| { |
| "best_metric": 0.10333551466464996, |
| "best_model_checkpoint": "checkpoints/mHubert-basque-ASR-30ep/checkpoint-144000", |
| "epoch": 24.116286752560292, |
| "eval_steps": 1000, |
| "global_step": 146000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016518004625041296, |
| "grad_norm": 11.923916816711426, |
| "learning_rate": 5.506001541680432e-08, |
| "loss": 24.4951, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03303600925008259, |
| "grad_norm": 14.201896667480469, |
| "learning_rate": 1.1012003083360864e-07, |
| "loss": 24.611, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.049554013875123884, |
| "grad_norm": 16.669321060180664, |
| "learning_rate": 1.6518004625041296e-07, |
| "loss": 24.4459, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06607201850016518, |
| "grad_norm": 15.999144554138184, |
| "learning_rate": 2.2024006166721728e-07, |
| "loss": 23.9418, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08259002312520647, |
| "grad_norm": 15.337122917175293, |
| "learning_rate": 2.753000770840216e-07, |
| "loss": 23.6429, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09910802775024777, |
| "grad_norm": 20.767488479614258, |
| "learning_rate": 3.3036009250082593e-07, |
| "loss": 22.9927, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.11562603237528907, |
| "grad_norm": 25.313589096069336, |
| "learning_rate": 3.8542010791763027e-07, |
| "loss": 22.5284, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.13214403700033037, |
| "grad_norm": 24.19838523864746, |
| "learning_rate": 4.4048012333443456e-07, |
| "loss": 21.2884, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.14866204162537167, |
| "grad_norm": 27.155614852905273, |
| "learning_rate": 4.955401387512389e-07, |
| "loss": 20.3421, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.16518004625041294, |
| "grad_norm": 30.608129501342773, |
| "learning_rate": 5.506001541680432e-07, |
| "loss": 19.4542, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.16518004625041294, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 16.246232986450195, |
| "eval_runtime": 48.061, |
| "eval_samples_per_second": 35.184, |
| "eval_steps_per_second": 8.801, |
| "eval_wer": 0.9999375507400238, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.18169805087545424, |
| "grad_norm": 27.388935089111328, |
| "learning_rate": 6.056601695848476e-07, |
| "loss": 18.666, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.19821605550049554, |
| "grad_norm": 35.63807678222656, |
| "learning_rate": 6.607201850016519e-07, |
| "loss": 17.1664, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.21473406012553684, |
| "grad_norm": 46.33199691772461, |
| "learning_rate": 7.157802004184563e-07, |
| "loss": 15.5691, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.23125206475057813, |
| "grad_norm": 37.326534271240234, |
| "learning_rate": 7.708402158352605e-07, |
| "loss": 14.6982, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.24777006937561943, |
| "grad_norm": 32.61579895019531, |
| "learning_rate": 8.259002312520647e-07, |
| "loss": 14.1957, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.26428807400066073, |
| "grad_norm": 32.68245315551758, |
| "learning_rate": 8.809602466688691e-07, |
| "loss": 13.7794, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.280806078625702, |
| "grad_norm": 36.71652603149414, |
| "learning_rate": 9.360202620856734e-07, |
| "loss": 13.1926, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.29732408325074333, |
| "grad_norm": 33.95559310913086, |
| "learning_rate": 9.910802775024778e-07, |
| "loss": 12.549, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3138420878757846, |
| "grad_norm": 34.27357864379883, |
| "learning_rate": 1.0461402929192822e-06, |
| "loss": 12.9556, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3303600925008259, |
| "grad_norm": 29.778766632080078, |
| "learning_rate": 1.1012003083360864e-06, |
| "loss": 11.7038, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3303600925008259, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 10.252079963684082, |
| "eval_runtime": 47.4771, |
| "eval_samples_per_second": 35.617, |
| "eval_steps_per_second": 8.91, |
| "eval_wer": 0.9999375507400238, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3468780971258672, |
| "grad_norm": 34.33523941040039, |
| "learning_rate": 1.1562603237528908e-06, |
| "loss": 12.5611, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3633961017509085, |
| "grad_norm": 34.92203140258789, |
| "learning_rate": 1.2113203391696951e-06, |
| "loss": 11.634, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3799141063759498, |
| "grad_norm": 35.76356887817383, |
| "learning_rate": 1.2663803545864995e-06, |
| "loss": 12.158, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.39643211100099107, |
| "grad_norm": 33.04209518432617, |
| "learning_rate": 1.3214403700033037e-06, |
| "loss": 11.245, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.4129501156260324, |
| "grad_norm": 36.18589782714844, |
| "learning_rate": 1.3765003854201081e-06, |
| "loss": 11.5037, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.42946812025107367, |
| "grad_norm": 30.83759307861328, |
| "learning_rate": 1.4315604008369125e-06, |
| "loss": 11.6204, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.44598612487611494, |
| "grad_norm": 31.8559627532959, |
| "learning_rate": 1.4866204162537167e-06, |
| "loss": 10.6165, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.46250412950115627, |
| "grad_norm": 2.7403502464294434, |
| "learning_rate": 1.541680431670521e-06, |
| "loss": 10.7605, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.47902213412619754, |
| "grad_norm": 3.411168098449707, |
| "learning_rate": 1.5967404470873255e-06, |
| "loss": 10.6777, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.49554013875123887, |
| "grad_norm": 31.218460083007812, |
| "learning_rate": 1.6518004625041294e-06, |
| "loss": 10.5214, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.49554013875123887, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 8.999906539916992, |
| "eval_runtime": 47.8317, |
| "eval_samples_per_second": 35.353, |
| "eval_steps_per_second": 8.844, |
| "eval_wer": 0.9999375507400238, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5120581433762802, |
| "grad_norm": 33.284549713134766, |
| "learning_rate": 1.7068604779209338e-06, |
| "loss": 10.5589, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5285761480013215, |
| "grad_norm": 29.90840721130371, |
| "learning_rate": 1.7619204933377382e-06, |
| "loss": 10.1695, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5450941526263627, |
| "grad_norm": 37.552734375, |
| "learning_rate": 1.8169805087545426e-06, |
| "loss": 10.1221, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.561612157251404, |
| "grad_norm": 33.36090850830078, |
| "learning_rate": 1.8720405241713468e-06, |
| "loss": 9.6691, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.5781301618764453, |
| "grad_norm": 28.224496841430664, |
| "learning_rate": 1.9271005395881514e-06, |
| "loss": 9.6393, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5946481665014867, |
| "grad_norm": 29.11280059814453, |
| "learning_rate": 1.9821605550049556e-06, |
| "loss": 9.6595, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6111661711265279, |
| "grad_norm": 30.484336853027344, |
| "learning_rate": 2.0372205704217598e-06, |
| "loss": 9.4354, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.6276841757515692, |
| "grad_norm": 28.713830947875977, |
| "learning_rate": 2.0922805858385644e-06, |
| "loss": 9.107, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.6442021803766105, |
| "grad_norm": 28.442527770996094, |
| "learning_rate": 2.1473406012553685e-06, |
| "loss": 9.1297, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6607201850016517, |
| "grad_norm": 30.577152252197266, |
| "learning_rate": 2.2024006166721727e-06, |
| "loss": 8.8754, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6607201850016517, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 7.591804504394531, |
| "eval_runtime": 48.3109, |
| "eval_samples_per_second": 35.002, |
| "eval_steps_per_second": 8.756, |
| "eval_wer": 0.9999375507400238, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6772381896266931, |
| "grad_norm": 33.752967834472656, |
| "learning_rate": 2.2574606320889773e-06, |
| "loss": 8.5952, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.6937561942517344, |
| "grad_norm": 29.171703338623047, |
| "learning_rate": 2.3125206475057815e-06, |
| "loss": 8.363, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.7102741988767757, |
| "grad_norm": 29.746253967285156, |
| "learning_rate": 2.3675806629225857e-06, |
| "loss": 8.3281, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.726792203501817, |
| "grad_norm": 31.43389129638672, |
| "learning_rate": 2.4226406783393903e-06, |
| "loss": 8.0488, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.7433102081268583, |
| "grad_norm": 26.350412368774414, |
| "learning_rate": 2.4777006937561945e-06, |
| "loss": 8.0053, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.7598282127518996, |
| "grad_norm": 24.809051513671875, |
| "learning_rate": 2.532760709172999e-06, |
| "loss": 7.8292, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7763462173769409, |
| "grad_norm": 5.660928726196289, |
| "learning_rate": 2.587820724589803e-06, |
| "loss": 7.2419, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.7928642220019821, |
| "grad_norm": 25.451852798461914, |
| "learning_rate": 2.6428807400066074e-06, |
| "loss": 7.3732, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.8093822266270234, |
| "grad_norm": 26.097301483154297, |
| "learning_rate": 2.6979407554234116e-06, |
| "loss": 7.1173, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.8259002312520648, |
| "grad_norm": 22.98796844482422, |
| "learning_rate": 2.7530007708402162e-06, |
| "loss": 6.9653, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.8259002312520648, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 5.927879810333252, |
| "eval_runtime": 47.9693, |
| "eval_samples_per_second": 35.252, |
| "eval_steps_per_second": 8.818, |
| "eval_wer": 0.9999375507400238, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.8424182358771061, |
| "grad_norm": 22.172231674194336, |
| "learning_rate": 2.8080607862570204e-06, |
| "loss": 6.5446, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.8589362405021473, |
| "grad_norm": 20.156213760375977, |
| "learning_rate": 2.863120801673825e-06, |
| "loss": 6.5427, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.8754542451271886, |
| "grad_norm": 23.265291213989258, |
| "learning_rate": 2.9181808170906288e-06, |
| "loss": 6.1793, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.8919722497522299, |
| "grad_norm": 19.185626983642578, |
| "learning_rate": 2.9732408325074334e-06, |
| "loss": 6.0592, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.9084902543772713, |
| "grad_norm": 2.092716693878174, |
| "learning_rate": 3.0283008479242375e-06, |
| "loss": 5.7659, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9250082590023125, |
| "grad_norm": 21.02352523803711, |
| "learning_rate": 3.083360863341042e-06, |
| "loss": 5.6754, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.9415262636273538, |
| "grad_norm": 18.27204132080078, |
| "learning_rate": 3.1384208787578463e-06, |
| "loss": 5.5381, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.9580442682523951, |
| "grad_norm": 18.369115829467773, |
| "learning_rate": 3.193480894174651e-06, |
| "loss": 5.4687, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.9745622728774364, |
| "grad_norm": 19.799074172973633, |
| "learning_rate": 3.248540909591455e-06, |
| "loss": 5.2652, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.9910802775024777, |
| "grad_norm": 16.94482421875, |
| "learning_rate": 3.303600925008259e-06, |
| "loss": 5.0711, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.9910802775024777, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 4.399995803833008, |
| "eval_runtime": 48.2208, |
| "eval_samples_per_second": 35.068, |
| "eval_steps_per_second": 8.772, |
| "eval_wer": 0.9999375507400238, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.007598282127519, |
| "grad_norm": 13.568734169006348, |
| "learning_rate": 3.3586609404250635e-06, |
| "loss": 4.7523, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.0241162867525604, |
| "grad_norm": 13.136434555053711, |
| "learning_rate": 3.4137209558418677e-06, |
| "loss": 4.5941, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.0406342913776017, |
| "grad_norm": 12.08462905883789, |
| "learning_rate": 3.4687809712586723e-06, |
| "loss": 4.431, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.057152296002643, |
| "grad_norm": 11.658442497253418, |
| "learning_rate": 3.5238409866754764e-06, |
| "loss": 4.4636, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.0736703006276842, |
| "grad_norm": 11.084771156311035, |
| "learning_rate": 3.578901002092281e-06, |
| "loss": 4.2883, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.0901883052527255, |
| "grad_norm": 9.543913841247559, |
| "learning_rate": 3.6339610175090852e-06, |
| "loss": 4.1244, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.1067063098777667, |
| "grad_norm": 9.388916015625, |
| "learning_rate": 3.68902103292589e-06, |
| "loss": 3.9917, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.123224314502808, |
| "grad_norm": 6.666196346282959, |
| "learning_rate": 3.7440810483426936e-06, |
| "loss": 3.8466, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.1397423191278493, |
| "grad_norm": 16.961889266967773, |
| "learning_rate": 3.799141063759498e-06, |
| "loss": 3.7877, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.1562603237528906, |
| "grad_norm": 5.992101669311523, |
| "learning_rate": 3.854201079176303e-06, |
| "loss": 3.6487, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.1562603237528906, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 3.413706064224243, |
| "eval_runtime": 47.9091, |
| "eval_samples_per_second": 35.296, |
| "eval_steps_per_second": 8.829, |
| "eval_wer": 0.9999375507400238, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.172778328377932, |
| "grad_norm": 6.866231441497803, |
| "learning_rate": 3.909261094593107e-06, |
| "loss": 3.5613, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.1892963330029733, |
| "grad_norm": 2.480027675628662, |
| "learning_rate": 3.964321110009911e-06, |
| "loss": 3.4725, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.2058143376280146, |
| "grad_norm": 4.010397911071777, |
| "learning_rate": 4.019381125426716e-06, |
| "loss": 3.4202, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.2223323422530559, |
| "grad_norm": 5.230499267578125, |
| "learning_rate": 4.0744411408435195e-06, |
| "loss": 3.3595, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.2388503468780971, |
| "grad_norm": 3.2052972316741943, |
| "learning_rate": 4.129501156260324e-06, |
| "loss": 3.3002, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.2553683515031384, |
| "grad_norm": 1.894927978515625, |
| "learning_rate": 4.184561171677129e-06, |
| "loss": 3.1986, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.2718863561281797, |
| "grad_norm": 2.197263240814209, |
| "learning_rate": 4.239621187093933e-06, |
| "loss": 3.1351, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.288404360753221, |
| "grad_norm": 2.1114447116851807, |
| "learning_rate": 4.294681202510737e-06, |
| "loss": 3.126, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.3049223653782622, |
| "grad_norm": 1.9289065599441528, |
| "learning_rate": 4.349741217927541e-06, |
| "loss": 3.0814, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.3214403700033035, |
| "grad_norm": 1.314790964126587, |
| "learning_rate": 4.4048012333443454e-06, |
| "loss": 3.061, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.3214403700033035, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 2.9990100860595703, |
| "eval_runtime": 47.905, |
| "eval_samples_per_second": 35.299, |
| "eval_steps_per_second": 8.83, |
| "eval_wer": 0.9999375507400238, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.3379583746283448, |
| "grad_norm": 1.4136910438537598, |
| "learning_rate": 4.45986124876115e-06, |
| "loss": 3.0138, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.3544763792533863, |
| "grad_norm": 1.2747600078582764, |
| "learning_rate": 4.514921264177955e-06, |
| "loss": 2.9893, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.3709943838784275, |
| "grad_norm": 1.3073844909667969, |
| "learning_rate": 4.569981279594758e-06, |
| "loss": 2.9698, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.3875123885034688, |
| "grad_norm": 1.0685036182403564, |
| "learning_rate": 4.625041295011563e-06, |
| "loss": 2.9459, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.40403039312851, |
| "grad_norm": 1.4280977249145508, |
| "learning_rate": 4.680101310428367e-06, |
| "loss": 2.9276, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.4205483977535514, |
| "grad_norm": 0.5592168569564819, |
| "learning_rate": 4.735161325845171e-06, |
| "loss": 2.9128, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.4370664023785926, |
| "grad_norm": 0.6144903302192688, |
| "learning_rate": 4.790221341261976e-06, |
| "loss": 2.89, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.453584407003634, |
| "grad_norm": 0.5364680886268616, |
| "learning_rate": 4.845281356678781e-06, |
| "loss": 2.884, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.4701024116286754, |
| "grad_norm": 0.42214369773864746, |
| "learning_rate": 4.900341372095584e-06, |
| "loss": 2.8763, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.4866204162537167, |
| "grad_norm": 1.1128541231155396, |
| "learning_rate": 4.955401387512389e-06, |
| "loss": 2.8674, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.4866204162537167, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 2.8672590255737305, |
| "eval_runtime": 47.8727, |
| "eval_samples_per_second": 35.323, |
| "eval_steps_per_second": 8.836, |
| "eval_wer": 0.9999375507400238, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.503138420878758, |
| "grad_norm": 1.4287844896316528, |
| "learning_rate": 5.0104614029291935e-06, |
| "loss": 2.8587, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.5196564255037992, |
| "grad_norm": 0.7117812037467957, |
| "learning_rate": 5.065521418345998e-06, |
| "loss": 2.8496, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.5361744301288405, |
| "grad_norm": 0.346927285194397, |
| "learning_rate": 5.120581433762803e-06, |
| "loss": 2.8843, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.5526924347538817, |
| "grad_norm": 0.28466975688934326, |
| "learning_rate": 5.175641449179606e-06, |
| "loss": 2.8361, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.569210439378923, |
| "grad_norm": 0.9709968566894531, |
| "learning_rate": 5.23070146459641e-06, |
| "loss": 2.8347, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.5857284440039643, |
| "grad_norm": 0.41504138708114624, |
| "learning_rate": 5.285761480013215e-06, |
| "loss": 2.8281, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.6022464486290056, |
| "grad_norm": 0.7209063172340393, |
| "learning_rate": 5.3408214954300195e-06, |
| "loss": 2.8216, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.6187644532540468, |
| "grad_norm": 0.17556777596473694, |
| "learning_rate": 5.395881510846823e-06, |
| "loss": 2.8191, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.635282457879088, |
| "grad_norm": 0.24542377889156342, |
| "learning_rate": 5.450941526263628e-06, |
| "loss": 2.8171, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.6518004625041294, |
| "grad_norm": 0.5793740153312683, |
| "learning_rate": 5.5060015416804324e-06, |
| "loss": 2.8139, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.6518004625041294, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 2.8268349170684814, |
| "eval_runtime": 47.7716, |
| "eval_samples_per_second": 35.398, |
| "eval_steps_per_second": 8.855, |
| "eval_wer": 0.9999375507400238, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.6683184671291706, |
| "grad_norm": 0.5430779457092285, |
| "learning_rate": 5.561061557097236e-06, |
| "loss": 2.8118, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.6848364717542121, |
| "grad_norm": 0.16136805713176727, |
| "learning_rate": 5.616121572514041e-06, |
| "loss": 2.8084, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.7013544763792534, |
| "grad_norm": 0.180739626288414, |
| "learning_rate": 5.671181587930845e-06, |
| "loss": 2.8075, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.7178724810042947, |
| "grad_norm": 0.33964207768440247, |
| "learning_rate": 5.72624160334765e-06, |
| "loss": 2.8029, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.734390485629336, |
| "grad_norm": 0.3545405864715576, |
| "learning_rate": 5.781301618764453e-06, |
| "loss": 2.8026, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.7509084902543772, |
| "grad_norm": 0.6962557435035706, |
| "learning_rate": 5.8363616341812575e-06, |
| "loss": 2.8008, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.7674264948794187, |
| "grad_norm": 0.49546900391578674, |
| "learning_rate": 5.891421649598062e-06, |
| "loss": 2.798, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.78394449950446, |
| "grad_norm": 0.10469625890254974, |
| "learning_rate": 5.946481665014867e-06, |
| "loss": 2.803, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.8004625041295013, |
| "grad_norm": 0.48037204146385193, |
| "learning_rate": 6.0015416804316705e-06, |
| "loss": 2.7972, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.8169805087545425, |
| "grad_norm": 1.4760863780975342, |
| "learning_rate": 6.056601695848475e-06, |
| "loss": 2.7957, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.8169805087545425, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 2.815251588821411, |
| "eval_runtime": 47.7131, |
| "eval_samples_per_second": 35.441, |
| "eval_steps_per_second": 8.865, |
| "eval_wer": 0.9999375507400238, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.8334985133795838, |
| "grad_norm": 0.6480938196182251, |
| "learning_rate": 6.11166171126528e-06, |
| "loss": 2.7959, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.850016518004625, |
| "grad_norm": 0.09613073617219925, |
| "learning_rate": 6.166721726682084e-06, |
| "loss": 2.795, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.8665345226296663, |
| "grad_norm": 0.7749711275100708, |
| "learning_rate": 6.221781742098888e-06, |
| "loss": 2.7929, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.8830525272547076, |
| "grad_norm": 0.3546479046344757, |
| "learning_rate": 6.276841757515693e-06, |
| "loss": 2.7918, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.899570531879749, |
| "grad_norm": 1.12019681930542, |
| "learning_rate": 6.331901772932497e-06, |
| "loss": 2.7918, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.9160885365047902, |
| "grad_norm": 0.8891560435295105, |
| "learning_rate": 6.386961788349302e-06, |
| "loss": 2.7891, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.9326065411298314, |
| "grad_norm": 0.17968548834323883, |
| "learning_rate": 6.442021803766106e-06, |
| "loss": 2.7893, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.9491245457548727, |
| "grad_norm": 0.7718554139137268, |
| "learning_rate": 6.49708181918291e-06, |
| "loss": 2.7906, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.965642550379914, |
| "grad_norm": 0.20580369234085083, |
| "learning_rate": 6.552141834599715e-06, |
| "loss": 2.7855, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.9821605550049552, |
| "grad_norm": 0.12557658553123474, |
| "learning_rate": 6.607201850016518e-06, |
| "loss": 2.7821, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.9821605550049552, |
| "eval_cer": 0.9883050441448223, |
| "eval_loss": 2.801440477371216, |
| "eval_runtime": 48.1673, |
| "eval_samples_per_second": 35.107, |
| "eval_steps_per_second": 8.782, |
| "eval_wer": 0.9999375507400238, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.9986785596299967, |
| "grad_norm": 0.29796159267425537, |
| "learning_rate": 6.662261865433322e-06, |
| "loss": 2.8061, |
| "step": 12100 |
| }, |
| { |
| "epoch": 2.015196564255038, |
| "grad_norm": 0.43523645401000977, |
| "learning_rate": 6.717321880850127e-06, |
| "loss": 2.7722, |
| "step": 12200 |
| }, |
| { |
| "epoch": 2.031714568880079, |
| "grad_norm": 0.8194575905799866, |
| "learning_rate": 6.7723818962669316e-06, |
| "loss": 2.7428, |
| "step": 12300 |
| }, |
| { |
| "epoch": 2.048232573505121, |
| "grad_norm": 0.5913192629814148, |
| "learning_rate": 6.827441911683735e-06, |
| "loss": 2.6796, |
| "step": 12400 |
| }, |
| { |
| "epoch": 2.064750578130162, |
| "grad_norm": 1.2272390127182007, |
| "learning_rate": 6.88250192710054e-06, |
| "loss": 2.5647, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.0812685827552033, |
| "grad_norm": 0.7809718251228333, |
| "learning_rate": 6.9375619425173445e-06, |
| "loss": 2.4445, |
| "step": 12600 |
| }, |
| { |
| "epoch": 2.0977865873802446, |
| "grad_norm": 1.4848648309707642, |
| "learning_rate": 6.992621957934149e-06, |
| "loss": 2.3472, |
| "step": 12700 |
| }, |
| { |
| "epoch": 2.114304592005286, |
| "grad_norm": 0.9019191265106201, |
| "learning_rate": 7.047681973350953e-06, |
| "loss": 2.2088, |
| "step": 12800 |
| }, |
| { |
| "epoch": 2.130822596630327, |
| "grad_norm": 1.6210988759994507, |
| "learning_rate": 7.1027419887677575e-06, |
| "loss": 2.0813, |
| "step": 12900 |
| }, |
| { |
| "epoch": 2.1473406012553684, |
| "grad_norm": 0.9672953486442566, |
| "learning_rate": 7.157802004184562e-06, |
| "loss": 1.9636, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.1473406012553684, |
| "eval_cer": 0.42876942029977416, |
| "eval_loss": 1.6961537599563599, |
| "eval_runtime": 48.2183, |
| "eval_samples_per_second": 35.07, |
| "eval_steps_per_second": 8.773, |
| "eval_wer": 0.9999375507400238, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.1638586058804097, |
| "grad_norm": 1.5448602437973022, |
| "learning_rate": 7.212862019601367e-06, |
| "loss": 1.8347, |
| "step": 13100 |
| }, |
| { |
| "epoch": 2.180376610505451, |
| "grad_norm": 1.3155843019485474, |
| "learning_rate": 7.2679220350181704e-06, |
| "loss": 1.6876, |
| "step": 13200 |
| }, |
| { |
| "epoch": 2.1968946151304922, |
| "grad_norm": 1.4173344373703003, |
| "learning_rate": 7.322982050434975e-06, |
| "loss": 1.6086, |
| "step": 13300 |
| }, |
| { |
| "epoch": 2.2134126197555335, |
| "grad_norm": 1.2968000173568726, |
| "learning_rate": 7.37804206585178e-06, |
| "loss": 1.4922, |
| "step": 13400 |
| }, |
| { |
| "epoch": 2.2299306243805748, |
| "grad_norm": 1.3589733839035034, |
| "learning_rate": 7.433102081268584e-06, |
| "loss": 1.4186, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.246448629005616, |
| "grad_norm": 1.5690885782241821, |
| "learning_rate": 7.488162096685387e-06, |
| "loss": 1.3541, |
| "step": 13600 |
| }, |
| { |
| "epoch": 2.2629666336306573, |
| "grad_norm": 1.1378659009933472, |
| "learning_rate": 7.543222112102192e-06, |
| "loss": 1.2945, |
| "step": 13700 |
| }, |
| { |
| "epoch": 2.2794846382556986, |
| "grad_norm": 1.2513694763183594, |
| "learning_rate": 7.598282127518996e-06, |
| "loss": 1.1947, |
| "step": 13800 |
| }, |
| { |
| "epoch": 2.29600264288074, |
| "grad_norm": 1.4649907350540161, |
| "learning_rate": 7.6533421429358e-06, |
| "loss": 1.1518, |
| "step": 13900 |
| }, |
| { |
| "epoch": 2.312520647505781, |
| "grad_norm": 1.2642732858657837, |
| "learning_rate": 7.708402158352606e-06, |
| "loss": 1.117, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.312520647505781, |
| "eval_cer": 0.13544589692697284, |
| "eval_loss": 0.8784080147743225, |
| "eval_runtime": 48.3911, |
| "eval_samples_per_second": 34.944, |
| "eval_steps_per_second": 8.741, |
| "eval_wer": 0.7124836070692562, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.329038652130823, |
| "grad_norm": 1.14898681640625, |
| "learning_rate": 7.76346217376941e-06, |
| "loss": 1.0267, |
| "step": 14100 |
| }, |
| { |
| "epoch": 2.345556656755864, |
| "grad_norm": 1.1903839111328125, |
| "learning_rate": 7.818522189186215e-06, |
| "loss": 0.994, |
| "step": 14200 |
| }, |
| { |
| "epoch": 2.3620746613809054, |
| "grad_norm": 1.2429312467575073, |
| "learning_rate": 7.873582204603017e-06, |
| "loss": 0.9589, |
| "step": 14300 |
| }, |
| { |
| "epoch": 2.3785926660059467, |
| "grad_norm": 1.2845401763916016, |
| "learning_rate": 7.928642220019822e-06, |
| "loss": 0.9027, |
| "step": 14400 |
| }, |
| { |
| "epoch": 2.395110670630988, |
| "grad_norm": 1.204010009765625, |
| "learning_rate": 7.983702235436626e-06, |
| "loss": 0.8773, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.411628675256029, |
| "grad_norm": 1.1621061563491821, |
| "learning_rate": 8.038762250853432e-06, |
| "loss": 0.8351, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.4281466798810705, |
| "grad_norm": 1.153045654296875, |
| "learning_rate": 8.093822266270235e-06, |
| "loss": 0.8008, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.4446646845061117, |
| "grad_norm": 1.1481854915618896, |
| "learning_rate": 8.148882281687039e-06, |
| "loss": 0.7573, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.461182689131153, |
| "grad_norm": 1.1236218214035034, |
| "learning_rate": 8.203942297103844e-06, |
| "loss": 0.7381, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.4777006937561943, |
| "grad_norm": 0.9569886326789856, |
| "learning_rate": 8.259002312520648e-06, |
| "loss": 0.7118, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.4777006937561943, |
| "eval_cer": 0.1136900280610499, |
| "eval_loss": 0.5557882189750671, |
| "eval_runtime": 48.3382, |
| "eval_samples_per_second": 34.983, |
| "eval_steps_per_second": 8.751, |
| "eval_wer": 0.6227440204833573, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.4942186983812356, |
| "grad_norm": 1.3618088960647583, |
| "learning_rate": 8.314062327937452e-06, |
| "loss": 0.7074, |
| "step": 15100 |
| }, |
| { |
| "epoch": 2.510736703006277, |
| "grad_norm": 0.933047354221344, |
| "learning_rate": 8.369122343354257e-06, |
| "loss": 0.6549, |
| "step": 15200 |
| }, |
| { |
| "epoch": 2.527254707631318, |
| "grad_norm": 1.266646146774292, |
| "learning_rate": 8.424182358771061e-06, |
| "loss": 0.6476, |
| "step": 15300 |
| }, |
| { |
| "epoch": 2.5437727122563594, |
| "grad_norm": 1.2776057720184326, |
| "learning_rate": 8.479242374187867e-06, |
| "loss": 0.6121, |
| "step": 15400 |
| }, |
| { |
| "epoch": 2.5602907168814006, |
| "grad_norm": 0.9074415564537048, |
| "learning_rate": 8.534302389604669e-06, |
| "loss": 0.5804, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.576808721506442, |
| "grad_norm": 0.9598638415336609, |
| "learning_rate": 8.589362405021474e-06, |
| "loss": 0.5695, |
| "step": 15600 |
| }, |
| { |
| "epoch": 2.593326726131483, |
| "grad_norm": 1.142428994178772, |
| "learning_rate": 8.644422420438278e-06, |
| "loss": 0.5548, |
| "step": 15700 |
| }, |
| { |
| "epoch": 2.6098447307565245, |
| "grad_norm": 1.1081598997116089, |
| "learning_rate": 8.699482435855082e-06, |
| "loss": 0.5482, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.6263627353815657, |
| "grad_norm": 1.1400047540664673, |
| "learning_rate": 8.754542451271887e-06, |
| "loss": 0.5071, |
| "step": 15900 |
| }, |
| { |
| "epoch": 2.642880740006607, |
| "grad_norm": 1.0958024263381958, |
| "learning_rate": 8.809602466688691e-06, |
| "loss": 0.4936, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.642880740006607, |
| "eval_cer": 0.09633153103825885, |
| "eval_loss": 0.38487282395362854, |
| "eval_runtime": 48.7501, |
| "eval_samples_per_second": 34.687, |
| "eval_steps_per_second": 8.677, |
| "eval_wer": 0.5314432023980515, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.6593987446316483, |
| "grad_norm": 1.1065205335617065, |
| "learning_rate": 8.864662482105496e-06, |
| "loss": 0.4899, |
| "step": 16100 |
| }, |
| { |
| "epoch": 2.6759167492566895, |
| "grad_norm": 1.138617992401123, |
| "learning_rate": 8.9197224975223e-06, |
| "loss": 0.4721, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.692434753881731, |
| "grad_norm": 1.2217905521392822, |
| "learning_rate": 8.974782512939104e-06, |
| "loss": 0.4723, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.7089527585067725, |
| "grad_norm": 1.0747772455215454, |
| "learning_rate": 9.02984252835591e-06, |
| "loss": 0.4861, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.725470763131814, |
| "grad_norm": 1.0680921077728271, |
| "learning_rate": 9.084902543772713e-06, |
| "loss": 0.4374, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.741988767756855, |
| "grad_norm": 1.0042054653167725, |
| "learning_rate": 9.139962559189517e-06, |
| "loss": 0.4247, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.7585067723818963, |
| "grad_norm": 0.9904269576072693, |
| "learning_rate": 9.195022574606322e-06, |
| "loss": 0.4356, |
| "step": 16700 |
| }, |
| { |
| "epoch": 2.7750247770069376, |
| "grad_norm": 1.1831291913986206, |
| "learning_rate": 9.250082590023126e-06, |
| "loss": 0.41, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.791542781631979, |
| "grad_norm": 0.973407506942749, |
| "learning_rate": 9.305142605439931e-06, |
| "loss": 0.4053, |
| "step": 16900 |
| }, |
| { |
| "epoch": 2.80806078625702, |
| "grad_norm": 1.0600470304489136, |
| "learning_rate": 9.360202620856734e-06, |
| "loss": 0.4109, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.80806078625702, |
| "eval_cer": 0.08471357196632674, |
| "eval_loss": 0.3102828562259674, |
| "eval_runtime": 50.4826, |
| "eval_samples_per_second": 33.497, |
| "eval_steps_per_second": 8.379, |
| "eval_wer": 0.46568413164304, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.8245787908820614, |
| "grad_norm": 1.3490804433822632, |
| "learning_rate": 9.415262636273539e-06, |
| "loss": 0.4253, |
| "step": 17100 |
| }, |
| { |
| "epoch": 2.8410967955071027, |
| "grad_norm": 0.9531931281089783, |
| "learning_rate": 9.470322651690343e-06, |
| "loss": 0.4057, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.857614800132144, |
| "grad_norm": 1.31855046749115, |
| "learning_rate": 9.525382667107148e-06, |
| "loss": 0.3935, |
| "step": 17300 |
| }, |
| { |
| "epoch": 2.8741328047571852, |
| "grad_norm": 0.9209637641906738, |
| "learning_rate": 9.580442682523952e-06, |
| "loss": 0.3822, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.8906508093822265, |
| "grad_norm": 1.0796180963516235, |
| "learning_rate": 9.635502697940756e-06, |
| "loss": 0.3829, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.907168814007268, |
| "grad_norm": 0.9043625593185425, |
| "learning_rate": 9.690562713357561e-06, |
| "loss": 0.358, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.923686818632309, |
| "grad_norm": 0.956969678401947, |
| "learning_rate": 9.745622728774365e-06, |
| "loss": 0.3563, |
| "step": 17700 |
| }, |
| { |
| "epoch": 2.9402048232573508, |
| "grad_norm": 0.9611093997955322, |
| "learning_rate": 9.800682744191169e-06, |
| "loss": 0.3886, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.956722827882392, |
| "grad_norm": 0.9433591365814209, |
| "learning_rate": 9.855742759607974e-06, |
| "loss": 0.3646, |
| "step": 17900 |
| }, |
| { |
| "epoch": 2.9732408325074333, |
| "grad_norm": 1.0778321027755737, |
| "learning_rate": 9.910802775024778e-06, |
| "loss": 0.3928, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.9732408325074333, |
| "eval_cer": 0.07708233522688386, |
| "eval_loss": 0.2756275534629822, |
| "eval_runtime": 48.2974, |
| "eval_samples_per_second": 35.012, |
| "eval_steps_per_second": 8.758, |
| "eval_wer": 0.42696559045775306, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.9897588371324746, |
| "grad_norm": 0.880342423915863, |
| "learning_rate": 9.965862790441582e-06, |
| "loss": 0.3485, |
| "step": 18100 |
| }, |
| { |
| "epoch": 3.006276841757516, |
| "grad_norm": 0.955066442489624, |
| "learning_rate": 9.997675243793513e-06, |
| "loss": 0.3558, |
| "step": 18200 |
| }, |
| { |
| "epoch": 3.022794846382557, |
| "grad_norm": 0.9584730863571167, |
| "learning_rate": 9.991557464302758e-06, |
| "loss": 0.3374, |
| "step": 18300 |
| }, |
| { |
| "epoch": 3.0393128510075984, |
| "grad_norm": 0.9742453694343567, |
| "learning_rate": 9.985439684812002e-06, |
| "loss": 0.3238, |
| "step": 18400 |
| }, |
| { |
| "epoch": 3.0558308556326397, |
| "grad_norm": 0.9343051910400391, |
| "learning_rate": 9.979321905321245e-06, |
| "loss": 0.325, |
| "step": 18500 |
| }, |
| { |
| "epoch": 3.072348860257681, |
| "grad_norm": 0.9533226490020752, |
| "learning_rate": 9.973204125830489e-06, |
| "loss": 0.3221, |
| "step": 18600 |
| }, |
| { |
| "epoch": 3.088866864882722, |
| "grad_norm": 1.0769504308700562, |
| "learning_rate": 9.967086346339734e-06, |
| "loss": 0.3212, |
| "step": 18700 |
| }, |
| { |
| "epoch": 3.1053848695077635, |
| "grad_norm": 1.0930256843566895, |
| "learning_rate": 9.960968566848977e-06, |
| "loss": 0.323, |
| "step": 18800 |
| }, |
| { |
| "epoch": 3.1219028741328048, |
| "grad_norm": 1.0789791345596313, |
| "learning_rate": 9.954850787358221e-06, |
| "loss": 0.3082, |
| "step": 18900 |
| }, |
| { |
| "epoch": 3.138420878757846, |
| "grad_norm": 0.8638594150543213, |
| "learning_rate": 9.948733007867464e-06, |
| "loss": 0.3282, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.138420878757846, |
| "eval_cer": 0.07125624529464103, |
| "eval_loss": 0.25253963470458984, |
| "eval_runtime": 48.5825, |
| "eval_samples_per_second": 34.807, |
| "eval_steps_per_second": 8.707, |
| "eval_wer": 0.3938050334103541, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.1549388833828873, |
| "grad_norm": 1.095402479171753, |
| "learning_rate": 9.94261522837671e-06, |
| "loss": 0.3278, |
| "step": 19100 |
| }, |
| { |
| "epoch": 3.1714568880079286, |
| "grad_norm": 0.7922815680503845, |
| "learning_rate": 9.936497448885953e-06, |
| "loss": 0.3157, |
| "step": 19200 |
| }, |
| { |
| "epoch": 3.18797489263297, |
| "grad_norm": 0.9539555907249451, |
| "learning_rate": 9.930379669395196e-06, |
| "loss": 0.3025, |
| "step": 19300 |
| }, |
| { |
| "epoch": 3.204492897258011, |
| "grad_norm": 0.8902342915534973, |
| "learning_rate": 9.92426188990444e-06, |
| "loss": 0.3065, |
| "step": 19400 |
| }, |
| { |
| "epoch": 3.2210109018830524, |
| "grad_norm": 0.8279675841331482, |
| "learning_rate": 9.918144110413685e-06, |
| "loss": 0.2993, |
| "step": 19500 |
| }, |
| { |
| "epoch": 3.2375289065080937, |
| "grad_norm": 0.8788127899169922, |
| "learning_rate": 9.912026330922929e-06, |
| "loss": 0.3052, |
| "step": 19600 |
| }, |
| { |
| "epoch": 3.254046911133135, |
| "grad_norm": 1.043555736541748, |
| "learning_rate": 9.905908551432174e-06, |
| "loss": 0.3041, |
| "step": 19700 |
| }, |
| { |
| "epoch": 3.270564915758176, |
| "grad_norm": 1.0483660697937012, |
| "learning_rate": 9.899790771941417e-06, |
| "loss": 0.3075, |
| "step": 19800 |
| }, |
| { |
| "epoch": 3.2870829203832175, |
| "grad_norm": 1.1182364225387573, |
| "learning_rate": 9.89367299245066e-06, |
| "loss": 0.2896, |
| "step": 19900 |
| }, |
| { |
| "epoch": 3.303600925008259, |
| "grad_norm": 0.951245903968811, |
| "learning_rate": 9.887555212959906e-06, |
| "loss": 0.2924, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.303600925008259, |
| "eval_cer": 0.06822770515365136, |
| "eval_loss": 0.23321548104286194, |
| "eval_runtime": 47.9978, |
| "eval_samples_per_second": 35.231, |
| "eval_steps_per_second": 8.813, |
| "eval_wer": 0.3738837194779242, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.3201189296333005, |
| "grad_norm": 0.8600097894668579, |
| "learning_rate": 9.88143743346915e-06, |
| "loss": 0.2907, |
| "step": 20100 |
| }, |
| { |
| "epoch": 3.3366369342583417, |
| "grad_norm": 1.1580179929733276, |
| "learning_rate": 9.875319653978393e-06, |
| "loss": 0.2864, |
| "step": 20200 |
| }, |
| { |
| "epoch": 3.353154938883383, |
| "grad_norm": 0.8637755513191223, |
| "learning_rate": 9.869201874487638e-06, |
| "loss": 0.2851, |
| "step": 20300 |
| }, |
| { |
| "epoch": 3.3696729435084243, |
| "grad_norm": 0.8564406633377075, |
| "learning_rate": 9.863084094996881e-06, |
| "loss": 0.2808, |
| "step": 20400 |
| }, |
| { |
| "epoch": 3.3861909481334656, |
| "grad_norm": 0.8136361241340637, |
| "learning_rate": 9.856966315506125e-06, |
| "loss": 0.3224, |
| "step": 20500 |
| }, |
| { |
| "epoch": 3.402708952758507, |
| "grad_norm": 1.0240442752838135, |
| "learning_rate": 9.850848536015368e-06, |
| "loss": 0.2779, |
| "step": 20600 |
| }, |
| { |
| "epoch": 3.419226957383548, |
| "grad_norm": 1.09860360622406, |
| "learning_rate": 9.844730756524613e-06, |
| "loss": 0.2817, |
| "step": 20700 |
| }, |
| { |
| "epoch": 3.4357449620085894, |
| "grad_norm": 1.0666810274124146, |
| "learning_rate": 9.838612977033857e-06, |
| "loss": 0.2917, |
| "step": 20800 |
| }, |
| { |
| "epoch": 3.4522629666336306, |
| "grad_norm": 0.9965100288391113, |
| "learning_rate": 9.8324951975431e-06, |
| "loss": 0.3204, |
| "step": 20900 |
| }, |
| { |
| "epoch": 3.468780971258672, |
| "grad_norm": 0.9994562864303589, |
| "learning_rate": 9.826377418052344e-06, |
| "loss": 0.2647, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.468780971258672, |
| "eval_cer": 0.06489973307781809, |
| "eval_loss": 0.22496198117733002, |
| "eval_runtime": 48.3102, |
| "eval_samples_per_second": 35.003, |
| "eval_steps_per_second": 8.756, |
| "eval_wer": 0.3570224192843315, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.485298975883713, |
| "grad_norm": 0.7928122878074646, |
| "learning_rate": 9.820259638561589e-06, |
| "loss": 0.3177, |
| "step": 21100 |
| }, |
| { |
| "epoch": 3.5018169805087545, |
| "grad_norm": 0.8977111577987671, |
| "learning_rate": 9.814141859070832e-06, |
| "loss": 0.2634, |
| "step": 21200 |
| }, |
| { |
| "epoch": 3.5183349851337957, |
| "grad_norm": 0.8508104085922241, |
| "learning_rate": 9.808024079580076e-06, |
| "loss": 0.2672, |
| "step": 21300 |
| }, |
| { |
| "epoch": 3.534852989758837, |
| "grad_norm": 0.8211712837219238, |
| "learning_rate": 9.80190630008932e-06, |
| "loss": 0.2742, |
| "step": 21400 |
| }, |
| { |
| "epoch": 3.5513709943838783, |
| "grad_norm": 0.739600658416748, |
| "learning_rate": 9.795788520598564e-06, |
| "loss": 0.2756, |
| "step": 21500 |
| }, |
| { |
| "epoch": 3.56788899900892, |
| "grad_norm": 1.0395748615264893, |
| "learning_rate": 9.789670741107808e-06, |
| "loss": 0.277, |
| "step": 21600 |
| }, |
| { |
| "epoch": 3.5844070036339613, |
| "grad_norm": 0.8592670559883118, |
| "learning_rate": 9.783552961617051e-06, |
| "loss": 0.2576, |
| "step": 21700 |
| }, |
| { |
| "epoch": 3.6009250082590025, |
| "grad_norm": 2.0866379737854004, |
| "learning_rate": 9.777435182126297e-06, |
| "loss": 0.2561, |
| "step": 21800 |
| }, |
| { |
| "epoch": 3.617443012884044, |
| "grad_norm": 0.7784512042999268, |
| "learning_rate": 9.77131740263554e-06, |
| "loss": 0.2649, |
| "step": 21900 |
| }, |
| { |
| "epoch": 3.633961017509085, |
| "grad_norm": 0.8441452383995056, |
| "learning_rate": 9.765199623144783e-06, |
| "loss": 0.2516, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.633961017509085, |
| "eval_cer": 0.06247861200465403, |
| "eval_loss": 0.20949821174144745, |
| "eval_runtime": 48.003, |
| "eval_samples_per_second": 35.227, |
| "eval_steps_per_second": 8.812, |
| "eval_wer": 0.3444701180291014, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.6504790221341263, |
| "grad_norm": 1.5408446788787842, |
| "learning_rate": 9.759081843654029e-06, |
| "loss": 0.2725, |
| "step": 22100 |
| }, |
| { |
| "epoch": 3.6669970267591676, |
| "grad_norm": 0.9491544961929321, |
| "learning_rate": 9.752964064163272e-06, |
| "loss": 0.2484, |
| "step": 22200 |
| }, |
| { |
| "epoch": 3.683515031384209, |
| "grad_norm": 0.9039120674133301, |
| "learning_rate": 9.746846284672517e-06, |
| "loss": 0.2673, |
| "step": 22300 |
| }, |
| { |
| "epoch": 3.70003303600925, |
| "grad_norm": 2.336216926574707, |
| "learning_rate": 9.74072850518176e-06, |
| "loss": 0.2588, |
| "step": 22400 |
| }, |
| { |
| "epoch": 3.7165510406342914, |
| "grad_norm": 0.889430046081543, |
| "learning_rate": 9.734610725691004e-06, |
| "loss": 0.2418, |
| "step": 22500 |
| }, |
| { |
| "epoch": 3.7330690452593327, |
| "grad_norm": 0.9641227722167969, |
| "learning_rate": 9.728492946200248e-06, |
| "loss": 0.2447, |
| "step": 22600 |
| }, |
| { |
| "epoch": 3.749587049884374, |
| "grad_norm": 1.2751914262771606, |
| "learning_rate": 9.722375166709493e-06, |
| "loss": 0.2481, |
| "step": 22700 |
| }, |
| { |
| "epoch": 3.7661050545094152, |
| "grad_norm": 0.8244801759719849, |
| "learning_rate": 9.716257387218736e-06, |
| "loss": 0.281, |
| "step": 22800 |
| }, |
| { |
| "epoch": 3.7826230591344565, |
| "grad_norm": 0.8751392364501953, |
| "learning_rate": 9.71013960772798e-06, |
| "loss": 0.2613, |
| "step": 22900 |
| }, |
| { |
| "epoch": 3.799141063759498, |
| "grad_norm": 0.8482999801635742, |
| "learning_rate": 9.704021828237223e-06, |
| "loss": 0.2372, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.799141063759498, |
| "eval_cer": 0.06080179316952981, |
| "eval_loss": 0.20372046530246735, |
| "eval_runtime": 48.3892, |
| "eval_samples_per_second": 34.946, |
| "eval_steps_per_second": 8.742, |
| "eval_wer": 0.33447823643289826, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.815659068384539, |
| "grad_norm": 0.7899025082588196, |
| "learning_rate": 9.697904048746468e-06, |
| "loss": 0.239, |
| "step": 23100 |
| }, |
| { |
| "epoch": 3.8321770730095803, |
| "grad_norm": 1.0239996910095215, |
| "learning_rate": 9.691786269255712e-06, |
| "loss": 0.2355, |
| "step": 23200 |
| }, |
| { |
| "epoch": 3.8486950776346216, |
| "grad_norm": 1.0043885707855225, |
| "learning_rate": 9.685668489764955e-06, |
| "loss": 0.2517, |
| "step": 23300 |
| }, |
| { |
| "epoch": 3.865213082259663, |
| "grad_norm": 0.7398520708084106, |
| "learning_rate": 9.679550710274199e-06, |
| "loss": 0.2349, |
| "step": 23400 |
| }, |
| { |
| "epoch": 3.881731086884704, |
| "grad_norm": 0.8725094199180603, |
| "learning_rate": 9.673432930783444e-06, |
| "loss": 0.2357, |
| "step": 23500 |
| }, |
| { |
| "epoch": 3.8982490915097454, |
| "grad_norm": 0.9465588927268982, |
| "learning_rate": 9.667315151292687e-06, |
| "loss": 0.249, |
| "step": 23600 |
| }, |
| { |
| "epoch": 3.9147670961347867, |
| "grad_norm": 0.814136266708374, |
| "learning_rate": 9.66119737180193e-06, |
| "loss": 0.229, |
| "step": 23700 |
| }, |
| { |
| "epoch": 3.931285100759828, |
| "grad_norm": 0.7686489820480347, |
| "learning_rate": 9.655079592311174e-06, |
| "loss": 0.2287, |
| "step": 23800 |
| }, |
| { |
| "epoch": 3.9478031053848697, |
| "grad_norm": 1.0001749992370605, |
| "learning_rate": 9.64896181282042e-06, |
| "loss": 0.2334, |
| "step": 23900 |
| }, |
| { |
| "epoch": 3.964321110009911, |
| "grad_norm": 0.9546142220497131, |
| "learning_rate": 9.642844033329663e-06, |
| "loss": 0.2447, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.964321110009911, |
| "eval_cer": 0.058791321607008416, |
| "eval_loss": 0.1985878199338913, |
| "eval_runtime": 48.7285, |
| "eval_samples_per_second": 34.702, |
| "eval_steps_per_second": 8.681, |
| "eval_wer": 0.32417410853681383, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.980839114634952, |
| "grad_norm": 0.7841982841491699, |
| "learning_rate": 9.636726253838908e-06, |
| "loss": 0.2366, |
| "step": 24100 |
| }, |
| { |
| "epoch": 3.9973571192599935, |
| "grad_norm": 0.7850095629692078, |
| "learning_rate": 9.630608474348151e-06, |
| "loss": 0.2393, |
| "step": 24200 |
| }, |
| { |
| "epoch": 4.013875123885034, |
| "grad_norm": 0.8924582004547119, |
| "learning_rate": 9.624490694857395e-06, |
| "loss": 0.2382, |
| "step": 24300 |
| }, |
| { |
| "epoch": 4.030393128510076, |
| "grad_norm": 0.7775335907936096, |
| "learning_rate": 9.61837291536664e-06, |
| "loss": 0.2317, |
| "step": 24400 |
| }, |
| { |
| "epoch": 4.046911133135117, |
| "grad_norm": 0.8482388257980347, |
| "learning_rate": 9.612255135875884e-06, |
| "loss": 0.2673, |
| "step": 24500 |
| }, |
| { |
| "epoch": 4.063429137760158, |
| "grad_norm": 0.8175519704818726, |
| "learning_rate": 9.606137356385127e-06, |
| "loss": 0.2271, |
| "step": 24600 |
| }, |
| { |
| "epoch": 4.0799471423852, |
| "grad_norm": 0.6910988688468933, |
| "learning_rate": 9.600019576894372e-06, |
| "loss": 0.2321, |
| "step": 24700 |
| }, |
| { |
| "epoch": 4.096465147010242, |
| "grad_norm": 0.8976187109947205, |
| "learning_rate": 9.593901797403616e-06, |
| "loss": 0.2348, |
| "step": 24800 |
| }, |
| { |
| "epoch": 4.112983151635283, |
| "grad_norm": 0.8644862174987793, |
| "learning_rate": 9.587784017912859e-06, |
| "loss": 0.2219, |
| "step": 24900 |
| }, |
| { |
| "epoch": 4.129501156260324, |
| "grad_norm": 0.7493522763252258, |
| "learning_rate": 9.581666238422103e-06, |
| "loss": 0.2304, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.129501156260324, |
| "eval_cer": 0.05679796044076381, |
| "eval_loss": 0.18797007203102112, |
| "eval_runtime": 48.2897, |
| "eval_samples_per_second": 35.018, |
| "eval_steps_per_second": 8.76, |
| "eval_wer": 0.3129956910010616, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.146019160885365, |
| "grad_norm": 0.7841621041297913, |
| "learning_rate": 9.575548458931348e-06, |
| "loss": 0.2297, |
| "step": 25100 |
| }, |
| { |
| "epoch": 4.162537165510407, |
| "grad_norm": 0.8792735934257507, |
| "learning_rate": 9.569430679440591e-06, |
| "loss": 0.2305, |
| "step": 25200 |
| }, |
| { |
| "epoch": 4.179055170135448, |
| "grad_norm": 1.1668968200683594, |
| "learning_rate": 9.563312899949835e-06, |
| "loss": 0.2595, |
| "step": 25300 |
| }, |
| { |
| "epoch": 4.195573174760489, |
| "grad_norm": 0.859511137008667, |
| "learning_rate": 9.557195120459078e-06, |
| "loss": 0.217, |
| "step": 25400 |
| }, |
| { |
| "epoch": 4.2120911793855305, |
| "grad_norm": 0.9139505624771118, |
| "learning_rate": 9.551077340968323e-06, |
| "loss": 0.2263, |
| "step": 25500 |
| }, |
| { |
| "epoch": 4.228609184010572, |
| "grad_norm": 0.776094377040863, |
| "learning_rate": 9.544959561477567e-06, |
| "loss": 0.2208, |
| "step": 25600 |
| }, |
| { |
| "epoch": 4.245127188635613, |
| "grad_norm": 0.8811630606651306, |
| "learning_rate": 9.53884178198681e-06, |
| "loss": 0.2229, |
| "step": 25700 |
| }, |
| { |
| "epoch": 4.261645193260654, |
| "grad_norm": 0.9367398619651794, |
| "learning_rate": 9.532724002496054e-06, |
| "loss": 0.2221, |
| "step": 25800 |
| }, |
| { |
| "epoch": 4.2781631978856955, |
| "grad_norm": 0.8413158655166626, |
| "learning_rate": 9.526606223005299e-06, |
| "loss": 0.2616, |
| "step": 25900 |
| }, |
| { |
| "epoch": 4.294681202510737, |
| "grad_norm": 0.9497280120849609, |
| "learning_rate": 9.520488443514542e-06, |
| "loss": 0.2828, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.294681202510737, |
| "eval_cer": 0.05571145027718842, |
| "eval_loss": 0.18053312599658966, |
| "eval_runtime": 48.2767, |
| "eval_samples_per_second": 35.027, |
| "eval_steps_per_second": 8.762, |
| "eval_wer": 0.30774995316305503, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.311199207135778, |
| "grad_norm": 0.7471584677696228, |
| "learning_rate": 9.514370664023786e-06, |
| "loss": 0.217, |
| "step": 26100 |
| }, |
| { |
| "epoch": 4.327717211760819, |
| "grad_norm": 0.9198097586631775, |
| "learning_rate": 9.508252884533031e-06, |
| "loss": 0.2093, |
| "step": 26200 |
| }, |
| { |
| "epoch": 4.344235216385861, |
| "grad_norm": 1.2914992570877075, |
| "learning_rate": 9.502135105042274e-06, |
| "loss": 0.2117, |
| "step": 26300 |
| }, |
| { |
| "epoch": 4.360753221010902, |
| "grad_norm": 1.1176624298095703, |
| "learning_rate": 9.496017325551518e-06, |
| "loss": 0.2213, |
| "step": 26400 |
| }, |
| { |
| "epoch": 4.377271225635943, |
| "grad_norm": 0.799958348274231, |
| "learning_rate": 9.489899546060763e-06, |
| "loss": 0.2175, |
| "step": 26500 |
| }, |
| { |
| "epoch": 4.3937892302609844, |
| "grad_norm": 0.8993442058563232, |
| "learning_rate": 9.483781766570006e-06, |
| "loss": 0.2126, |
| "step": 26600 |
| }, |
| { |
| "epoch": 4.410307234886026, |
| "grad_norm": 1.000191330909729, |
| "learning_rate": 9.477663987079252e-06, |
| "loss": 0.2044, |
| "step": 26700 |
| }, |
| { |
| "epoch": 4.426825239511067, |
| "grad_norm": 1.1478374004364014, |
| "learning_rate": 9.471546207588495e-06, |
| "loss": 0.2325, |
| "step": 26800 |
| }, |
| { |
| "epoch": 4.443343244136108, |
| "grad_norm": 0.7666307091712952, |
| "learning_rate": 9.465428428097739e-06, |
| "loss": 0.2086, |
| "step": 26900 |
| }, |
| { |
| "epoch": 4.4598612487611495, |
| "grad_norm": 0.9440354108810425, |
| "learning_rate": 9.459310648606982e-06, |
| "loss": 0.2054, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.4598612487611495, |
| "eval_cer": 0.05489870645404148, |
| "eval_loss": 0.17803701758384705, |
| "eval_runtime": 48.3147, |
| "eval_samples_per_second": 35.0, |
| "eval_steps_per_second": 8.755, |
| "eval_wer": 0.30181727346530945, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.476379253386191, |
| "grad_norm": 0.975606381893158, |
| "learning_rate": 9.453192869116227e-06, |
| "loss": 0.2188, |
| "step": 27100 |
| }, |
| { |
| "epoch": 4.492897258011232, |
| "grad_norm": 1.0825639963150024, |
| "learning_rate": 9.44707508962547e-06, |
| "loss": 0.1951, |
| "step": 27200 |
| }, |
| { |
| "epoch": 4.509415262636273, |
| "grad_norm": 0.858279824256897, |
| "learning_rate": 9.440957310134714e-06, |
| "loss": 0.2202, |
| "step": 27300 |
| }, |
| { |
| "epoch": 4.525933267261315, |
| "grad_norm": 0.8295080661773682, |
| "learning_rate": 9.434839530643958e-06, |
| "loss": 0.2143, |
| "step": 27400 |
| }, |
| { |
| "epoch": 4.542451271886356, |
| "grad_norm": 1.0606642961502075, |
| "learning_rate": 9.428721751153203e-06, |
| "loss": 0.2116, |
| "step": 27500 |
| }, |
| { |
| "epoch": 4.558969276511397, |
| "grad_norm": 0.8650080561637878, |
| "learning_rate": 9.422603971662446e-06, |
| "loss": 0.202, |
| "step": 27600 |
| }, |
| { |
| "epoch": 4.575487281136438, |
| "grad_norm": 0.7315616011619568, |
| "learning_rate": 9.41648619217169e-06, |
| "loss": 0.2088, |
| "step": 27700 |
| }, |
| { |
| "epoch": 4.59200528576148, |
| "grad_norm": 0.6952201724052429, |
| "learning_rate": 9.410368412680933e-06, |
| "loss": 0.2007, |
| "step": 27800 |
| }, |
| { |
| "epoch": 4.608523290386521, |
| "grad_norm": 1.3911277055740356, |
| "learning_rate": 9.404250633190178e-06, |
| "loss": 0.2242, |
| "step": 27900 |
| }, |
| { |
| "epoch": 4.625041295011562, |
| "grad_norm": 0.9672855734825134, |
| "learning_rate": 9.398132853699422e-06, |
| "loss": 0.2021, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.625041295011562, |
| "eval_cer": 0.05435117377318459, |
| "eval_loss": 0.17107851803302765, |
| "eval_runtime": 48.3911, |
| "eval_samples_per_second": 34.944, |
| "eval_steps_per_second": 8.741, |
| "eval_wer": 0.2986323612065197, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.6415592996366035, |
| "grad_norm": 0.869714617729187, |
| "learning_rate": 9.392015074208665e-06, |
| "loss": 0.2054, |
| "step": 28100 |
| }, |
| { |
| "epoch": 4.658077304261646, |
| "grad_norm": 0.7512599229812622, |
| "learning_rate": 9.385897294717909e-06, |
| "loss": 0.2449, |
| "step": 28200 |
| }, |
| { |
| "epoch": 4.674595308886687, |
| "grad_norm": 0.8734112977981567, |
| "learning_rate": 9.379779515227154e-06, |
| "loss": 0.2067, |
| "step": 28300 |
| }, |
| { |
| "epoch": 4.691113313511728, |
| "grad_norm": 1.0213971138000488, |
| "learning_rate": 9.373661735736397e-06, |
| "loss": 0.1941, |
| "step": 28400 |
| }, |
| { |
| "epoch": 4.7076313181367695, |
| "grad_norm": 0.8136150240898132, |
| "learning_rate": 9.367543956245642e-06, |
| "loss": 0.197, |
| "step": 28500 |
| }, |
| { |
| "epoch": 4.724149322761811, |
| "grad_norm": 0.889690637588501, |
| "learning_rate": 9.361426176754886e-06, |
| "loss": 0.1887, |
| "step": 28600 |
| }, |
| { |
| "epoch": 4.740667327386852, |
| "grad_norm": 0.8246539235115051, |
| "learning_rate": 9.35530839726413e-06, |
| "loss": 0.1963, |
| "step": 28700 |
| }, |
| { |
| "epoch": 4.757185332011893, |
| "grad_norm": 1.07891845703125, |
| "learning_rate": 9.349190617773374e-06, |
| "loss": 0.2013, |
| "step": 28800 |
| }, |
| { |
| "epoch": 4.773703336636935, |
| "grad_norm": 0.8867871165275574, |
| "learning_rate": 9.343072838282618e-06, |
| "loss": 0.1969, |
| "step": 28900 |
| }, |
| { |
| "epoch": 4.790221341261976, |
| "grad_norm": 1.0651185512542725, |
| "learning_rate": 9.336955058791861e-06, |
| "loss": 0.1944, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.790221341261976, |
| "eval_cer": 0.05322188761891725, |
| "eval_loss": 0.16940154135227203, |
| "eval_runtime": 48.8775, |
| "eval_samples_per_second": 34.597, |
| "eval_steps_per_second": 8.654, |
| "eval_wer": 0.29307437706863176, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.806739345887017, |
| "grad_norm": 0.8285331726074219, |
| "learning_rate": 9.330837279301107e-06, |
| "loss": 0.2038, |
| "step": 29100 |
| }, |
| { |
| "epoch": 4.823257350512058, |
| "grad_norm": 0.8632585406303406, |
| "learning_rate": 9.32471949981035e-06, |
| "loss": 0.1947, |
| "step": 29200 |
| }, |
| { |
| "epoch": 4.8397753551371, |
| "grad_norm": 0.7332074046134949, |
| "learning_rate": 9.318601720319593e-06, |
| "loss": 0.1929, |
| "step": 29300 |
| }, |
| { |
| "epoch": 4.856293359762141, |
| "grad_norm": 0.8279902935028076, |
| "learning_rate": 9.312483940828837e-06, |
| "loss": 0.188, |
| "step": 29400 |
| }, |
| { |
| "epoch": 4.872811364387182, |
| "grad_norm": 0.887501060962677, |
| "learning_rate": 9.306366161338082e-06, |
| "loss": 0.2087, |
| "step": 29500 |
| }, |
| { |
| "epoch": 4.8893293690122235, |
| "grad_norm": 0.8915200233459473, |
| "learning_rate": 9.300248381847326e-06, |
| "loss": 0.1861, |
| "step": 29600 |
| }, |
| { |
| "epoch": 4.905847373637265, |
| "grad_norm": 0.8219689130783081, |
| "learning_rate": 9.294130602356569e-06, |
| "loss": 0.1884, |
| "step": 29700 |
| }, |
| { |
| "epoch": 4.922365378262306, |
| "grad_norm": 0.8272607326507568, |
| "learning_rate": 9.288012822865812e-06, |
| "loss": 0.1991, |
| "step": 29800 |
| }, |
| { |
| "epoch": 4.938883382887347, |
| "grad_norm": 2.43432354927063, |
| "learning_rate": 9.281895043375058e-06, |
| "loss": 0.196, |
| "step": 29900 |
| }, |
| { |
| "epoch": 4.955401387512389, |
| "grad_norm": 0.922092616558075, |
| "learning_rate": 9.275777263884301e-06, |
| "loss": 0.1933, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.955401387512389, |
| "eval_cer": 0.0528796796933817, |
| "eval_loss": 0.1640305370092392, |
| "eval_runtime": 50.5291, |
| "eval_samples_per_second": 33.466, |
| "eval_steps_per_second": 8.371, |
| "eval_wer": 0.29063885592955724, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.97191939213743, |
| "grad_norm": 0.6855641603469849, |
| "learning_rate": 9.269659484393545e-06, |
| "loss": 0.1918, |
| "step": 30100 |
| }, |
| { |
| "epoch": 4.988437396762471, |
| "grad_norm": 0.7922428846359253, |
| "learning_rate": 9.263541704902788e-06, |
| "loss": 0.2464, |
| "step": 30200 |
| }, |
| { |
| "epoch": 5.004955401387512, |
| "grad_norm": 0.9734669923782349, |
| "learning_rate": 9.257423925412033e-06, |
| "loss": 0.1909, |
| "step": 30300 |
| }, |
| { |
| "epoch": 5.021473406012554, |
| "grad_norm": 0.9491944313049316, |
| "learning_rate": 9.251306145921277e-06, |
| "loss": 0.1824, |
| "step": 30400 |
| }, |
| { |
| "epoch": 5.037991410637595, |
| "grad_norm": 1.4745386838912964, |
| "learning_rate": 9.24518836643052e-06, |
| "loss": 0.1934, |
| "step": 30500 |
| }, |
| { |
| "epoch": 5.054509415262636, |
| "grad_norm": 0.8815566897392273, |
| "learning_rate": 9.239070586939765e-06, |
| "loss": 0.1844, |
| "step": 30600 |
| }, |
| { |
| "epoch": 5.0710274198876775, |
| "grad_norm": 0.741477906703949, |
| "learning_rate": 9.232952807449009e-06, |
| "loss": 0.2308, |
| "step": 30700 |
| }, |
| { |
| "epoch": 5.087545424512719, |
| "grad_norm": 0.784695029258728, |
| "learning_rate": 9.226835027958252e-06, |
| "loss": 0.1852, |
| "step": 30800 |
| }, |
| { |
| "epoch": 5.10406342913776, |
| "grad_norm": 0.7334086298942566, |
| "learning_rate": 9.220717248467497e-06, |
| "loss": 0.1885, |
| "step": 30900 |
| }, |
| { |
| "epoch": 5.120581433762801, |
| "grad_norm": 1.0273959636688232, |
| "learning_rate": 9.21459946897674e-06, |
| "loss": 0.1885, |
| "step": 31000 |
| }, |
| { |
| "epoch": 5.120581433762801, |
| "eval_cer": 0.05156217918006981, |
| "eval_loss": 0.16276109218597412, |
| "eval_runtime": 48.3633, |
| "eval_samples_per_second": 34.965, |
| "eval_steps_per_second": 8.746, |
| "eval_wer": 0.28289514769249985, |
| "step": 31000 |
| }, |
| { |
| "epoch": 5.1370994383878426, |
| "grad_norm": 0.8171842694282532, |
| "learning_rate": 9.208481689485986e-06, |
| "loss": 0.186, |
| "step": 31100 |
| }, |
| { |
| "epoch": 5.153617443012884, |
| "grad_norm": 0.9652734994888306, |
| "learning_rate": 9.20236390999523e-06, |
| "loss": 0.1813, |
| "step": 31200 |
| }, |
| { |
| "epoch": 5.170135447637925, |
| "grad_norm": 1.062037467956543, |
| "learning_rate": 9.196246130504473e-06, |
| "loss": 0.1744, |
| "step": 31300 |
| }, |
| { |
| "epoch": 5.186653452262966, |
| "grad_norm": 0.8532341718673706, |
| "learning_rate": 9.190128351013718e-06, |
| "loss": 0.1833, |
| "step": 31400 |
| }, |
| { |
| "epoch": 5.203171456888008, |
| "grad_norm": 0.7094704508781433, |
| "learning_rate": 9.184010571522962e-06, |
| "loss": 0.1827, |
| "step": 31500 |
| }, |
| { |
| "epoch": 5.219689461513049, |
| "grad_norm": 0.8176188468933105, |
| "learning_rate": 9.177892792032205e-06, |
| "loss": 0.1896, |
| "step": 31600 |
| }, |
| { |
| "epoch": 5.23620746613809, |
| "grad_norm": 0.9988218545913696, |
| "learning_rate": 9.171775012541448e-06, |
| "loss": 0.1953, |
| "step": 31700 |
| }, |
| { |
| "epoch": 5.2527254707631315, |
| "grad_norm": 1.0254257917404175, |
| "learning_rate": 9.165657233050694e-06, |
| "loss": 0.1858, |
| "step": 31800 |
| }, |
| { |
| "epoch": 5.269243475388173, |
| "grad_norm": 0.7182506918907166, |
| "learning_rate": 9.159539453559937e-06, |
| "loss": 0.2522, |
| "step": 31900 |
| }, |
| { |
| "epoch": 5.285761480013214, |
| "grad_norm": 0.8318942189216614, |
| "learning_rate": 9.15342167406918e-06, |
| "loss": 0.1871, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.285761480013214, |
| "eval_cer": 0.05092909451782903, |
| "eval_loss": 0.15823741257190704, |
| "eval_runtime": 48.5699, |
| "eval_samples_per_second": 34.816, |
| "eval_steps_per_second": 8.709, |
| "eval_wer": 0.28108411915318804, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.302279484638255, |
| "grad_norm": 0.6533938050270081, |
| "learning_rate": 9.147303894578424e-06, |
| "loss": 0.1801, |
| "step": 32100 |
| }, |
| { |
| "epoch": 5.3187974892632965, |
| "grad_norm": 0.8273053169250488, |
| "learning_rate": 9.141186115087669e-06, |
| "loss": 0.1815, |
| "step": 32200 |
| }, |
| { |
| "epoch": 5.335315493888339, |
| "grad_norm": 0.8062841892242432, |
| "learning_rate": 9.135068335596913e-06, |
| "loss": 0.2024, |
| "step": 32300 |
| }, |
| { |
| "epoch": 5.35183349851338, |
| "grad_norm": 0.9883460402488708, |
| "learning_rate": 9.128950556106156e-06, |
| "loss": 0.176, |
| "step": 32400 |
| }, |
| { |
| "epoch": 5.368351503138421, |
| "grad_norm": 1.0027878284454346, |
| "learning_rate": 9.1228327766154e-06, |
| "loss": 0.2513, |
| "step": 32500 |
| }, |
| { |
| "epoch": 5.3848695077634625, |
| "grad_norm": 0.6766846776008606, |
| "learning_rate": 9.116714997124645e-06, |
| "loss": 0.2155, |
| "step": 32600 |
| }, |
| { |
| "epoch": 5.401387512388504, |
| "grad_norm": 0.7808175086975098, |
| "learning_rate": 9.110597217633888e-06, |
| "loss": 0.188, |
| "step": 32700 |
| }, |
| { |
| "epoch": 5.417905517013545, |
| "grad_norm": 0.7467240691184998, |
| "learning_rate": 9.104479438143132e-06, |
| "loss": 0.182, |
| "step": 32800 |
| }, |
| { |
| "epoch": 5.434423521638586, |
| "grad_norm": 0.6580876708030701, |
| "learning_rate": 9.098361658652377e-06, |
| "loss": 0.1771, |
| "step": 32900 |
| }, |
| { |
| "epoch": 5.450941526263628, |
| "grad_norm": 0.7481684684753418, |
| "learning_rate": 9.09224387916162e-06, |
| "loss": 0.1811, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.450941526263628, |
| "eval_cer": 0.05002224351515981, |
| "eval_loss": 0.15250813961029053, |
| "eval_runtime": 49.105, |
| "eval_samples_per_second": 34.436, |
| "eval_steps_per_second": 8.614, |
| "eval_wer": 0.2747142946356086, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.467459530888669, |
| "grad_norm": 0.9323834180831909, |
| "learning_rate": 9.086126099670864e-06, |
| "loss": 0.2123, |
| "step": 33100 |
| }, |
| { |
| "epoch": 5.48397753551371, |
| "grad_norm": 0.779329776763916, |
| "learning_rate": 9.080008320180109e-06, |
| "loss": 0.1807, |
| "step": 33200 |
| }, |
| { |
| "epoch": 5.500495540138751, |
| "grad_norm": 1.0125453472137451, |
| "learning_rate": 9.073890540689352e-06, |
| "loss": 0.1746, |
| "step": 33300 |
| }, |
| { |
| "epoch": 5.517013544763793, |
| "grad_norm": 0.8062576055526733, |
| "learning_rate": 9.067772761198596e-06, |
| "loss": 0.1747, |
| "step": 33400 |
| }, |
| { |
| "epoch": 5.533531549388834, |
| "grad_norm": 0.817570686340332, |
| "learning_rate": 9.061654981707841e-06, |
| "loss": 0.1707, |
| "step": 33500 |
| }, |
| { |
| "epoch": 5.550049554013875, |
| "grad_norm": 0.7053462266921997, |
| "learning_rate": 9.055537202217084e-06, |
| "loss": 0.1777, |
| "step": 33600 |
| }, |
| { |
| "epoch": 5.5665675586389165, |
| "grad_norm": 0.8066178560256958, |
| "learning_rate": 9.049419422726328e-06, |
| "loss": 0.171, |
| "step": 33700 |
| }, |
| { |
| "epoch": 5.583085563263958, |
| "grad_norm": 0.8742169141769409, |
| "learning_rate": 9.043301643235573e-06, |
| "loss": 0.1721, |
| "step": 33800 |
| }, |
| { |
| "epoch": 5.599603567888999, |
| "grad_norm": 0.7562609314918518, |
| "learning_rate": 9.037183863744816e-06, |
| "loss": 0.2377, |
| "step": 33900 |
| }, |
| { |
| "epoch": 5.61612157251404, |
| "grad_norm": 1.084651231765747, |
| "learning_rate": 9.03106608425406e-06, |
| "loss": 0.2145, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.61612157251404, |
| "eval_cer": 0.04938060365478064, |
| "eval_loss": 0.14984501898288727, |
| "eval_runtime": 48.5125, |
| "eval_samples_per_second": 34.857, |
| "eval_steps_per_second": 8.719, |
| "eval_wer": 0.2715918316367951, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.632639577139082, |
| "grad_norm": 0.8906255960464478, |
| "learning_rate": 9.024948304763303e-06, |
| "loss": 0.188, |
| "step": 34100 |
| }, |
| { |
| "epoch": 5.649157581764123, |
| "grad_norm": 0.8091058135032654, |
| "learning_rate": 9.018830525272549e-06, |
| "loss": 0.1696, |
| "step": 34200 |
| }, |
| { |
| "epoch": 5.665675586389164, |
| "grad_norm": 0.9051063656806946, |
| "learning_rate": 9.012712745781792e-06, |
| "loss": 0.1744, |
| "step": 34300 |
| }, |
| { |
| "epoch": 5.682193591014205, |
| "grad_norm": 1.0009392499923706, |
| "learning_rate": 9.006594966291035e-06, |
| "loss": 0.1738, |
| "step": 34400 |
| }, |
| { |
| "epoch": 5.698711595639247, |
| "grad_norm": 0.8981117010116577, |
| "learning_rate": 9.000477186800279e-06, |
| "loss": 0.1855, |
| "step": 34500 |
| }, |
| { |
| "epoch": 5.715229600264288, |
| "grad_norm": 0.9005815386772156, |
| "learning_rate": 8.994359407309524e-06, |
| "loss": 0.1784, |
| "step": 34600 |
| }, |
| { |
| "epoch": 5.731747604889329, |
| "grad_norm": 0.9097332954406738, |
| "learning_rate": 8.988241627818768e-06, |
| "loss": 0.1833, |
| "step": 34700 |
| }, |
| { |
| "epoch": 5.7482656095143705, |
| "grad_norm": 0.7952153086662292, |
| "learning_rate": 8.982123848328011e-06, |
| "loss": 0.1676, |
| "step": 34800 |
| }, |
| { |
| "epoch": 5.764783614139412, |
| "grad_norm": 0.840761661529541, |
| "learning_rate": 8.976006068837254e-06, |
| "loss": 0.1744, |
| "step": 34900 |
| }, |
| { |
| "epoch": 5.781301618764453, |
| "grad_norm": 0.972186267375946, |
| "learning_rate": 8.9698882893465e-06, |
| "loss": 0.1713, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.781301618764453, |
| "eval_cer": 0.049508931626856476, |
| "eval_loss": 0.14971515536308289, |
| "eval_runtime": 48.1943, |
| "eval_samples_per_second": 35.087, |
| "eval_steps_per_second": 8.777, |
| "eval_wer": 0.2710297882970087, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.797819623389494, |
| "grad_norm": 0.7469506859779358, |
| "learning_rate": 8.963770509855743e-06, |
| "loss": 0.165, |
| "step": 35100 |
| }, |
| { |
| "epoch": 5.814337628014536, |
| "grad_norm": 0.8786357045173645, |
| "learning_rate": 8.957652730364987e-06, |
| "loss": 0.1715, |
| "step": 35200 |
| }, |
| { |
| "epoch": 5.830855632639577, |
| "grad_norm": 0.8044286370277405, |
| "learning_rate": 8.951534950874232e-06, |
| "loss": 0.1687, |
| "step": 35300 |
| }, |
| { |
| "epoch": 5.847373637264618, |
| "grad_norm": 0.859174370765686, |
| "learning_rate": 8.945417171383475e-06, |
| "loss": 0.1658, |
| "step": 35400 |
| }, |
| { |
| "epoch": 5.863891641889659, |
| "grad_norm": 0.8090763092041016, |
| "learning_rate": 8.93929939189272e-06, |
| "loss": 0.1836, |
| "step": 35500 |
| }, |
| { |
| "epoch": 5.880409646514701, |
| "grad_norm": 0.8392448425292969, |
| "learning_rate": 8.933181612401964e-06, |
| "loss": 0.1796, |
| "step": 35600 |
| }, |
| { |
| "epoch": 5.896927651139742, |
| "grad_norm": 0.8285984396934509, |
| "learning_rate": 8.927063832911207e-06, |
| "loss": 0.1703, |
| "step": 35700 |
| }, |
| { |
| "epoch": 5.913445655764784, |
| "grad_norm": 0.9240791201591492, |
| "learning_rate": 8.920946053420452e-06, |
| "loss": 0.2069, |
| "step": 35800 |
| }, |
| { |
| "epoch": 5.929963660389825, |
| "grad_norm": 0.9001137018203735, |
| "learning_rate": 8.914828273929696e-06, |
| "loss": 0.1724, |
| "step": 35900 |
| }, |
| { |
| "epoch": 5.946481665014867, |
| "grad_norm": 0.8891072273254395, |
| "learning_rate": 8.90871049443894e-06, |
| "loss": 0.1766, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.946481665014867, |
| "eval_cer": 0.04906406132366026, |
| "eval_loss": 0.14797988533973694, |
| "eval_runtime": 48.6094, |
| "eval_samples_per_second": 34.787, |
| "eval_steps_per_second": 8.702, |
| "eval_wer": 0.2692187597576969, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.962999669639908, |
| "grad_norm": 1.0912429094314575, |
| "learning_rate": 8.902592714948183e-06, |
| "loss": 0.1653, |
| "step": 36100 |
| }, |
| { |
| "epoch": 5.979517674264949, |
| "grad_norm": 1.203782320022583, |
| "learning_rate": 8.896474935457428e-06, |
| "loss": 0.1675, |
| "step": 36200 |
| }, |
| { |
| "epoch": 5.9960356788899905, |
| "grad_norm": 0.7841401696205139, |
| "learning_rate": 8.890357155966671e-06, |
| "loss": 0.1652, |
| "step": 36300 |
| }, |
| { |
| "epoch": 6.012553683515032, |
| "grad_norm": 0.841820478439331, |
| "learning_rate": 8.884239376475915e-06, |
| "loss": 0.1677, |
| "step": 36400 |
| }, |
| { |
| "epoch": 6.029071688140073, |
| "grad_norm": 0.8913053870201111, |
| "learning_rate": 8.878121596985158e-06, |
| "loss": 0.1613, |
| "step": 36500 |
| }, |
| { |
| "epoch": 6.045589692765114, |
| "grad_norm": 1.3167953491210938, |
| "learning_rate": 8.872003817494404e-06, |
| "loss": 0.1639, |
| "step": 36600 |
| }, |
| { |
| "epoch": 6.0621076973901555, |
| "grad_norm": 0.7834457159042358, |
| "learning_rate": 8.865886038003647e-06, |
| "loss": 0.1743, |
| "step": 36700 |
| }, |
| { |
| "epoch": 6.078625702015197, |
| "grad_norm": 0.790767252445221, |
| "learning_rate": 8.85976825851289e-06, |
| "loss": 0.1736, |
| "step": 36800 |
| }, |
| { |
| "epoch": 6.095143706640238, |
| "grad_norm": 0.8585237860679626, |
| "learning_rate": 8.853650479022134e-06, |
| "loss": 0.1764, |
| "step": 36900 |
| }, |
| { |
| "epoch": 6.111661711265279, |
| "grad_norm": 0.9544495344161987, |
| "learning_rate": 8.847532699531379e-06, |
| "loss": 0.1662, |
| "step": 37000 |
| }, |
| { |
| "epoch": 6.111661711265279, |
| "eval_cer": 0.04808021353774553, |
| "eval_loss": 0.1443062424659729, |
| "eval_runtime": 48.3927, |
| "eval_samples_per_second": 34.943, |
| "eval_steps_per_second": 8.741, |
| "eval_wer": 0.2633485293199276, |
| "step": 37000 |
| }, |
| { |
| "epoch": 6.128179715890321, |
| "grad_norm": 0.6950782537460327, |
| "learning_rate": 8.841414920040623e-06, |
| "loss": 0.1574, |
| "step": 37100 |
| }, |
| { |
| "epoch": 6.144697720515362, |
| "grad_norm": 0.7609145641326904, |
| "learning_rate": 8.835297140549866e-06, |
| "loss": 0.1699, |
| "step": 37200 |
| }, |
| { |
| "epoch": 6.161215725140403, |
| "grad_norm": 0.6571762561798096, |
| "learning_rate": 8.829179361059111e-06, |
| "loss": 0.1767, |
| "step": 37300 |
| }, |
| { |
| "epoch": 6.177733729765444, |
| "grad_norm": 1.0556763410568237, |
| "learning_rate": 8.823061581568355e-06, |
| "loss": 0.1562, |
| "step": 37400 |
| }, |
| { |
| "epoch": 6.194251734390486, |
| "grad_norm": 2.556347131729126, |
| "learning_rate": 8.816943802077598e-06, |
| "loss": 0.1628, |
| "step": 37500 |
| }, |
| { |
| "epoch": 6.210769739015527, |
| "grad_norm": 0.7034619450569153, |
| "learning_rate": 8.810826022586843e-06, |
| "loss": 0.1953, |
| "step": 37600 |
| }, |
| { |
| "epoch": 6.227287743640568, |
| "grad_norm": 0.7905510067939758, |
| "learning_rate": 8.804708243096087e-06, |
| "loss": 0.163, |
| "step": 37700 |
| }, |
| { |
| "epoch": 6.2438057482656095, |
| "grad_norm": 0.5802010893821716, |
| "learning_rate": 8.79859046360533e-06, |
| "loss": 0.1669, |
| "step": 37800 |
| }, |
| { |
| "epoch": 6.260323752890651, |
| "grad_norm": 0.6908354163169861, |
| "learning_rate": 8.792472684114575e-06, |
| "loss": 0.1594, |
| "step": 37900 |
| }, |
| { |
| "epoch": 6.276841757515692, |
| "grad_norm": 0.5971213579177856, |
| "learning_rate": 8.786354904623819e-06, |
| "loss": 0.1629, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.276841757515692, |
| "eval_cer": 0.04748134966805831, |
| "eval_loss": 0.14269790053367615, |
| "eval_runtime": 48.0871, |
| "eval_samples_per_second": 35.165, |
| "eval_steps_per_second": 8.797, |
| "eval_wer": 0.2614126022606632, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.293359762140733, |
| "grad_norm": 0.6142451763153076, |
| "learning_rate": 8.780237125133062e-06, |
| "loss": 0.1564, |
| "step": 38100 |
| }, |
| { |
| "epoch": 6.309877766765775, |
| "grad_norm": 0.6337829232215881, |
| "learning_rate": 8.774119345642307e-06, |
| "loss": 0.1605, |
| "step": 38200 |
| }, |
| { |
| "epoch": 6.326395771390816, |
| "grad_norm": 0.9899505972862244, |
| "learning_rate": 8.76800156615155e-06, |
| "loss": 0.1568, |
| "step": 38300 |
| }, |
| { |
| "epoch": 6.342913776015857, |
| "grad_norm": 0.8140648007392883, |
| "learning_rate": 8.761883786660794e-06, |
| "loss": 0.1671, |
| "step": 38400 |
| }, |
| { |
| "epoch": 6.359431780640898, |
| "grad_norm": 0.9894407987594604, |
| "learning_rate": 8.755766007170038e-06, |
| "loss": 0.1635, |
| "step": 38500 |
| }, |
| { |
| "epoch": 6.37594978526594, |
| "grad_norm": 0.6572480797767639, |
| "learning_rate": 8.749648227679283e-06, |
| "loss": 0.1673, |
| "step": 38600 |
| }, |
| { |
| "epoch": 6.392467789890981, |
| "grad_norm": 0.6784759759902954, |
| "learning_rate": 8.743530448188526e-06, |
| "loss": 0.1602, |
| "step": 38700 |
| }, |
| { |
| "epoch": 6.408985794516022, |
| "grad_norm": 1.053363561630249, |
| "learning_rate": 8.73741266869777e-06, |
| "loss": 0.1649, |
| "step": 38800 |
| }, |
| { |
| "epoch": 6.4255037991410635, |
| "grad_norm": 0.8504717946052551, |
| "learning_rate": 8.731294889207013e-06, |
| "loss": 0.1679, |
| "step": 38900 |
| }, |
| { |
| "epoch": 6.442021803766105, |
| "grad_norm": 0.8182229399681091, |
| "learning_rate": 8.725177109716258e-06, |
| "loss": 0.1607, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.442021803766105, |
| "eval_cer": 0.047164807336937925, |
| "eval_loss": 0.139786496758461, |
| "eval_runtime": 48.4561, |
| "eval_samples_per_second": 34.898, |
| "eval_steps_per_second": 8.73, |
| "eval_wer": 0.2596015737213514, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.458539808391146, |
| "grad_norm": 0.7049907445907593, |
| "learning_rate": 8.719059330225502e-06, |
| "loss": 0.1582, |
| "step": 39100 |
| }, |
| { |
| "epoch": 6.475057813016187, |
| "grad_norm": 0.928734540939331, |
| "learning_rate": 8.712941550734745e-06, |
| "loss": 0.1542, |
| "step": 39200 |
| }, |
| { |
| "epoch": 6.491575817641229, |
| "grad_norm": 0.7158243656158447, |
| "learning_rate": 8.706823771243989e-06, |
| "loss": 0.2327, |
| "step": 39300 |
| }, |
| { |
| "epoch": 6.50809382226627, |
| "grad_norm": 0.7434096336364746, |
| "learning_rate": 8.700705991753234e-06, |
| "loss": 0.1557, |
| "step": 39400 |
| }, |
| { |
| "epoch": 6.524611826891311, |
| "grad_norm": 0.9645175933837891, |
| "learning_rate": 8.694588212262477e-06, |
| "loss": 0.1658, |
| "step": 39500 |
| }, |
| { |
| "epoch": 6.541129831516352, |
| "grad_norm": 0.7772352695465088, |
| "learning_rate": 8.688470432771721e-06, |
| "loss": 0.1707, |
| "step": 39600 |
| }, |
| { |
| "epoch": 6.557647836141394, |
| "grad_norm": 0.7710452675819397, |
| "learning_rate": 8.682352653280966e-06, |
| "loss": 0.1546, |
| "step": 39700 |
| }, |
| { |
| "epoch": 6.574165840766435, |
| "grad_norm": 0.6807363033294678, |
| "learning_rate": 8.67623487379021e-06, |
| "loss": 0.1524, |
| "step": 39800 |
| }, |
| { |
| "epoch": 6.590683845391476, |
| "grad_norm": 0.6985335350036621, |
| "learning_rate": 8.670117094299455e-06, |
| "loss": 0.2057, |
| "step": 39900 |
| }, |
| { |
| "epoch": 6.607201850016518, |
| "grad_norm": 0.6793562173843384, |
| "learning_rate": 8.663999314808698e-06, |
| "loss": 0.1616, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.607201850016518, |
| "eval_cer": 0.04697659297789337, |
| "eval_loss": 0.14157184958457947, |
| "eval_runtime": 48.5024, |
| "eval_samples_per_second": 34.864, |
| "eval_steps_per_second": 8.721, |
| "eval_wer": 0.25810279148192095, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.62371985464156, |
| "grad_norm": 0.9306012988090515, |
| "learning_rate": 8.657881535317942e-06, |
| "loss": 0.1916, |
| "step": 40100 |
| }, |
| { |
| "epoch": 6.640237859266601, |
| "grad_norm": 0.7695144414901733, |
| "learning_rate": 8.651763755827187e-06, |
| "loss": 0.1581, |
| "step": 40200 |
| }, |
| { |
| "epoch": 6.656755863891642, |
| "grad_norm": 0.9468954205513, |
| "learning_rate": 8.64564597633643e-06, |
| "loss": 0.1543, |
| "step": 40300 |
| }, |
| { |
| "epoch": 6.6732738685166835, |
| "grad_norm": 0.9969133138656616, |
| "learning_rate": 8.639528196845674e-06, |
| "loss": 0.2421, |
| "step": 40400 |
| }, |
| { |
| "epoch": 6.689791873141725, |
| "grad_norm": 0.7657153606414795, |
| "learning_rate": 8.633410417354917e-06, |
| "loss": 0.1498, |
| "step": 40500 |
| }, |
| { |
| "epoch": 6.706309877766766, |
| "grad_norm": 0.6543861031532288, |
| "learning_rate": 8.627292637864162e-06, |
| "loss": 0.1592, |
| "step": 40600 |
| }, |
| { |
| "epoch": 6.722827882391807, |
| "grad_norm": 0.7110750675201416, |
| "learning_rate": 8.621174858373406e-06, |
| "loss": 0.1871, |
| "step": 40700 |
| }, |
| { |
| "epoch": 6.739345887016849, |
| "grad_norm": 0.6737387776374817, |
| "learning_rate": 8.61505707888265e-06, |
| "loss": 0.1634, |
| "step": 40800 |
| }, |
| { |
| "epoch": 6.75586389164189, |
| "grad_norm": 0.9051392078399658, |
| "learning_rate": 8.608939299391893e-06, |
| "loss": 0.1535, |
| "step": 40900 |
| }, |
| { |
| "epoch": 6.772381896266931, |
| "grad_norm": 0.7674338221549988, |
| "learning_rate": 8.602821519901138e-06, |
| "loss": 0.1512, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.772381896266931, |
| "eval_cer": 0.046300732324960646, |
| "eval_loss": 0.13961651921272278, |
| "eval_runtime": 48.6061, |
| "eval_samples_per_second": 34.79, |
| "eval_steps_per_second": 8.703, |
| "eval_wer": 0.2526697058639855, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.788899900891972, |
| "grad_norm": 0.9694798588752747, |
| "learning_rate": 8.596703740410381e-06, |
| "loss": 0.1555, |
| "step": 41100 |
| }, |
| { |
| "epoch": 6.805417905517014, |
| "grad_norm": 0.7667569518089294, |
| "learning_rate": 8.590585960919625e-06, |
| "loss": 0.1791, |
| "step": 41200 |
| }, |
| { |
| "epoch": 6.821935910142055, |
| "grad_norm": 0.8261126279830933, |
| "learning_rate": 8.584468181428868e-06, |
| "loss": 0.1554, |
| "step": 41300 |
| }, |
| { |
| "epoch": 6.838453914767096, |
| "grad_norm": 0.8309662342071533, |
| "learning_rate": 8.578350401938113e-06, |
| "loss": 0.1863, |
| "step": 41400 |
| }, |
| { |
| "epoch": 6.8549719193921375, |
| "grad_norm": 0.8523270487785339, |
| "learning_rate": 8.572232622447357e-06, |
| "loss": 0.1847, |
| "step": 41500 |
| }, |
| { |
| "epoch": 6.871489924017179, |
| "grad_norm": 0.7406333684921265, |
| "learning_rate": 8.5661148429566e-06, |
| "loss": 0.1486, |
| "step": 41600 |
| }, |
| { |
| "epoch": 6.88800792864222, |
| "grad_norm": 0.6426775455474854, |
| "learning_rate": 8.559997063465845e-06, |
| "loss": 0.1485, |
| "step": 41700 |
| }, |
| { |
| "epoch": 6.904525933267261, |
| "grad_norm": 0.705653727054596, |
| "learning_rate": 8.553879283975089e-06, |
| "loss": 0.1645, |
| "step": 41800 |
| }, |
| { |
| "epoch": 6.9210439378923025, |
| "grad_norm": 0.7143226265907288, |
| "learning_rate": 8.547761504484332e-06, |
| "loss": 0.1483, |
| "step": 41900 |
| }, |
| { |
| "epoch": 6.937561942517344, |
| "grad_norm": 0.6951993107795715, |
| "learning_rate": 8.541643724993578e-06, |
| "loss": 0.1497, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.937561942517344, |
| "eval_cer": 0.046266511532407094, |
| "eval_loss": 0.13769099116325378, |
| "eval_runtime": 48.8917, |
| "eval_samples_per_second": 34.587, |
| "eval_steps_per_second": 8.652, |
| "eval_wer": 0.2522950103041279, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.954079947142385, |
| "grad_norm": 0.781966507434845, |
| "learning_rate": 8.535525945502821e-06, |
| "loss": 0.1469, |
| "step": 42100 |
| }, |
| { |
| "epoch": 6.970597951767426, |
| "grad_norm": 0.7758731842041016, |
| "learning_rate": 8.529408166012065e-06, |
| "loss": 0.1523, |
| "step": 42200 |
| }, |
| { |
| "epoch": 6.987115956392468, |
| "grad_norm": 0.9695360064506531, |
| "learning_rate": 8.52329038652131e-06, |
| "loss": 0.1475, |
| "step": 42300 |
| }, |
| { |
| "epoch": 7.003633961017509, |
| "grad_norm": 0.8952251672744751, |
| "learning_rate": 8.517172607030553e-06, |
| "loss": 0.1509, |
| "step": 42400 |
| }, |
| { |
| "epoch": 7.02015196564255, |
| "grad_norm": 0.649591326713562, |
| "learning_rate": 8.511054827539797e-06, |
| "loss": 0.1454, |
| "step": 42500 |
| }, |
| { |
| "epoch": 7.0366699702675914, |
| "grad_norm": 0.6509200930595398, |
| "learning_rate": 8.504937048049042e-06, |
| "loss": 0.1546, |
| "step": 42600 |
| }, |
| { |
| "epoch": 7.053187974892633, |
| "grad_norm": 0.7854897975921631, |
| "learning_rate": 8.498819268558285e-06, |
| "loss": 0.1519, |
| "step": 42700 |
| }, |
| { |
| "epoch": 7.069705979517674, |
| "grad_norm": 0.7244015336036682, |
| "learning_rate": 8.492701489067529e-06, |
| "loss": 0.1485, |
| "step": 42800 |
| }, |
| { |
| "epoch": 7.086223984142715, |
| "grad_norm": 0.8875409960746765, |
| "learning_rate": 8.486583709576772e-06, |
| "loss": 0.1577, |
| "step": 42900 |
| }, |
| { |
| "epoch": 7.1027419887677565, |
| "grad_norm": 1.101585030555725, |
| "learning_rate": 8.480465930086017e-06, |
| "loss": 0.158, |
| "step": 43000 |
| }, |
| { |
| "epoch": 7.1027419887677565, |
| "eval_cer": 0.045624871672027924, |
| "eval_loss": 0.13763436675071716, |
| "eval_runtime": 48.2628, |
| "eval_samples_per_second": 35.037, |
| "eval_steps_per_second": 8.765, |
| "eval_wer": 0.24960969212514833, |
| "step": 43000 |
| }, |
| { |
| "epoch": 7.119259993392798, |
| "grad_norm": 0.7627003788948059, |
| "learning_rate": 8.47434815059526e-06, |
| "loss": 0.1601, |
| "step": 43100 |
| }, |
| { |
| "epoch": 7.135777998017839, |
| "grad_norm": 0.9609680771827698, |
| "learning_rate": 8.468230371104504e-06, |
| "loss": 0.1512, |
| "step": 43200 |
| }, |
| { |
| "epoch": 7.15229600264288, |
| "grad_norm": 0.9379695057868958, |
| "learning_rate": 8.462112591613748e-06, |
| "loss": 0.153, |
| "step": 43300 |
| }, |
| { |
| "epoch": 7.168814007267922, |
| "grad_norm": 0.6494946479797363, |
| "learning_rate": 8.455994812122993e-06, |
| "loss": 0.1518, |
| "step": 43400 |
| }, |
| { |
| "epoch": 7.185332011892964, |
| "grad_norm": 1.0411746501922607, |
| "learning_rate": 8.449877032632236e-06, |
| "loss": 0.1513, |
| "step": 43500 |
| }, |
| { |
| "epoch": 7.201850016518005, |
| "grad_norm": 0.6832746863365173, |
| "learning_rate": 8.44375925314148e-06, |
| "loss": 0.146, |
| "step": 43600 |
| }, |
| { |
| "epoch": 7.218368021143046, |
| "grad_norm": 0.7576162219047546, |
| "learning_rate": 8.437641473650723e-06, |
| "loss": 0.153, |
| "step": 43700 |
| }, |
| { |
| "epoch": 7.234886025768088, |
| "grad_norm": 0.7100921273231506, |
| "learning_rate": 8.431523694159968e-06, |
| "loss": 0.1504, |
| "step": 43800 |
| }, |
| { |
| "epoch": 7.251404030393129, |
| "grad_norm": 1.0959173440933228, |
| "learning_rate": 8.425405914669212e-06, |
| "loss": 0.1456, |
| "step": 43900 |
| }, |
| { |
| "epoch": 7.26792203501817, |
| "grad_norm": 0.6423998475074768, |
| "learning_rate": 8.419288135178455e-06, |
| "loss": 0.1498, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.26792203501817, |
| "eval_cer": 0.04530832934090753, |
| "eval_loss": 0.13714681565761566, |
| "eval_runtime": 48.9228, |
| "eval_samples_per_second": 34.565, |
| "eval_steps_per_second": 8.646, |
| "eval_wer": 0.24786111284581278, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.284440039643211, |
| "grad_norm": 0.8662620782852173, |
| "learning_rate": 8.4131703556877e-06, |
| "loss": 0.152, |
| "step": 44100 |
| }, |
| { |
| "epoch": 7.300958044268253, |
| "grad_norm": 0.6693345308303833, |
| "learning_rate": 8.407052576196944e-06, |
| "loss": 0.1499, |
| "step": 44200 |
| }, |
| { |
| "epoch": 7.317476048893294, |
| "grad_norm": 1.1424570083618164, |
| "learning_rate": 8.400934796706189e-06, |
| "loss": 0.1524, |
| "step": 44300 |
| }, |
| { |
| "epoch": 7.333994053518335, |
| "grad_norm": 0.9028821587562561, |
| "learning_rate": 8.394817017215433e-06, |
| "loss": 0.1433, |
| "step": 44400 |
| }, |
| { |
| "epoch": 7.3505120581433765, |
| "grad_norm": 0.7449358701705933, |
| "learning_rate": 8.388699237724676e-06, |
| "loss": 0.1497, |
| "step": 44500 |
| }, |
| { |
| "epoch": 7.367030062768418, |
| "grad_norm": 0.8447193503379822, |
| "learning_rate": 8.382581458233921e-06, |
| "loss": 0.1531, |
| "step": 44600 |
| }, |
| { |
| "epoch": 7.383548067393459, |
| "grad_norm": 0.661593496799469, |
| "learning_rate": 8.376463678743165e-06, |
| "loss": 0.1463, |
| "step": 44700 |
| }, |
| { |
| "epoch": 7.4000660720185, |
| "grad_norm": 0.9252416491508484, |
| "learning_rate": 8.370345899252408e-06, |
| "loss": 0.1459, |
| "step": 44800 |
| }, |
| { |
| "epoch": 7.416584076643542, |
| "grad_norm": 0.8766170740127563, |
| "learning_rate": 8.364228119761652e-06, |
| "loss": 0.1447, |
| "step": 44900 |
| }, |
| { |
| "epoch": 7.433102081268583, |
| "grad_norm": 0.7817343473434448, |
| "learning_rate": 8.358110340270897e-06, |
| "loss": 0.1477, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.433102081268583, |
| "eval_cer": 0.04499178700978715, |
| "eval_loss": 0.13690289855003357, |
| "eval_runtime": 48.2135, |
| "eval_samples_per_second": 35.073, |
| "eval_steps_per_second": 8.773, |
| "eval_wer": 0.24592518578654843, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.449620085893624, |
| "grad_norm": 0.891497790813446, |
| "learning_rate": 8.35199256078014e-06, |
| "loss": 0.1461, |
| "step": 45100 |
| }, |
| { |
| "epoch": 7.466138090518665, |
| "grad_norm": 0.7994738221168518, |
| "learning_rate": 8.345874781289384e-06, |
| "loss": 0.1498, |
| "step": 45200 |
| }, |
| { |
| "epoch": 7.482656095143707, |
| "grad_norm": 0.9430075287818909, |
| "learning_rate": 8.339757001798627e-06, |
| "loss": 0.1939, |
| "step": 45300 |
| }, |
| { |
| "epoch": 7.499174099768748, |
| "grad_norm": 0.7117358446121216, |
| "learning_rate": 8.333639222307872e-06, |
| "loss": 0.149, |
| "step": 45400 |
| }, |
| { |
| "epoch": 7.515692104393789, |
| "grad_norm": 0.6447555422782898, |
| "learning_rate": 8.327521442817116e-06, |
| "loss": 0.1509, |
| "step": 45500 |
| }, |
| { |
| "epoch": 7.5322101090188305, |
| "grad_norm": 0.6948108077049255, |
| "learning_rate": 8.32140366332636e-06, |
| "loss": 0.1488, |
| "step": 45600 |
| }, |
| { |
| "epoch": 7.548728113643872, |
| "grad_norm": 1.0623235702514648, |
| "learning_rate": 8.315285883835603e-06, |
| "loss": 0.135, |
| "step": 45700 |
| }, |
| { |
| "epoch": 7.565246118268913, |
| "grad_norm": 0.7654304504394531, |
| "learning_rate": 8.309168104344848e-06, |
| "loss": 0.1352, |
| "step": 45800 |
| }, |
| { |
| "epoch": 7.581764122893954, |
| "grad_norm": 0.8501843810081482, |
| "learning_rate": 8.303050324854091e-06, |
| "loss": 0.1487, |
| "step": 45900 |
| }, |
| { |
| "epoch": 7.598282127518996, |
| "grad_norm": 0.7539622783660889, |
| "learning_rate": 8.296932545363335e-06, |
| "loss": 0.1449, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.598282127518996, |
| "eval_cer": 0.04526555335021559, |
| "eval_loss": 0.13489525020122528, |
| "eval_runtime": 48.4075, |
| "eval_samples_per_second": 34.933, |
| "eval_steps_per_second": 8.738, |
| "eval_wer": 0.24605008430650097, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.614800132144037, |
| "grad_norm": 0.679431140422821, |
| "learning_rate": 8.29081476587258e-06, |
| "loss": 0.1418, |
| "step": 46100 |
| }, |
| { |
| "epoch": 7.631318136769078, |
| "grad_norm": 0.9577202796936035, |
| "learning_rate": 8.284696986381823e-06, |
| "loss": 0.1444, |
| "step": 46200 |
| }, |
| { |
| "epoch": 7.647836141394119, |
| "grad_norm": 0.7873533964157104, |
| "learning_rate": 8.278579206891067e-06, |
| "loss": 0.1443, |
| "step": 46300 |
| }, |
| { |
| "epoch": 7.664354146019161, |
| "grad_norm": 0.9830496907234192, |
| "learning_rate": 8.272461427400312e-06, |
| "loss": 0.1447, |
| "step": 46400 |
| }, |
| { |
| "epoch": 7.680872150644202, |
| "grad_norm": 0.7039462327957153, |
| "learning_rate": 8.266343647909555e-06, |
| "loss": 0.1546, |
| "step": 46500 |
| }, |
| { |
| "epoch": 7.697390155269243, |
| "grad_norm": 1.1326159238815308, |
| "learning_rate": 8.260225868418799e-06, |
| "loss": 0.1681, |
| "step": 46600 |
| }, |
| { |
| "epoch": 7.7139081598942845, |
| "grad_norm": 0.9435326457023621, |
| "learning_rate": 8.254108088928044e-06, |
| "loss": 0.1441, |
| "step": 46700 |
| }, |
| { |
| "epoch": 7.730426164519326, |
| "grad_norm": 1.0461517572402954, |
| "learning_rate": 8.247990309437287e-06, |
| "loss": 0.1438, |
| "step": 46800 |
| }, |
| { |
| "epoch": 7.746944169144367, |
| "grad_norm": 0.6983148455619812, |
| "learning_rate": 8.241872529946531e-06, |
| "loss": 0.1461, |
| "step": 46900 |
| }, |
| { |
| "epoch": 7.763462173769408, |
| "grad_norm": 0.6941544413566589, |
| "learning_rate": 8.235754750455776e-06, |
| "loss": 0.1414, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.763462173769408, |
| "eval_cer": 0.044256039969885703, |
| "eval_loss": 0.13219207525253296, |
| "eval_runtime": 48.7223, |
| "eval_samples_per_second": 34.707, |
| "eval_steps_per_second": 8.682, |
| "eval_wer": 0.24261537500780617, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.7799801783944496, |
| "grad_norm": 0.7406185865402222, |
| "learning_rate": 8.22963697096502e-06, |
| "loss": 0.1462, |
| "step": 47100 |
| }, |
| { |
| "epoch": 7.796498183019491, |
| "grad_norm": 0.7421987652778625, |
| "learning_rate": 8.223519191474263e-06, |
| "loss": 0.1377, |
| "step": 47200 |
| }, |
| { |
| "epoch": 7.813016187644532, |
| "grad_norm": 0.9080411195755005, |
| "learning_rate": 8.217401411983506e-06, |
| "loss": 0.1447, |
| "step": 47300 |
| }, |
| { |
| "epoch": 7.829534192269573, |
| "grad_norm": 0.9030922651290894, |
| "learning_rate": 8.211283632492752e-06, |
| "loss": 0.1488, |
| "step": 47400 |
| }, |
| { |
| "epoch": 7.846052196894615, |
| "grad_norm": 0.8082269430160522, |
| "learning_rate": 8.205165853001995e-06, |
| "loss": 0.1695, |
| "step": 47500 |
| }, |
| { |
| "epoch": 7.862570201519657, |
| "grad_norm": 0.8809986114501953, |
| "learning_rate": 8.199048073511239e-06, |
| "loss": 0.144, |
| "step": 47600 |
| }, |
| { |
| "epoch": 7.879088206144698, |
| "grad_norm": 0.6915613412857056, |
| "learning_rate": 8.192930294020482e-06, |
| "loss": 0.172, |
| "step": 47700 |
| }, |
| { |
| "epoch": 7.895606210769739, |
| "grad_norm": 0.8618057370185852, |
| "learning_rate": 8.186812514529727e-06, |
| "loss": 0.1362, |
| "step": 47800 |
| }, |
| { |
| "epoch": 7.912124215394781, |
| "grad_norm": 0.8087924122810364, |
| "learning_rate": 8.18069473503897e-06, |
| "loss": 0.1714, |
| "step": 47900 |
| }, |
| { |
| "epoch": 7.928642220019822, |
| "grad_norm": 1.0039881467819214, |
| "learning_rate": 8.174576955548214e-06, |
| "loss": 0.148, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.928642220019822, |
| "eval_cer": 0.04386250085551981, |
| "eval_loss": 0.13279926776885986, |
| "eval_runtime": 48.5969, |
| "eval_samples_per_second": 34.796, |
| "eval_steps_per_second": 8.704, |
| "eval_wer": 0.24136638980828076, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.945160224644863, |
| "grad_norm": 0.8676062822341919, |
| "learning_rate": 8.168459176057458e-06, |
| "loss": 0.1426, |
| "step": 48100 |
| }, |
| { |
| "epoch": 7.961678229269904, |
| "grad_norm": 0.7867946028709412, |
| "learning_rate": 8.162341396566703e-06, |
| "loss": 0.149, |
| "step": 48200 |
| }, |
| { |
| "epoch": 7.978196233894946, |
| "grad_norm": 0.7121617794036865, |
| "learning_rate": 8.156223617075946e-06, |
| "loss": 0.1447, |
| "step": 48300 |
| }, |
| { |
| "epoch": 7.994714238519987, |
| "grad_norm": 0.7431650757789612, |
| "learning_rate": 8.15010583758519e-06, |
| "loss": 0.1352, |
| "step": 48400 |
| }, |
| { |
| "epoch": 8.011232243145027, |
| "grad_norm": 1.1219011545181274, |
| "learning_rate": 8.143988058094435e-06, |
| "loss": 0.1427, |
| "step": 48500 |
| }, |
| { |
| "epoch": 8.027750247770069, |
| "grad_norm": 0.6559448838233948, |
| "learning_rate": 8.137870278603678e-06, |
| "loss": 0.1434, |
| "step": 48600 |
| }, |
| { |
| "epoch": 8.04426825239511, |
| "grad_norm": 0.7558749318122864, |
| "learning_rate": 8.131752499112923e-06, |
| "loss": 0.1396, |
| "step": 48700 |
| }, |
| { |
| "epoch": 8.060786257020151, |
| "grad_norm": 0.7132017016410828, |
| "learning_rate": 8.125634719622167e-06, |
| "loss": 0.1384, |
| "step": 48800 |
| }, |
| { |
| "epoch": 8.077304261645192, |
| "grad_norm": 0.7818734645843506, |
| "learning_rate": 8.11951694013141e-06, |
| "loss": 0.1386, |
| "step": 48900 |
| }, |
| { |
| "epoch": 8.093822266270234, |
| "grad_norm": 1.4067357778549194, |
| "learning_rate": 8.113399160640656e-06, |
| "loss": 0.1356, |
| "step": 49000 |
| }, |
| { |
| "epoch": 8.093822266270234, |
| "eval_cer": 0.04489767983026487, |
| "eval_loss": 0.1310672014951706, |
| "eval_runtime": 48.4665, |
| "eval_samples_per_second": 34.89, |
| "eval_steps_per_second": 8.728, |
| "eval_wer": 0.2447386498469993, |
| "step": 49000 |
| }, |
| { |
| "epoch": 8.110340270895275, |
| "grad_norm": 0.6177778840065002, |
| "learning_rate": 8.107281381149899e-06, |
| "loss": 0.1602, |
| "step": 49100 |
| }, |
| { |
| "epoch": 8.126858275520316, |
| "grad_norm": 0.6477259993553162, |
| "learning_rate": 8.101163601659142e-06, |
| "loss": 0.1356, |
| "step": 49200 |
| }, |
| { |
| "epoch": 8.14337628014536, |
| "grad_norm": 0.7413625717163086, |
| "learning_rate": 8.095045822168388e-06, |
| "loss": 0.1564, |
| "step": 49300 |
| }, |
| { |
| "epoch": 8.1598942847704, |
| "grad_norm": 0.6410078406333923, |
| "learning_rate": 8.088928042677631e-06, |
| "loss": 0.1344, |
| "step": 49400 |
| }, |
| { |
| "epoch": 8.176412289395442, |
| "grad_norm": 0.7430760860443115, |
| "learning_rate": 8.082810263186875e-06, |
| "loss": 0.1423, |
| "step": 49500 |
| }, |
| { |
| "epoch": 8.192930294020483, |
| "grad_norm": 0.7136946320533752, |
| "learning_rate": 8.076692483696118e-06, |
| "loss": 0.1328, |
| "step": 49600 |
| }, |
| { |
| "epoch": 8.209448298645524, |
| "grad_norm": 0.6934331059455872, |
| "learning_rate": 8.070574704205363e-06, |
| "loss": 0.1372, |
| "step": 49700 |
| }, |
| { |
| "epoch": 8.225966303270566, |
| "grad_norm": 0.9232444167137146, |
| "learning_rate": 8.064456924714607e-06, |
| "loss": 0.1333, |
| "step": 49800 |
| }, |
| { |
| "epoch": 8.242484307895607, |
| "grad_norm": 0.9037547707557678, |
| "learning_rate": 8.05833914522385e-06, |
| "loss": 0.1304, |
| "step": 49900 |
| }, |
| { |
| "epoch": 8.259002312520648, |
| "grad_norm": 0.7326195240020752, |
| "learning_rate": 8.052221365733094e-06, |
| "loss": 0.1426, |
| "step": 50000 |
| }, |
| { |
| "epoch": 8.259002312520648, |
| "eval_cer": 0.043101088221203204, |
| "eval_loss": 0.1275395154953003, |
| "eval_runtime": 48.7537, |
| "eval_samples_per_second": 34.685, |
| "eval_steps_per_second": 8.676, |
| "eval_wer": 0.23793168050958596, |
| "step": 50000 |
| }, |
| { |
| "epoch": 8.27552031714569, |
| "grad_norm": 0.8698177933692932, |
| "learning_rate": 8.046103586242339e-06, |
| "loss": 0.1436, |
| "step": 50100 |
| }, |
| { |
| "epoch": 8.29203832177073, |
| "grad_norm": 0.6039656400680542, |
| "learning_rate": 8.039985806751582e-06, |
| "loss": 0.1381, |
| "step": 50200 |
| }, |
| { |
| "epoch": 8.308556326395772, |
| "grad_norm": 0.6586022973060608, |
| "learning_rate": 8.033868027260826e-06, |
| "loss": 0.1419, |
| "step": 50300 |
| }, |
| { |
| "epoch": 8.325074331020813, |
| "grad_norm": 0.7226503491401672, |
| "learning_rate": 8.027750247770069e-06, |
| "loss": 0.1383, |
| "step": 50400 |
| }, |
| { |
| "epoch": 8.341592335645855, |
| "grad_norm": 0.6569647192955017, |
| "learning_rate": 8.021632468279314e-06, |
| "loss": 0.14, |
| "step": 50500 |
| }, |
| { |
| "epoch": 8.358110340270896, |
| "grad_norm": 0.7345595955848694, |
| "learning_rate": 8.015514688788558e-06, |
| "loss": 0.1335, |
| "step": 50600 |
| }, |
| { |
| "epoch": 8.374628344895937, |
| "grad_norm": 0.9025945067405701, |
| "learning_rate": 8.009396909297801e-06, |
| "loss": 0.1327, |
| "step": 50700 |
| }, |
| { |
| "epoch": 8.391146349520978, |
| "grad_norm": 0.693868100643158, |
| "learning_rate": 8.003279129807046e-06, |
| "loss": 0.1405, |
| "step": 50800 |
| }, |
| { |
| "epoch": 8.40766435414602, |
| "grad_norm": 0.726399302482605, |
| "learning_rate": 7.99716135031629e-06, |
| "loss": 0.1412, |
| "step": 50900 |
| }, |
| { |
| "epoch": 8.424182358771061, |
| "grad_norm": 0.8612838983535767, |
| "learning_rate": 7.991043570825533e-06, |
| "loss": 0.1605, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.424182358771061, |
| "eval_cer": 0.04358873451509137, |
| "eval_loss": 0.12762609124183655, |
| "eval_runtime": 48.4653, |
| "eval_samples_per_second": 34.891, |
| "eval_steps_per_second": 8.728, |
| "eval_wer": 0.2394929120089927, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.440700363396102, |
| "grad_norm": 0.7443174719810486, |
| "learning_rate": 7.984925791334778e-06, |
| "loss": 0.1586, |
| "step": 51100 |
| }, |
| { |
| "epoch": 8.457218368021143, |
| "grad_norm": 0.8640626668930054, |
| "learning_rate": 7.978808011844022e-06, |
| "loss": 0.1315, |
| "step": 51200 |
| }, |
| { |
| "epoch": 8.473736372646185, |
| "grad_norm": 0.8193531632423401, |
| "learning_rate": 7.972690232353267e-06, |
| "loss": 0.1345, |
| "step": 51300 |
| }, |
| { |
| "epoch": 8.490254377271226, |
| "grad_norm": 0.922201931476593, |
| "learning_rate": 7.96657245286251e-06, |
| "loss": 0.1406, |
| "step": 51400 |
| }, |
| { |
| "epoch": 8.506772381896267, |
| "grad_norm": 0.6063607931137085, |
| "learning_rate": 7.960454673371754e-06, |
| "loss": 0.1311, |
| "step": 51500 |
| }, |
| { |
| "epoch": 8.523290386521309, |
| "grad_norm": 0.6972509622573853, |
| "learning_rate": 7.954336893880997e-06, |
| "loss": 0.1313, |
| "step": 51600 |
| }, |
| { |
| "epoch": 8.53980839114635, |
| "grad_norm": 0.8324514627456665, |
| "learning_rate": 7.948219114390243e-06, |
| "loss": 0.1357, |
| "step": 51700 |
| }, |
| { |
| "epoch": 8.556326395771391, |
| "grad_norm": 0.8537706136703491, |
| "learning_rate": 7.942101334899486e-06, |
| "loss": 0.1326, |
| "step": 51800 |
| }, |
| { |
| "epoch": 8.572844400396432, |
| "grad_norm": 0.7509739398956299, |
| "learning_rate": 7.93598355540873e-06, |
| "loss": 0.141, |
| "step": 51900 |
| }, |
| { |
| "epoch": 8.589362405021474, |
| "grad_norm": 0.8359591364860535, |
| "learning_rate": 7.929865775917973e-06, |
| "loss": 0.1619, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.589362405021474, |
| "eval_cer": 0.04345185134487715, |
| "eval_loss": 0.128912091255188, |
| "eval_runtime": 48.8649, |
| "eval_samples_per_second": 34.606, |
| "eval_steps_per_second": 8.657, |
| "eval_wer": 0.23874352088927747, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.605880409646515, |
| "grad_norm": 0.9481146335601807, |
| "learning_rate": 7.923747996427218e-06, |
| "loss": 0.1305, |
| "step": 52100 |
| }, |
| { |
| "epoch": 8.622398414271556, |
| "grad_norm": 0.7615697979927063, |
| "learning_rate": 7.917630216936462e-06, |
| "loss": 0.1575, |
| "step": 52200 |
| }, |
| { |
| "epoch": 8.638916418896597, |
| "grad_norm": 0.8259508609771729, |
| "learning_rate": 7.911512437445705e-06, |
| "loss": 0.1378, |
| "step": 52300 |
| }, |
| { |
| "epoch": 8.655434423521639, |
| "grad_norm": 2.3674380779266357, |
| "learning_rate": 7.905394657954948e-06, |
| "loss": 0.1279, |
| "step": 52400 |
| }, |
| { |
| "epoch": 8.67195242814668, |
| "grad_norm": 0.6892039179801941, |
| "learning_rate": 7.899276878464194e-06, |
| "loss": 0.1387, |
| "step": 52500 |
| }, |
| { |
| "epoch": 8.688470432771721, |
| "grad_norm": 0.7626641988754272, |
| "learning_rate": 7.893159098973437e-06, |
| "loss": 0.1354, |
| "step": 52600 |
| }, |
| { |
| "epoch": 8.704988437396763, |
| "grad_norm": 0.9490280747413635, |
| "learning_rate": 7.88704131948268e-06, |
| "loss": 0.1273, |
| "step": 52700 |
| }, |
| { |
| "epoch": 8.721506442021804, |
| "grad_norm": 0.7699686884880066, |
| "learning_rate": 7.880923539991924e-06, |
| "loss": 0.1277, |
| "step": 52800 |
| }, |
| { |
| "epoch": 8.738024446646845, |
| "grad_norm": 0.819313108921051, |
| "learning_rate": 7.87480576050117e-06, |
| "loss": 0.1337, |
| "step": 52900 |
| }, |
| { |
| "epoch": 8.754542451271886, |
| "grad_norm": 0.7003619074821472, |
| "learning_rate": 7.868687981010413e-06, |
| "loss": 0.1594, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.754542451271886, |
| "eval_cer": 0.042955649852850594, |
| "eval_loss": 0.12719017267227173, |
| "eval_runtime": 48.6494, |
| "eval_samples_per_second": 34.759, |
| "eval_steps_per_second": 8.695, |
| "eval_wer": 0.237244738649847, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.771060455896928, |
| "grad_norm": 0.6663370728492737, |
| "learning_rate": 7.862570201519658e-06, |
| "loss": 0.1376, |
| "step": 53100 |
| }, |
| { |
| "epoch": 8.787578460521969, |
| "grad_norm": 0.5805346369743347, |
| "learning_rate": 7.856452422028901e-06, |
| "loss": 0.1379, |
| "step": 53200 |
| }, |
| { |
| "epoch": 8.80409646514701, |
| "grad_norm": 0.7736044526100159, |
| "learning_rate": 7.850334642538145e-06, |
| "loss": 0.1323, |
| "step": 53300 |
| }, |
| { |
| "epoch": 8.820614469772051, |
| "grad_norm": 0.9529663324356079, |
| "learning_rate": 7.84421686304739e-06, |
| "loss": 0.1289, |
| "step": 53400 |
| }, |
| { |
| "epoch": 8.837132474397093, |
| "grad_norm": 0.583885908126831, |
| "learning_rate": 7.838099083556633e-06, |
| "loss": 0.1322, |
| "step": 53500 |
| }, |
| { |
| "epoch": 8.853650479022134, |
| "grad_norm": 0.7250145673751831, |
| "learning_rate": 7.831981304065877e-06, |
| "loss": 0.133, |
| "step": 53600 |
| }, |
| { |
| "epoch": 8.870168483647175, |
| "grad_norm": 0.6262508034706116, |
| "learning_rate": 7.825863524575122e-06, |
| "loss": 0.1599, |
| "step": 53700 |
| }, |
| { |
| "epoch": 8.886686488272217, |
| "grad_norm": 0.7456868290901184, |
| "learning_rate": 7.819745745084365e-06, |
| "loss": 0.1389, |
| "step": 53800 |
| }, |
| { |
| "epoch": 8.903204492897258, |
| "grad_norm": 0.944975733757019, |
| "learning_rate": 7.813627965593609e-06, |
| "loss": 0.1339, |
| "step": 53900 |
| }, |
| { |
| "epoch": 8.919722497522299, |
| "grad_norm": 0.7597010731697083, |
| "learning_rate": 7.807510186102852e-06, |
| "loss": 0.1247, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.919722497522299, |
| "eval_cer": 0.042852987475189924, |
| "eval_loss": 0.12474211305379868, |
| "eval_runtime": 48.5952, |
| "eval_samples_per_second": 34.798, |
| "eval_steps_per_second": 8.705, |
| "eval_wer": 0.23530881159058264, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.93624050214734, |
| "grad_norm": 0.8667477965354919, |
| "learning_rate": 7.801392406612098e-06, |
| "loss": 0.1305, |
| "step": 54100 |
| }, |
| { |
| "epoch": 8.952758506772382, |
| "grad_norm": 0.7182130813598633, |
| "learning_rate": 7.795274627121341e-06, |
| "loss": 0.1315, |
| "step": 54200 |
| }, |
| { |
| "epoch": 8.969276511397423, |
| "grad_norm": 0.7797411680221558, |
| "learning_rate": 7.789156847630584e-06, |
| "loss": 0.1342, |
| "step": 54300 |
| }, |
| { |
| "epoch": 8.985794516022464, |
| "grad_norm": 0.7155345678329468, |
| "learning_rate": 7.783039068139828e-06, |
| "loss": 0.128, |
| "step": 54400 |
| }, |
| { |
| "epoch": 9.002312520647505, |
| "grad_norm": 0.8948495388031006, |
| "learning_rate": 7.776921288649073e-06, |
| "loss": 0.1352, |
| "step": 54500 |
| }, |
| { |
| "epoch": 9.018830525272547, |
| "grad_norm": 1.397560954093933, |
| "learning_rate": 7.770803509158317e-06, |
| "loss": 0.1362, |
| "step": 54600 |
| }, |
| { |
| "epoch": 9.035348529897588, |
| "grad_norm": 0.7347814440727234, |
| "learning_rate": 7.76468572966756e-06, |
| "loss": 0.1331, |
| "step": 54700 |
| }, |
| { |
| "epoch": 9.05186653452263, |
| "grad_norm": 0.9538244009017944, |
| "learning_rate": 7.758567950176803e-06, |
| "loss": 0.1339, |
| "step": 54800 |
| }, |
| { |
| "epoch": 9.06838453914767, |
| "grad_norm": 0.7012799382209778, |
| "learning_rate": 7.752450170686049e-06, |
| "loss": 0.1572, |
| "step": 54900 |
| }, |
| { |
| "epoch": 9.084902543772712, |
| "grad_norm": 0.6902897357940674, |
| "learning_rate": 7.746332391195292e-06, |
| "loss": 0.1382, |
| "step": 55000 |
| }, |
| { |
| "epoch": 9.084902543772712, |
| "eval_cer": 0.04332352337280131, |
| "eval_loss": 0.1268453598022461, |
| "eval_runtime": 48.6908, |
| "eval_samples_per_second": 34.729, |
| "eval_steps_per_second": 8.687, |
| "eval_wer": 0.23674514457003684, |
| "step": 55000 |
| }, |
| { |
| "epoch": 9.101420548397753, |
| "grad_norm": 0.785682737827301, |
| "learning_rate": 7.740214611704536e-06, |
| "loss": 0.1246, |
| "step": 55100 |
| }, |
| { |
| "epoch": 9.117938553022794, |
| "grad_norm": 0.6153995990753174, |
| "learning_rate": 7.73409683221378e-06, |
| "loss": 0.1308, |
| "step": 55200 |
| }, |
| { |
| "epoch": 9.134456557647836, |
| "grad_norm": 0.9429919719696045, |
| "learning_rate": 7.727979052723024e-06, |
| "loss": 0.1297, |
| "step": 55300 |
| }, |
| { |
| "epoch": 9.150974562272877, |
| "grad_norm": 0.509573221206665, |
| "learning_rate": 7.721861273232268e-06, |
| "loss": 0.1536, |
| "step": 55400 |
| }, |
| { |
| "epoch": 9.167492566897918, |
| "grad_norm": 0.8129322528839111, |
| "learning_rate": 7.715743493741513e-06, |
| "loss": 0.1335, |
| "step": 55500 |
| }, |
| { |
| "epoch": 9.18401057152296, |
| "grad_norm": 0.6652195453643799, |
| "learning_rate": 7.709625714250756e-06, |
| "loss": 0.1227, |
| "step": 55600 |
| }, |
| { |
| "epoch": 9.200528576148, |
| "grad_norm": 0.9443718791007996, |
| "learning_rate": 7.703507934760001e-06, |
| "loss": 0.1327, |
| "step": 55700 |
| }, |
| { |
| "epoch": 9.217046580773042, |
| "grad_norm": 1.0480358600616455, |
| "learning_rate": 7.697390155269245e-06, |
| "loss": 0.1333, |
| "step": 55800 |
| }, |
| { |
| "epoch": 9.233564585398083, |
| "grad_norm": 0.8347544074058533, |
| "learning_rate": 7.691272375778488e-06, |
| "loss": 0.1265, |
| "step": 55900 |
| }, |
| { |
| "epoch": 9.250082590023124, |
| "grad_norm": 0.9064726233482361, |
| "learning_rate": 7.685154596287732e-06, |
| "loss": 0.1263, |
| "step": 56000 |
| }, |
| { |
| "epoch": 9.250082590023124, |
| "eval_cer": 0.043246526589555814, |
| "eval_loss": 0.12526705861091614, |
| "eval_runtime": 48.6324, |
| "eval_samples_per_second": 34.771, |
| "eval_steps_per_second": 8.698, |
| "eval_wer": 0.23574595641041654, |
| "step": 56000 |
| }, |
| { |
| "epoch": 9.266600594648166, |
| "grad_norm": 0.6867343783378601, |
| "learning_rate": 7.679036816796977e-06, |
| "loss": 0.1634, |
| "step": 56100 |
| }, |
| { |
| "epoch": 9.283118599273207, |
| "grad_norm": 0.5822398662567139, |
| "learning_rate": 7.67291903730622e-06, |
| "loss": 0.1232, |
| "step": 56200 |
| }, |
| { |
| "epoch": 9.299636603898248, |
| "grad_norm": 0.70078444480896, |
| "learning_rate": 7.666801257815464e-06, |
| "loss": 0.1334, |
| "step": 56300 |
| }, |
| { |
| "epoch": 9.31615460852329, |
| "grad_norm": 0.7892748117446899, |
| "learning_rate": 7.660683478324707e-06, |
| "loss": 0.1364, |
| "step": 56400 |
| }, |
| { |
| "epoch": 9.33267261314833, |
| "grad_norm": 0.6174075603485107, |
| "learning_rate": 7.654565698833952e-06, |
| "loss": 0.1279, |
| "step": 56500 |
| }, |
| { |
| "epoch": 9.349190617773372, |
| "grad_norm": 0.5943549275398254, |
| "learning_rate": 7.648447919343196e-06, |
| "loss": 0.1211, |
| "step": 56600 |
| }, |
| { |
| "epoch": 9.365708622398415, |
| "grad_norm": 0.7577424645423889, |
| "learning_rate": 7.64233013985244e-06, |
| "loss": 0.1282, |
| "step": 56700 |
| }, |
| { |
| "epoch": 9.382226627023456, |
| "grad_norm": 0.6361554861068726, |
| "learning_rate": 7.636212360361683e-06, |
| "loss": 0.146, |
| "step": 56800 |
| }, |
| { |
| "epoch": 9.398744631648498, |
| "grad_norm": 0.7385000586509705, |
| "learning_rate": 7.630094580870928e-06, |
| "loss": 0.1218, |
| "step": 56900 |
| }, |
| { |
| "epoch": 9.415262636273539, |
| "grad_norm": 0.8645774126052856, |
| "learning_rate": 7.6239768013801715e-06, |
| "loss": 0.1296, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.415262636273539, |
| "eval_cer": 0.041988912463212645, |
| "eval_loss": 0.12412171810865402, |
| "eval_runtime": 48.7466, |
| "eval_samples_per_second": 34.69, |
| "eval_steps_per_second": 8.678, |
| "eval_wer": 0.23099981265222008, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.43178064089858, |
| "grad_norm": 0.8043680787086487, |
| "learning_rate": 7.617859021889416e-06, |
| "loss": 0.1263, |
| "step": 57100 |
| }, |
| { |
| "epoch": 9.448298645523622, |
| "grad_norm": 0.6487779021263123, |
| "learning_rate": 7.611741242398659e-06, |
| "loss": 0.1409, |
| "step": 57200 |
| }, |
| { |
| "epoch": 9.464816650148663, |
| "grad_norm": 0.8494729399681091, |
| "learning_rate": 7.605623462907904e-06, |
| "loss": 0.1292, |
| "step": 57300 |
| }, |
| { |
| "epoch": 9.481334654773704, |
| "grad_norm": 0.6968827247619629, |
| "learning_rate": 7.599505683417148e-06, |
| "loss": 0.1237, |
| "step": 57400 |
| }, |
| { |
| "epoch": 9.497852659398745, |
| "grad_norm": 0.7528768181800842, |
| "learning_rate": 7.593387903926391e-06, |
| "loss": 0.121, |
| "step": 57500 |
| }, |
| { |
| "epoch": 9.514370664023787, |
| "grad_norm": 0.8891571164131165, |
| "learning_rate": 7.587270124435635e-06, |
| "loss": 0.1271, |
| "step": 57600 |
| }, |
| { |
| "epoch": 9.530888668648828, |
| "grad_norm": 0.60162353515625, |
| "learning_rate": 7.58115234494488e-06, |
| "loss": 0.1321, |
| "step": 57700 |
| }, |
| { |
| "epoch": 9.54740667327387, |
| "grad_norm": 0.9320480823516846, |
| "learning_rate": 7.575034565454123e-06, |
| "loss": 0.136, |
| "step": 57800 |
| }, |
| { |
| "epoch": 9.56392467789891, |
| "grad_norm": 0.7406982779502869, |
| "learning_rate": 7.568916785963367e-06, |
| "loss": 0.128, |
| "step": 57900 |
| }, |
| { |
| "epoch": 9.580442682523952, |
| "grad_norm": 0.753324568271637, |
| "learning_rate": 7.562799006472611e-06, |
| "loss": 0.1263, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.580442682523952, |
| "eval_cer": 0.042305454794333036, |
| "eval_loss": 0.12526877224445343, |
| "eval_runtime": 48.8065, |
| "eval_samples_per_second": 34.647, |
| "eval_steps_per_second": 8.667, |
| "eval_wer": 0.23212389933179292, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.596960687148993, |
| "grad_norm": 0.6665119528770447, |
| "learning_rate": 7.5566812269818555e-06, |
| "loss": 0.1285, |
| "step": 58100 |
| }, |
| { |
| "epoch": 9.613478691774034, |
| "grad_norm": 0.6425819993019104, |
| "learning_rate": 7.5505634474911e-06, |
| "loss": 0.1303, |
| "step": 58200 |
| }, |
| { |
| "epoch": 9.629996696399076, |
| "grad_norm": 0.7088574767112732, |
| "learning_rate": 7.544445668000343e-06, |
| "loss": 0.1246, |
| "step": 58300 |
| }, |
| { |
| "epoch": 9.646514701024117, |
| "grad_norm": 0.6304578185081482, |
| "learning_rate": 7.538327888509587e-06, |
| "loss": 0.1357, |
| "step": 58400 |
| }, |
| { |
| "epoch": 9.663032705649158, |
| "grad_norm": 0.9591554403305054, |
| "learning_rate": 7.532210109018832e-06, |
| "loss": 0.1352, |
| "step": 58500 |
| }, |
| { |
| "epoch": 9.6795507102742, |
| "grad_norm": 0.8646364808082581, |
| "learning_rate": 7.526092329528075e-06, |
| "loss": 0.1225, |
| "step": 58600 |
| }, |
| { |
| "epoch": 9.69606871489924, |
| "grad_norm": 0.729404866695404, |
| "learning_rate": 7.519974550037319e-06, |
| "loss": 0.1241, |
| "step": 58700 |
| }, |
| { |
| "epoch": 9.712586719524282, |
| "grad_norm": 0.719990611076355, |
| "learning_rate": 7.513856770546562e-06, |
| "loss": 0.1184, |
| "step": 58800 |
| }, |
| { |
| "epoch": 9.729104724149323, |
| "grad_norm": 0.7622655630111694, |
| "learning_rate": 7.507738991055807e-06, |
| "loss": 0.1281, |
| "step": 58900 |
| }, |
| { |
| "epoch": 9.745622728774364, |
| "grad_norm": 0.8316338658332825, |
| "learning_rate": 7.501621211565051e-06, |
| "loss": 0.1327, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.745622728774364, |
| "eval_cer": 0.042014578057627816, |
| "eval_loss": 0.12281199544668198, |
| "eval_runtime": 48.6883, |
| "eval_samples_per_second": 34.731, |
| "eval_steps_per_second": 8.688, |
| "eval_wer": 0.23018797227252857, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.762140733399406, |
| "grad_norm": 0.6978473663330078, |
| "learning_rate": 7.495503432074294e-06, |
| "loss": 0.1218, |
| "step": 59100 |
| }, |
| { |
| "epoch": 9.778658738024447, |
| "grad_norm": 0.7994058728218079, |
| "learning_rate": 7.489385652583539e-06, |
| "loss": 0.1217, |
| "step": 59200 |
| }, |
| { |
| "epoch": 9.795176742649488, |
| "grad_norm": 0.6791940927505493, |
| "learning_rate": 7.483267873092783e-06, |
| "loss": 0.1598, |
| "step": 59300 |
| }, |
| { |
| "epoch": 9.81169474727453, |
| "grad_norm": 0.7519752383232117, |
| "learning_rate": 7.477150093602027e-06, |
| "loss": 0.1267, |
| "step": 59400 |
| }, |
| { |
| "epoch": 9.82821275189957, |
| "grad_norm": 0.6616401672363281, |
| "learning_rate": 7.471032314111271e-06, |
| "loss": 0.126, |
| "step": 59500 |
| }, |
| { |
| "epoch": 9.844730756524612, |
| "grad_norm": 0.8174837231636047, |
| "learning_rate": 7.464914534620514e-06, |
| "loss": 0.1287, |
| "step": 59600 |
| }, |
| { |
| "epoch": 9.861248761149653, |
| "grad_norm": 0.746444046497345, |
| "learning_rate": 7.458796755129759e-06, |
| "loss": 0.1576, |
| "step": 59700 |
| }, |
| { |
| "epoch": 9.877766765774695, |
| "grad_norm": 0.9220054745674133, |
| "learning_rate": 7.452678975639003e-06, |
| "loss": 0.1396, |
| "step": 59800 |
| }, |
| { |
| "epoch": 9.894284770399736, |
| "grad_norm": 0.8045241832733154, |
| "learning_rate": 7.446561196148246e-06, |
| "loss": 0.1217, |
| "step": 59900 |
| }, |
| { |
| "epoch": 9.910802775024777, |
| "grad_norm": 0.759081244468689, |
| "learning_rate": 7.44044341665749e-06, |
| "loss": 0.1262, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.910802775024777, |
| "eval_cer": 0.041911915679967146, |
| "eval_loss": 0.12326313555240631, |
| "eval_runtime": 48.7409, |
| "eval_samples_per_second": 34.694, |
| "eval_steps_per_second": 8.679, |
| "eval_wer": 0.23018797227252857, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.927320779649818, |
| "grad_norm": 0.8618572354316711, |
| "learning_rate": 7.434325637166735e-06, |
| "loss": 0.1251, |
| "step": 60100 |
| }, |
| { |
| "epoch": 9.94383878427486, |
| "grad_norm": 0.6436170935630798, |
| "learning_rate": 7.428207857675978e-06, |
| "loss": 0.1201, |
| "step": 60200 |
| }, |
| { |
| "epoch": 9.960356788899901, |
| "grad_norm": 0.8590137958526611, |
| "learning_rate": 7.422090078185223e-06, |
| "loss": 0.129, |
| "step": 60300 |
| }, |
| { |
| "epoch": 9.976874793524942, |
| "grad_norm": 0.7668434381484985, |
| "learning_rate": 7.415972298694466e-06, |
| "loss": 0.1255, |
| "step": 60400 |
| }, |
| { |
| "epoch": 9.993392798149983, |
| "grad_norm": 0.7597298622131348, |
| "learning_rate": 7.4098545192037104e-06, |
| "loss": 0.1231, |
| "step": 60500 |
| }, |
| { |
| "epoch": 10.009910802775025, |
| "grad_norm": 0.8070718050003052, |
| "learning_rate": 7.403736739712955e-06, |
| "loss": 0.1642, |
| "step": 60600 |
| }, |
| { |
| "epoch": 10.026428807400066, |
| "grad_norm": 0.7364042401313782, |
| "learning_rate": 7.397618960222198e-06, |
| "loss": 0.1243, |
| "step": 60700 |
| }, |
| { |
| "epoch": 10.042946812025107, |
| "grad_norm": 0.8967491984367371, |
| "learning_rate": 7.391501180731442e-06, |
| "loss": 0.1321, |
| "step": 60800 |
| }, |
| { |
| "epoch": 10.059464816650149, |
| "grad_norm": 0.8420141339302063, |
| "learning_rate": 7.385383401240687e-06, |
| "loss": 0.1274, |
| "step": 60900 |
| }, |
| { |
| "epoch": 10.07598282127519, |
| "grad_norm": 0.7145309448242188, |
| "learning_rate": 7.37926562174993e-06, |
| "loss": 0.1207, |
| "step": 61000 |
| }, |
| { |
| "epoch": 10.07598282127519, |
| "eval_cer": 0.041920470878105534, |
| "eval_loss": 0.12428971379995346, |
| "eval_runtime": 48.748, |
| "eval_samples_per_second": 34.689, |
| "eval_steps_per_second": 8.677, |
| "eval_wer": 0.22881408855305066, |
| "step": 61000 |
| }, |
| { |
| "epoch": 10.092500825900231, |
| "grad_norm": 0.7484616041183472, |
| "learning_rate": 7.373147842259174e-06, |
| "loss": 0.1279, |
| "step": 61100 |
| }, |
| { |
| "epoch": 10.109018830525272, |
| "grad_norm": 0.7732555270195007, |
| "learning_rate": 7.367030062768418e-06, |
| "loss": 0.1251, |
| "step": 61200 |
| }, |
| { |
| "epoch": 10.125536835150314, |
| "grad_norm": 0.7943729162216187, |
| "learning_rate": 7.360912283277662e-06, |
| "loss": 0.1194, |
| "step": 61300 |
| }, |
| { |
| "epoch": 10.142054839775355, |
| "grad_norm": 0.7555480003356934, |
| "learning_rate": 7.354794503786906e-06, |
| "loss": 0.1171, |
| "step": 61400 |
| }, |
| { |
| "epoch": 10.158572844400396, |
| "grad_norm": 0.6439567804336548, |
| "learning_rate": 7.34867672429615e-06, |
| "loss": 0.1203, |
| "step": 61500 |
| }, |
| { |
| "epoch": 10.175090849025437, |
| "grad_norm": 0.5505064725875854, |
| "learning_rate": 7.342558944805394e-06, |
| "loss": 0.1198, |
| "step": 61600 |
| }, |
| { |
| "epoch": 10.191608853650479, |
| "grad_norm": 0.6508448123931885, |
| "learning_rate": 7.336441165314639e-06, |
| "loss": 0.1227, |
| "step": 61700 |
| }, |
| { |
| "epoch": 10.20812685827552, |
| "grad_norm": 0.6717207431793213, |
| "learning_rate": 7.330323385823882e-06, |
| "loss": 0.1258, |
| "step": 61800 |
| }, |
| { |
| "epoch": 10.224644862900561, |
| "grad_norm": 0.7035212516784668, |
| "learning_rate": 7.324205606333126e-06, |
| "loss": 0.1235, |
| "step": 61900 |
| }, |
| { |
| "epoch": 10.241162867525603, |
| "grad_norm": 0.6881560683250427, |
| "learning_rate": 7.318087826842369e-06, |
| "loss": 0.1411, |
| "step": 62000 |
| }, |
| { |
| "epoch": 10.241162867525603, |
| "eval_cer": 0.04166381493395387, |
| "eval_loss": 0.12054095417261124, |
| "eval_runtime": 48.5305, |
| "eval_samples_per_second": 34.844, |
| "eval_steps_per_second": 8.716, |
| "eval_wer": 0.22781490039343033, |
| "step": 62000 |
| }, |
| { |
| "epoch": 10.257680872150644, |
| "grad_norm": 0.8385311961174011, |
| "learning_rate": 7.311970047351614e-06, |
| "loss": 0.1223, |
| "step": 62100 |
| }, |
| { |
| "epoch": 10.274198876775685, |
| "grad_norm": 0.5517755746841431, |
| "learning_rate": 7.305852267860858e-06, |
| "loss": 0.1251, |
| "step": 62200 |
| }, |
| { |
| "epoch": 10.290716881400726, |
| "grad_norm": 0.736827552318573, |
| "learning_rate": 7.299734488370101e-06, |
| "loss": 0.1215, |
| "step": 62300 |
| }, |
| { |
| "epoch": 10.307234886025768, |
| "grad_norm": 0.8742785453796387, |
| "learning_rate": 7.2936167088793455e-06, |
| "loss": 0.1284, |
| "step": 62400 |
| }, |
| { |
| "epoch": 10.323752890650809, |
| "grad_norm": 0.6363995671272278, |
| "learning_rate": 7.28749892938859e-06, |
| "loss": 0.1287, |
| "step": 62500 |
| }, |
| { |
| "epoch": 10.34027089527585, |
| "grad_norm": 0.8067464232444763, |
| "learning_rate": 7.281381149897834e-06, |
| "loss": 0.1206, |
| "step": 62600 |
| }, |
| { |
| "epoch": 10.356788899900891, |
| "grad_norm": 0.7671234011650085, |
| "learning_rate": 7.275263370407078e-06, |
| "loss": 0.1216, |
| "step": 62700 |
| }, |
| { |
| "epoch": 10.373306904525933, |
| "grad_norm": 1.0161293745040894, |
| "learning_rate": 7.269145590916321e-06, |
| "loss": 0.1244, |
| "step": 62800 |
| }, |
| { |
| "epoch": 10.389824909150974, |
| "grad_norm": 0.7212845683097839, |
| "learning_rate": 7.263027811425566e-06, |
| "loss": 0.1298, |
| "step": 62900 |
| }, |
| { |
| "epoch": 10.406342913776015, |
| "grad_norm": 0.6176585555076599, |
| "learning_rate": 7.25691003193481e-06, |
| "loss": 0.1401, |
| "step": 63000 |
| }, |
| { |
| "epoch": 10.406342913776015, |
| "eval_cer": 0.04089384710149887, |
| "eval_loss": 0.12006353586912155, |
| "eval_runtime": 48.5157, |
| "eval_samples_per_second": 34.855, |
| "eval_steps_per_second": 8.719, |
| "eval_wer": 0.22494223443452194, |
| "step": 63000 |
| }, |
| { |
| "epoch": 10.422860918401057, |
| "grad_norm": 0.724654495716095, |
| "learning_rate": 7.250792252444053e-06, |
| "loss": 0.1246, |
| "step": 63100 |
| }, |
| { |
| "epoch": 10.439378923026098, |
| "grad_norm": 2.0829966068267822, |
| "learning_rate": 7.244674472953297e-06, |
| "loss": 0.1206, |
| "step": 63200 |
| }, |
| { |
| "epoch": 10.455896927651139, |
| "grad_norm": 0.7068758606910706, |
| "learning_rate": 7.238556693462542e-06, |
| "loss": 0.1214, |
| "step": 63300 |
| }, |
| { |
| "epoch": 10.47241493227618, |
| "grad_norm": 0.717832624912262, |
| "learning_rate": 7.232438913971785e-06, |
| "loss": 0.1233, |
| "step": 63400 |
| }, |
| { |
| "epoch": 10.488932936901222, |
| "grad_norm": 0.7591824531555176, |
| "learning_rate": 7.226321134481029e-06, |
| "loss": 0.1223, |
| "step": 63500 |
| }, |
| { |
| "epoch": 10.505450941526263, |
| "grad_norm": 0.8358705639839172, |
| "learning_rate": 7.220203354990273e-06, |
| "loss": 0.117, |
| "step": 63600 |
| }, |
| { |
| "epoch": 10.521968946151304, |
| "grad_norm": 0.7193006277084351, |
| "learning_rate": 7.214085575499517e-06, |
| "loss": 0.1229, |
| "step": 63700 |
| }, |
| { |
| "epoch": 10.538486950776345, |
| "grad_norm": 0.8279296159744263, |
| "learning_rate": 7.207967796008762e-06, |
| "loss": 0.1304, |
| "step": 63800 |
| }, |
| { |
| "epoch": 10.555004955401387, |
| "grad_norm": 0.9237922430038452, |
| "learning_rate": 7.201850016518005e-06, |
| "loss": 0.121, |
| "step": 63900 |
| }, |
| { |
| "epoch": 10.571522960026428, |
| "grad_norm": 0.6493191719055176, |
| "learning_rate": 7.1957322370272486e-06, |
| "loss": 0.1165, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.571522960026428, |
| "eval_cer": 0.04139860379166382, |
| "eval_loss": 0.12036388367414474, |
| "eval_runtime": 48.7181, |
| "eval_samples_per_second": 34.71, |
| "eval_steps_per_second": 8.683, |
| "eval_wer": 0.2271279585336914, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.58804096465147, |
| "grad_norm": 0.9154326319694519, |
| "learning_rate": 7.189614457536494e-06, |
| "loss": 0.1184, |
| "step": 64100 |
| }, |
| { |
| "epoch": 10.60455896927651, |
| "grad_norm": 0.8205140829086304, |
| "learning_rate": 7.183496678045737e-06, |
| "loss": 0.1243, |
| "step": 64200 |
| }, |
| { |
| "epoch": 10.621076973901552, |
| "grad_norm": 0.7464902400970459, |
| "learning_rate": 7.177378898554981e-06, |
| "loss": 0.1274, |
| "step": 64300 |
| }, |
| { |
| "epoch": 10.637594978526593, |
| "grad_norm": 0.6660764217376709, |
| "learning_rate": 7.171261119064224e-06, |
| "loss": 0.1348, |
| "step": 64400 |
| }, |
| { |
| "epoch": 10.654112983151634, |
| "grad_norm": 0.8469193577766418, |
| "learning_rate": 7.165143339573469e-06, |
| "loss": 0.1273, |
| "step": 64500 |
| }, |
| { |
| "epoch": 10.670630987776677, |
| "grad_norm": 0.7771629095077515, |
| "learning_rate": 7.159025560082713e-06, |
| "loss": 0.1202, |
| "step": 64600 |
| }, |
| { |
| "epoch": 10.687148992401719, |
| "grad_norm": 0.7291647791862488, |
| "learning_rate": 7.152907780591957e-06, |
| "loss": 0.12, |
| "step": 64700 |
| }, |
| { |
| "epoch": 10.70366699702676, |
| "grad_norm": 0.670477032661438, |
| "learning_rate": 7.1467900011012005e-06, |
| "loss": 0.1407, |
| "step": 64800 |
| }, |
| { |
| "epoch": 10.720185001651801, |
| "grad_norm": 0.7225449085235596, |
| "learning_rate": 7.140672221610445e-06, |
| "loss": 0.1207, |
| "step": 64900 |
| }, |
| { |
| "epoch": 10.736703006276842, |
| "grad_norm": 0.6769737601280212, |
| "learning_rate": 7.134554442119689e-06, |
| "loss": 0.1145, |
| "step": 65000 |
| }, |
| { |
| "epoch": 10.736703006276842, |
| "eval_cer": 0.04139004859352543, |
| "eval_loss": 0.12175419926643372, |
| "eval_runtime": 48.7387, |
| "eval_samples_per_second": 34.695, |
| "eval_steps_per_second": 8.679, |
| "eval_wer": 0.2271279585336914, |
| "step": 65000 |
| }, |
| { |
| "epoch": 10.753221010901884, |
| "grad_norm": 0.7679712772369385, |
| "learning_rate": 7.1284366626289326e-06, |
| "loss": 0.1201, |
| "step": 65100 |
| }, |
| { |
| "epoch": 10.769739015526925, |
| "grad_norm": 0.8149623274803162, |
| "learning_rate": 7.122318883138176e-06, |
| "loss": 0.1142, |
| "step": 65200 |
| }, |
| { |
| "epoch": 10.786257020151966, |
| "grad_norm": 0.6343280076980591, |
| "learning_rate": 7.116201103647421e-06, |
| "loss": 0.1392, |
| "step": 65300 |
| }, |
| { |
| "epoch": 10.802775024777008, |
| "grad_norm": 0.759663462638855, |
| "learning_rate": 7.110083324156665e-06, |
| "loss": 0.1168, |
| "step": 65400 |
| }, |
| { |
| "epoch": 10.819293029402049, |
| "grad_norm": 0.9799485206604004, |
| "learning_rate": 7.103965544665908e-06, |
| "loss": 0.122, |
| "step": 65500 |
| }, |
| { |
| "epoch": 10.83581103402709, |
| "grad_norm": 0.6957365274429321, |
| "learning_rate": 7.097847765175152e-06, |
| "loss": 0.1152, |
| "step": 65600 |
| }, |
| { |
| "epoch": 10.852329038652131, |
| "grad_norm": 0.8867782950401306, |
| "learning_rate": 7.091729985684397e-06, |
| "loss": 0.1175, |
| "step": 65700 |
| }, |
| { |
| "epoch": 10.868847043277173, |
| "grad_norm": 0.8960587382316589, |
| "learning_rate": 7.08561220619364e-06, |
| "loss": 0.1217, |
| "step": 65800 |
| }, |
| { |
| "epoch": 10.885365047902214, |
| "grad_norm": 0.6762132048606873, |
| "learning_rate": 7.0794944267028845e-06, |
| "loss": 0.1223, |
| "step": 65900 |
| }, |
| { |
| "epoch": 10.901883052527255, |
| "grad_norm": 0.835166335105896, |
| "learning_rate": 7.073376647212128e-06, |
| "loss": 0.119, |
| "step": 66000 |
| }, |
| { |
| "epoch": 10.901883052527255, |
| "eval_cer": 0.0415611525562932, |
| "eval_loss": 0.11837079375982285, |
| "eval_runtime": 48.8246, |
| "eval_samples_per_second": 34.634, |
| "eval_steps_per_second": 8.664, |
| "eval_wer": 0.22619121963404734, |
| "step": 66000 |
| }, |
| { |
| "epoch": 10.918401057152296, |
| "grad_norm": 0.6607236862182617, |
| "learning_rate": 7.067258867721373e-06, |
| "loss": 0.1195, |
| "step": 66100 |
| }, |
| { |
| "epoch": 10.934919061777338, |
| "grad_norm": 0.7014028429985046, |
| "learning_rate": 7.061141088230617e-06, |
| "loss": 0.1233, |
| "step": 66200 |
| }, |
| { |
| "epoch": 10.951437066402379, |
| "grad_norm": 0.8678550124168396, |
| "learning_rate": 7.05502330873986e-06, |
| "loss": 0.127, |
| "step": 66300 |
| }, |
| { |
| "epoch": 10.96795507102742, |
| "grad_norm": 0.676571786403656, |
| "learning_rate": 7.048905529249105e-06, |
| "loss": 0.1171, |
| "step": 66400 |
| }, |
| { |
| "epoch": 10.984473075652462, |
| "grad_norm": 0.8348824381828308, |
| "learning_rate": 7.042787749758349e-06, |
| "loss": 0.1209, |
| "step": 66500 |
| }, |
| { |
| "epoch": 11.000991080277503, |
| "grad_norm": 1.0055019855499268, |
| "learning_rate": 7.036669970267592e-06, |
| "loss": 0.1217, |
| "step": 66600 |
| }, |
| { |
| "epoch": 11.017509084902544, |
| "grad_norm": 0.8197912573814392, |
| "learning_rate": 7.030552190776836e-06, |
| "loss": 0.12, |
| "step": 66700 |
| }, |
| { |
| "epoch": 11.034027089527585, |
| "grad_norm": 0.8832284212112427, |
| "learning_rate": 7.024434411286081e-06, |
| "loss": 0.1175, |
| "step": 66800 |
| }, |
| { |
| "epoch": 11.050545094152627, |
| "grad_norm": 0.7222535014152527, |
| "learning_rate": 7.018316631795324e-06, |
| "loss": 0.1138, |
| "step": 66900 |
| }, |
| { |
| "epoch": 11.067063098777668, |
| "grad_norm": 0.6838215589523315, |
| "learning_rate": 7.012198852304568e-06, |
| "loss": 0.1155, |
| "step": 67000 |
| }, |
| { |
| "epoch": 11.067063098777668, |
| "eval_cer": 0.04099650947915954, |
| "eval_loss": 0.11776668578386307, |
| "eval_runtime": 48.4053, |
| "eval_samples_per_second": 34.934, |
| "eval_steps_per_second": 8.739, |
| "eval_wer": 0.22519203147442704, |
| "step": 67000 |
| }, |
| { |
| "epoch": 11.08358110340271, |
| "grad_norm": 0.8637756109237671, |
| "learning_rate": 7.006081072813812e-06, |
| "loss": 0.1202, |
| "step": 67100 |
| }, |
| { |
| "epoch": 11.10009910802775, |
| "grad_norm": 0.9718282222747803, |
| "learning_rate": 6.999963293323056e-06, |
| "loss": 0.1203, |
| "step": 67200 |
| }, |
| { |
| "epoch": 11.116617112652792, |
| "grad_norm": 0.531356930732727, |
| "learning_rate": 6.993845513832301e-06, |
| "loss": 0.1128, |
| "step": 67300 |
| }, |
| { |
| "epoch": 11.133135117277833, |
| "grad_norm": 0.5169577598571777, |
| "learning_rate": 6.987727734341544e-06, |
| "loss": 0.1232, |
| "step": 67400 |
| }, |
| { |
| "epoch": 11.149653121902874, |
| "grad_norm": 0.634283185005188, |
| "learning_rate": 6.9816099548507875e-06, |
| "loss": 0.1125, |
| "step": 67500 |
| }, |
| { |
| "epoch": 11.166171126527916, |
| "grad_norm": 0.7183799743652344, |
| "learning_rate": 6.975492175360033e-06, |
| "loss": 0.1187, |
| "step": 67600 |
| }, |
| { |
| "epoch": 11.182689131152957, |
| "grad_norm": 0.5369941592216492, |
| "learning_rate": 6.969374395869276e-06, |
| "loss": 0.1161, |
| "step": 67700 |
| }, |
| { |
| "epoch": 11.199207135777998, |
| "grad_norm": 0.5835019946098328, |
| "learning_rate": 6.96325661637852e-06, |
| "loss": 0.1137, |
| "step": 67800 |
| }, |
| { |
| "epoch": 11.21572514040304, |
| "grad_norm": 0.7346104383468628, |
| "learning_rate": 6.957138836887763e-06, |
| "loss": 0.1135, |
| "step": 67900 |
| }, |
| { |
| "epoch": 11.23224314502808, |
| "grad_norm": 0.6166725754737854, |
| "learning_rate": 6.951021057397008e-06, |
| "loss": 0.1224, |
| "step": 68000 |
| }, |
| { |
| "epoch": 11.23224314502808, |
| "eval_cer": 0.040833960714530146, |
| "eval_loss": 0.11831438541412354, |
| "eval_runtime": 48.7233, |
| "eval_samples_per_second": 34.706, |
| "eval_steps_per_second": 8.682, |
| "eval_wer": 0.22494223443452194, |
| "step": 68000 |
| }, |
| { |
| "epoch": 11.248761149653122, |
| "grad_norm": 0.9099162220954895, |
| "learning_rate": 6.944903277906252e-06, |
| "loss": 0.1248, |
| "step": 68100 |
| }, |
| { |
| "epoch": 11.265279154278163, |
| "grad_norm": 0.5954209566116333, |
| "learning_rate": 6.938785498415496e-06, |
| "loss": 0.1184, |
| "step": 68200 |
| }, |
| { |
| "epoch": 11.281797158903204, |
| "grad_norm": 0.765312910079956, |
| "learning_rate": 6.9326677189247395e-06, |
| "loss": 0.1422, |
| "step": 68300 |
| }, |
| { |
| "epoch": 11.298315163528246, |
| "grad_norm": 0.9866732954978943, |
| "learning_rate": 6.926549939433984e-06, |
| "loss": 0.1213, |
| "step": 68400 |
| }, |
| { |
| "epoch": 11.314833168153287, |
| "grad_norm": 0.6962186694145203, |
| "learning_rate": 6.920432159943228e-06, |
| "loss": 0.1178, |
| "step": 68500 |
| }, |
| { |
| "epoch": 11.331351172778328, |
| "grad_norm": 0.547361433506012, |
| "learning_rate": 6.9143143804524715e-06, |
| "loss": 0.1215, |
| "step": 68600 |
| }, |
| { |
| "epoch": 11.34786917740337, |
| "grad_norm": 0.5408198833465576, |
| "learning_rate": 6.908196600961715e-06, |
| "loss": 0.122, |
| "step": 68700 |
| }, |
| { |
| "epoch": 11.36438718202841, |
| "grad_norm": 0.7383239269256592, |
| "learning_rate": 6.90207882147096e-06, |
| "loss": 0.1232, |
| "step": 68800 |
| }, |
| { |
| "epoch": 11.380905186653452, |
| "grad_norm": 0.7225533127784729, |
| "learning_rate": 6.895961041980204e-06, |
| "loss": 0.1773, |
| "step": 68900 |
| }, |
| { |
| "epoch": 11.397423191278493, |
| "grad_norm": 0.7376521825790405, |
| "learning_rate": 6.889843262489447e-06, |
| "loss": 0.1146, |
| "step": 69000 |
| }, |
| { |
| "epoch": 11.397423191278493, |
| "eval_cer": 0.04105639586612826, |
| "eval_loss": 0.11837118864059448, |
| "eval_runtime": 48.8385, |
| "eval_samples_per_second": 34.624, |
| "eval_steps_per_second": 8.661, |
| "eval_wer": 0.22556672703428465, |
| "step": 69000 |
| }, |
| { |
| "epoch": 11.413941195903535, |
| "grad_norm": 0.6268288493156433, |
| "learning_rate": 6.883725482998691e-06, |
| "loss": 0.141, |
| "step": 69100 |
| }, |
| { |
| "epoch": 11.430459200528576, |
| "grad_norm": 0.9457260370254517, |
| "learning_rate": 6.877607703507936e-06, |
| "loss": 0.1174, |
| "step": 69200 |
| }, |
| { |
| "epoch": 11.446977205153617, |
| "grad_norm": 0.8935351371765137, |
| "learning_rate": 6.871489924017179e-06, |
| "loss": 0.1109, |
| "step": 69300 |
| }, |
| { |
| "epoch": 11.463495209778658, |
| "grad_norm": 0.6600612998008728, |
| "learning_rate": 6.8653721445264235e-06, |
| "loss": 0.1285, |
| "step": 69400 |
| }, |
| { |
| "epoch": 11.4800132144037, |
| "grad_norm": 0.6968724727630615, |
| "learning_rate": 6.859254365035667e-06, |
| "loss": 0.1327, |
| "step": 69500 |
| }, |
| { |
| "epoch": 11.496531219028741, |
| "grad_norm": 0.738458514213562, |
| "learning_rate": 6.853136585544912e-06, |
| "loss": 0.1089, |
| "step": 69600 |
| }, |
| { |
| "epoch": 11.513049223653782, |
| "grad_norm": 0.8320337533950806, |
| "learning_rate": 6.8470188060541556e-06, |
| "loss": 0.1131, |
| "step": 69700 |
| }, |
| { |
| "epoch": 11.529567228278824, |
| "grad_norm": 0.6417104005813599, |
| "learning_rate": 6.840901026563399e-06, |
| "loss": 0.1219, |
| "step": 69800 |
| }, |
| { |
| "epoch": 11.546085232903865, |
| "grad_norm": 0.7197741866111755, |
| "learning_rate": 6.8347832470726425e-06, |
| "loss": 0.1165, |
| "step": 69900 |
| }, |
| { |
| "epoch": 11.562603237528906, |
| "grad_norm": 0.8726572394371033, |
| "learning_rate": 6.828665467581888e-06, |
| "loss": 0.1137, |
| "step": 70000 |
| }, |
| { |
| "epoch": 11.562603237528906, |
| "eval_cer": 0.04100506467729793, |
| "eval_loss": 0.11831272393465042, |
| "eval_runtime": 48.5609, |
| "eval_samples_per_second": 34.822, |
| "eval_steps_per_second": 8.711, |
| "eval_wer": 0.22487978517454568, |
| "step": 70000 |
| }, |
| { |
| "epoch": 11.579121242153947, |
| "grad_norm": 0.6094586849212646, |
| "learning_rate": 6.822547688091131e-06, |
| "loss": 0.1233, |
| "step": 70100 |
| }, |
| { |
| "epoch": 11.595639246778989, |
| "grad_norm": 0.7053238749504089, |
| "learning_rate": 6.8164299086003746e-06, |
| "loss": 0.1207, |
| "step": 70200 |
| }, |
| { |
| "epoch": 11.61215725140403, |
| "grad_norm": 0.5145518183708191, |
| "learning_rate": 6.810312129109619e-06, |
| "loss": 0.1181, |
| "step": 70300 |
| }, |
| { |
| "epoch": 11.628675256029071, |
| "grad_norm": 1.1360536813735962, |
| "learning_rate": 6.804194349618863e-06, |
| "loss": 0.1227, |
| "step": 70400 |
| }, |
| { |
| "epoch": 11.645193260654112, |
| "grad_norm": 0.7354953289031982, |
| "learning_rate": 6.7980765701281075e-06, |
| "loss": 0.1151, |
| "step": 70500 |
| }, |
| { |
| "epoch": 11.661711265279154, |
| "grad_norm": 0.6327475309371948, |
| "learning_rate": 6.791958790637351e-06, |
| "loss": 0.1132, |
| "step": 70600 |
| }, |
| { |
| "epoch": 11.678229269904195, |
| "grad_norm": 0.7320681214332581, |
| "learning_rate": 6.785841011146594e-06, |
| "loss": 0.116, |
| "step": 70700 |
| }, |
| { |
| "epoch": 11.694747274529236, |
| "grad_norm": 0.7258247137069702, |
| "learning_rate": 6.7797232316558396e-06, |
| "loss": 0.1236, |
| "step": 70800 |
| }, |
| { |
| "epoch": 11.711265279154278, |
| "grad_norm": 0.7472134232521057, |
| "learning_rate": 6.773605452165083e-06, |
| "loss": 0.1148, |
| "step": 70900 |
| }, |
| { |
| "epoch": 11.727783283779319, |
| "grad_norm": 0.9377081394195557, |
| "learning_rate": 6.7674876726743265e-06, |
| "loss": 0.1173, |
| "step": 71000 |
| }, |
| { |
| "epoch": 11.727783283779319, |
| "eval_cer": 0.04082540551639176, |
| "eval_loss": 0.11736804246902466, |
| "eval_runtime": 48.644, |
| "eval_samples_per_second": 34.763, |
| "eval_steps_per_second": 8.696, |
| "eval_wer": 0.22294385811528133, |
| "step": 71000 |
| }, |
| { |
| "epoch": 11.74430128840436, |
| "grad_norm": 0.638346791267395, |
| "learning_rate": 6.76136989318357e-06, |
| "loss": 0.1333, |
| "step": 71100 |
| }, |
| { |
| "epoch": 11.760819293029401, |
| "grad_norm": 0.7357622981071472, |
| "learning_rate": 6.755252113692815e-06, |
| "loss": 0.1136, |
| "step": 71200 |
| }, |
| { |
| "epoch": 11.777337297654443, |
| "grad_norm": 0.801539957523346, |
| "learning_rate": 6.7491343342020586e-06, |
| "loss": 0.1162, |
| "step": 71300 |
| }, |
| { |
| "epoch": 11.793855302279484, |
| "grad_norm": 0.869429886341095, |
| "learning_rate": 6.743016554711302e-06, |
| "loss": 0.112, |
| "step": 71400 |
| }, |
| { |
| "epoch": 11.810373306904525, |
| "grad_norm": 0.649721622467041, |
| "learning_rate": 6.736898775220546e-06, |
| "loss": 0.1115, |
| "step": 71500 |
| }, |
| { |
| "epoch": 11.826891311529566, |
| "grad_norm": 0.8566005229949951, |
| "learning_rate": 6.730780995729791e-06, |
| "loss": 0.1178, |
| "step": 71600 |
| }, |
| { |
| "epoch": 11.84340931615461, |
| "grad_norm": 0.8232606649398804, |
| "learning_rate": 6.724663216239035e-06, |
| "loss": 0.1267, |
| "step": 71700 |
| }, |
| { |
| "epoch": 11.85992732077965, |
| "grad_norm": 0.7500156760215759, |
| "learning_rate": 6.7185454367482784e-06, |
| "loss": 0.1163, |
| "step": 71800 |
| }, |
| { |
| "epoch": 11.876445325404692, |
| "grad_norm": 0.635427713394165, |
| "learning_rate": 6.712427657257522e-06, |
| "loss": 0.1555, |
| "step": 71900 |
| }, |
| { |
| "epoch": 11.892963330029733, |
| "grad_norm": 0.807422399520874, |
| "learning_rate": 6.706309877766767e-06, |
| "loss": 0.1428, |
| "step": 72000 |
| }, |
| { |
| "epoch": 11.892963330029733, |
| "eval_cer": 0.04042331120388748, |
| "eval_loss": 0.11749948561191559, |
| "eval_runtime": 48.902, |
| "eval_samples_per_second": 34.579, |
| "eval_steps_per_second": 8.65, |
| "eval_wer": 0.2228189595953288, |
| "step": 72000 |
| }, |
| { |
| "epoch": 11.909481334654775, |
| "grad_norm": 0.6549407839775085, |
| "learning_rate": 6.7001920982760105e-06, |
| "loss": 0.1114, |
| "step": 72100 |
| }, |
| { |
| "epoch": 11.925999339279816, |
| "grad_norm": 1.0132852792739868, |
| "learning_rate": 6.694074318785254e-06, |
| "loss": 0.1153, |
| "step": 72200 |
| }, |
| { |
| "epoch": 11.942517343904857, |
| "grad_norm": 0.5365763306617737, |
| "learning_rate": 6.6879565392944974e-06, |
| "loss": 0.1109, |
| "step": 72300 |
| }, |
| { |
| "epoch": 11.959035348529898, |
| "grad_norm": 0.6037495732307434, |
| "learning_rate": 6.681838759803743e-06, |
| "loss": 0.1144, |
| "step": 72400 |
| }, |
| { |
| "epoch": 11.97555335315494, |
| "grad_norm": 0.4775562286376953, |
| "learning_rate": 6.675720980312986e-06, |
| "loss": 0.1178, |
| "step": 72500 |
| }, |
| { |
| "epoch": 11.992071357779981, |
| "grad_norm": 0.863073468208313, |
| "learning_rate": 6.66960320082223e-06, |
| "loss": 0.1168, |
| "step": 72600 |
| }, |
| { |
| "epoch": 12.008589362405022, |
| "grad_norm": 2.4738621711730957, |
| "learning_rate": 6.663485421331474e-06, |
| "loss": 0.1322, |
| "step": 72700 |
| }, |
| { |
| "epoch": 12.025107367030063, |
| "grad_norm": 0.6702748537063599, |
| "learning_rate": 6.657367641840718e-06, |
| "loss": 0.1702, |
| "step": 72800 |
| }, |
| { |
| "epoch": 12.041625371655105, |
| "grad_norm": 0.9668029546737671, |
| "learning_rate": 6.6512498623499624e-06, |
| "loss": 0.1358, |
| "step": 72900 |
| }, |
| { |
| "epoch": 12.058143376280146, |
| "grad_norm": 0.6446594595909119, |
| "learning_rate": 6.645132082859206e-06, |
| "loss": 0.1128, |
| "step": 73000 |
| }, |
| { |
| "epoch": 12.058143376280146, |
| "eval_cer": 0.040833960714530146, |
| "eval_loss": 0.11493762582540512, |
| "eval_runtime": 48.8939, |
| "eval_samples_per_second": 34.585, |
| "eval_steps_per_second": 8.651, |
| "eval_wer": 0.22231936551551865, |
| "step": 73000 |
| }, |
| { |
| "epoch": 12.074661380905187, |
| "grad_norm": 0.7115280032157898, |
| "learning_rate": 6.639014303368449e-06, |
| "loss": 0.1126, |
| "step": 73100 |
| }, |
| { |
| "epoch": 12.091179385530229, |
| "grad_norm": 0.6623009443283081, |
| "learning_rate": 6.6328965238776945e-06, |
| "loss": 0.1187, |
| "step": 73200 |
| }, |
| { |
| "epoch": 12.10769739015527, |
| "grad_norm": 0.7320263981819153, |
| "learning_rate": 6.626778744386938e-06, |
| "loss": 0.11, |
| "step": 73300 |
| }, |
| { |
| "epoch": 12.124215394780311, |
| "grad_norm": 0.7504459619522095, |
| "learning_rate": 6.6206609648961814e-06, |
| "loss": 0.1128, |
| "step": 73400 |
| }, |
| { |
| "epoch": 12.140733399405352, |
| "grad_norm": 0.6281275749206543, |
| "learning_rate": 6.614543185405426e-06, |
| "loss": 0.1169, |
| "step": 73500 |
| }, |
| { |
| "epoch": 12.157251404030394, |
| "grad_norm": 0.6658099889755249, |
| "learning_rate": 6.60842540591467e-06, |
| "loss": 0.1093, |
| "step": 73600 |
| }, |
| { |
| "epoch": 12.173769408655435, |
| "grad_norm": 0.8157078623771667, |
| "learning_rate": 6.6023076264239135e-06, |
| "loss": 0.1128, |
| "step": 73700 |
| }, |
| { |
| "epoch": 12.190287413280476, |
| "grad_norm": 0.7392610907554626, |
| "learning_rate": 6.596189846933158e-06, |
| "loss": 0.1168, |
| "step": 73800 |
| }, |
| { |
| "epoch": 12.206805417905517, |
| "grad_norm": 0.5370469689369202, |
| "learning_rate": 6.590072067442401e-06, |
| "loss": 0.115, |
| "step": 73900 |
| }, |
| { |
| "epoch": 12.223323422530559, |
| "grad_norm": 0.7587655782699585, |
| "learning_rate": 6.5839542879516465e-06, |
| "loss": 0.1063, |
| "step": 74000 |
| }, |
| { |
| "epoch": 12.223323422530559, |
| "eval_cer": 0.04020087605228937, |
| "eval_loss": 0.1159936785697937, |
| "eval_runtime": 48.8724, |
| "eval_samples_per_second": 34.6, |
| "eval_steps_per_second": 8.655, |
| "eval_wer": 0.22044588771623055, |
| "step": 74000 |
| }, |
| { |
| "epoch": 12.2398414271556, |
| "grad_norm": 0.6018242835998535, |
| "learning_rate": 6.57783650846089e-06, |
| "loss": 0.1118, |
| "step": 74100 |
| }, |
| { |
| "epoch": 12.256359431780641, |
| "grad_norm": 0.6748114228248596, |
| "learning_rate": 6.571718728970133e-06, |
| "loss": 0.1094, |
| "step": 74200 |
| }, |
| { |
| "epoch": 12.272877436405683, |
| "grad_norm": 0.6757166981697083, |
| "learning_rate": 6.565600949479377e-06, |
| "loss": 0.1097, |
| "step": 74300 |
| }, |
| { |
| "epoch": 12.289395441030724, |
| "grad_norm": 0.948271632194519, |
| "learning_rate": 6.559483169988622e-06, |
| "loss": 0.1186, |
| "step": 74400 |
| }, |
| { |
| "epoch": 12.305913445655765, |
| "grad_norm": 0.6468844413757324, |
| "learning_rate": 6.5533653904978655e-06, |
| "loss": 0.1206, |
| "step": 74500 |
| }, |
| { |
| "epoch": 12.322431450280806, |
| "grad_norm": 0.6049870848655701, |
| "learning_rate": 6.547247611007109e-06, |
| "loss": 0.1672, |
| "step": 74600 |
| }, |
| { |
| "epoch": 12.338949454905848, |
| "grad_norm": 1.080959677696228, |
| "learning_rate": 6.541129831516353e-06, |
| "loss": 0.136, |
| "step": 74700 |
| }, |
| { |
| "epoch": 12.355467459530889, |
| "grad_norm": 0.7225471138954163, |
| "learning_rate": 6.5350120520255975e-06, |
| "loss": 0.1095, |
| "step": 74800 |
| }, |
| { |
| "epoch": 12.37198546415593, |
| "grad_norm": 0.6947051286697388, |
| "learning_rate": 6.528894272534841e-06, |
| "loss": 0.1354, |
| "step": 74900 |
| }, |
| { |
| "epoch": 12.388503468780971, |
| "grad_norm": 0.6471466422080994, |
| "learning_rate": 6.522776493044085e-06, |
| "loss": 0.1051, |
| "step": 75000 |
| }, |
| { |
| "epoch": 12.388503468780971, |
| "eval_cer": 0.04020943125042776, |
| "eval_loss": 0.11492911726236343, |
| "eval_runtime": 48.7391, |
| "eval_samples_per_second": 34.695, |
| "eval_steps_per_second": 8.679, |
| "eval_wer": 0.2200087428963967, |
| "step": 75000 |
| }, |
| { |
| "epoch": 12.405021473406013, |
| "grad_norm": 0.6231672763824463, |
| "learning_rate": 6.516658713553329e-06, |
| "loss": 0.1331, |
| "step": 75100 |
| }, |
| { |
| "epoch": 12.421539478031054, |
| "grad_norm": 0.49103644490242004, |
| "learning_rate": 6.510540934062574e-06, |
| "loss": 0.11, |
| "step": 75200 |
| }, |
| { |
| "epoch": 12.438057482656095, |
| "grad_norm": 0.7189831733703613, |
| "learning_rate": 6.504423154571817e-06, |
| "loss": 0.115, |
| "step": 75300 |
| }, |
| { |
| "epoch": 12.454575487281137, |
| "grad_norm": 0.5822007060050964, |
| "learning_rate": 6.498305375081061e-06, |
| "loss": 0.112, |
| "step": 75400 |
| }, |
| { |
| "epoch": 12.471093491906178, |
| "grad_norm": 0.6000872254371643, |
| "learning_rate": 6.492187595590304e-06, |
| "loss": 0.1088, |
| "step": 75500 |
| }, |
| { |
| "epoch": 12.487611496531219, |
| "grad_norm": 0.6508600115776062, |
| "learning_rate": 6.4860698160995495e-06, |
| "loss": 0.1111, |
| "step": 75600 |
| }, |
| { |
| "epoch": 12.50412950115626, |
| "grad_norm": 0.6574178338050842, |
| "learning_rate": 6.479952036608793e-06, |
| "loss": 0.1095, |
| "step": 75700 |
| }, |
| { |
| "epoch": 12.520647505781302, |
| "grad_norm": 0.845613956451416, |
| "learning_rate": 6.473834257118036e-06, |
| "loss": 0.1139, |
| "step": 75800 |
| }, |
| { |
| "epoch": 12.537165510406343, |
| "grad_norm": 0.5848095417022705, |
| "learning_rate": 6.467716477627281e-06, |
| "loss": 0.1147, |
| "step": 75900 |
| }, |
| { |
| "epoch": 12.553683515031384, |
| "grad_norm": 0.9496851563453674, |
| "learning_rate": 6.461598698136525e-06, |
| "loss": 0.1128, |
| "step": 76000 |
| }, |
| { |
| "epoch": 12.553683515031384, |
| "eval_cer": 0.040106768872767096, |
| "eval_loss": 0.112928107380867, |
| "eval_runtime": 48.6721, |
| "eval_samples_per_second": 34.743, |
| "eval_steps_per_second": 8.691, |
| "eval_wer": 0.22025853993630176, |
| "step": 76000 |
| }, |
| { |
| "epoch": 12.570201519656425, |
| "grad_norm": 0.7724167108535767, |
| "learning_rate": 6.455480918645769e-06, |
| "loss": 0.1156, |
| "step": 76100 |
| }, |
| { |
| "epoch": 12.586719524281467, |
| "grad_norm": 0.753487765789032, |
| "learning_rate": 6.449363139155013e-06, |
| "loss": 0.1216, |
| "step": 76200 |
| }, |
| { |
| "epoch": 12.603237528906508, |
| "grad_norm": 0.7323099970817566, |
| "learning_rate": 6.443245359664256e-06, |
| "loss": 0.1163, |
| "step": 76300 |
| }, |
| { |
| "epoch": 12.61975553353155, |
| "grad_norm": 0.5276266932487488, |
| "learning_rate": 6.437127580173501e-06, |
| "loss": 0.1261, |
| "step": 76400 |
| }, |
| { |
| "epoch": 12.63627353815659, |
| "grad_norm": 0.7041454315185547, |
| "learning_rate": 6.431009800682745e-06, |
| "loss": 0.1097, |
| "step": 76500 |
| }, |
| { |
| "epoch": 12.652791542781632, |
| "grad_norm": 0.5830830931663513, |
| "learning_rate": 6.424892021191988e-06, |
| "loss": 0.1053, |
| "step": 76600 |
| }, |
| { |
| "epoch": 12.669309547406673, |
| "grad_norm": 0.8507035970687866, |
| "learning_rate": 6.418774241701232e-06, |
| "loss": 0.1157, |
| "step": 76700 |
| }, |
| { |
| "epoch": 12.685827552031714, |
| "grad_norm": 0.7934384942054749, |
| "learning_rate": 6.412656462210477e-06, |
| "loss": 0.1139, |
| "step": 76800 |
| }, |
| { |
| "epoch": 12.702345556656756, |
| "grad_norm": 0.8126075863838196, |
| "learning_rate": 6.40653868271972e-06, |
| "loss": 0.1382, |
| "step": 76900 |
| }, |
| { |
| "epoch": 12.718863561281797, |
| "grad_norm": 0.7506862282752991, |
| "learning_rate": 6.400420903228965e-06, |
| "loss": 0.1108, |
| "step": 77000 |
| }, |
| { |
| "epoch": 12.718863561281797, |
| "eval_cer": 0.04023509684484293, |
| "eval_loss": 0.11516769230365753, |
| "eval_runtime": 48.6282, |
| "eval_samples_per_second": 34.774, |
| "eval_steps_per_second": 8.699, |
| "eval_wer": 0.2200087428963967, |
| "step": 77000 |
| }, |
| { |
| "epoch": 12.735381565906838, |
| "grad_norm": 0.6928473114967346, |
| "learning_rate": 6.394303123738208e-06, |
| "loss": 0.1118, |
| "step": 77100 |
| }, |
| { |
| "epoch": 12.75189957053188, |
| "grad_norm": 0.7494087815284729, |
| "learning_rate": 6.3881853442474525e-06, |
| "loss": 0.1152, |
| "step": 77200 |
| }, |
| { |
| "epoch": 12.76841757515692, |
| "grad_norm": 0.7207498550415039, |
| "learning_rate": 6.382067564756697e-06, |
| "loss": 0.1074, |
| "step": 77300 |
| }, |
| { |
| "epoch": 12.784935579781962, |
| "grad_norm": 0.6607386469841003, |
| "learning_rate": 6.37594978526594e-06, |
| "loss": 0.1102, |
| "step": 77400 |
| }, |
| { |
| "epoch": 12.801453584407003, |
| "grad_norm": 0.5259993076324463, |
| "learning_rate": 6.369832005775184e-06, |
| "loss": 0.1067, |
| "step": 77500 |
| }, |
| { |
| "epoch": 12.817971589032044, |
| "grad_norm": 0.7667635679244995, |
| "learning_rate": 6.363714226284429e-06, |
| "loss": 0.1079, |
| "step": 77600 |
| }, |
| { |
| "epoch": 12.834489593657086, |
| "grad_norm": 0.676259458065033, |
| "learning_rate": 6.357596446793672e-06, |
| "loss": 0.1323, |
| "step": 77700 |
| }, |
| { |
| "epoch": 12.851007598282127, |
| "grad_norm": 0.6613221168518066, |
| "learning_rate": 6.351478667302916e-06, |
| "loss": 0.1104, |
| "step": 77800 |
| }, |
| { |
| "epoch": 12.867525602907168, |
| "grad_norm": 0.8658110499382019, |
| "learning_rate": 6.34536088781216e-06, |
| "loss": 0.1087, |
| "step": 77900 |
| }, |
| { |
| "epoch": 12.88404360753221, |
| "grad_norm": 0.5932702422142029, |
| "learning_rate": 6.3392431083214044e-06, |
| "loss": 0.1184, |
| "step": 78000 |
| }, |
| { |
| "epoch": 12.88404360753221, |
| "eval_cer": 0.04030353842995004, |
| "eval_loss": 0.11411629617214203, |
| "eval_runtime": 48.5234, |
| "eval_samples_per_second": 34.849, |
| "eval_steps_per_second": 8.717, |
| "eval_wer": 0.22019609067632548, |
| "step": 78000 |
| }, |
| { |
| "epoch": 12.90056161215725, |
| "grad_norm": 0.7417730689048767, |
| "learning_rate": 6.333125328830648e-06, |
| "loss": 0.1262, |
| "step": 78100 |
| }, |
| { |
| "epoch": 12.917079616782292, |
| "grad_norm": 0.625182032585144, |
| "learning_rate": 6.327007549339892e-06, |
| "loss": 0.1114, |
| "step": 78200 |
| }, |
| { |
| "epoch": 12.933597621407333, |
| "grad_norm": 0.9503306746482849, |
| "learning_rate": 6.320889769849136e-06, |
| "loss": 0.109, |
| "step": 78300 |
| }, |
| { |
| "epoch": 12.950115626032375, |
| "grad_norm": 0.4723988473415375, |
| "learning_rate": 6.314771990358381e-06, |
| "loss": 0.1346, |
| "step": 78400 |
| }, |
| { |
| "epoch": 12.966633630657416, |
| "grad_norm": 0.5400856137275696, |
| "learning_rate": 6.308654210867624e-06, |
| "loss": 0.1162, |
| "step": 78500 |
| }, |
| { |
| "epoch": 12.983151635282457, |
| "grad_norm": 0.9495701789855957, |
| "learning_rate": 6.302536431376868e-06, |
| "loss": 0.1116, |
| "step": 78600 |
| }, |
| { |
| "epoch": 12.999669639907498, |
| "grad_norm": 0.5586131811141968, |
| "learning_rate": 6.296418651886111e-06, |
| "loss": 0.1093, |
| "step": 78700 |
| }, |
| { |
| "epoch": 13.01618764453254, |
| "grad_norm": 0.7302865386009216, |
| "learning_rate": 6.290300872395356e-06, |
| "loss": 0.1095, |
| "step": 78800 |
| }, |
| { |
| "epoch": 13.032705649157581, |
| "grad_norm": 0.726801872253418, |
| "learning_rate": 6.2841830929046e-06, |
| "loss": 0.1144, |
| "step": 78900 |
| }, |
| { |
| "epoch": 13.049223653782622, |
| "grad_norm": 0.6335176825523376, |
| "learning_rate": 6.278065313413843e-06, |
| "loss": 0.1099, |
| "step": 79000 |
| }, |
| { |
| "epoch": 13.049223653782622, |
| "eval_cer": 0.04023509684484293, |
| "eval_loss": 0.11533664911985397, |
| "eval_runtime": 48.8975, |
| "eval_samples_per_second": 34.583, |
| "eval_steps_per_second": 8.651, |
| "eval_wer": 0.2199462936364204, |
| "step": 79000 |
| }, |
| { |
| "epoch": 13.065741658407664, |
| "grad_norm": 0.7183253765106201, |
| "learning_rate": 6.271947533923088e-06, |
| "loss": 0.1015, |
| "step": 79100 |
| }, |
| { |
| "epoch": 13.082259663032705, |
| "grad_norm": 0.8460133075714111, |
| "learning_rate": 6.265829754432332e-06, |
| "loss": 0.1147, |
| "step": 79200 |
| }, |
| { |
| "epoch": 13.098777667657746, |
| "grad_norm": 0.9035709500312805, |
| "learning_rate": 6.259711974941575e-06, |
| "loss": 0.105, |
| "step": 79300 |
| }, |
| { |
| "epoch": 13.115295672282787, |
| "grad_norm": 1.1149568557739258, |
| "learning_rate": 6.25359419545082e-06, |
| "loss": 0.1173, |
| "step": 79400 |
| }, |
| { |
| "epoch": 13.131813676907829, |
| "grad_norm": 0.746825635433197, |
| "learning_rate": 6.247476415960063e-06, |
| "loss": 0.1103, |
| "step": 79500 |
| }, |
| { |
| "epoch": 13.148331681532872, |
| "grad_norm": 0.5890305638313293, |
| "learning_rate": 6.241358636469308e-06, |
| "loss": 0.1075, |
| "step": 79600 |
| }, |
| { |
| "epoch": 13.164849686157913, |
| "grad_norm": 0.6706238985061646, |
| "learning_rate": 6.235240856978552e-06, |
| "loss": 0.1043, |
| "step": 79700 |
| }, |
| { |
| "epoch": 13.181367690782954, |
| "grad_norm": 0.7864231467247009, |
| "learning_rate": 6.229123077487795e-06, |
| "loss": 0.1105, |
| "step": 79800 |
| }, |
| { |
| "epoch": 13.197885695407995, |
| "grad_norm": 0.7406273484230042, |
| "learning_rate": 6.223005297997039e-06, |
| "loss": 0.1046, |
| "step": 79900 |
| }, |
| { |
| "epoch": 13.214403700033037, |
| "grad_norm": 0.7028843760490417, |
| "learning_rate": 6.216887518506284e-06, |
| "loss": 0.1119, |
| "step": 80000 |
| }, |
| { |
| "epoch": 13.214403700033037, |
| "eval_cer": 0.039867223324892204, |
| "eval_loss": 0.11367151141166687, |
| "eval_runtime": 48.9472, |
| "eval_samples_per_second": 34.547, |
| "eval_steps_per_second": 8.642, |
| "eval_wer": 0.21919690251670518, |
| "step": 80000 |
| }, |
| { |
| "epoch": 13.230921704658078, |
| "grad_norm": 0.6652178168296814, |
| "learning_rate": 6.210769739015527e-06, |
| "loss": 0.1118, |
| "step": 80100 |
| }, |
| { |
| "epoch": 13.24743970928312, |
| "grad_norm": 0.9752405285835266, |
| "learning_rate": 6.204651959524771e-06, |
| "loss": 0.1305, |
| "step": 80200 |
| }, |
| { |
| "epoch": 13.26395771390816, |
| "grad_norm": 0.6729234457015991, |
| "learning_rate": 6.198534180034015e-06, |
| "loss": 0.1102, |
| "step": 80300 |
| }, |
| { |
| "epoch": 13.280475718533202, |
| "grad_norm": 0.7551404237747192, |
| "learning_rate": 6.192416400543259e-06, |
| "loss": 0.1131, |
| "step": 80400 |
| }, |
| { |
| "epoch": 13.296993723158243, |
| "grad_norm": 0.5141217112541199, |
| "learning_rate": 6.186298621052504e-06, |
| "loss": 0.1041, |
| "step": 80500 |
| }, |
| { |
| "epoch": 13.313511727783284, |
| "grad_norm": 0.7362185716629028, |
| "learning_rate": 6.180180841561747e-06, |
| "loss": 0.1054, |
| "step": 80600 |
| }, |
| { |
| "epoch": 13.330029732408326, |
| "grad_norm": 0.6110237240791321, |
| "learning_rate": 6.174063062070991e-06, |
| "loss": 0.1067, |
| "step": 80700 |
| }, |
| { |
| "epoch": 13.346547737033367, |
| "grad_norm": 0.5987915992736816, |
| "learning_rate": 6.167945282580236e-06, |
| "loss": 0.1518, |
| "step": 80800 |
| }, |
| { |
| "epoch": 13.363065741658408, |
| "grad_norm": 0.7611739635467529, |
| "learning_rate": 6.161827503089479e-06, |
| "loss": 0.1036, |
| "step": 80900 |
| }, |
| { |
| "epoch": 13.37958374628345, |
| "grad_norm": 0.5500743389129639, |
| "learning_rate": 6.155709723598723e-06, |
| "loss": 0.1102, |
| "step": 81000 |
| }, |
| { |
| "epoch": 13.37958374628345, |
| "eval_cer": 0.039858668126753816, |
| "eval_loss": 0.11399171501398087, |
| "eval_runtime": 48.7532, |
| "eval_samples_per_second": 34.685, |
| "eval_steps_per_second": 8.676, |
| "eval_wer": 0.21869730843689503, |
| "step": 81000 |
| }, |
| { |
| "epoch": 13.39610175090849, |
| "grad_norm": 0.7734577059745789, |
| "learning_rate": 6.149591944107966e-06, |
| "loss": 0.109, |
| "step": 81100 |
| }, |
| { |
| "epoch": 13.412619755533532, |
| "grad_norm": 0.6689289808273315, |
| "learning_rate": 6.143474164617211e-06, |
| "loss": 0.1097, |
| "step": 81200 |
| }, |
| { |
| "epoch": 13.429137760158573, |
| "grad_norm": 0.7353644371032715, |
| "learning_rate": 6.137356385126455e-06, |
| "loss": 0.1084, |
| "step": 81300 |
| }, |
| { |
| "epoch": 13.445655764783615, |
| "grad_norm": 0.6356621384620667, |
| "learning_rate": 6.131238605635699e-06, |
| "loss": 0.115, |
| "step": 81400 |
| }, |
| { |
| "epoch": 13.462173769408656, |
| "grad_norm": 0.6484361290931702, |
| "learning_rate": 6.1251208261449426e-06, |
| "loss": 0.1125, |
| "step": 81500 |
| }, |
| { |
| "epoch": 13.478691774033697, |
| "grad_norm": 0.9929621815681458, |
| "learning_rate": 6.119003046654187e-06, |
| "loss": 0.1033, |
| "step": 81600 |
| }, |
| { |
| "epoch": 13.495209778658738, |
| "grad_norm": 0.7411353588104248, |
| "learning_rate": 6.112885267163431e-06, |
| "loss": 0.115, |
| "step": 81700 |
| }, |
| { |
| "epoch": 13.51172778328378, |
| "grad_norm": 0.7139526009559631, |
| "learning_rate": 6.106767487672675e-06, |
| "loss": 0.1023, |
| "step": 81800 |
| }, |
| { |
| "epoch": 13.528245787908821, |
| "grad_norm": 0.6597611904144287, |
| "learning_rate": 6.100649708181918e-06, |
| "loss": 0.1063, |
| "step": 81900 |
| }, |
| { |
| "epoch": 13.544763792533862, |
| "grad_norm": 0.8007270097732544, |
| "learning_rate": 6.094531928691163e-06, |
| "loss": 0.1086, |
| "step": 82000 |
| }, |
| { |
| "epoch": 13.544763792533862, |
| "eval_cer": 0.03977311614536993, |
| "eval_loss": 0.11309035122394562, |
| "eval_runtime": 48.6041, |
| "eval_samples_per_second": 34.791, |
| "eval_steps_per_second": 8.703, |
| "eval_wer": 0.2185724099169425, |
| "step": 82000 |
| }, |
| { |
| "epoch": 13.561281797158903, |
| "grad_norm": 0.8659864068031311, |
| "learning_rate": 6.088414149200407e-06, |
| "loss": 0.1084, |
| "step": 82100 |
| }, |
| { |
| "epoch": 13.577799801783945, |
| "grad_norm": 0.6871950030326843, |
| "learning_rate": 6.08229636970965e-06, |
| "loss": 0.1072, |
| "step": 82200 |
| }, |
| { |
| "epoch": 13.594317806408986, |
| "grad_norm": 0.5756420493125916, |
| "learning_rate": 6.0761785902188945e-06, |
| "loss": 0.1124, |
| "step": 82300 |
| }, |
| { |
| "epoch": 13.610835811034027, |
| "grad_norm": 1.0295737981796265, |
| "learning_rate": 6.070060810728139e-06, |
| "loss": 0.1162, |
| "step": 82400 |
| }, |
| { |
| "epoch": 13.627353815659069, |
| "grad_norm": 0.5129362940788269, |
| "learning_rate": 6.063943031237382e-06, |
| "loss": 0.1103, |
| "step": 82500 |
| }, |
| { |
| "epoch": 13.64387182028411, |
| "grad_norm": 0.7439867258071899, |
| "learning_rate": 6.0578252517466266e-06, |
| "loss": 0.1061, |
| "step": 82600 |
| }, |
| { |
| "epoch": 13.660389824909151, |
| "grad_norm": 0.4660612940788269, |
| "learning_rate": 6.05170747225587e-06, |
| "loss": 0.1143, |
| "step": 82700 |
| }, |
| { |
| "epoch": 13.676907829534192, |
| "grad_norm": 0.7765456438064575, |
| "learning_rate": 6.045589692765114e-06, |
| "loss": 0.1858, |
| "step": 82800 |
| }, |
| { |
| "epoch": 13.693425834159234, |
| "grad_norm": 0.793312132358551, |
| "learning_rate": 6.039471913274359e-06, |
| "loss": 0.1098, |
| "step": 82900 |
| }, |
| { |
| "epoch": 13.709943838784275, |
| "grad_norm": 0.6621662378311157, |
| "learning_rate": 6.033354133783602e-06, |
| "loss": 0.1151, |
| "step": 83000 |
| }, |
| { |
| "epoch": 13.709943838784275, |
| "eval_cer": 0.03952501539935665, |
| "eval_loss": 0.11215273290872574, |
| "eval_runtime": 49.1873, |
| "eval_samples_per_second": 34.379, |
| "eval_steps_per_second": 8.6, |
| "eval_wer": 0.21701117841753575, |
| "step": 83000 |
| }, |
| { |
| "epoch": 13.726461843409316, |
| "grad_norm": 0.6841396689414978, |
| "learning_rate": 6.0272363542928456e-06, |
| "loss": 0.1094, |
| "step": 83100 |
| }, |
| { |
| "epoch": 13.742979848034357, |
| "grad_norm": 0.7111786007881165, |
| "learning_rate": 6.021118574802091e-06, |
| "loss": 0.1072, |
| "step": 83200 |
| }, |
| { |
| "epoch": 13.759497852659399, |
| "grad_norm": 0.7815682291984558, |
| "learning_rate": 6.015000795311334e-06, |
| "loss": 0.1102, |
| "step": 83300 |
| }, |
| { |
| "epoch": 13.77601585728444, |
| "grad_norm": 0.8677568435668945, |
| "learning_rate": 6.008883015820578e-06, |
| "loss": 0.1062, |
| "step": 83400 |
| }, |
| { |
| "epoch": 13.792533861909481, |
| "grad_norm": 0.5680195689201355, |
| "learning_rate": 6.002765236329822e-06, |
| "loss": 0.106, |
| "step": 83500 |
| }, |
| { |
| "epoch": 13.809051866534523, |
| "grad_norm": 0.9129924178123474, |
| "learning_rate": 5.996647456839066e-06, |
| "loss": 0.0995, |
| "step": 83600 |
| }, |
| { |
| "epoch": 13.825569871159564, |
| "grad_norm": 0.662200927734375, |
| "learning_rate": 5.99052967734831e-06, |
| "loss": 0.1088, |
| "step": 83700 |
| }, |
| { |
| "epoch": 13.842087875784605, |
| "grad_norm": 0.887140691280365, |
| "learning_rate": 5.984411897857554e-06, |
| "loss": 0.1098, |
| "step": 83800 |
| }, |
| { |
| "epoch": 13.858605880409646, |
| "grad_norm": 0.9814369082450867, |
| "learning_rate": 5.978294118366798e-06, |
| "loss": 0.1068, |
| "step": 83900 |
| }, |
| { |
| "epoch": 13.875123885034688, |
| "grad_norm": 0.761234700679779, |
| "learning_rate": 5.972176338876043e-06, |
| "loss": 0.1033, |
| "step": 84000 |
| }, |
| { |
| "epoch": 13.875123885034688, |
| "eval_cer": 0.03929402504962015, |
| "eval_loss": 0.112494558095932, |
| "eval_runtime": 49.3853, |
| "eval_samples_per_second": 34.241, |
| "eval_steps_per_second": 8.565, |
| "eval_wer": 0.21626178729782053, |
| "step": 84000 |
| }, |
| { |
| "epoch": 13.891641889659729, |
| "grad_norm": 0.5570207238197327, |
| "learning_rate": 5.966058559385286e-06, |
| "loss": 0.1081, |
| "step": 84100 |
| }, |
| { |
| "epoch": 13.90815989428477, |
| "grad_norm": 0.5992655158042908, |
| "learning_rate": 5.95994077989453e-06, |
| "loss": 0.1362, |
| "step": 84200 |
| }, |
| { |
| "epoch": 13.924677898909811, |
| "grad_norm": 0.4389006197452545, |
| "learning_rate": 5.953823000403775e-06, |
| "loss": 0.1122, |
| "step": 84300 |
| }, |
| { |
| "epoch": 13.941195903534853, |
| "grad_norm": 0.6106426119804382, |
| "learning_rate": 5.947705220913018e-06, |
| "loss": 0.1065, |
| "step": 84400 |
| }, |
| { |
| "epoch": 13.957713908159894, |
| "grad_norm": 0.5008405447006226, |
| "learning_rate": 5.941587441422262e-06, |
| "loss": 0.1301, |
| "step": 84500 |
| }, |
| { |
| "epoch": 13.974231912784935, |
| "grad_norm": 20.616357803344727, |
| "learning_rate": 5.935469661931505e-06, |
| "loss": 0.1729, |
| "step": 84600 |
| }, |
| { |
| "epoch": 13.990749917409977, |
| "grad_norm": 0.7851992845535278, |
| "learning_rate": 5.92935188244075e-06, |
| "loss": 0.1043, |
| "step": 84700 |
| }, |
| { |
| "epoch": 14.007267922035018, |
| "grad_norm": 0.8801394104957581, |
| "learning_rate": 5.923234102949994e-06, |
| "loss": 0.1074, |
| "step": 84800 |
| }, |
| { |
| "epoch": 14.023785926660059, |
| "grad_norm": 0.5735670924186707, |
| "learning_rate": 5.917116323459238e-06, |
| "loss": 0.105, |
| "step": 84900 |
| }, |
| { |
| "epoch": 14.0403039312851, |
| "grad_norm": 0.6361643671989441, |
| "learning_rate": 5.9109985439684815e-06, |
| "loss": 0.1078, |
| "step": 85000 |
| }, |
| { |
| "epoch": 14.0403039312851, |
| "eval_cer": 0.03945657381424954, |
| "eval_loss": 0.1119338721036911, |
| "eval_runtime": 49.079, |
| "eval_samples_per_second": 34.455, |
| "eval_steps_per_second": 8.619, |
| "eval_wer": 0.2175107724973459, |
| "step": 85000 |
| }, |
| { |
| "epoch": 14.056821935910142, |
| "grad_norm": 0.6829052567481995, |
| "learning_rate": 5.904880764477726e-06, |
| "loss": 0.1007, |
| "step": 85100 |
| }, |
| { |
| "epoch": 14.073339940535183, |
| "grad_norm": 0.5998505353927612, |
| "learning_rate": 5.89876298498697e-06, |
| "loss": 0.1058, |
| "step": 85200 |
| }, |
| { |
| "epoch": 14.089857945160224, |
| "grad_norm": 0.7161391973495483, |
| "learning_rate": 5.892645205496214e-06, |
| "loss": 0.1096, |
| "step": 85300 |
| }, |
| { |
| "epoch": 14.106375949785265, |
| "grad_norm": 0.5567154288291931, |
| "learning_rate": 5.886527426005457e-06, |
| "loss": 0.1078, |
| "step": 85400 |
| }, |
| { |
| "epoch": 14.122893954410307, |
| "grad_norm": 0.9288133978843689, |
| "learning_rate": 5.880409646514702e-06, |
| "loss": 0.1075, |
| "step": 85500 |
| }, |
| { |
| "epoch": 14.139411959035348, |
| "grad_norm": 0.7576249837875366, |
| "learning_rate": 5.874291867023946e-06, |
| "loss": 0.1135, |
| "step": 85600 |
| }, |
| { |
| "epoch": 14.15592996366039, |
| "grad_norm": 0.7857004404067993, |
| "learning_rate": 5.868174087533189e-06, |
| "loss": 0.1045, |
| "step": 85700 |
| }, |
| { |
| "epoch": 14.17244796828543, |
| "grad_norm": 0.962145984172821, |
| "learning_rate": 5.8620563080424335e-06, |
| "loss": 0.1, |
| "step": 85800 |
| }, |
| { |
| "epoch": 14.188965972910472, |
| "grad_norm": 0.7464323043823242, |
| "learning_rate": 5.855938528551678e-06, |
| "loss": 0.1034, |
| "step": 85900 |
| }, |
| { |
| "epoch": 14.205483977535513, |
| "grad_norm": 0.8271916508674622, |
| "learning_rate": 5.849820749060921e-06, |
| "loss": 0.1082, |
| "step": 86000 |
| }, |
| { |
| "epoch": 14.205483977535513, |
| "eval_cer": 0.039174252275682706, |
| "eval_loss": 0.1129402220249176, |
| "eval_runtime": 48.9184, |
| "eval_samples_per_second": 34.568, |
| "eval_steps_per_second": 8.647, |
| "eval_wer": 0.21513770061824766, |
| "step": 86000 |
| }, |
| { |
| "epoch": 14.222001982160554, |
| "grad_norm": 0.5619252324104309, |
| "learning_rate": 5.8437029695701655e-06, |
| "loss": 0.1072, |
| "step": 86100 |
| }, |
| { |
| "epoch": 14.238519986785596, |
| "grad_norm": 0.5619592070579529, |
| "learning_rate": 5.837585190079409e-06, |
| "loss": 0.1038, |
| "step": 86200 |
| }, |
| { |
| "epoch": 14.255037991410637, |
| "grad_norm": 1.2644349336624146, |
| "learning_rate": 5.831467410588653e-06, |
| "loss": 0.102, |
| "step": 86300 |
| }, |
| { |
| "epoch": 14.271555996035678, |
| "grad_norm": 0.7374313473701477, |
| "learning_rate": 5.825349631097898e-06, |
| "loss": 0.1008, |
| "step": 86400 |
| }, |
| { |
| "epoch": 14.28807400066072, |
| "grad_norm": 0.8285679221153259, |
| "learning_rate": 5.819231851607141e-06, |
| "loss": 0.1024, |
| "step": 86500 |
| }, |
| { |
| "epoch": 14.30459200528576, |
| "grad_norm": 0.5749133825302124, |
| "learning_rate": 5.8131140721163845e-06, |
| "loss": 0.1078, |
| "step": 86600 |
| }, |
| { |
| "epoch": 14.321110009910802, |
| "grad_norm": 0.6757526397705078, |
| "learning_rate": 5.80699629262563e-06, |
| "loss": 0.1298, |
| "step": 86700 |
| }, |
| { |
| "epoch": 14.337628014535843, |
| "grad_norm": 0.4636983275413513, |
| "learning_rate": 5.800878513134873e-06, |
| "loss": 0.1045, |
| "step": 86800 |
| }, |
| { |
| "epoch": 14.354146019160885, |
| "grad_norm": 0.6189342737197876, |
| "learning_rate": 5.794760733644117e-06, |
| "loss": 0.1335, |
| "step": 86900 |
| }, |
| { |
| "epoch": 14.370664023785928, |
| "grad_norm": 0.7641118764877319, |
| "learning_rate": 5.788642954153361e-06, |
| "loss": 0.102, |
| "step": 87000 |
| }, |
| { |
| "epoch": 14.370664023785928, |
| "eval_cer": 0.038857709944562314, |
| "eval_loss": 0.11137784272432327, |
| "eval_runtime": 49.1649, |
| "eval_samples_per_second": 34.394, |
| "eval_steps_per_second": 8.604, |
| "eval_wer": 0.21457565727846126, |
| "step": 87000 |
| }, |
| { |
| "epoch": 14.387182028410969, |
| "grad_norm": 0.8745734095573425, |
| "learning_rate": 5.782525174662605e-06, |
| "loss": 0.1056, |
| "step": 87100 |
| }, |
| { |
| "epoch": 14.40370003303601, |
| "grad_norm": 0.4426126182079315, |
| "learning_rate": 5.776407395171849e-06, |
| "loss": 0.1294, |
| "step": 87200 |
| }, |
| { |
| "epoch": 14.420218037661051, |
| "grad_norm": 0.7525532841682434, |
| "learning_rate": 5.770289615681093e-06, |
| "loss": 0.1139, |
| "step": 87300 |
| }, |
| { |
| "epoch": 14.436736042286093, |
| "grad_norm": 0.6336373686790466, |
| "learning_rate": 5.7641718361903365e-06, |
| "loss": 0.1023, |
| "step": 87400 |
| }, |
| { |
| "epoch": 14.453254046911134, |
| "grad_norm": 0.6930210590362549, |
| "learning_rate": 5.758054056699582e-06, |
| "loss": 0.1336, |
| "step": 87500 |
| }, |
| { |
| "epoch": 14.469772051536175, |
| "grad_norm": 0.7454831004142761, |
| "learning_rate": 5.751936277208825e-06, |
| "loss": 0.1032, |
| "step": 87600 |
| }, |
| { |
| "epoch": 14.486290056161216, |
| "grad_norm": 0.7100419998168945, |
| "learning_rate": 5.7458184977180686e-06, |
| "loss": 0.1034, |
| "step": 87700 |
| }, |
| { |
| "epoch": 14.502808060786258, |
| "grad_norm": 0.6206198334693909, |
| "learning_rate": 5.739700718227312e-06, |
| "loss": 0.1073, |
| "step": 87800 |
| }, |
| { |
| "epoch": 14.519326065411299, |
| "grad_norm": 0.5653363466262817, |
| "learning_rate": 5.733582938736557e-06, |
| "loss": 0.1034, |
| "step": 87900 |
| }, |
| { |
| "epoch": 14.53584407003634, |
| "grad_norm": 0.6938855051994324, |
| "learning_rate": 5.727465159245801e-06, |
| "loss": 0.1065, |
| "step": 88000 |
| }, |
| { |
| "epoch": 14.53584407003634, |
| "eval_cer": 0.03898603791663815, |
| "eval_loss": 0.11170890182256699, |
| "eval_runtime": 62.1432, |
| "eval_samples_per_second": 27.211, |
| "eval_steps_per_second": 6.807, |
| "eval_wer": 0.21582464247798663, |
| "step": 88000 |
| }, |
| { |
| "epoch": 14.552362074661382, |
| "grad_norm": 0.9062691330909729, |
| "learning_rate": 5.721347379755044e-06, |
| "loss": 0.0999, |
| "step": 88100 |
| }, |
| { |
| "epoch": 14.568880079286423, |
| "grad_norm": 0.6869949102401733, |
| "learning_rate": 5.715229600264288e-06, |
| "loss": 0.1033, |
| "step": 88200 |
| }, |
| { |
| "epoch": 14.585398083911464, |
| "grad_norm": 0.6004628539085388, |
| "learning_rate": 5.709111820773533e-06, |
| "loss": 0.108, |
| "step": 88300 |
| }, |
| { |
| "epoch": 14.601916088536505, |
| "grad_norm": 0.6582931876182556, |
| "learning_rate": 5.702994041282777e-06, |
| "loss": 0.1034, |
| "step": 88400 |
| }, |
| { |
| "epoch": 14.618434093161547, |
| "grad_norm": 0.5958510637283325, |
| "learning_rate": 5.6968762617920205e-06, |
| "loss": 0.1082, |
| "step": 88500 |
| }, |
| { |
| "epoch": 14.634952097786588, |
| "grad_norm": 0.8877278566360474, |
| "learning_rate": 5.690758482301264e-06, |
| "loss": 0.1051, |
| "step": 88600 |
| }, |
| { |
| "epoch": 14.65147010241163, |
| "grad_norm": 0.46800002455711365, |
| "learning_rate": 5.684640702810509e-06, |
| "loss": 0.1032, |
| "step": 88700 |
| }, |
| { |
| "epoch": 14.66798810703667, |
| "grad_norm": 0.6601079106330872, |
| "learning_rate": 5.6785229233197526e-06, |
| "loss": 0.108, |
| "step": 88800 |
| }, |
| { |
| "epoch": 14.684506111661712, |
| "grad_norm": 0.6476488709449768, |
| "learning_rate": 5.672405143828996e-06, |
| "loss": 0.1049, |
| "step": 88900 |
| }, |
| { |
| "epoch": 14.701024116286753, |
| "grad_norm": 0.7255818843841553, |
| "learning_rate": 5.6662873643382395e-06, |
| "loss": 0.1322, |
| "step": 89000 |
| }, |
| { |
| "epoch": 14.701024116286753, |
| "eval_cer": 0.039174252275682706, |
| "eval_loss": 0.11011859029531479, |
| "eval_runtime": 52.7786, |
| "eval_samples_per_second": 32.04, |
| "eval_steps_per_second": 8.015, |
| "eval_wer": 0.21588709173796292, |
| "step": 89000 |
| }, |
| { |
| "epoch": 14.717542120911794, |
| "grad_norm": 0.7102627754211426, |
| "learning_rate": 5.660169584847485e-06, |
| "loss": 0.1046, |
| "step": 89100 |
| }, |
| { |
| "epoch": 14.734060125536836, |
| "grad_norm": 0.6122440099716187, |
| "learning_rate": 5.654051805356728e-06, |
| "loss": 0.1012, |
| "step": 89200 |
| }, |
| { |
| "epoch": 14.750578130161877, |
| "grad_norm": 0.6586080193519592, |
| "learning_rate": 5.6479340258659724e-06, |
| "loss": 0.1021, |
| "step": 89300 |
| }, |
| { |
| "epoch": 14.767096134786918, |
| "grad_norm": 0.9857539534568787, |
| "learning_rate": 5.641816246375216e-06, |
| "loss": 0.158, |
| "step": 89400 |
| }, |
| { |
| "epoch": 14.78361413941196, |
| "grad_norm": 0.8294028043746948, |
| "learning_rate": 5.63569846688446e-06, |
| "loss": 0.1115, |
| "step": 89500 |
| }, |
| { |
| "epoch": 14.800132144037, |
| "grad_norm": 0.6861185431480408, |
| "learning_rate": 5.6295806873937045e-06, |
| "loss": 0.1043, |
| "step": 89600 |
| }, |
| { |
| "epoch": 14.816650148662042, |
| "grad_norm": 0.6036092042922974, |
| "learning_rate": 5.623462907902948e-06, |
| "loss": 0.1105, |
| "step": 89700 |
| }, |
| { |
| "epoch": 14.833168153287083, |
| "grad_norm": 0.778626561164856, |
| "learning_rate": 5.6173451284121914e-06, |
| "loss": 0.1032, |
| "step": 89800 |
| }, |
| { |
| "epoch": 14.849686157912124, |
| "grad_norm": 0.5784227252006531, |
| "learning_rate": 5.611227348921437e-06, |
| "loss": 0.1122, |
| "step": 89900 |
| }, |
| { |
| "epoch": 14.866204162537166, |
| "grad_norm": 0.6248123645782471, |
| "learning_rate": 5.60510956943068e-06, |
| "loss": 0.1027, |
| "step": 90000 |
| }, |
| { |
| "epoch": 14.866204162537166, |
| "eval_cer": 0.0388662651427007, |
| "eval_loss": 0.11089600622653961, |
| "eval_runtime": 53.1033, |
| "eval_samples_per_second": 31.844, |
| "eval_steps_per_second": 7.966, |
| "eval_wer": 0.2147005557984138, |
| "step": 90000 |
| }, |
| { |
| "epoch": 14.882722167162207, |
| "grad_norm": 0.5955941081047058, |
| "learning_rate": 5.5989917899399235e-06, |
| "loss": 0.117, |
| "step": 90100 |
| }, |
| { |
| "epoch": 14.899240171787248, |
| "grad_norm": 0.7445477247238159, |
| "learning_rate": 5.592874010449168e-06, |
| "loss": 0.1035, |
| "step": 90200 |
| }, |
| { |
| "epoch": 14.91575817641229, |
| "grad_norm": 0.4745796024799347, |
| "learning_rate": 5.586756230958412e-06, |
| "loss": 0.1042, |
| "step": 90300 |
| }, |
| { |
| "epoch": 14.93227618103733, |
| "grad_norm": 0.7581929564476013, |
| "learning_rate": 5.580638451467656e-06, |
| "loss": 0.1047, |
| "step": 90400 |
| }, |
| { |
| "epoch": 14.948794185662372, |
| "grad_norm": 1.0136349201202393, |
| "learning_rate": 5.5745206719769e-06, |
| "loss": 0.105, |
| "step": 90500 |
| }, |
| { |
| "epoch": 14.965312190287413, |
| "grad_norm": 0.7604655623435974, |
| "learning_rate": 5.568402892486143e-06, |
| "loss": 0.1091, |
| "step": 90600 |
| }, |
| { |
| "epoch": 14.981830194912455, |
| "grad_norm": 0.7419881224632263, |
| "learning_rate": 5.562285112995388e-06, |
| "loss": 0.1009, |
| "step": 90700 |
| }, |
| { |
| "epoch": 14.998348199537496, |
| "grad_norm": 0.571348249912262, |
| "learning_rate": 5.556167333504632e-06, |
| "loss": 0.099, |
| "step": 90800 |
| }, |
| { |
| "epoch": 15.014866204162537, |
| "grad_norm": 0.7786069512367249, |
| "learning_rate": 5.5500495540138754e-06, |
| "loss": 0.1141, |
| "step": 90900 |
| }, |
| { |
| "epoch": 15.031384208787578, |
| "grad_norm": 0.6933959722518921, |
| "learning_rate": 5.543931774523119e-06, |
| "loss": 0.1029, |
| "step": 91000 |
| }, |
| { |
| "epoch": 15.031384208787578, |
| "eval_cer": 0.03914003148312915, |
| "eval_loss": 0.110771544277668, |
| "eval_runtime": 55.563, |
| "eval_samples_per_second": 30.434, |
| "eval_steps_per_second": 7.613, |
| "eval_wer": 0.21532504839817648, |
| "step": 91000 |
| }, |
| { |
| "epoch": 15.04790221341262, |
| "grad_norm": 0.5616199374198914, |
| "learning_rate": 5.537813995032364e-06, |
| "loss": 0.0993, |
| "step": 91100 |
| }, |
| { |
| "epoch": 15.064420218037661, |
| "grad_norm": 0.6372345089912415, |
| "learning_rate": 5.5316962155416075e-06, |
| "loss": 0.104, |
| "step": 91200 |
| }, |
| { |
| "epoch": 15.080938222662702, |
| "grad_norm": 0.7811592817306519, |
| "learning_rate": 5.525578436050851e-06, |
| "loss": 0.1002, |
| "step": 91300 |
| }, |
| { |
| "epoch": 15.097456227287744, |
| "grad_norm": 0.974866509437561, |
| "learning_rate": 5.519460656560095e-06, |
| "loss": 0.1252, |
| "step": 91400 |
| }, |
| { |
| "epoch": 15.113974231912785, |
| "grad_norm": 0.705301821231842, |
| "learning_rate": 5.51334287706934e-06, |
| "loss": 0.1028, |
| "step": 91500 |
| }, |
| { |
| "epoch": 15.130492236537826, |
| "grad_norm": 0.6617374420166016, |
| "learning_rate": 5.507225097578583e-06, |
| "loss": 0.1029, |
| "step": 91600 |
| }, |
| { |
| "epoch": 15.147010241162867, |
| "grad_norm": 0.6031976342201233, |
| "learning_rate": 5.501107318087827e-06, |
| "loss": 0.0994, |
| "step": 91700 |
| }, |
| { |
| "epoch": 15.163528245787909, |
| "grad_norm": 0.8132845163345337, |
| "learning_rate": 5.494989538597071e-06, |
| "loss": 0.1013, |
| "step": 91800 |
| }, |
| { |
| "epoch": 15.18004625041295, |
| "grad_norm": 0.4518735110759735, |
| "learning_rate": 5.488871759106316e-06, |
| "loss": 0.1086, |
| "step": 91900 |
| }, |
| { |
| "epoch": 15.196564255037991, |
| "grad_norm": 0.6119063496589661, |
| "learning_rate": 5.4827539796155595e-06, |
| "loss": 0.0978, |
| "step": 92000 |
| }, |
| { |
| "epoch": 15.196564255037991, |
| "eval_cer": 0.03872082677434809, |
| "eval_loss": 0.11010745912790298, |
| "eval_runtime": 56.6751, |
| "eval_samples_per_second": 29.837, |
| "eval_steps_per_second": 7.464, |
| "eval_wer": 0.21351401985886467, |
| "step": 92000 |
| }, |
| { |
| "epoch": 15.213082259663032, |
| "grad_norm": 1.2135928869247437, |
| "learning_rate": 5.476636200124803e-06, |
| "loss": 0.126, |
| "step": 92100 |
| }, |
| { |
| "epoch": 15.229600264288074, |
| "grad_norm": 0.7273651361465454, |
| "learning_rate": 5.470518420634046e-06, |
| "loss": 0.1026, |
| "step": 92200 |
| }, |
| { |
| "epoch": 15.246118268913115, |
| "grad_norm": 0.5206860303878784, |
| "learning_rate": 5.4644006411432915e-06, |
| "loss": 0.1061, |
| "step": 92300 |
| }, |
| { |
| "epoch": 15.262636273538156, |
| "grad_norm": 0.5830551981925964, |
| "learning_rate": 5.458282861652535e-06, |
| "loss": 0.1049, |
| "step": 92400 |
| }, |
| { |
| "epoch": 15.279154278163197, |
| "grad_norm": 0.5370995402336121, |
| "learning_rate": 5.4521650821617785e-06, |
| "loss": 0.1022, |
| "step": 92500 |
| }, |
| { |
| "epoch": 15.295672282788239, |
| "grad_norm": 0.6254607439041138, |
| "learning_rate": 5.446047302671023e-06, |
| "loss": 0.1076, |
| "step": 92600 |
| }, |
| { |
| "epoch": 15.31219028741328, |
| "grad_norm": 0.7650060057640076, |
| "learning_rate": 5.439929523180267e-06, |
| "loss": 0.1061, |
| "step": 92700 |
| }, |
| { |
| "epoch": 15.328708292038321, |
| "grad_norm": 0.786281168460846, |
| "learning_rate": 5.433811743689511e-06, |
| "loss": 0.1059, |
| "step": 92800 |
| }, |
| { |
| "epoch": 15.345226296663363, |
| "grad_norm": 0.7369528412818909, |
| "learning_rate": 5.427693964198755e-06, |
| "loss": 0.1025, |
| "step": 92900 |
| }, |
| { |
| "epoch": 15.361744301288404, |
| "grad_norm": 0.8376733660697937, |
| "learning_rate": 5.421576184707998e-06, |
| "loss": 0.1048, |
| "step": 93000 |
| }, |
| { |
| "epoch": 15.361744301288404, |
| "eval_cer": 0.038275956471151874, |
| "eval_loss": 0.1115972101688385, |
| "eval_runtime": 60.1921, |
| "eval_samples_per_second": 28.093, |
| "eval_steps_per_second": 7.028, |
| "eval_wer": 0.21095360019983764, |
| "step": 93000 |
| }, |
| { |
| "epoch": 15.378262305913445, |
| "grad_norm": 0.6252397298812866, |
| "learning_rate": 5.4154584052172435e-06, |
| "loss": 0.1, |
| "step": 93100 |
| }, |
| { |
| "epoch": 15.394780310538486, |
| "grad_norm": 0.7326919436454773, |
| "learning_rate": 5.409340625726487e-06, |
| "loss": 0.11, |
| "step": 93200 |
| }, |
| { |
| "epoch": 15.411298315163528, |
| "grad_norm": 0.6019201874732971, |
| "learning_rate": 5.40322284623573e-06, |
| "loss": 0.1065, |
| "step": 93300 |
| }, |
| { |
| "epoch": 15.427816319788569, |
| "grad_norm": 0.7445711493492126, |
| "learning_rate": 5.397105066744974e-06, |
| "loss": 0.105, |
| "step": 93400 |
| }, |
| { |
| "epoch": 15.44433432441361, |
| "grad_norm": 1.068389892578125, |
| "learning_rate": 5.390987287254219e-06, |
| "loss": 0.1038, |
| "step": 93500 |
| }, |
| { |
| "epoch": 15.460852329038651, |
| "grad_norm": 0.672622561454773, |
| "learning_rate": 5.3848695077634625e-06, |
| "loss": 0.104, |
| "step": 93600 |
| }, |
| { |
| "epoch": 15.477370333663693, |
| "grad_norm": 0.6717888712882996, |
| "learning_rate": 5.378751728272707e-06, |
| "loss": 0.0985, |
| "step": 93700 |
| }, |
| { |
| "epoch": 15.493888338288734, |
| "grad_norm": 1.2381566762924194, |
| "learning_rate": 5.37263394878195e-06, |
| "loss": 0.1078, |
| "step": 93800 |
| }, |
| { |
| "epoch": 15.510406342913775, |
| "grad_norm": 0.6967211365699768, |
| "learning_rate": 5.3665161692911946e-06, |
| "loss": 0.1017, |
| "step": 93900 |
| }, |
| { |
| "epoch": 15.526924347538817, |
| "grad_norm": 0.7272515892982483, |
| "learning_rate": 5.360398389800439e-06, |
| "loss": 0.1027, |
| "step": 94000 |
| }, |
| { |
| "epoch": 15.526924347538817, |
| "eval_cer": 0.038635274792964205, |
| "eval_loss": 0.10984691232442856, |
| "eval_runtime": 58.0727, |
| "eval_samples_per_second": 29.119, |
| "eval_steps_per_second": 7.284, |
| "eval_wer": 0.21320177355898332, |
| "step": 94000 |
| }, |
| { |
| "epoch": 15.543442352163858, |
| "grad_norm": 0.7049939036369324, |
| "learning_rate": 5.354280610309682e-06, |
| "loss": 0.1035, |
| "step": 94100 |
| }, |
| { |
| "epoch": 15.559960356788899, |
| "grad_norm": 1.3722845315933228, |
| "learning_rate": 5.348162830818926e-06, |
| "loss": 0.1002, |
| "step": 94200 |
| }, |
| { |
| "epoch": 15.57647836141394, |
| "grad_norm": 0.7943611145019531, |
| "learning_rate": 5.342045051328171e-06, |
| "loss": 0.0976, |
| "step": 94300 |
| }, |
| { |
| "epoch": 15.592996366038982, |
| "grad_norm": 0.6992027163505554, |
| "learning_rate": 5.335927271837414e-06, |
| "loss": 0.1038, |
| "step": 94400 |
| }, |
| { |
| "epoch": 15.609514370664023, |
| "grad_norm": 0.9091536998748779, |
| "learning_rate": 5.329809492346658e-06, |
| "loss": 0.1059, |
| "step": 94500 |
| }, |
| { |
| "epoch": 15.626032375289064, |
| "grad_norm": 0.6817540526390076, |
| "learning_rate": 5.323691712855902e-06, |
| "loss": 0.1099, |
| "step": 94600 |
| }, |
| { |
| "epoch": 15.642550379914105, |
| "grad_norm": 0.6020603775978088, |
| "learning_rate": 5.3175739333651465e-06, |
| "loss": 0.0986, |
| "step": 94700 |
| }, |
| { |
| "epoch": 15.659068384539147, |
| "grad_norm": 0.6270213723182678, |
| "learning_rate": 5.31145615387439e-06, |
| "loss": 0.1034, |
| "step": 94800 |
| }, |
| { |
| "epoch": 15.67558638916419, |
| "grad_norm": 0.7782559990882874, |
| "learning_rate": 5.305338374383634e-06, |
| "loss": 0.0969, |
| "step": 94900 |
| }, |
| { |
| "epoch": 15.692104393789231, |
| "grad_norm": 0.6843757629394531, |
| "learning_rate": 5.299220594892878e-06, |
| "loss": 0.0996, |
| "step": 95000 |
| }, |
| { |
| "epoch": 15.692104393789231, |
| "eval_cer": 0.03852405721716515, |
| "eval_loss": 0.11085934937000275, |
| "eval_runtime": 60.5345, |
| "eval_samples_per_second": 27.934, |
| "eval_steps_per_second": 6.988, |
| "eval_wer": 0.21270217947917316, |
| "step": 95000 |
| }, |
| { |
| "epoch": 15.708622398414272, |
| "grad_norm": 0.5856791138648987, |
| "learning_rate": 5.293102815402122e-06, |
| "loss": 0.1043, |
| "step": 95100 |
| }, |
| { |
| "epoch": 15.725140403039314, |
| "grad_norm": 1.0118597745895386, |
| "learning_rate": 5.286985035911366e-06, |
| "loss": 0.1207, |
| "step": 95200 |
| }, |
| { |
| "epoch": 15.741658407664355, |
| "grad_norm": 0.4559677541255951, |
| "learning_rate": 5.28086725642061e-06, |
| "loss": 0.0953, |
| "step": 95300 |
| }, |
| { |
| "epoch": 15.758176412289396, |
| "grad_norm": 0.7937479615211487, |
| "learning_rate": 5.274749476929853e-06, |
| "loss": 0.1018, |
| "step": 95400 |
| }, |
| { |
| "epoch": 15.774694416914437, |
| "grad_norm": 0.7912297248840332, |
| "learning_rate": 5.2686316974390984e-06, |
| "loss": 0.1013, |
| "step": 95500 |
| }, |
| { |
| "epoch": 15.791212421539479, |
| "grad_norm": 0.9011877775192261, |
| "learning_rate": 5.262513917948342e-06, |
| "loss": 0.1085, |
| "step": 95600 |
| }, |
| { |
| "epoch": 15.80773042616452, |
| "grad_norm": 0.7926939129829407, |
| "learning_rate": 5.256396138457585e-06, |
| "loss": 0.0993, |
| "step": 95700 |
| }, |
| { |
| "epoch": 15.824248430789561, |
| "grad_norm": 0.9147284626960754, |
| "learning_rate": 5.25027835896683e-06, |
| "loss": 0.106, |
| "step": 95800 |
| }, |
| { |
| "epoch": 15.840766435414602, |
| "grad_norm": 0.6496513485908508, |
| "learning_rate": 5.244160579476074e-06, |
| "loss": 0.101, |
| "step": 95900 |
| }, |
| { |
| "epoch": 15.857284440039644, |
| "grad_norm": 0.5024608969688416, |
| "learning_rate": 5.2380427999853174e-06, |
| "loss": 0.0959, |
| "step": 96000 |
| }, |
| { |
| "epoch": 15.857284440039644, |
| "eval_cer": 0.03859249880227226, |
| "eval_loss": 0.1082993894815445, |
| "eval_runtime": 61.2314, |
| "eval_samples_per_second": 27.617, |
| "eval_steps_per_second": 6.908, |
| "eval_wer": 0.21288952725910198, |
| "step": 96000 |
| }, |
| { |
| "epoch": 15.873802444664685, |
| "grad_norm": 0.9131097197532654, |
| "learning_rate": 5.231925020494562e-06, |
| "loss": 0.1023, |
| "step": 96100 |
| }, |
| { |
| "epoch": 15.890320449289726, |
| "grad_norm": 0.7231972813606262, |
| "learning_rate": 5.225807241003805e-06, |
| "loss": 0.1057, |
| "step": 96200 |
| }, |
| { |
| "epoch": 15.906838453914768, |
| "grad_norm": 0.7179155349731445, |
| "learning_rate": 5.21968946151305e-06, |
| "loss": 0.1054, |
| "step": 96300 |
| }, |
| { |
| "epoch": 15.923356458539809, |
| "grad_norm": 0.6966670751571655, |
| "learning_rate": 5.213571682022294e-06, |
| "loss": 0.0992, |
| "step": 96400 |
| }, |
| { |
| "epoch": 15.93987446316485, |
| "grad_norm": 0.7580718398094177, |
| "learning_rate": 5.207453902531537e-06, |
| "loss": 0.0978, |
| "step": 96500 |
| }, |
| { |
| "epoch": 15.956392467789891, |
| "grad_norm": 0.6020950675010681, |
| "learning_rate": 5.201336123040781e-06, |
| "loss": 0.1, |
| "step": 96600 |
| }, |
| { |
| "epoch": 15.972910472414933, |
| "grad_norm": 0.7800185680389404, |
| "learning_rate": 5.195218343550026e-06, |
| "loss": 0.1041, |
| "step": 96700 |
| }, |
| { |
| "epoch": 15.989428477039974, |
| "grad_norm": 0.6527479290962219, |
| "learning_rate": 5.189100564059269e-06, |
| "loss": 0.1504, |
| "step": 96800 |
| }, |
| { |
| "epoch": 16.005946481665013, |
| "grad_norm": 2.855896472930908, |
| "learning_rate": 5.182982784568513e-06, |
| "loss": 0.1247, |
| "step": 96900 |
| }, |
| { |
| "epoch": 16.022464486290055, |
| "grad_norm": 0.6793861985206604, |
| "learning_rate": 5.176865005077757e-06, |
| "loss": 0.1015, |
| "step": 97000 |
| }, |
| { |
| "epoch": 16.022464486290055, |
| "eval_cer": 0.038601054000410646, |
| "eval_loss": 0.10963103175163269, |
| "eval_runtime": 57.4338, |
| "eval_samples_per_second": 29.443, |
| "eval_steps_per_second": 7.365, |
| "eval_wer": 0.21257728095922063, |
| "step": 97000 |
| }, |
| { |
| "epoch": 16.038982490915096, |
| "grad_norm": 0.5223618149757385, |
| "learning_rate": 5.1707472255870015e-06, |
| "loss": 0.0952, |
| "step": 97100 |
| }, |
| { |
| "epoch": 16.055500495540137, |
| "grad_norm": 0.6774017810821533, |
| "learning_rate": 5.164629446096246e-06, |
| "loss": 0.101, |
| "step": 97200 |
| }, |
| { |
| "epoch": 16.07201850016518, |
| "grad_norm": 0.6046952605247498, |
| "learning_rate": 5.158511666605489e-06, |
| "loss": 0.0986, |
| "step": 97300 |
| }, |
| { |
| "epoch": 16.08853650479022, |
| "grad_norm": 0.5968722701072693, |
| "learning_rate": 5.152393887114733e-06, |
| "loss": 0.1172, |
| "step": 97400 |
| }, |
| { |
| "epoch": 16.10505450941526, |
| "grad_norm": 0.49890223145484924, |
| "learning_rate": 5.146276107623978e-06, |
| "loss": 0.095, |
| "step": 97500 |
| }, |
| { |
| "epoch": 16.121572514040302, |
| "grad_norm": 0.5506992936134338, |
| "learning_rate": 5.140158328133221e-06, |
| "loss": 0.0983, |
| "step": 97600 |
| }, |
| { |
| "epoch": 16.138090518665344, |
| "grad_norm": 0.8042443990707397, |
| "learning_rate": 5.134040548642465e-06, |
| "loss": 0.2112, |
| "step": 97700 |
| }, |
| { |
| "epoch": 16.154608523290385, |
| "grad_norm": 0.8264985680580139, |
| "learning_rate": 5.127922769151708e-06, |
| "loss": 0.1013, |
| "step": 97800 |
| }, |
| { |
| "epoch": 16.171126527915426, |
| "grad_norm": 0.5965238809585571, |
| "learning_rate": 5.121804989660953e-06, |
| "loss": 0.1457, |
| "step": 97900 |
| }, |
| { |
| "epoch": 16.187644532540467, |
| "grad_norm": 0.8089606761932373, |
| "learning_rate": 5.115687210170197e-06, |
| "loss": 0.1058, |
| "step": 98000 |
| }, |
| { |
| "epoch": 16.187644532540467, |
| "eval_cer": 0.03804496612141537, |
| "eval_loss": 0.10823166370391846, |
| "eval_runtime": 58.9445, |
| "eval_samples_per_second": 28.688, |
| "eval_steps_per_second": 7.176, |
| "eval_wer": 0.21132829575969525, |
| "step": 98000 |
| }, |
| { |
| "epoch": 16.20416253716551, |
| "grad_norm": 0.5970620512962341, |
| "learning_rate": 5.109569430679441e-06, |
| "loss": 0.1046, |
| "step": 98100 |
| }, |
| { |
| "epoch": 16.22068054179055, |
| "grad_norm": 0.45412981510162354, |
| "learning_rate": 5.103451651188685e-06, |
| "loss": 0.1035, |
| "step": 98200 |
| }, |
| { |
| "epoch": 16.23719854641559, |
| "grad_norm": 0.5827893018722534, |
| "learning_rate": 5.097333871697929e-06, |
| "loss": 0.0992, |
| "step": 98300 |
| }, |
| { |
| "epoch": 16.253716551040633, |
| "grad_norm": 0.6516451239585876, |
| "learning_rate": 5.091216092207173e-06, |
| "loss": 0.1023, |
| "step": 98400 |
| }, |
| { |
| "epoch": 16.270234555665677, |
| "grad_norm": 0.731946587562561, |
| "learning_rate": 5.085098312716417e-06, |
| "loss": 0.0969, |
| "step": 98500 |
| }, |
| { |
| "epoch": 16.28675256029072, |
| "grad_norm": 0.70552659034729, |
| "learning_rate": 5.07898053322566e-06, |
| "loss": 0.1478, |
| "step": 98600 |
| }, |
| { |
| "epoch": 16.30327056491576, |
| "grad_norm": 0.7130141258239746, |
| "learning_rate": 5.072862753734905e-06, |
| "loss": 0.1049, |
| "step": 98700 |
| }, |
| { |
| "epoch": 16.3197885695408, |
| "grad_norm": 0.9122040867805481, |
| "learning_rate": 5.066744974244149e-06, |
| "loss": 0.0976, |
| "step": 98800 |
| }, |
| { |
| "epoch": 16.336306574165842, |
| "grad_norm": 0.7150751948356628, |
| "learning_rate": 5.060627194753392e-06, |
| "loss": 0.0938, |
| "step": 98900 |
| }, |
| { |
| "epoch": 16.352824578790884, |
| "grad_norm": 0.571891188621521, |
| "learning_rate": 5.0545094152626366e-06, |
| "loss": 0.0966, |
| "step": 99000 |
| }, |
| { |
| "epoch": 16.352824578790884, |
| "eval_cer": 0.03804496612141537, |
| "eval_loss": 0.1098564937710762, |
| "eval_runtime": 59.3833, |
| "eval_samples_per_second": 28.476, |
| "eval_steps_per_second": 7.123, |
| "eval_wer": 0.21089115093986135, |
| "step": 99000 |
| }, |
| { |
| "epoch": 16.369342583415925, |
| "grad_norm": 0.5009652376174927, |
| "learning_rate": 5.048391635771881e-06, |
| "loss": 0.0956, |
| "step": 99100 |
| }, |
| { |
| "epoch": 16.385860588040966, |
| "grad_norm": 0.6411744952201843, |
| "learning_rate": 5.042273856281124e-06, |
| "loss": 0.1109, |
| "step": 99200 |
| }, |
| { |
| "epoch": 16.402378592666008, |
| "grad_norm": 0.7724633812904358, |
| "learning_rate": 5.036156076790369e-06, |
| "loss": 0.0996, |
| "step": 99300 |
| }, |
| { |
| "epoch": 16.41889659729105, |
| "grad_norm": 0.5513240694999695, |
| "learning_rate": 5.030038297299612e-06, |
| "loss": 0.0972, |
| "step": 99400 |
| }, |
| { |
| "epoch": 16.43541460191609, |
| "grad_norm": 0.685674786567688, |
| "learning_rate": 5.023920517808856e-06, |
| "loss": 0.1259, |
| "step": 99500 |
| }, |
| { |
| "epoch": 16.45193260654113, |
| "grad_norm": 0.7051562070846558, |
| "learning_rate": 5.017802738318101e-06, |
| "loss": 0.1076, |
| "step": 99600 |
| }, |
| { |
| "epoch": 16.468450611166173, |
| "grad_norm": 0.6196284890174866, |
| "learning_rate": 5.011684958827344e-06, |
| "loss": 0.1033, |
| "step": 99700 |
| }, |
| { |
| "epoch": 16.484968615791214, |
| "grad_norm": 0.6664172410964966, |
| "learning_rate": 5.005567179336588e-06, |
| "loss": 0.0988, |
| "step": 99800 |
| }, |
| { |
| "epoch": 16.501486620416255, |
| "grad_norm": 0.8101247549057007, |
| "learning_rate": 4.999449399845832e-06, |
| "loss": 0.1005, |
| "step": 99900 |
| }, |
| { |
| "epoch": 16.518004625041296, |
| "grad_norm": 0.5494738817214966, |
| "learning_rate": 4.993331620355076e-06, |
| "loss": 0.0988, |
| "step": 100000 |
| }, |
| { |
| "epoch": 16.518004625041296, |
| "eval_cer": 0.03866949558551776, |
| "eval_loss": 0.10886727273464203, |
| "eval_runtime": 57.6452, |
| "eval_samples_per_second": 29.335, |
| "eval_steps_per_second": 7.338, |
| "eval_wer": 0.21388871541872229, |
| "step": 100000 |
| }, |
| { |
| "epoch": 16.534522629666338, |
| "grad_norm": 0.752741813659668, |
| "learning_rate": 4.98721384086432e-06, |
| "loss": 0.0958, |
| "step": 100100 |
| }, |
| { |
| "epoch": 16.55104063429138, |
| "grad_norm": 0.6410621404647827, |
| "learning_rate": 4.981096061373564e-06, |
| "loss": 0.1042, |
| "step": 100200 |
| }, |
| { |
| "epoch": 16.56755863891642, |
| "grad_norm": 0.5088207125663757, |
| "learning_rate": 4.974978281882808e-06, |
| "loss": 0.0953, |
| "step": 100300 |
| }, |
| { |
| "epoch": 16.58407664354146, |
| "grad_norm": 0.7134143114089966, |
| "learning_rate": 4.968860502392052e-06, |
| "loss": 0.1021, |
| "step": 100400 |
| }, |
| { |
| "epoch": 16.600594648166503, |
| "grad_norm": 0.9732416272163391, |
| "learning_rate": 4.962742722901296e-06, |
| "loss": 0.1004, |
| "step": 100500 |
| }, |
| { |
| "epoch": 16.617112652791544, |
| "grad_norm": 0.5259725451469421, |
| "learning_rate": 4.95662494341054e-06, |
| "loss": 0.1008, |
| "step": 100600 |
| }, |
| { |
| "epoch": 16.633630657416585, |
| "grad_norm": 0.7298964262008667, |
| "learning_rate": 4.950507163919784e-06, |
| "loss": 0.1216, |
| "step": 100700 |
| }, |
| { |
| "epoch": 16.650148662041627, |
| "grad_norm": 0.6519650816917419, |
| "learning_rate": 4.944389384429028e-06, |
| "loss": 0.0989, |
| "step": 100800 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.5744999647140503, |
| "learning_rate": 4.938271604938272e-06, |
| "loss": 0.1034, |
| "step": 100900 |
| }, |
| { |
| "epoch": 16.68318467129171, |
| "grad_norm": 0.8439196944236755, |
| "learning_rate": 4.932153825447516e-06, |
| "loss": 0.0983, |
| "step": 101000 |
| }, |
| { |
| "epoch": 16.68318467129171, |
| "eval_cer": 0.03823318048045993, |
| "eval_loss": 0.10998154431581497, |
| "eval_runtime": 57.0437, |
| "eval_samples_per_second": 29.644, |
| "eval_steps_per_second": 7.415, |
| "eval_wer": 0.21026665834009867, |
| "step": 101000 |
| }, |
| { |
| "epoch": 16.69970267591675, |
| "grad_norm": 0.7444531917572021, |
| "learning_rate": 4.9260360459567594e-06, |
| "loss": 0.0986, |
| "step": 101100 |
| }, |
| { |
| "epoch": 16.71622068054179, |
| "grad_norm": 0.64404296875, |
| "learning_rate": 4.919918266466004e-06, |
| "loss": 0.0977, |
| "step": 101200 |
| }, |
| { |
| "epoch": 16.732738685166833, |
| "grad_norm": 0.5869942903518677, |
| "learning_rate": 4.913800486975248e-06, |
| "loss": 0.0943, |
| "step": 101300 |
| }, |
| { |
| "epoch": 16.749256689791874, |
| "grad_norm": 0.7566863894462585, |
| "learning_rate": 4.9076827074844915e-06, |
| "loss": 0.0932, |
| "step": 101400 |
| }, |
| { |
| "epoch": 16.765774694416915, |
| "grad_norm": 0.6613245010375977, |
| "learning_rate": 4.901564927993736e-06, |
| "loss": 0.094, |
| "step": 101500 |
| }, |
| { |
| "epoch": 16.782292699041957, |
| "grad_norm": 0.5942184925079346, |
| "learning_rate": 4.89544714850298e-06, |
| "loss": 0.1035, |
| "step": 101600 |
| }, |
| { |
| "epoch": 16.798810703666998, |
| "grad_norm": 0.6501281261444092, |
| "learning_rate": 4.889329369012224e-06, |
| "loss": 0.0991, |
| "step": 101700 |
| }, |
| { |
| "epoch": 16.81532870829204, |
| "grad_norm": 0.5310657024383545, |
| "learning_rate": 4.883211589521468e-06, |
| "loss": 0.1031, |
| "step": 101800 |
| }, |
| { |
| "epoch": 16.83184671291708, |
| "grad_norm": 0.7567028403282166, |
| "learning_rate": 4.877093810030712e-06, |
| "loss": 0.1248, |
| "step": 101900 |
| }, |
| { |
| "epoch": 16.848364717542122, |
| "grad_norm": 0.5749494433403015, |
| "learning_rate": 4.870976030539956e-06, |
| "loss": 0.1394, |
| "step": 102000 |
| }, |
| { |
| "epoch": 16.848364717542122, |
| "eval_cer": 0.03819040448976798, |
| "eval_loss": 0.10844554007053375, |
| "eval_runtime": 55.7352, |
| "eval_samples_per_second": 30.34, |
| "eval_steps_per_second": 7.589, |
| "eval_wer": 0.21051645538000374, |
| "step": 102000 |
| }, |
| { |
| "epoch": 16.864882722167163, |
| "grad_norm": 0.6915029883384705, |
| "learning_rate": 4.8648582510492e-06, |
| "loss": 0.0999, |
| "step": 102100 |
| }, |
| { |
| "epoch": 16.881400726792204, |
| "grad_norm": 0.6687765121459961, |
| "learning_rate": 4.8587404715584434e-06, |
| "loss": 0.147, |
| "step": 102200 |
| }, |
| { |
| "epoch": 16.897918731417246, |
| "grad_norm": 0.6900584697723389, |
| "learning_rate": 4.852622692067688e-06, |
| "loss": 0.1014, |
| "step": 102300 |
| }, |
| { |
| "epoch": 16.914436736042287, |
| "grad_norm": 0.5827245712280273, |
| "learning_rate": 4.846504912576931e-06, |
| "loss": 0.1018, |
| "step": 102400 |
| }, |
| { |
| "epoch": 16.930954740667328, |
| "grad_norm": 0.6611018180847168, |
| "learning_rate": 4.8403871330861755e-06, |
| "loss": 0.0986, |
| "step": 102500 |
| }, |
| { |
| "epoch": 16.94747274529237, |
| "grad_norm": 0.7171707153320312, |
| "learning_rate": 4.834269353595419e-06, |
| "loss": 0.1365, |
| "step": 102600 |
| }, |
| { |
| "epoch": 16.96399074991741, |
| "grad_norm": 0.7955174446105957, |
| "learning_rate": 4.828151574104663e-06, |
| "loss": 0.1115, |
| "step": 102700 |
| }, |
| { |
| "epoch": 16.980508754542452, |
| "grad_norm": 0.6726603507995605, |
| "learning_rate": 4.822033794613908e-06, |
| "loss": 0.0998, |
| "step": 102800 |
| }, |
| { |
| "epoch": 16.997026759167493, |
| "grad_norm": 0.6470670104026794, |
| "learning_rate": 4.815916015123152e-06, |
| "loss": 0.1008, |
| "step": 102900 |
| }, |
| { |
| "epoch": 17.013544763792535, |
| "grad_norm": 0.8556126952171326, |
| "learning_rate": 4.809798235632395e-06, |
| "loss": 0.1134, |
| "step": 103000 |
| }, |
| { |
| "epoch": 17.013544763792535, |
| "eval_cer": 0.03776264458284854, |
| "eval_loss": 0.1081945076584816, |
| "eval_runtime": 57.2963, |
| "eval_samples_per_second": 29.513, |
| "eval_steps_per_second": 7.383, |
| "eval_wer": 0.20945481796040716, |
| "step": 103000 |
| }, |
| { |
| "epoch": 17.030062768417576, |
| "grad_norm": 0.8898919224739075, |
| "learning_rate": 4.80368045614164e-06, |
| "loss": 0.1023, |
| "step": 103100 |
| }, |
| { |
| "epoch": 17.046580773042617, |
| "grad_norm": 0.4918752908706665, |
| "learning_rate": 4.797562676650883e-06, |
| "loss": 0.1012, |
| "step": 103200 |
| }, |
| { |
| "epoch": 17.06309877766766, |
| "grad_norm": 0.5613105297088623, |
| "learning_rate": 4.7914448971601275e-06, |
| "loss": 0.1064, |
| "step": 103300 |
| }, |
| { |
| "epoch": 17.0796167822927, |
| "grad_norm": 0.6109268665313721, |
| "learning_rate": 4.785327117669371e-06, |
| "loss": 0.1048, |
| "step": 103400 |
| }, |
| { |
| "epoch": 17.09613478691774, |
| "grad_norm": 0.6237761974334717, |
| "learning_rate": 4.779209338178615e-06, |
| "loss": 0.1044, |
| "step": 103500 |
| }, |
| { |
| "epoch": 17.112652791542782, |
| "grad_norm": 0.6789395213127136, |
| "learning_rate": 4.773091558687859e-06, |
| "loss": 0.0973, |
| "step": 103600 |
| }, |
| { |
| "epoch": 17.129170796167823, |
| "grad_norm": 0.7461550831794739, |
| "learning_rate": 4.766973779197103e-06, |
| "loss": 0.0965, |
| "step": 103700 |
| }, |
| { |
| "epoch": 17.145688800792865, |
| "grad_norm": 0.655927836894989, |
| "learning_rate": 4.760855999706347e-06, |
| "loss": 0.0962, |
| "step": 103800 |
| }, |
| { |
| "epoch": 17.162206805417906, |
| "grad_norm": 0.6233280897140503, |
| "learning_rate": 4.754738220215591e-06, |
| "loss": 0.0949, |
| "step": 103900 |
| }, |
| { |
| "epoch": 17.178724810042947, |
| "grad_norm": 0.6471518874168396, |
| "learning_rate": 4.748620440724835e-06, |
| "loss": 0.0916, |
| "step": 104000 |
| }, |
| { |
| "epoch": 17.178724810042947, |
| "eval_cer": 0.038019300527000206, |
| "eval_loss": 0.10819939523935318, |
| "eval_runtime": 61.0456, |
| "eval_samples_per_second": 27.701, |
| "eval_steps_per_second": 6.929, |
| "eval_wer": 0.21051645538000374, |
| "step": 104000 |
| }, |
| { |
| "epoch": 17.19524281466799, |
| "grad_norm": 0.6647598147392273, |
| "learning_rate": 4.742502661234079e-06, |
| "loss": 0.0941, |
| "step": 104100 |
| }, |
| { |
| "epoch": 17.21176081929303, |
| "grad_norm": 0.6160650849342346, |
| "learning_rate": 4.736384881743323e-06, |
| "loss": 0.0958, |
| "step": 104200 |
| }, |
| { |
| "epoch": 17.22827882391807, |
| "grad_norm": 0.7830073833465576, |
| "learning_rate": 4.730267102252567e-06, |
| "loss": 0.0988, |
| "step": 104300 |
| }, |
| { |
| "epoch": 17.244796828543112, |
| "grad_norm": 0.5620446801185608, |
| "learning_rate": 4.724149322761811e-06, |
| "loss": 0.0981, |
| "step": 104400 |
| }, |
| { |
| "epoch": 17.261314833168154, |
| "grad_norm": 0.5912889838218689, |
| "learning_rate": 4.718031543271055e-06, |
| "loss": 0.1015, |
| "step": 104500 |
| }, |
| { |
| "epoch": 17.277832837793195, |
| "grad_norm": 0.8370086550712585, |
| "learning_rate": 4.711913763780298e-06, |
| "loss": 0.0953, |
| "step": 104600 |
| }, |
| { |
| "epoch": 17.294350842418236, |
| "grad_norm": 0.5910390615463257, |
| "learning_rate": 4.705795984289543e-06, |
| "loss": 0.1, |
| "step": 104700 |
| }, |
| { |
| "epoch": 17.310868847043277, |
| "grad_norm": 0.6430118083953857, |
| "learning_rate": 4.699678204798786e-06, |
| "loss": 0.1022, |
| "step": 104800 |
| }, |
| { |
| "epoch": 17.32738685166832, |
| "grad_norm": 0.6246772408485413, |
| "learning_rate": 4.6935604253080305e-06, |
| "loss": 0.0989, |
| "step": 104900 |
| }, |
| { |
| "epoch": 17.34390485629336, |
| "grad_norm": 0.598013699054718, |
| "learning_rate": 4.687442645817275e-06, |
| "loss": 0.1074, |
| "step": 105000 |
| }, |
| { |
| "epoch": 17.34390485629336, |
| "eval_cer": 0.038062076517692146, |
| "eval_loss": 0.11045213788747787, |
| "eval_runtime": 57.9821, |
| "eval_samples_per_second": 29.164, |
| "eval_steps_per_second": 7.295, |
| "eval_wer": 0.20883032536064447, |
| "step": 105000 |
| }, |
| { |
| "epoch": 17.3604228609184, |
| "grad_norm": 1.371077060699463, |
| "learning_rate": 4.681324866326519e-06, |
| "loss": 0.0984, |
| "step": 105100 |
| }, |
| { |
| "epoch": 17.376940865543443, |
| "grad_norm": 0.9299737811088562, |
| "learning_rate": 4.6752070868357626e-06, |
| "loss": 0.1229, |
| "step": 105200 |
| }, |
| { |
| "epoch": 17.393458870168484, |
| "grad_norm": 0.7221639156341553, |
| "learning_rate": 4.669089307345007e-06, |
| "loss": 0.104, |
| "step": 105300 |
| }, |
| { |
| "epoch": 17.409976874793525, |
| "grad_norm": 1.0506755113601685, |
| "learning_rate": 4.66297152785425e-06, |
| "loss": 0.0978, |
| "step": 105400 |
| }, |
| { |
| "epoch": 17.426494879418566, |
| "grad_norm": 0.6425598859786987, |
| "learning_rate": 4.656853748363495e-06, |
| "loss": 0.0983, |
| "step": 105500 |
| }, |
| { |
| "epoch": 17.443012884043608, |
| "grad_norm": 0.7174783945083618, |
| "learning_rate": 4.650735968872738e-06, |
| "loss": 0.0997, |
| "step": 105600 |
| }, |
| { |
| "epoch": 17.45953088866865, |
| "grad_norm": 0.5940792560577393, |
| "learning_rate": 4.644618189381982e-06, |
| "loss": 0.1225, |
| "step": 105700 |
| }, |
| { |
| "epoch": 17.47604889329369, |
| "grad_norm": 0.47687768936157227, |
| "learning_rate": 4.638500409891226e-06, |
| "loss": 0.0972, |
| "step": 105800 |
| }, |
| { |
| "epoch": 17.49256689791873, |
| "grad_norm": 0.7543063759803772, |
| "learning_rate": 4.63238263040047e-06, |
| "loss": 0.095, |
| "step": 105900 |
| }, |
| { |
| "epoch": 17.509084902543773, |
| "grad_norm": 0.6112996339797974, |
| "learning_rate": 4.6262648509097145e-06, |
| "loss": 0.095, |
| "step": 106000 |
| }, |
| { |
| "epoch": 17.509084902543773, |
| "eval_cer": 0.038027855725138594, |
| "eval_loss": 0.10872569680213928, |
| "eval_runtime": 58.3098, |
| "eval_samples_per_second": 29.0, |
| "eval_steps_per_second": 7.254, |
| "eval_wer": 0.20964216574033598, |
| "step": 106000 |
| }, |
| { |
| "epoch": 17.525602907168814, |
| "grad_norm": 0.7269030809402466, |
| "learning_rate": 4.620147071418958e-06, |
| "loss": 0.1022, |
| "step": 106100 |
| }, |
| { |
| "epoch": 17.542120911793855, |
| "grad_norm": 0.6887866258621216, |
| "learning_rate": 4.614029291928202e-06, |
| "loss": 0.1003, |
| "step": 106200 |
| }, |
| { |
| "epoch": 17.558638916418897, |
| "grad_norm": 0.7257598638534546, |
| "learning_rate": 4.6079115124374466e-06, |
| "loss": 0.096, |
| "step": 106300 |
| }, |
| { |
| "epoch": 17.575156921043938, |
| "grad_norm": 0.8789656162261963, |
| "learning_rate": 4.60179373294669e-06, |
| "loss": 0.0986, |
| "step": 106400 |
| }, |
| { |
| "epoch": 17.59167492566898, |
| "grad_norm": 0.6779934167861938, |
| "learning_rate": 4.595675953455934e-06, |
| "loss": 0.1376, |
| "step": 106500 |
| }, |
| { |
| "epoch": 17.60819293029402, |
| "grad_norm": 0.63017737865448, |
| "learning_rate": 4.589558173965178e-06, |
| "loss": 0.1013, |
| "step": 106600 |
| }, |
| { |
| "epoch": 17.62471093491906, |
| "grad_norm": 0.6014170050621033, |
| "learning_rate": 4.583440394474422e-06, |
| "loss": 0.093, |
| "step": 106700 |
| }, |
| { |
| "epoch": 17.641228939544103, |
| "grad_norm": 0.7778664231300354, |
| "learning_rate": 4.577322614983666e-06, |
| "loss": 0.1045, |
| "step": 106800 |
| }, |
| { |
| "epoch": 17.657746944169144, |
| "grad_norm": 0.6275627017021179, |
| "learning_rate": 4.57120483549291e-06, |
| "loss": 0.0915, |
| "step": 106900 |
| }, |
| { |
| "epoch": 17.674264948794185, |
| "grad_norm": 0.5987668633460999, |
| "learning_rate": 4.565087056002153e-06, |
| "loss": 0.0973, |
| "step": 107000 |
| }, |
| { |
| "epoch": 17.674264948794185, |
| "eval_cer": 0.03785675176237081, |
| "eval_loss": 0.10704370588064194, |
| "eval_runtime": 57.1479, |
| "eval_samples_per_second": 29.59, |
| "eval_steps_per_second": 7.402, |
| "eval_wer": 0.20826828202085806, |
| "step": 107000 |
| }, |
| { |
| "epoch": 17.690782953419227, |
| "grad_norm": 0.6655980944633484, |
| "learning_rate": 4.558969276511398e-06, |
| "loss": 0.097, |
| "step": 107100 |
| }, |
| { |
| "epoch": 17.707300958044268, |
| "grad_norm": 0.6261619925498962, |
| "learning_rate": 4.552851497020642e-06, |
| "loss": 0.0994, |
| "step": 107200 |
| }, |
| { |
| "epoch": 17.72381896266931, |
| "grad_norm": 0.6233117580413818, |
| "learning_rate": 4.546733717529886e-06, |
| "loss": 0.0967, |
| "step": 107300 |
| }, |
| { |
| "epoch": 17.74033696729435, |
| "grad_norm": 0.5294709205627441, |
| "learning_rate": 4.54061593803913e-06, |
| "loss": 0.0941, |
| "step": 107400 |
| }, |
| { |
| "epoch": 17.75685497191939, |
| "grad_norm": 0.6875845193862915, |
| "learning_rate": 4.534498158548374e-06, |
| "loss": 0.1, |
| "step": 107500 |
| }, |
| { |
| "epoch": 17.773372976544433, |
| "grad_norm": 0.7154032588005066, |
| "learning_rate": 4.5283803790576175e-06, |
| "loss": 0.094, |
| "step": 107600 |
| }, |
| { |
| "epoch": 17.789890981169474, |
| "grad_norm": 0.672591507434845, |
| "learning_rate": 4.522262599566862e-06, |
| "loss": 0.0971, |
| "step": 107700 |
| }, |
| { |
| "epoch": 17.806408985794516, |
| "grad_norm": 0.744452178478241, |
| "learning_rate": 4.516144820076105e-06, |
| "loss": 0.092, |
| "step": 107800 |
| }, |
| { |
| "epoch": 17.822926990419557, |
| "grad_norm": 0.8155786991119385, |
| "learning_rate": 4.51002704058535e-06, |
| "loss": 0.0919, |
| "step": 107900 |
| }, |
| { |
| "epoch": 17.839444995044598, |
| "grad_norm": 0.7288583517074585, |
| "learning_rate": 4.503909261094593e-06, |
| "loss": 0.0984, |
| "step": 108000 |
| }, |
| { |
| "epoch": 17.839444995044598, |
| "eval_cer": 0.03769420299774143, |
| "eval_loss": 0.10798373073339462, |
| "eval_runtime": 57.3331, |
| "eval_samples_per_second": 29.494, |
| "eval_steps_per_second": 7.378, |
| "eval_wer": 0.20833073128083432, |
| "step": 108000 |
| }, |
| { |
| "epoch": 17.85596299966964, |
| "grad_norm": 0.6306447386741638, |
| "learning_rate": 4.497791481603837e-06, |
| "loss": 0.0945, |
| "step": 108100 |
| }, |
| { |
| "epoch": 17.87248100429468, |
| "grad_norm": 0.7502180933952332, |
| "learning_rate": 4.491673702113082e-06, |
| "loss": 0.0991, |
| "step": 108200 |
| }, |
| { |
| "epoch": 17.888999008919722, |
| "grad_norm": 0.6571519374847412, |
| "learning_rate": 4.485555922622325e-06, |
| "loss": 0.1046, |
| "step": 108300 |
| }, |
| { |
| "epoch": 17.905517013544763, |
| "grad_norm": 0.6141965389251709, |
| "learning_rate": 4.4794381431315694e-06, |
| "loss": 0.0982, |
| "step": 108400 |
| }, |
| { |
| "epoch": 17.922035018169804, |
| "grad_norm": 0.4874700903892517, |
| "learning_rate": 4.473320363640814e-06, |
| "loss": 0.0984, |
| "step": 108500 |
| }, |
| { |
| "epoch": 17.938553022794846, |
| "grad_norm": 0.8021253347396851, |
| "learning_rate": 4.467202584150057e-06, |
| "loss": 0.0955, |
| "step": 108600 |
| }, |
| { |
| "epoch": 17.955071027419887, |
| "grad_norm": 0.6977095603942871, |
| "learning_rate": 4.4610848046593015e-06, |
| "loss": 0.1019, |
| "step": 108700 |
| }, |
| { |
| "epoch": 17.97158903204493, |
| "grad_norm": 0.649456262588501, |
| "learning_rate": 4.454967025168545e-06, |
| "loss": 0.0992, |
| "step": 108800 |
| }, |
| { |
| "epoch": 17.98810703666997, |
| "grad_norm": 0.6027668714523315, |
| "learning_rate": 4.448849245677789e-06, |
| "loss": 0.0958, |
| "step": 108900 |
| }, |
| { |
| "epoch": 18.00462504129501, |
| "grad_norm": 0.7994409203529358, |
| "learning_rate": 4.442731466187033e-06, |
| "loss": 0.0972, |
| "step": 109000 |
| }, |
| { |
| "epoch": 18.00462504129501, |
| "eval_cer": 0.03761720621449593, |
| "eval_loss": 0.10664419084787369, |
| "eval_runtime": 61.6933, |
| "eval_samples_per_second": 27.41, |
| "eval_steps_per_second": 6.856, |
| "eval_wer": 0.20845562980078686, |
| "step": 109000 |
| }, |
| { |
| "epoch": 18.021143045920052, |
| "grad_norm": 0.5892287492752075, |
| "learning_rate": 4.436613686696277e-06, |
| "loss": 0.1006, |
| "step": 109100 |
| }, |
| { |
| "epoch": 18.037661050545093, |
| "grad_norm": 0.5561397075653076, |
| "learning_rate": 4.4304959072055205e-06, |
| "loss": 0.1027, |
| "step": 109200 |
| }, |
| { |
| "epoch": 18.054179055170135, |
| "grad_norm": 0.6827639937400818, |
| "learning_rate": 4.424378127714765e-06, |
| "loss": 0.0924, |
| "step": 109300 |
| }, |
| { |
| "epoch": 18.070697059795176, |
| "grad_norm": 0.4659579396247864, |
| "learning_rate": 4.418260348224009e-06, |
| "loss": 0.0955, |
| "step": 109400 |
| }, |
| { |
| "epoch": 18.087215064420217, |
| "grad_norm": 0.5949708223342896, |
| "learning_rate": 4.4121425687332535e-06, |
| "loss": 0.1026, |
| "step": 109500 |
| }, |
| { |
| "epoch": 18.10373306904526, |
| "grad_norm": 0.7214411497116089, |
| "learning_rate": 4.406024789242497e-06, |
| "loss": 0.0951, |
| "step": 109600 |
| }, |
| { |
| "epoch": 18.1202510736703, |
| "grad_norm": 0.7198790311813354, |
| "learning_rate": 4.399907009751741e-06, |
| "loss": 0.0958, |
| "step": 109700 |
| }, |
| { |
| "epoch": 18.13676907829534, |
| "grad_norm": 0.6852260828018188, |
| "learning_rate": 4.393789230260985e-06, |
| "loss": 0.0946, |
| "step": 109800 |
| }, |
| { |
| "epoch": 18.153287082920382, |
| "grad_norm": 0.6294082403182983, |
| "learning_rate": 4.387671450770229e-06, |
| "loss": 0.0956, |
| "step": 109900 |
| }, |
| { |
| "epoch": 18.169805087545424, |
| "grad_norm": 0.6702645421028137, |
| "learning_rate": 4.3815536712794725e-06, |
| "loss": 0.1124, |
| "step": 110000 |
| }, |
| { |
| "epoch": 18.169805087545424, |
| "eval_cer": 0.037873862158647596, |
| "eval_loss": 0.10725517570972443, |
| "eval_runtime": 57.4138, |
| "eval_samples_per_second": 29.453, |
| "eval_steps_per_second": 7.368, |
| "eval_wer": 0.20920502092050208, |
| "step": 110000 |
| }, |
| { |
| "epoch": 18.186323092170465, |
| "grad_norm": 0.8165464997291565, |
| "learning_rate": 4.375435891788717e-06, |
| "loss": 0.1, |
| "step": 110100 |
| }, |
| { |
| "epoch": 18.202841096795506, |
| "grad_norm": 0.5550252199172974, |
| "learning_rate": 4.36931811229796e-06, |
| "loss": 0.1161, |
| "step": 110200 |
| }, |
| { |
| "epoch": 18.219359101420547, |
| "grad_norm": 0.6268564462661743, |
| "learning_rate": 4.3632003328072045e-06, |
| "loss": 0.0868, |
| "step": 110300 |
| }, |
| { |
| "epoch": 18.23587710604559, |
| "grad_norm": 0.9998523592948914, |
| "learning_rate": 4.357082553316449e-06, |
| "loss": 0.0888, |
| "step": 110400 |
| }, |
| { |
| "epoch": 18.25239511067063, |
| "grad_norm": 0.7963108420372009, |
| "learning_rate": 4.350964773825692e-06, |
| "loss": 0.0976, |
| "step": 110500 |
| }, |
| { |
| "epoch": 18.26891311529567, |
| "grad_norm": 0.6764956712722778, |
| "learning_rate": 4.344846994334937e-06, |
| "loss": 0.0945, |
| "step": 110600 |
| }, |
| { |
| "epoch": 18.285431119920712, |
| "grad_norm": 0.845342218875885, |
| "learning_rate": 4.338729214844181e-06, |
| "loss": 0.1018, |
| "step": 110700 |
| }, |
| { |
| "epoch": 18.301949124545754, |
| "grad_norm": 0.519926130771637, |
| "learning_rate": 4.332611435353424e-06, |
| "loss": 0.1012, |
| "step": 110800 |
| }, |
| { |
| "epoch": 18.318467129170795, |
| "grad_norm": 0.5771397352218628, |
| "learning_rate": 4.326493655862669e-06, |
| "loss": 0.0963, |
| "step": 110900 |
| }, |
| { |
| "epoch": 18.334985133795836, |
| "grad_norm": 0.6836599707603455, |
| "learning_rate": 4.320375876371912e-06, |
| "loss": 0.0961, |
| "step": 111000 |
| }, |
| { |
| "epoch": 18.334985133795836, |
| "eval_cer": 0.03749743344055848, |
| "eval_loss": 0.10766017436981201, |
| "eval_runtime": 57.0005, |
| "eval_samples_per_second": 29.666, |
| "eval_steps_per_second": 7.421, |
| "eval_wer": 0.20683194904140387, |
| "step": 111000 |
| }, |
| { |
| "epoch": 18.351503138420878, |
| "grad_norm": 0.655540943145752, |
| "learning_rate": 4.3142580968811565e-06, |
| "loss": 0.0937, |
| "step": 111100 |
| }, |
| { |
| "epoch": 18.36802114304592, |
| "grad_norm": 0.7102084755897522, |
| "learning_rate": 4.3081403173904e-06, |
| "loss": 0.0982, |
| "step": 111200 |
| }, |
| { |
| "epoch": 18.38453914767096, |
| "grad_norm": 0.5947690010070801, |
| "learning_rate": 4.302022537899644e-06, |
| "loss": 0.0984, |
| "step": 111300 |
| }, |
| { |
| "epoch": 18.401057152296, |
| "grad_norm": 0.5899379253387451, |
| "learning_rate": 4.295904758408888e-06, |
| "loss": 0.0897, |
| "step": 111400 |
| }, |
| { |
| "epoch": 18.417575156921043, |
| "grad_norm": 0.48331010341644287, |
| "learning_rate": 4.289786978918132e-06, |
| "loss": 0.0971, |
| "step": 111500 |
| }, |
| { |
| "epoch": 18.434093161546084, |
| "grad_norm": 0.6152350306510925, |
| "learning_rate": 4.283669199427376e-06, |
| "loss": 0.1001, |
| "step": 111600 |
| }, |
| { |
| "epoch": 18.450611166171125, |
| "grad_norm": 0.5093644857406616, |
| "learning_rate": 4.277551419936621e-06, |
| "loss": 0.099, |
| "step": 111700 |
| }, |
| { |
| "epoch": 18.467129170796166, |
| "grad_norm": 0.6065688133239746, |
| "learning_rate": 4.271433640445864e-06, |
| "loss": 0.1007, |
| "step": 111800 |
| }, |
| { |
| "epoch": 18.483647175421208, |
| "grad_norm": 0.7295845746994019, |
| "learning_rate": 4.265315860955108e-06, |
| "loss": 0.1, |
| "step": 111900 |
| }, |
| { |
| "epoch": 18.50016518004625, |
| "grad_norm": 1.8832347393035889, |
| "learning_rate": 4.259198081464352e-06, |
| "loss": 0.0975, |
| "step": 112000 |
| }, |
| { |
| "epoch": 18.50016518004625, |
| "eval_cer": 0.03751454383683526, |
| "eval_loss": 0.10801618546247482, |
| "eval_runtime": 57.4664, |
| "eval_samples_per_second": 29.426, |
| "eval_steps_per_second": 7.361, |
| "eval_wer": 0.2083931805408106, |
| "step": 112000 |
| }, |
| { |
| "epoch": 18.51668318467129, |
| "grad_norm": 0.7237940430641174, |
| "learning_rate": 4.253080301973596e-06, |
| "loss": 0.0929, |
| "step": 112100 |
| }, |
| { |
| "epoch": 18.53320118929633, |
| "grad_norm": 0.8659229278564453, |
| "learning_rate": 4.24696252248284e-06, |
| "loss": 0.0956, |
| "step": 112200 |
| }, |
| { |
| "epoch": 18.549719193921373, |
| "grad_norm": 0.6070505380630493, |
| "learning_rate": 4.240844742992084e-06, |
| "loss": 0.0942, |
| "step": 112300 |
| }, |
| { |
| "epoch": 18.566237198546414, |
| "grad_norm": 0.5244882702827454, |
| "learning_rate": 4.234726963501327e-06, |
| "loss": 0.0917, |
| "step": 112400 |
| }, |
| { |
| "epoch": 18.582755203171455, |
| "grad_norm": 0.7137103080749512, |
| "learning_rate": 4.228609184010572e-06, |
| "loss": 0.0949, |
| "step": 112500 |
| }, |
| { |
| "epoch": 18.599273207796497, |
| "grad_norm": 0.5891650319099426, |
| "learning_rate": 4.222491404519816e-06, |
| "loss": 0.0953, |
| "step": 112600 |
| }, |
| { |
| "epoch": 18.615791212421538, |
| "grad_norm": 0.5612820386886597, |
| "learning_rate": 4.2163736250290595e-06, |
| "loss": 0.0966, |
| "step": 112700 |
| }, |
| { |
| "epoch": 18.63230921704658, |
| "grad_norm": 0.7165923714637756, |
| "learning_rate": 4.210255845538304e-06, |
| "loss": 0.0908, |
| "step": 112800 |
| }, |
| { |
| "epoch": 18.64882722167162, |
| "grad_norm": 0.6711476445198059, |
| "learning_rate": 4.204138066047548e-06, |
| "loss": 0.1005, |
| "step": 112900 |
| }, |
| { |
| "epoch": 18.66534522629666, |
| "grad_norm": 0.5342008471488953, |
| "learning_rate": 4.198020286556792e-06, |
| "loss": 0.089, |
| "step": 113000 |
| }, |
| { |
| "epoch": 18.66534522629666, |
| "eval_cer": 0.03749743344055848, |
| "eval_loss": 0.10710610449314117, |
| "eval_runtime": 62.8493, |
| "eval_samples_per_second": 26.906, |
| "eval_steps_per_second": 6.73, |
| "eval_wer": 0.20783113720102417, |
| "step": 113000 |
| }, |
| { |
| "epoch": 18.681863230921703, |
| "grad_norm": 0.49494898319244385, |
| "learning_rate": 4.191902507066036e-06, |
| "loss": 0.098, |
| "step": 113100 |
| }, |
| { |
| "epoch": 18.698381235546744, |
| "grad_norm": 0.8655831217765808, |
| "learning_rate": 4.185784727575279e-06, |
| "loss": 0.1006, |
| "step": 113200 |
| }, |
| { |
| "epoch": 18.71489924017179, |
| "grad_norm": 0.7897951006889343, |
| "learning_rate": 4.179666948084524e-06, |
| "loss": 0.0914, |
| "step": 113300 |
| }, |
| { |
| "epoch": 18.73141724479683, |
| "grad_norm": 0.5988522171974182, |
| "learning_rate": 4.173549168593767e-06, |
| "loss": 0.1146, |
| "step": 113400 |
| }, |
| { |
| "epoch": 18.74793524942187, |
| "grad_norm": 0.690118134021759, |
| "learning_rate": 4.1674313891030114e-06, |
| "loss": 0.096, |
| "step": 113500 |
| }, |
| { |
| "epoch": 18.764453254046913, |
| "grad_norm": 0.7711309790611267, |
| "learning_rate": 4.161313609612255e-06, |
| "loss": 0.0935, |
| "step": 113600 |
| }, |
| { |
| "epoch": 18.780971258671954, |
| "grad_norm": 0.596615195274353, |
| "learning_rate": 4.155195830121499e-06, |
| "loss": 0.0973, |
| "step": 113700 |
| }, |
| { |
| "epoch": 18.797489263296995, |
| "grad_norm": 0.7073595523834229, |
| "learning_rate": 4.1490780506307435e-06, |
| "loss": 0.0941, |
| "step": 113800 |
| }, |
| { |
| "epoch": 18.814007267922037, |
| "grad_norm": 0.7061730027198792, |
| "learning_rate": 4.142960271139988e-06, |
| "loss": 0.0935, |
| "step": 113900 |
| }, |
| { |
| "epoch": 18.830525272547078, |
| "grad_norm": 0.7775730490684509, |
| "learning_rate": 4.136842491649231e-06, |
| "loss": 0.0902, |
| "step": 114000 |
| }, |
| { |
| "epoch": 18.830525272547078, |
| "eval_cer": 0.03754876462938882, |
| "eval_loss": 0.10710606724023819, |
| "eval_runtime": 57.0219, |
| "eval_samples_per_second": 29.655, |
| "eval_steps_per_second": 7.418, |
| "eval_wer": 0.20683194904140387, |
| "step": 114000 |
| }, |
| { |
| "epoch": 18.84704327717212, |
| "grad_norm": 0.6550432443618774, |
| "learning_rate": 4.130724712158476e-06, |
| "loss": 0.0954, |
| "step": 114100 |
| }, |
| { |
| "epoch": 18.86356128179716, |
| "grad_norm": 0.5847755670547485, |
| "learning_rate": 4.124606932667719e-06, |
| "loss": 0.0996, |
| "step": 114200 |
| }, |
| { |
| "epoch": 18.880079286422202, |
| "grad_norm": 0.6805459260940552, |
| "learning_rate": 4.118489153176963e-06, |
| "loss": 0.0948, |
| "step": 114300 |
| }, |
| { |
| "epoch": 18.896597291047243, |
| "grad_norm": 0.6957184076309204, |
| "learning_rate": 4.112371373686207e-06, |
| "loss": 0.0981, |
| "step": 114400 |
| }, |
| { |
| "epoch": 18.913115295672284, |
| "grad_norm": 0.6642732620239258, |
| "learning_rate": 4.106253594195451e-06, |
| "loss": 0.0929, |
| "step": 114500 |
| }, |
| { |
| "epoch": 18.929633300297326, |
| "grad_norm": 0.6945010423660278, |
| "learning_rate": 4.100135814704695e-06, |
| "loss": 0.1019, |
| "step": 114600 |
| }, |
| { |
| "epoch": 18.946151304922367, |
| "grad_norm": 0.7043463587760925, |
| "learning_rate": 4.094018035213939e-06, |
| "loss": 0.0999, |
| "step": 114700 |
| }, |
| { |
| "epoch": 18.962669309547408, |
| "grad_norm": 0.5579137206077576, |
| "learning_rate": 4.087900255723183e-06, |
| "loss": 0.0944, |
| "step": 114800 |
| }, |
| { |
| "epoch": 18.97918731417245, |
| "grad_norm": 0.6382579803466797, |
| "learning_rate": 4.081782476232427e-06, |
| "loss": 0.0975, |
| "step": 114900 |
| }, |
| { |
| "epoch": 18.99570531879749, |
| "grad_norm": 0.6321828365325928, |
| "learning_rate": 4.075664696741671e-06, |
| "loss": 0.101, |
| "step": 115000 |
| }, |
| { |
| "epoch": 18.99570531879749, |
| "eval_cer": 0.03747176784614332, |
| "eval_loss": 0.10826370120048523, |
| "eval_runtime": 56.9853, |
| "eval_samples_per_second": 29.674, |
| "eval_steps_per_second": 7.423, |
| "eval_wer": 0.20770623868107163, |
| "step": 115000 |
| }, |
| { |
| "epoch": 19.012223323422532, |
| "grad_norm": 0.6674085855484009, |
| "learning_rate": 4.069546917250915e-06, |
| "loss": 0.0945, |
| "step": 115100 |
| }, |
| { |
| "epoch": 19.028741328047573, |
| "grad_norm": 0.7357644438743591, |
| "learning_rate": 4.063429137760159e-06, |
| "loss": 0.0907, |
| "step": 115200 |
| }, |
| { |
| "epoch": 19.045259332672615, |
| "grad_norm": 0.6607419848442078, |
| "learning_rate": 4.057311358269403e-06, |
| "loss": 0.1015, |
| "step": 115300 |
| }, |
| { |
| "epoch": 19.061777337297656, |
| "grad_norm": 0.7660622000694275, |
| "learning_rate": 4.0511935787786465e-06, |
| "loss": 0.0989, |
| "step": 115400 |
| }, |
| { |
| "epoch": 19.078295341922697, |
| "grad_norm": 0.5479562282562256, |
| "learning_rate": 4.045075799287891e-06, |
| "loss": 0.0942, |
| "step": 115500 |
| }, |
| { |
| "epoch": 19.09481334654774, |
| "grad_norm": 0.9880116581916809, |
| "learning_rate": 4.038958019797134e-06, |
| "loss": 0.0926, |
| "step": 115600 |
| }, |
| { |
| "epoch": 19.11133135117278, |
| "grad_norm": 0.9058769941329956, |
| "learning_rate": 4.032840240306379e-06, |
| "loss": 0.0956, |
| "step": 115700 |
| }, |
| { |
| "epoch": 19.12784935579782, |
| "grad_norm": 0.6099743247032166, |
| "learning_rate": 4.026722460815622e-06, |
| "loss": 0.0999, |
| "step": 115800 |
| }, |
| { |
| "epoch": 19.144367360422862, |
| "grad_norm": 0.7211606502532959, |
| "learning_rate": 4.020604681324866e-06, |
| "loss": 0.093, |
| "step": 115900 |
| }, |
| { |
| "epoch": 19.160885365047903, |
| "grad_norm": 0.7449648380279541, |
| "learning_rate": 4.014486901834111e-06, |
| "loss": 0.0957, |
| "step": 116000 |
| }, |
| { |
| "epoch": 19.160885365047903, |
| "eval_cer": 0.03714667031688454, |
| "eval_loss": 0.10729096084833145, |
| "eval_runtime": 57.487, |
| "eval_samples_per_second": 29.415, |
| "eval_steps_per_second": 7.358, |
| "eval_wer": 0.20583276088178354, |
| "step": 116000 |
| }, |
| { |
| "epoch": 19.177403369672945, |
| "grad_norm": 0.7683896422386169, |
| "learning_rate": 4.008369122343355e-06, |
| "loss": 0.0959, |
| "step": 116100 |
| }, |
| { |
| "epoch": 19.193921374297986, |
| "grad_norm": 0.6918036341667175, |
| "learning_rate": 4.0022513428525985e-06, |
| "loss": 0.0922, |
| "step": 116200 |
| }, |
| { |
| "epoch": 19.210439378923027, |
| "grad_norm": 0.9067769050598145, |
| "learning_rate": 3.996133563361843e-06, |
| "loss": 0.1088, |
| "step": 116300 |
| }, |
| { |
| "epoch": 19.22695738354807, |
| "grad_norm": 0.5687412023544312, |
| "learning_rate": 3.990015783871086e-06, |
| "loss": 0.0955, |
| "step": 116400 |
| }, |
| { |
| "epoch": 19.24347538817311, |
| "grad_norm": 0.6453192830085754, |
| "learning_rate": 3.9838980043803305e-06, |
| "loss": 0.0942, |
| "step": 116500 |
| }, |
| { |
| "epoch": 19.25999339279815, |
| "grad_norm": 0.6212557554244995, |
| "learning_rate": 3.977780224889574e-06, |
| "loss": 0.0972, |
| "step": 116600 |
| }, |
| { |
| "epoch": 19.276511397423192, |
| "grad_norm": 0.7683126926422119, |
| "learning_rate": 3.971662445398818e-06, |
| "loss": 0.0943, |
| "step": 116700 |
| }, |
| { |
| "epoch": 19.293029402048234, |
| "grad_norm": 0.9485934972763062, |
| "learning_rate": 3.965544665908062e-06, |
| "loss": 0.0954, |
| "step": 116800 |
| }, |
| { |
| "epoch": 19.309547406673275, |
| "grad_norm": 0.5345008373260498, |
| "learning_rate": 3.959426886417306e-06, |
| "loss": 0.0951, |
| "step": 116900 |
| }, |
| { |
| "epoch": 19.326065411298316, |
| "grad_norm": 0.5996564626693726, |
| "learning_rate": 3.95330910692655e-06, |
| "loss": 0.094, |
| "step": 117000 |
| }, |
| { |
| "epoch": 19.326065411298316, |
| "eval_cer": 0.03736910546848265, |
| "eval_loss": 0.10748081654310226, |
| "eval_runtime": 57.0717, |
| "eval_samples_per_second": 29.629, |
| "eval_steps_per_second": 7.412, |
| "eval_wer": 0.20670705052145133, |
| "step": 117000 |
| }, |
| { |
| "epoch": 19.342583415923357, |
| "grad_norm": 0.7661871910095215, |
| "learning_rate": 3.947191327435794e-06, |
| "loss": 0.092, |
| "step": 117100 |
| }, |
| { |
| "epoch": 19.3591014205484, |
| "grad_norm": 0.5788329839706421, |
| "learning_rate": 3.941073547945038e-06, |
| "loss": 0.0949, |
| "step": 117200 |
| }, |
| { |
| "epoch": 19.37561942517344, |
| "grad_norm": 0.6844035983085632, |
| "learning_rate": 3.9349557684542825e-06, |
| "loss": 0.0948, |
| "step": 117300 |
| }, |
| { |
| "epoch": 19.39213742979848, |
| "grad_norm": 0.5855575203895569, |
| "learning_rate": 3.928837988963526e-06, |
| "loss": 0.0932, |
| "step": 117400 |
| }, |
| { |
| "epoch": 19.408655434423522, |
| "grad_norm": 0.500566840171814, |
| "learning_rate": 3.92272020947277e-06, |
| "loss": 0.0984, |
| "step": 117500 |
| }, |
| { |
| "epoch": 19.425173439048564, |
| "grad_norm": 0.7234964370727539, |
| "learning_rate": 3.916602429982014e-06, |
| "loss": 0.1, |
| "step": 117600 |
| }, |
| { |
| "epoch": 19.441691443673605, |
| "grad_norm": 0.6670413613319397, |
| "learning_rate": 3.910484650491258e-06, |
| "loss": 0.0903, |
| "step": 117700 |
| }, |
| { |
| "epoch": 19.458209448298646, |
| "grad_norm": 1.1672645807266235, |
| "learning_rate": 3.9043668710005015e-06, |
| "loss": 0.0941, |
| "step": 117800 |
| }, |
| { |
| "epoch": 19.474727452923688, |
| "grad_norm": 0.8242597579956055, |
| "learning_rate": 3.898249091509746e-06, |
| "loss": 0.1007, |
| "step": 117900 |
| }, |
| { |
| "epoch": 19.49124545754873, |
| "grad_norm": 0.6898741722106934, |
| "learning_rate": 3.892131312018989e-06, |
| "loss": 0.094, |
| "step": 118000 |
| }, |
| { |
| "epoch": 19.49124545754873, |
| "eval_cer": 0.03757443022380398, |
| "eval_loss": 0.10757853835821152, |
| "eval_runtime": 59.6316, |
| "eval_samples_per_second": 28.357, |
| "eval_steps_per_second": 7.094, |
| "eval_wer": 0.2075188909011428, |
| "step": 118000 |
| }, |
| { |
| "epoch": 19.50776346217377, |
| "grad_norm": 1.0092437267303467, |
| "learning_rate": 3.8860135325282336e-06, |
| "loss": 0.0977, |
| "step": 118100 |
| }, |
| { |
| "epoch": 19.52428146679881, |
| "grad_norm": 0.5592585802078247, |
| "learning_rate": 3.879895753037478e-06, |
| "loss": 0.0908, |
| "step": 118200 |
| }, |
| { |
| "epoch": 19.540799471423853, |
| "grad_norm": 0.7661089301109314, |
| "learning_rate": 3.873777973546722e-06, |
| "loss": 0.0894, |
| "step": 118300 |
| }, |
| { |
| "epoch": 19.557317476048894, |
| "grad_norm": 0.6303833723068237, |
| "learning_rate": 3.867660194055966e-06, |
| "loss": 0.0938, |
| "step": 118400 |
| }, |
| { |
| "epoch": 19.573835480673935, |
| "grad_norm": 0.6270598769187927, |
| "learning_rate": 3.86154241456521e-06, |
| "loss": 0.0963, |
| "step": 118500 |
| }, |
| { |
| "epoch": 19.590353485298976, |
| "grad_norm": 0.7540850639343262, |
| "learning_rate": 3.855424635074453e-06, |
| "loss": 0.0941, |
| "step": 118600 |
| }, |
| { |
| "epoch": 19.606871489924018, |
| "grad_norm": 0.6837806701660156, |
| "learning_rate": 3.849306855583698e-06, |
| "loss": 0.0965, |
| "step": 118700 |
| }, |
| { |
| "epoch": 19.62338949454906, |
| "grad_norm": 0.5979442000389099, |
| "learning_rate": 3.843189076092942e-06, |
| "loss": 0.0934, |
| "step": 118800 |
| }, |
| { |
| "epoch": 19.6399074991741, |
| "grad_norm": 0.5547999143600464, |
| "learning_rate": 3.8370712966021855e-06, |
| "loss": 0.0954, |
| "step": 118900 |
| }, |
| { |
| "epoch": 19.65642550379914, |
| "grad_norm": 0.7073753476142883, |
| "learning_rate": 3.83095351711143e-06, |
| "loss": 0.0912, |
| "step": 119000 |
| }, |
| { |
| "epoch": 19.65642550379914, |
| "eval_cer": 0.037488878242420094, |
| "eval_loss": 0.10558834671974182, |
| "eval_runtime": 57.5545, |
| "eval_samples_per_second": 29.381, |
| "eval_steps_per_second": 7.35, |
| "eval_wer": 0.20639480422156997, |
| "step": 119000 |
| }, |
| { |
| "epoch": 19.672943508424183, |
| "grad_norm": 0.6621033549308777, |
| "learning_rate": 3.824835737620673e-06, |
| "loss": 0.096, |
| "step": 119100 |
| }, |
| { |
| "epoch": 19.689461513049224, |
| "grad_norm": 0.6240584254264832, |
| "learning_rate": 3.818717958129918e-06, |
| "loss": 0.0925, |
| "step": 119200 |
| }, |
| { |
| "epoch": 19.705979517674265, |
| "grad_norm": 0.834823489189148, |
| "learning_rate": 3.8126001786391615e-06, |
| "loss": 0.0904, |
| "step": 119300 |
| }, |
| { |
| "epoch": 19.722497522299307, |
| "grad_norm": 0.5534527897834778, |
| "learning_rate": 3.8064823991484058e-06, |
| "loss": 0.0864, |
| "step": 119400 |
| }, |
| { |
| "epoch": 19.739015526924348, |
| "grad_norm": 0.5191404819488525, |
| "learning_rate": 3.8003646196576492e-06, |
| "loss": 0.0943, |
| "step": 119500 |
| }, |
| { |
| "epoch": 19.75553353154939, |
| "grad_norm": 0.7800885438919067, |
| "learning_rate": 3.7942468401668936e-06, |
| "loss": 0.096, |
| "step": 119600 |
| }, |
| { |
| "epoch": 19.77205153617443, |
| "grad_norm": 0.624622106552124, |
| "learning_rate": 3.7881290606761374e-06, |
| "loss": 0.1021, |
| "step": 119700 |
| }, |
| { |
| "epoch": 19.78856954079947, |
| "grad_norm": 0.448397159576416, |
| "learning_rate": 3.7820112811853813e-06, |
| "loss": 0.0934, |
| "step": 119800 |
| }, |
| { |
| "epoch": 19.805087545424513, |
| "grad_norm": 0.6804871559143066, |
| "learning_rate": 3.775893501694625e-06, |
| "loss": 0.0907, |
| "step": 119900 |
| }, |
| { |
| "epoch": 19.821605550049554, |
| "grad_norm": 0.8749545216560364, |
| "learning_rate": 3.7697757222038695e-06, |
| "loss": 0.0914, |
| "step": 120000 |
| }, |
| { |
| "epoch": 19.821605550049554, |
| "eval_cer": 0.03736910546848265, |
| "eval_loss": 0.1068761870265007, |
| "eval_runtime": 61.4436, |
| "eval_samples_per_second": 27.521, |
| "eval_steps_per_second": 6.884, |
| "eval_wer": 0.20639480422156997, |
| "step": 120000 |
| }, |
| { |
| "epoch": 19.838123554674596, |
| "grad_norm": 0.6852620244026184, |
| "learning_rate": 3.763657942713113e-06, |
| "loss": 0.0893, |
| "step": 120100 |
| }, |
| { |
| "epoch": 19.854641559299637, |
| "grad_norm": 0.48279762268066406, |
| "learning_rate": 3.7575401632223573e-06, |
| "loss": 0.0929, |
| "step": 120200 |
| }, |
| { |
| "epoch": 19.871159563924678, |
| "grad_norm": 0.9051792025566101, |
| "learning_rate": 3.751422383731601e-06, |
| "loss": 0.1167, |
| "step": 120300 |
| }, |
| { |
| "epoch": 19.88767756854972, |
| "grad_norm": 0.5648931264877319, |
| "learning_rate": 3.7453046042408455e-06, |
| "loss": 0.0946, |
| "step": 120400 |
| }, |
| { |
| "epoch": 19.90419557317476, |
| "grad_norm": 0.6829082369804382, |
| "learning_rate": 3.739186824750089e-06, |
| "loss": 0.0914, |
| "step": 120500 |
| }, |
| { |
| "epoch": 19.920713577799802, |
| "grad_norm": 0.8707834482192993, |
| "learning_rate": 3.7330690452593333e-06, |
| "loss": 0.0919, |
| "step": 120600 |
| }, |
| { |
| "epoch": 19.937231582424843, |
| "grad_norm": 0.6333926916122437, |
| "learning_rate": 3.7269512657685767e-06, |
| "loss": 0.0991, |
| "step": 120700 |
| }, |
| { |
| "epoch": 19.953749587049884, |
| "grad_norm": 0.5039823055267334, |
| "learning_rate": 3.720833486277821e-06, |
| "loss": 0.0899, |
| "step": 120800 |
| }, |
| { |
| "epoch": 19.970267591674926, |
| "grad_norm": 0.5696250200271606, |
| "learning_rate": 3.714715706787065e-06, |
| "loss": 0.0934, |
| "step": 120900 |
| }, |
| { |
| "epoch": 19.986785596299967, |
| "grad_norm": 0.6956700682640076, |
| "learning_rate": 3.7085979272963092e-06, |
| "loss": 0.0933, |
| "step": 121000 |
| }, |
| { |
| "epoch": 19.986785596299967, |
| "eval_cer": 0.03744610225172815, |
| "eval_loss": 0.10595033317804337, |
| "eval_runtime": 57.0726, |
| "eval_samples_per_second": 29.629, |
| "eval_steps_per_second": 7.412, |
| "eval_wer": 0.20664460126147505, |
| "step": 121000 |
| }, |
| { |
| "epoch": 20.00330360092501, |
| "grad_norm": 0.615598201751709, |
| "learning_rate": 3.7024801478055527e-06, |
| "loss": 0.0954, |
| "step": 121100 |
| }, |
| { |
| "epoch": 20.01982160555005, |
| "grad_norm": 0.4726826250553131, |
| "learning_rate": 3.696362368314797e-06, |
| "loss": 0.1088, |
| "step": 121200 |
| }, |
| { |
| "epoch": 20.03633961017509, |
| "grad_norm": 0.554155170917511, |
| "learning_rate": 3.690244588824041e-06, |
| "loss": 0.0934, |
| "step": 121300 |
| }, |
| { |
| "epoch": 20.052857614800132, |
| "grad_norm": 0.8273882269859314, |
| "learning_rate": 3.6841268093332848e-06, |
| "loss": 0.0982, |
| "step": 121400 |
| }, |
| { |
| "epoch": 20.069375619425173, |
| "grad_norm": 0.6084610819816589, |
| "learning_rate": 3.6780090298425287e-06, |
| "loss": 0.096, |
| "step": 121500 |
| }, |
| { |
| "epoch": 20.085893624050215, |
| "grad_norm": 0.42655861377716064, |
| "learning_rate": 3.671891250351773e-06, |
| "loss": 0.0929, |
| "step": 121600 |
| }, |
| { |
| "epoch": 20.102411628675256, |
| "grad_norm": 0.7716320753097534, |
| "learning_rate": 3.6657734708610164e-06, |
| "loss": 0.0925, |
| "step": 121700 |
| }, |
| { |
| "epoch": 20.118929633300297, |
| "grad_norm": 0.5255216360092163, |
| "learning_rate": 3.6596556913702607e-06, |
| "loss": 0.0929, |
| "step": 121800 |
| }, |
| { |
| "epoch": 20.13544763792534, |
| "grad_norm": 0.8503526449203491, |
| "learning_rate": 3.6535379118795046e-06, |
| "loss": 0.0963, |
| "step": 121900 |
| }, |
| { |
| "epoch": 20.15196564255038, |
| "grad_norm": 0.5264951586723328, |
| "learning_rate": 3.6474201323887485e-06, |
| "loss": 0.1132, |
| "step": 122000 |
| }, |
| { |
| "epoch": 20.15196564255038, |
| "eval_cer": 0.03729210868523715, |
| "eval_loss": 0.10676946491003036, |
| "eval_runtime": 57.951, |
| "eval_samples_per_second": 29.18, |
| "eval_steps_per_second": 7.299, |
| "eval_wer": 0.20701929682133266, |
| "step": 122000 |
| }, |
| { |
| "epoch": 20.16848364717542, |
| "grad_norm": 0.6232183575630188, |
| "learning_rate": 3.6413023528979924e-06, |
| "loss": 0.0964, |
| "step": 122100 |
| }, |
| { |
| "epoch": 20.185001651800462, |
| "grad_norm": 0.6945717334747314, |
| "learning_rate": 3.6351845734072367e-06, |
| "loss": 0.0967, |
| "step": 122200 |
| }, |
| { |
| "epoch": 20.201519656425504, |
| "grad_norm": 0.7937995195388794, |
| "learning_rate": 3.62906679391648e-06, |
| "loss": 0.0896, |
| "step": 122300 |
| }, |
| { |
| "epoch": 20.218037661050545, |
| "grad_norm": 0.7246369123458862, |
| "learning_rate": 3.6229490144257245e-06, |
| "loss": 0.0867, |
| "step": 122400 |
| }, |
| { |
| "epoch": 20.234555665675586, |
| "grad_norm": 0.5831708908081055, |
| "learning_rate": 3.6168312349349684e-06, |
| "loss": 0.091, |
| "step": 122500 |
| }, |
| { |
| "epoch": 20.251073670300627, |
| "grad_norm": 0.7024865746498108, |
| "learning_rate": 3.6107134554442127e-06, |
| "loss": 0.0935, |
| "step": 122600 |
| }, |
| { |
| "epoch": 20.26759167492567, |
| "grad_norm": 0.8907782435417175, |
| "learning_rate": 3.604595675953456e-06, |
| "loss": 0.0956, |
| "step": 122700 |
| }, |
| { |
| "epoch": 20.28410967955071, |
| "grad_norm": 0.6609148383140564, |
| "learning_rate": 3.5984778964627004e-06, |
| "loss": 0.0933, |
| "step": 122800 |
| }, |
| { |
| "epoch": 20.30062768417575, |
| "grad_norm": 0.8460065722465515, |
| "learning_rate": 3.592360116971944e-06, |
| "loss": 0.0891, |
| "step": 122900 |
| }, |
| { |
| "epoch": 20.317145688800792, |
| "grad_norm": 0.6879094839096069, |
| "learning_rate": 3.586242337481188e-06, |
| "loss": 0.0888, |
| "step": 123000 |
| }, |
| { |
| "epoch": 20.317145688800792, |
| "eval_cer": 0.03731777427965232, |
| "eval_loss": 0.10576903820037842, |
| "eval_runtime": 55.0332, |
| "eval_samples_per_second": 30.727, |
| "eval_steps_per_second": 7.686, |
| "eval_wer": 0.20670705052145133, |
| "step": 123000 |
| }, |
| { |
| "epoch": 20.333663693425834, |
| "grad_norm": 0.7055560946464539, |
| "learning_rate": 3.580124557990432e-06, |
| "loss": 0.098, |
| "step": 123100 |
| }, |
| { |
| "epoch": 20.350181698050875, |
| "grad_norm": 0.5633586049079895, |
| "learning_rate": 3.5740067784996764e-06, |
| "loss": 0.0926, |
| "step": 123200 |
| }, |
| { |
| "epoch": 20.366699702675916, |
| "grad_norm": 0.6035296320915222, |
| "learning_rate": 3.56788899900892e-06, |
| "loss": 0.0939, |
| "step": 123300 |
| }, |
| { |
| "epoch": 20.383217707300957, |
| "grad_norm": 0.6581436395645142, |
| "learning_rate": 3.561771219518164e-06, |
| "loss": 0.094, |
| "step": 123400 |
| }, |
| { |
| "epoch": 20.399735711926, |
| "grad_norm": 0.6265963912010193, |
| "learning_rate": 3.555653440027408e-06, |
| "loss": 0.0901, |
| "step": 123500 |
| }, |
| { |
| "epoch": 20.41625371655104, |
| "grad_norm": 0.6421579718589783, |
| "learning_rate": 3.549535660536652e-06, |
| "loss": 0.0916, |
| "step": 123600 |
| }, |
| { |
| "epoch": 20.43277172117608, |
| "grad_norm": 0.5874105095863342, |
| "learning_rate": 3.543417881045896e-06, |
| "loss": 0.0899, |
| "step": 123700 |
| }, |
| { |
| "epoch": 20.449289725801123, |
| "grad_norm": 0.7892938256263733, |
| "learning_rate": 3.53730010155514e-06, |
| "loss": 0.0943, |
| "step": 123800 |
| }, |
| { |
| "epoch": 20.465807730426164, |
| "grad_norm": 0.5755423903465271, |
| "learning_rate": 3.5311823220643836e-06, |
| "loss": 0.0895, |
| "step": 123900 |
| }, |
| { |
| "epoch": 20.482325735051205, |
| "grad_norm": 0.5386433005332947, |
| "learning_rate": 3.525064542573628e-06, |
| "loss": 0.0942, |
| "step": 124000 |
| }, |
| { |
| "epoch": 20.482325735051205, |
| "eval_cer": 0.03739477106289782, |
| "eval_loss": 0.10733035951852798, |
| "eval_runtime": 53.425, |
| "eval_samples_per_second": 31.652, |
| "eval_steps_per_second": 7.918, |
| "eval_wer": 0.205707862361831, |
| "step": 124000 |
| }, |
| { |
| "epoch": 20.498843739676246, |
| "grad_norm": 0.6741786003112793, |
| "learning_rate": 3.518946763082872e-06, |
| "loss": 0.1323, |
| "step": 124100 |
| }, |
| { |
| "epoch": 20.515361744301288, |
| "grad_norm": 0.5971143245697021, |
| "learning_rate": 3.5128289835921157e-06, |
| "loss": 0.1021, |
| "step": 124200 |
| }, |
| { |
| "epoch": 20.53187974892633, |
| "grad_norm": 0.6608400344848633, |
| "learning_rate": 3.5067112041013596e-06, |
| "loss": 0.0947, |
| "step": 124300 |
| }, |
| { |
| "epoch": 20.54839775355137, |
| "grad_norm": 0.7231072187423706, |
| "learning_rate": 3.500593424610604e-06, |
| "loss": 0.1009, |
| "step": 124400 |
| }, |
| { |
| "epoch": 20.56491575817641, |
| "grad_norm": 0.6929687857627869, |
| "learning_rate": 3.4944756451198473e-06, |
| "loss": 0.0947, |
| "step": 124500 |
| }, |
| { |
| "epoch": 20.581433762801453, |
| "grad_norm": 0.7624922394752502, |
| "learning_rate": 3.4883578656290917e-06, |
| "loss": 0.1048, |
| "step": 124600 |
| }, |
| { |
| "epoch": 20.597951767426494, |
| "grad_norm": 0.6456249356269836, |
| "learning_rate": 3.4822400861383355e-06, |
| "loss": 0.0898, |
| "step": 124700 |
| }, |
| { |
| "epoch": 20.614469772051535, |
| "grad_norm": 0.5414004921913147, |
| "learning_rate": 3.47612230664758e-06, |
| "loss": 0.0919, |
| "step": 124800 |
| }, |
| { |
| "epoch": 20.630987776676577, |
| "grad_norm": 0.6581851840019226, |
| "learning_rate": 3.4700045271568233e-06, |
| "loss": 0.0933, |
| "step": 124900 |
| }, |
| { |
| "epoch": 20.647505781301618, |
| "grad_norm": 0.7606977820396423, |
| "learning_rate": 3.4638867476660676e-06, |
| "loss": 0.09, |
| "step": 125000 |
| }, |
| { |
| "epoch": 20.647505781301618, |
| "eval_cer": 0.037086783929915816, |
| "eval_loss": 0.10901934653520584, |
| "eval_runtime": 53.6092, |
| "eval_samples_per_second": 31.543, |
| "eval_steps_per_second": 7.89, |
| "eval_wer": 0.2050833697620683, |
| "step": 125000 |
| }, |
| { |
| "epoch": 20.66402378592666, |
| "grad_norm": 0.8043733239173889, |
| "learning_rate": 3.457768968175311e-06, |
| "loss": 0.0946, |
| "step": 125100 |
| }, |
| { |
| "epoch": 20.6805417905517, |
| "grad_norm": 0.5810668468475342, |
| "learning_rate": 3.4516511886845554e-06, |
| "loss": 0.1174, |
| "step": 125200 |
| }, |
| { |
| "epoch": 20.69705979517674, |
| "grad_norm": 0.5117190480232239, |
| "learning_rate": 3.4455334091937993e-06, |
| "loss": 0.087, |
| "step": 125300 |
| }, |
| { |
| "epoch": 20.713577799801783, |
| "grad_norm": 0.6740157604217529, |
| "learning_rate": 3.4394156297030436e-06, |
| "loss": 0.0974, |
| "step": 125400 |
| }, |
| { |
| "epoch": 20.730095804426824, |
| "grad_norm": 0.7044069170951843, |
| "learning_rate": 3.433297850212287e-06, |
| "loss": 0.0983, |
| "step": 125500 |
| }, |
| { |
| "epoch": 20.746613809051865, |
| "grad_norm": 0.7274563908576965, |
| "learning_rate": 3.4271800707215314e-06, |
| "loss": 0.0893, |
| "step": 125600 |
| }, |
| { |
| "epoch": 20.763131813676907, |
| "grad_norm": 0.6939309239387512, |
| "learning_rate": 3.4210622912307752e-06, |
| "loss": 0.0905, |
| "step": 125700 |
| }, |
| { |
| "epoch": 20.779649818301948, |
| "grad_norm": 0.841923713684082, |
| "learning_rate": 3.414944511740019e-06, |
| "loss": 0.0919, |
| "step": 125800 |
| }, |
| { |
| "epoch": 20.79616782292699, |
| "grad_norm": 0.6695510149002075, |
| "learning_rate": 3.408826732249263e-06, |
| "loss": 0.0996, |
| "step": 125900 |
| }, |
| { |
| "epoch": 20.81268582755203, |
| "grad_norm": 0.5963577628135681, |
| "learning_rate": 3.4027089527585073e-06, |
| "loss": 0.1104, |
| "step": 126000 |
| }, |
| { |
| "epoch": 20.81268582755203, |
| "eval_cer": 0.03736910546848265, |
| "eval_loss": 0.10715510696172714, |
| "eval_runtime": 53.3765, |
| "eval_samples_per_second": 31.681, |
| "eval_steps_per_second": 7.925, |
| "eval_wer": 0.20608255792168864, |
| "step": 126000 |
| }, |
| { |
| "epoch": 20.829203832177072, |
| "grad_norm": 0.5884077548980713, |
| "learning_rate": 3.396591173267751e-06, |
| "loss": 0.0941, |
| "step": 126100 |
| }, |
| { |
| "epoch": 20.845721836802113, |
| "grad_norm": 0.7187851071357727, |
| "learning_rate": 3.390473393776995e-06, |
| "loss": 0.0951, |
| "step": 126200 |
| }, |
| { |
| "epoch": 20.862239841427154, |
| "grad_norm": 0.8274132609367371, |
| "learning_rate": 3.384355614286239e-06, |
| "loss": 0.0926, |
| "step": 126300 |
| }, |
| { |
| "epoch": 20.878757846052196, |
| "grad_norm": 0.7908247113227844, |
| "learning_rate": 3.378237834795483e-06, |
| "loss": 0.0929, |
| "step": 126400 |
| }, |
| { |
| "epoch": 20.895275850677237, |
| "grad_norm": 0.7135681509971619, |
| "learning_rate": 3.3721200553047268e-06, |
| "loss": 0.0896, |
| "step": 126500 |
| }, |
| { |
| "epoch": 20.911793855302278, |
| "grad_norm": 0.5181264877319336, |
| "learning_rate": 3.366002275813971e-06, |
| "loss": 0.0903, |
| "step": 126600 |
| }, |
| { |
| "epoch": 20.92831185992732, |
| "grad_norm": 0.6559743285179138, |
| "learning_rate": 3.3598844963232145e-06, |
| "loss": 0.091, |
| "step": 126700 |
| }, |
| { |
| "epoch": 20.94482986455236, |
| "grad_norm": 0.498858243227005, |
| "learning_rate": 3.353766716832459e-06, |
| "loss": 0.0873, |
| "step": 126800 |
| }, |
| { |
| "epoch": 20.961347869177402, |
| "grad_norm": 0.6528595089912415, |
| "learning_rate": 3.3476489373417027e-06, |
| "loss": 0.0944, |
| "step": 126900 |
| }, |
| { |
| "epoch": 20.977865873802443, |
| "grad_norm": 0.4162144064903259, |
| "learning_rate": 3.341531157850947e-06, |
| "loss": 0.0865, |
| "step": 127000 |
| }, |
| { |
| "epoch": 20.977865873802443, |
| "eval_cer": 0.037052563137362264, |
| "eval_loss": 0.10633628815412521, |
| "eval_runtime": 53.5789, |
| "eval_samples_per_second": 31.561, |
| "eval_steps_per_second": 7.895, |
| "eval_wer": 0.2053331668019734, |
| "step": 127000 |
| }, |
| { |
| "epoch": 20.994383878427485, |
| "grad_norm": 0.8712024092674255, |
| "learning_rate": 3.3354133783601905e-06, |
| "loss": 0.0937, |
| "step": 127100 |
| }, |
| { |
| "epoch": 21.010901883052526, |
| "grad_norm": 0.8483607172966003, |
| "learning_rate": 3.329295598869435e-06, |
| "loss": 0.0879, |
| "step": 127200 |
| }, |
| { |
| "epoch": 21.027419887677567, |
| "grad_norm": 0.9952839016914368, |
| "learning_rate": 3.3231778193786783e-06, |
| "loss": 0.095, |
| "step": 127300 |
| }, |
| { |
| "epoch": 21.04393789230261, |
| "grad_norm": 0.4832421541213989, |
| "learning_rate": 3.3170600398879226e-06, |
| "loss": 0.109, |
| "step": 127400 |
| }, |
| { |
| "epoch": 21.06045589692765, |
| "grad_norm": 0.6822460889816284, |
| "learning_rate": 3.3109422603971665e-06, |
| "loss": 0.0897, |
| "step": 127500 |
| }, |
| { |
| "epoch": 21.07697390155269, |
| "grad_norm": 0.6260835528373718, |
| "learning_rate": 3.3048244809064108e-06, |
| "loss": 0.0892, |
| "step": 127600 |
| }, |
| { |
| "epoch": 21.093491906177732, |
| "grad_norm": 0.6604743003845215, |
| "learning_rate": 3.2987067014156542e-06, |
| "loss": 0.0957, |
| "step": 127700 |
| }, |
| { |
| "epoch": 21.110009910802773, |
| "grad_norm": 0.5889437794685364, |
| "learning_rate": 3.2925889219248985e-06, |
| "loss": 0.0923, |
| "step": 127800 |
| }, |
| { |
| "epoch": 21.126527915427815, |
| "grad_norm": 0.6197744011878967, |
| "learning_rate": 3.2864711424341424e-06, |
| "loss": 0.0936, |
| "step": 127900 |
| }, |
| { |
| "epoch": 21.143045920052856, |
| "grad_norm": 0.6159409880638123, |
| "learning_rate": 3.2803533629433863e-06, |
| "loss": 0.0901, |
| "step": 128000 |
| }, |
| { |
| "epoch": 21.143045920052856, |
| "eval_cer": 0.03699267675039354, |
| "eval_loss": 0.10548759251832962, |
| "eval_runtime": 53.1647, |
| "eval_samples_per_second": 31.807, |
| "eval_steps_per_second": 7.956, |
| "eval_wer": 0.20320989196278025, |
| "step": 128000 |
| }, |
| { |
| "epoch": 21.159563924677897, |
| "grad_norm": 0.4305579960346222, |
| "learning_rate": 3.27423558345263e-06, |
| "loss": 0.0962, |
| "step": 128100 |
| }, |
| { |
| "epoch": 21.17608192930294, |
| "grad_norm": 0.8560436367988586, |
| "learning_rate": 3.2681178039618745e-06, |
| "loss": 0.1129, |
| "step": 128200 |
| }, |
| { |
| "epoch": 21.19259993392798, |
| "grad_norm": 0.824738085269928, |
| "learning_rate": 3.262000024471118e-06, |
| "loss": 0.095, |
| "step": 128300 |
| }, |
| { |
| "epoch": 21.20911793855302, |
| "grad_norm": 0.7285254597663879, |
| "learning_rate": 3.2558822449803623e-06, |
| "loss": 0.0967, |
| "step": 128400 |
| }, |
| { |
| "epoch": 21.225635943178062, |
| "grad_norm": 0.8130694627761841, |
| "learning_rate": 3.249764465489606e-06, |
| "loss": 0.0952, |
| "step": 128500 |
| }, |
| { |
| "epoch": 21.242153947803104, |
| "grad_norm": 0.640154242515564, |
| "learning_rate": 3.24364668599885e-06, |
| "loss": 0.0911, |
| "step": 128600 |
| }, |
| { |
| "epoch": 21.258671952428145, |
| "grad_norm": 0.5193492770195007, |
| "learning_rate": 3.237528906508094e-06, |
| "loss": 0.085, |
| "step": 128700 |
| }, |
| { |
| "epoch": 21.27518995705319, |
| "grad_norm": 0.7556692957878113, |
| "learning_rate": 3.2314111270173382e-06, |
| "loss": 0.0904, |
| "step": 128800 |
| }, |
| { |
| "epoch": 21.29170796167823, |
| "grad_norm": 0.6025686860084534, |
| "learning_rate": 3.2252933475265817e-06, |
| "loss": 0.0969, |
| "step": 128900 |
| }, |
| { |
| "epoch": 21.308225966303272, |
| "grad_norm": 0.6533762812614441, |
| "learning_rate": 3.219175568035826e-06, |
| "loss": 0.0962, |
| "step": 129000 |
| }, |
| { |
| "epoch": 21.308225966303272, |
| "eval_cer": 0.037095339128054204, |
| "eval_loss": 0.10510192811489105, |
| "eval_runtime": 53.7455, |
| "eval_samples_per_second": 31.463, |
| "eval_steps_per_second": 7.87, |
| "eval_wer": 0.20539561606194967, |
| "step": 129000 |
| }, |
| { |
| "epoch": 21.324743970928314, |
| "grad_norm": 0.7926602363586426, |
| "learning_rate": 3.21305778854507e-06, |
| "loss": 0.0847, |
| "step": 129100 |
| }, |
| { |
| "epoch": 21.341261975553355, |
| "grad_norm": 0.5735198855400085, |
| "learning_rate": 3.2069400090543142e-06, |
| "loss": 0.0949, |
| "step": 129200 |
| }, |
| { |
| "epoch": 21.357779980178396, |
| "grad_norm": 0.4958854615688324, |
| "learning_rate": 3.2008222295635577e-06, |
| "loss": 0.0916, |
| "step": 129300 |
| }, |
| { |
| "epoch": 21.374297984803437, |
| "grad_norm": 0.9454948306083679, |
| "learning_rate": 3.194704450072802e-06, |
| "loss": 0.1177, |
| "step": 129400 |
| }, |
| { |
| "epoch": 21.39081598942848, |
| "grad_norm": 0.6658288240432739, |
| "learning_rate": 3.1885866705820454e-06, |
| "loss": 0.0931, |
| "step": 129500 |
| }, |
| { |
| "epoch": 21.40733399405352, |
| "grad_norm": 0.6774040460586548, |
| "learning_rate": 3.1824688910912898e-06, |
| "loss": 0.0971, |
| "step": 129600 |
| }, |
| { |
| "epoch": 21.42385199867856, |
| "grad_norm": 0.5878866910934448, |
| "learning_rate": 3.1763511116005336e-06, |
| "loss": 0.1129, |
| "step": 129700 |
| }, |
| { |
| "epoch": 21.440370003303602, |
| "grad_norm": 0.6971415281295776, |
| "learning_rate": 3.170233332109778e-06, |
| "loss": 0.0937, |
| "step": 129800 |
| }, |
| { |
| "epoch": 21.456888007928644, |
| "grad_norm": 0.7083709239959717, |
| "learning_rate": 3.1641155526190214e-06, |
| "loss": 0.088, |
| "step": 129900 |
| }, |
| { |
| "epoch": 21.473406012553685, |
| "grad_norm": 0.7533379197120667, |
| "learning_rate": 3.1579977731282657e-06, |
| "loss": 0.0979, |
| "step": 130000 |
| }, |
| { |
| "epoch": 21.473406012553685, |
| "eval_cer": 0.036872903976456095, |
| "eval_loss": 0.10504589229822159, |
| "eval_runtime": 53.1803, |
| "eval_samples_per_second": 31.797, |
| "eval_steps_per_second": 7.954, |
| "eval_wer": 0.20427152938237683, |
| "step": 130000 |
| }, |
| { |
| "epoch": 21.489924017178726, |
| "grad_norm": 0.7101976275444031, |
| "learning_rate": 3.1518799936375096e-06, |
| "loss": 0.0909, |
| "step": 130100 |
| }, |
| { |
| "epoch": 21.506442021803768, |
| "grad_norm": 0.6949977874755859, |
| "learning_rate": 3.1457622141467535e-06, |
| "loss": 0.0878, |
| "step": 130200 |
| }, |
| { |
| "epoch": 21.52296002642881, |
| "grad_norm": 0.584557831287384, |
| "learning_rate": 3.1396444346559974e-06, |
| "loss": 0.101, |
| "step": 130300 |
| }, |
| { |
| "epoch": 21.53947803105385, |
| "grad_norm": 1.0865356922149658, |
| "learning_rate": 3.1335266551652417e-06, |
| "loss": 0.0925, |
| "step": 130400 |
| }, |
| { |
| "epoch": 21.55599603567889, |
| "grad_norm": 0.6468126177787781, |
| "learning_rate": 3.127408875674485e-06, |
| "loss": 0.0855, |
| "step": 130500 |
| }, |
| { |
| "epoch": 21.572514040303933, |
| "grad_norm": 0.6762518286705017, |
| "learning_rate": 3.1212910961837295e-06, |
| "loss": 0.1107, |
| "step": 130600 |
| }, |
| { |
| "epoch": 21.589032044928974, |
| "grad_norm": 0.7978318333625793, |
| "learning_rate": 3.1151733166929734e-06, |
| "loss": 0.0897, |
| "step": 130700 |
| }, |
| { |
| "epoch": 21.605550049554015, |
| "grad_norm": 0.8376022577285767, |
| "learning_rate": 3.1090555372022172e-06, |
| "loss": 0.0902, |
| "step": 130800 |
| }, |
| { |
| "epoch": 21.622068054179056, |
| "grad_norm": 0.702617347240448, |
| "learning_rate": 3.102937757711461e-06, |
| "loss": 0.0937, |
| "step": 130900 |
| }, |
| { |
| "epoch": 21.638586058804098, |
| "grad_norm": 0.5407445430755615, |
| "learning_rate": 3.0968199782207054e-06, |
| "loss": 0.0912, |
| "step": 131000 |
| }, |
| { |
| "epoch": 21.638586058804098, |
| "eval_cer": 0.03681301758948737, |
| "eval_loss": 0.10601798444986343, |
| "eval_runtime": 52.6946, |
| "eval_samples_per_second": 32.091, |
| "eval_steps_per_second": 8.027, |
| "eval_wer": 0.20352213826266158, |
| "step": 131000 |
| }, |
| { |
| "epoch": 21.65510406342914, |
| "grad_norm": 0.48049646615982056, |
| "learning_rate": 3.090702198729949e-06, |
| "loss": 0.1007, |
| "step": 131100 |
| }, |
| { |
| "epoch": 21.67162206805418, |
| "grad_norm": 0.8997465372085571, |
| "learning_rate": 3.084584419239193e-06, |
| "loss": 0.0901, |
| "step": 131200 |
| }, |
| { |
| "epoch": 21.68814007267922, |
| "grad_norm": 0.6192366480827332, |
| "learning_rate": 3.078466639748437e-06, |
| "loss": 0.0892, |
| "step": 131300 |
| }, |
| { |
| "epoch": 21.704658077304263, |
| "grad_norm": 0.7299876809120178, |
| "learning_rate": 3.0723488602576814e-06, |
| "loss": 0.0929, |
| "step": 131400 |
| }, |
| { |
| "epoch": 21.721176081929304, |
| "grad_norm": 0.6832283735275269, |
| "learning_rate": 3.066231080766925e-06, |
| "loss": 0.0936, |
| "step": 131500 |
| }, |
| { |
| "epoch": 21.737694086554345, |
| "grad_norm": 0.5136446952819824, |
| "learning_rate": 3.060113301276169e-06, |
| "loss": 0.0911, |
| "step": 131600 |
| }, |
| { |
| "epoch": 21.754212091179387, |
| "grad_norm": 0.6710427403450012, |
| "learning_rate": 3.0539955217854126e-06, |
| "loss": 0.0833, |
| "step": 131700 |
| }, |
| { |
| "epoch": 21.770730095804428, |
| "grad_norm": 0.6596719026565552, |
| "learning_rate": 3.047877742294657e-06, |
| "loss": 0.0921, |
| "step": 131800 |
| }, |
| { |
| "epoch": 21.78724810042947, |
| "grad_norm": 0.5548281669616699, |
| "learning_rate": 3.041759962803901e-06, |
| "loss": 0.0903, |
| "step": 131900 |
| }, |
| { |
| "epoch": 21.80376610505451, |
| "grad_norm": 0.5049402713775635, |
| "learning_rate": 3.035642183313145e-06, |
| "loss": 0.1321, |
| "step": 132000 |
| }, |
| { |
| "epoch": 21.80376610505451, |
| "eval_cer": 0.03675313120251865, |
| "eval_loss": 0.10501556098461151, |
| "eval_runtime": 52.4304, |
| "eval_samples_per_second": 32.252, |
| "eval_steps_per_second": 8.068, |
| "eval_wer": 0.20283519640292264, |
| "step": 132000 |
| }, |
| { |
| "epoch": 21.82028410967955, |
| "grad_norm": 0.7295696139335632, |
| "learning_rate": 3.0295244038223886e-06, |
| "loss": 0.0888, |
| "step": 132100 |
| }, |
| { |
| "epoch": 21.836802114304593, |
| "grad_norm": 0.5694403648376465, |
| "learning_rate": 3.023406624331633e-06, |
| "loss": 0.1097, |
| "step": 132200 |
| }, |
| { |
| "epoch": 21.853320118929634, |
| "grad_norm": 0.5931413769721985, |
| "learning_rate": 3.017288844840877e-06, |
| "loss": 0.0895, |
| "step": 132300 |
| }, |
| { |
| "epoch": 21.869838123554675, |
| "grad_norm": 0.715791642665863, |
| "learning_rate": 3.0111710653501207e-06, |
| "loss": 0.0926, |
| "step": 132400 |
| }, |
| { |
| "epoch": 21.886356128179717, |
| "grad_norm": 0.7110834717750549, |
| "learning_rate": 3.0050532858593646e-06, |
| "loss": 0.0892, |
| "step": 132500 |
| }, |
| { |
| "epoch": 21.902874132804758, |
| "grad_norm": 0.8973935842514038, |
| "learning_rate": 2.998935506368609e-06, |
| "loss": 0.0917, |
| "step": 132600 |
| }, |
| { |
| "epoch": 21.9193921374298, |
| "grad_norm": 0.6259893178939819, |
| "learning_rate": 2.9928177268778523e-06, |
| "loss": 0.0919, |
| "step": 132700 |
| }, |
| { |
| "epoch": 21.93591014205484, |
| "grad_norm": 0.6321418881416321, |
| "learning_rate": 2.9866999473870966e-06, |
| "loss": 0.0985, |
| "step": 132800 |
| }, |
| { |
| "epoch": 21.952428146679882, |
| "grad_norm": 0.690564751625061, |
| "learning_rate": 2.9805821678963405e-06, |
| "loss": 0.0857, |
| "step": 132900 |
| }, |
| { |
| "epoch": 21.968946151304923, |
| "grad_norm": 0.5872605443000793, |
| "learning_rate": 2.9744643884055844e-06, |
| "loss": 0.0954, |
| "step": 133000 |
| }, |
| { |
| "epoch": 21.968946151304923, |
| "eval_cer": 0.03696701115597837, |
| "eval_loss": 0.10601279884576797, |
| "eval_runtime": 52.5874, |
| "eval_samples_per_second": 32.156, |
| "eval_steps_per_second": 8.044, |
| "eval_wer": 0.20445887716230562, |
| "step": 133000 |
| }, |
| { |
| "epoch": 21.985464155929964, |
| "grad_norm": 0.5473292469978333, |
| "learning_rate": 2.9683466089148283e-06, |
| "loss": 0.0858, |
| "step": 133100 |
| }, |
| { |
| "epoch": 22.001982160555006, |
| "grad_norm": 0.7367832660675049, |
| "learning_rate": 2.9622288294240726e-06, |
| "loss": 0.0909, |
| "step": 133200 |
| }, |
| { |
| "epoch": 22.018500165180047, |
| "grad_norm": 1.0184003114700317, |
| "learning_rate": 2.956111049933316e-06, |
| "loss": 0.0901, |
| "step": 133300 |
| }, |
| { |
| "epoch": 22.035018169805088, |
| "grad_norm": 0.7270667552947998, |
| "learning_rate": 2.9499932704425604e-06, |
| "loss": 0.0942, |
| "step": 133400 |
| }, |
| { |
| "epoch": 22.05153617443013, |
| "grad_norm": 0.6220849752426147, |
| "learning_rate": 2.9438754909518043e-06, |
| "loss": 0.0892, |
| "step": 133500 |
| }, |
| { |
| "epoch": 22.06805417905517, |
| "grad_norm": 0.6055799126625061, |
| "learning_rate": 2.9377577114610486e-06, |
| "loss": 0.0895, |
| "step": 133600 |
| }, |
| { |
| "epoch": 22.084572183680212, |
| "grad_norm": 0.5487551689147949, |
| "learning_rate": 2.931639931970292e-06, |
| "loss": 0.0894, |
| "step": 133700 |
| }, |
| { |
| "epoch": 22.101090188305253, |
| "grad_norm": 0.6704040765762329, |
| "learning_rate": 2.9255221524795364e-06, |
| "loss": 0.0945, |
| "step": 133800 |
| }, |
| { |
| "epoch": 22.117608192930295, |
| "grad_norm": 0.5721579194068909, |
| "learning_rate": 2.91940437298878e-06, |
| "loss": 0.0959, |
| "step": 133900 |
| }, |
| { |
| "epoch": 22.134126197555336, |
| "grad_norm": 0.6543858051300049, |
| "learning_rate": 2.913286593498024e-06, |
| "loss": 0.1333, |
| "step": 134000 |
| }, |
| { |
| "epoch": 22.134126197555336, |
| "eval_cer": 0.03689001437273287, |
| "eval_loss": 0.10688560456037521, |
| "eval_runtime": 52.1166, |
| "eval_samples_per_second": 32.446, |
| "eval_steps_per_second": 8.116, |
| "eval_wer": 0.2038968338225192, |
| "step": 134000 |
| }, |
| { |
| "epoch": 22.150644202180377, |
| "grad_norm": 0.6130584478378296, |
| "learning_rate": 2.907168814007268e-06, |
| "loss": 0.0962, |
| "step": 134100 |
| }, |
| { |
| "epoch": 22.16716220680542, |
| "grad_norm": 0.7324750423431396, |
| "learning_rate": 2.9010510345165123e-06, |
| "loss": 0.0903, |
| "step": 134200 |
| }, |
| { |
| "epoch": 22.18368021143046, |
| "grad_norm": 0.6277410984039307, |
| "learning_rate": 2.8949332550257558e-06, |
| "loss": 0.0818, |
| "step": 134300 |
| }, |
| { |
| "epoch": 22.2001982160555, |
| "grad_norm": 0.5178551077842712, |
| "learning_rate": 2.888815475535e-06, |
| "loss": 0.1053, |
| "step": 134400 |
| }, |
| { |
| "epoch": 22.216716220680542, |
| "grad_norm": 0.6540612578392029, |
| "learning_rate": 2.8826976960442436e-06, |
| "loss": 0.0866, |
| "step": 134500 |
| }, |
| { |
| "epoch": 22.233234225305583, |
| "grad_norm": 0.5932282209396362, |
| "learning_rate": 2.876579916553488e-06, |
| "loss": 0.0927, |
| "step": 134600 |
| }, |
| { |
| "epoch": 22.249752229930625, |
| "grad_norm": 0.6185062527656555, |
| "learning_rate": 2.8704621370627317e-06, |
| "loss": 0.089, |
| "step": 134700 |
| }, |
| { |
| "epoch": 22.266270234555666, |
| "grad_norm": 0.8983421921730042, |
| "learning_rate": 2.864344357571976e-06, |
| "loss": 0.0861, |
| "step": 134800 |
| }, |
| { |
| "epoch": 22.282788239180707, |
| "grad_norm": 0.3891274034976959, |
| "learning_rate": 2.8582265780812195e-06, |
| "loss": 0.0944, |
| "step": 134900 |
| }, |
| { |
| "epoch": 22.29930624380575, |
| "grad_norm": 0.7119171023368835, |
| "learning_rate": 2.852108798590464e-06, |
| "loss": 0.089, |
| "step": 135000 |
| }, |
| { |
| "epoch": 22.29930624380575, |
| "eval_cer": 0.03690712476900965, |
| "eval_loss": 0.10521671921014786, |
| "eval_runtime": 52.7579, |
| "eval_samples_per_second": 32.052, |
| "eval_steps_per_second": 8.018, |
| "eval_wer": 0.20402173234247173, |
| "step": 135000 |
| }, |
| { |
| "epoch": 22.31582424843079, |
| "grad_norm": 0.5368226766586304, |
| "learning_rate": 2.8459910190997077e-06, |
| "loss": 0.0905, |
| "step": 135100 |
| }, |
| { |
| "epoch": 22.33234225305583, |
| "grad_norm": 0.6488823890686035, |
| "learning_rate": 2.8398732396089516e-06, |
| "loss": 0.1179, |
| "step": 135200 |
| }, |
| { |
| "epoch": 22.348860257680872, |
| "grad_norm": 0.6369620561599731, |
| "learning_rate": 2.8337554601181955e-06, |
| "loss": 0.0939, |
| "step": 135300 |
| }, |
| { |
| "epoch": 22.365378262305914, |
| "grad_norm": 0.6993893384933472, |
| "learning_rate": 2.82763768062744e-06, |
| "loss": 0.0944, |
| "step": 135400 |
| }, |
| { |
| "epoch": 22.381896266930955, |
| "grad_norm": 0.8022906184196472, |
| "learning_rate": 2.8215199011366833e-06, |
| "loss": 0.0832, |
| "step": 135500 |
| }, |
| { |
| "epoch": 22.398414271555996, |
| "grad_norm": 0.5833423733711243, |
| "learning_rate": 2.8154021216459276e-06, |
| "loss": 0.0944, |
| "step": 135600 |
| }, |
| { |
| "epoch": 22.414932276181037, |
| "grad_norm": 0.72309410572052, |
| "learning_rate": 2.8092843421551715e-06, |
| "loss": 0.093, |
| "step": 135700 |
| }, |
| { |
| "epoch": 22.43145028080608, |
| "grad_norm": 0.5882470011711121, |
| "learning_rate": 2.8031665626644158e-06, |
| "loss": 0.1, |
| "step": 135800 |
| }, |
| { |
| "epoch": 22.44796828543112, |
| "grad_norm": 0.6774691343307495, |
| "learning_rate": 2.7970487831736592e-06, |
| "loss": 0.0954, |
| "step": 135900 |
| }, |
| { |
| "epoch": 22.46448629005616, |
| "grad_norm": 0.9647297263145447, |
| "learning_rate": 2.7909310036829035e-06, |
| "loss": 0.094, |
| "step": 136000 |
| }, |
| { |
| "epoch": 22.46448629005616, |
| "eval_cer": 0.03667613441927315, |
| "eval_loss": 0.10520410537719727, |
| "eval_runtime": 52.5832, |
| "eval_samples_per_second": 32.159, |
| "eval_steps_per_second": 8.044, |
| "eval_wer": 0.20314744270280397, |
| "step": 136000 |
| }, |
| { |
| "epoch": 22.481004294681203, |
| "grad_norm": 0.6736404895782471, |
| "learning_rate": 2.784813224192147e-06, |
| "loss": 0.0912, |
| "step": 136100 |
| }, |
| { |
| "epoch": 22.497522299306244, |
| "grad_norm": 0.4658312499523163, |
| "learning_rate": 2.7786954447013913e-06, |
| "loss": 0.0874, |
| "step": 136200 |
| }, |
| { |
| "epoch": 22.514040303931285, |
| "grad_norm": 0.8794094920158386, |
| "learning_rate": 2.7725776652106356e-06, |
| "loss": 0.1005, |
| "step": 136300 |
| }, |
| { |
| "epoch": 22.530558308556326, |
| "grad_norm": 0.6956797242164612, |
| "learning_rate": 2.7664598857198795e-06, |
| "loss": 0.0895, |
| "step": 136400 |
| }, |
| { |
| "epoch": 22.547076313181368, |
| "grad_norm": 0.4646037220954895, |
| "learning_rate": 2.7603421062291234e-06, |
| "loss": 0.0869, |
| "step": 136500 |
| }, |
| { |
| "epoch": 22.56359431780641, |
| "grad_norm": 0.8446247577667236, |
| "learning_rate": 2.7542243267383673e-06, |
| "loss": 0.0958, |
| "step": 136600 |
| }, |
| { |
| "epoch": 22.58011232243145, |
| "grad_norm": 0.47825750708580017, |
| "learning_rate": 2.7481065472476116e-06, |
| "loss": 0.0918, |
| "step": 136700 |
| }, |
| { |
| "epoch": 22.59663032705649, |
| "grad_norm": 0.8411787152290344, |
| "learning_rate": 2.741988767756855e-06, |
| "loss": 0.0886, |
| "step": 136800 |
| }, |
| { |
| "epoch": 22.613148331681533, |
| "grad_norm": 0.5080142021179199, |
| "learning_rate": 2.7358709882660994e-06, |
| "loss": 0.0982, |
| "step": 136900 |
| }, |
| { |
| "epoch": 22.629666336306574, |
| "grad_norm": 0.7875675559043884, |
| "learning_rate": 2.7297532087753432e-06, |
| "loss": 0.0909, |
| "step": 137000 |
| }, |
| { |
| "epoch": 22.629666336306574, |
| "eval_cer": 0.03681301758948737, |
| "eval_loss": 0.10519874840974808, |
| "eval_runtime": 52.8842, |
| "eval_samples_per_second": 31.976, |
| "eval_steps_per_second": 7.999, |
| "eval_wer": 0.20264784862299381, |
| "step": 137000 |
| }, |
| { |
| "epoch": 22.646184340931615, |
| "grad_norm": 0.7804688215255737, |
| "learning_rate": 2.7236354292845875e-06, |
| "loss": 0.1158, |
| "step": 137100 |
| }, |
| { |
| "epoch": 22.662702345556657, |
| "grad_norm": 0.49170786142349243, |
| "learning_rate": 2.717517649793831e-06, |
| "loss": 0.0981, |
| "step": 137200 |
| }, |
| { |
| "epoch": 22.679220350181698, |
| "grad_norm": 0.649940550327301, |
| "learning_rate": 2.7113998703030753e-06, |
| "loss": 0.0892, |
| "step": 137300 |
| }, |
| { |
| "epoch": 22.69573835480674, |
| "grad_norm": 0.7027512192726135, |
| "learning_rate": 2.7052820908123188e-06, |
| "loss": 0.1086, |
| "step": 137400 |
| }, |
| { |
| "epoch": 22.71225635943178, |
| "grad_norm": 0.7389455437660217, |
| "learning_rate": 2.699164311321563e-06, |
| "loss": 0.0899, |
| "step": 137500 |
| }, |
| { |
| "epoch": 22.72877436405682, |
| "grad_norm": 0.7065523862838745, |
| "learning_rate": 2.693046531830807e-06, |
| "loss": 0.0851, |
| "step": 137600 |
| }, |
| { |
| "epoch": 22.745292368681863, |
| "grad_norm": 0.768282949924469, |
| "learning_rate": 2.6869287523400513e-06, |
| "loss": 0.0869, |
| "step": 137700 |
| }, |
| { |
| "epoch": 22.761810373306904, |
| "grad_norm": 0.6381931900978088, |
| "learning_rate": 2.6808109728492948e-06, |
| "loss": 0.0894, |
| "step": 137800 |
| }, |
| { |
| "epoch": 22.778328377931945, |
| "grad_norm": 0.6711616516113281, |
| "learning_rate": 2.674693193358539e-06, |
| "loss": 0.0932, |
| "step": 137900 |
| }, |
| { |
| "epoch": 22.794846382556987, |
| "grad_norm": 0.8620249629020691, |
| "learning_rate": 2.668575413867783e-06, |
| "loss": 0.0946, |
| "step": 138000 |
| }, |
| { |
| "epoch": 22.794846382556987, |
| "eval_cer": 0.036624803230442815, |
| "eval_loss": 0.1052507609128952, |
| "eval_runtime": 52.4513, |
| "eval_samples_per_second": 32.239, |
| "eval_steps_per_second": 8.065, |
| "eval_wer": 0.20314744270280397, |
| "step": 138000 |
| }, |
| { |
| "epoch": 22.811364387182028, |
| "grad_norm": 0.4814409911632538, |
| "learning_rate": 2.662457634377027e-06, |
| "loss": 0.0948, |
| "step": 138100 |
| }, |
| { |
| "epoch": 22.82788239180707, |
| "grad_norm": 1.151419997215271, |
| "learning_rate": 2.6563398548862707e-06, |
| "loss": 0.1138, |
| "step": 138200 |
| }, |
| { |
| "epoch": 22.84440039643211, |
| "grad_norm": 0.6814967393875122, |
| "learning_rate": 2.650222075395515e-06, |
| "loss": 0.114, |
| "step": 138300 |
| }, |
| { |
| "epoch": 22.860918401057152, |
| "grad_norm": 0.8873021602630615, |
| "learning_rate": 2.6441042959047585e-06, |
| "loss": 0.0866, |
| "step": 138400 |
| }, |
| { |
| "epoch": 22.877436405682193, |
| "grad_norm": 0.6129996180534363, |
| "learning_rate": 2.637986516414003e-06, |
| "loss": 0.0902, |
| "step": 138500 |
| }, |
| { |
| "epoch": 22.893954410307234, |
| "grad_norm": 0.8606892228126526, |
| "learning_rate": 2.6318687369232467e-06, |
| "loss": 0.0953, |
| "step": 138600 |
| }, |
| { |
| "epoch": 22.910472414932276, |
| "grad_norm": 0.6854122281074524, |
| "learning_rate": 2.6257509574324906e-06, |
| "loss": 0.0963, |
| "step": 138700 |
| }, |
| { |
| "epoch": 22.926990419557317, |
| "grad_norm": 0.7230859398841858, |
| "learning_rate": 2.6196331779417345e-06, |
| "loss": 0.0866, |
| "step": 138800 |
| }, |
| { |
| "epoch": 22.943508424182358, |
| "grad_norm": 0.4967285692691803, |
| "learning_rate": 2.6135153984509788e-06, |
| "loss": 0.0875, |
| "step": 138900 |
| }, |
| { |
| "epoch": 22.9600264288074, |
| "grad_norm": 0.6331928372383118, |
| "learning_rate": 2.6073976189602222e-06, |
| "loss": 0.0897, |
| "step": 139000 |
| }, |
| { |
| "epoch": 22.9600264288074, |
| "eval_cer": 0.03681301758948737, |
| "eval_loss": 0.10469213128089905, |
| "eval_runtime": 52.8869, |
| "eval_samples_per_second": 31.974, |
| "eval_steps_per_second": 7.998, |
| "eval_wer": 0.20383438456254294, |
| "step": 139000 |
| }, |
| { |
| "epoch": 22.97654443343244, |
| "grad_norm": 0.68625807762146, |
| "learning_rate": 2.6012798394694665e-06, |
| "loss": 0.103, |
| "step": 139100 |
| }, |
| { |
| "epoch": 22.993062438057482, |
| "grad_norm": 0.7166194915771484, |
| "learning_rate": 2.5951620599787104e-06, |
| "loss": 0.0937, |
| "step": 139200 |
| }, |
| { |
| "epoch": 23.009580442682523, |
| "grad_norm": 0.7146703600883484, |
| "learning_rate": 2.5890442804879547e-06, |
| "loss": 0.0936, |
| "step": 139300 |
| }, |
| { |
| "epoch": 23.026098447307564, |
| "grad_norm": 0.5112409591674805, |
| "learning_rate": 2.582926500997198e-06, |
| "loss": 0.0863, |
| "step": 139400 |
| }, |
| { |
| "epoch": 23.042616451932606, |
| "grad_norm": 0.5813011527061462, |
| "learning_rate": 2.5768087215064425e-06, |
| "loss": 0.0827, |
| "step": 139500 |
| }, |
| { |
| "epoch": 23.059134456557647, |
| "grad_norm": 0.6480150818824768, |
| "learning_rate": 2.570690942015686e-06, |
| "loss": 0.0866, |
| "step": 139600 |
| }, |
| { |
| "epoch": 23.07565246118269, |
| "grad_norm": 1.00325608253479, |
| "learning_rate": 2.5645731625249303e-06, |
| "loss": 0.0927, |
| "step": 139700 |
| }, |
| { |
| "epoch": 23.09217046580773, |
| "grad_norm": 0.5901710391044617, |
| "learning_rate": 2.558455383034174e-06, |
| "loss": 0.0978, |
| "step": 139800 |
| }, |
| { |
| "epoch": 23.10868847043277, |
| "grad_norm": 0.6397861242294312, |
| "learning_rate": 2.5523376035434185e-06, |
| "loss": 0.0869, |
| "step": 139900 |
| }, |
| { |
| "epoch": 23.125206475057812, |
| "grad_norm": 0.4879724085330963, |
| "learning_rate": 2.546219824052662e-06, |
| "loss": 0.0876, |
| "step": 140000 |
| }, |
| { |
| "epoch": 23.125206475057812, |
| "eval_cer": 0.03693279036342482, |
| "eval_loss": 0.10442952066659927, |
| "eval_runtime": 52.9301, |
| "eval_samples_per_second": 31.948, |
| "eval_steps_per_second": 7.992, |
| "eval_wer": 0.2038968338225192, |
| "step": 140000 |
| }, |
| { |
| "epoch": 23.141724479682853, |
| "grad_norm": 0.7894465327262878, |
| "learning_rate": 2.5401020445619062e-06, |
| "loss": 0.1086, |
| "step": 140100 |
| }, |
| { |
| "epoch": 23.158242484307895, |
| "grad_norm": 0.7804042100906372, |
| "learning_rate": 2.53398426507115e-06, |
| "loss": 0.0881, |
| "step": 140200 |
| }, |
| { |
| "epoch": 23.174760488932936, |
| "grad_norm": 0.5835601091384888, |
| "learning_rate": 2.527866485580394e-06, |
| "loss": 0.0909, |
| "step": 140300 |
| }, |
| { |
| "epoch": 23.191278493557977, |
| "grad_norm": 0.7063116431236267, |
| "learning_rate": 2.521748706089638e-06, |
| "loss": 0.0875, |
| "step": 140400 |
| }, |
| { |
| "epoch": 23.20779649818302, |
| "grad_norm": 0.66155606508255, |
| "learning_rate": 2.515630926598882e-06, |
| "loss": 0.0859, |
| "step": 140500 |
| }, |
| { |
| "epoch": 23.22431450280806, |
| "grad_norm": 0.5779556035995483, |
| "learning_rate": 2.5095131471081257e-06, |
| "loss": 0.0879, |
| "step": 140600 |
| }, |
| { |
| "epoch": 23.2408325074331, |
| "grad_norm": 0.5715177655220032, |
| "learning_rate": 2.50339536761737e-06, |
| "loss": 0.0914, |
| "step": 140700 |
| }, |
| { |
| "epoch": 23.257350512058142, |
| "grad_norm": 0.5225812792778015, |
| "learning_rate": 2.497277588126614e-06, |
| "loss": 0.0902, |
| "step": 140800 |
| }, |
| { |
| "epoch": 23.273868516683184, |
| "grad_norm": 0.8125872015953064, |
| "learning_rate": 2.4911598086358578e-06, |
| "loss": 0.1079, |
| "step": 140900 |
| }, |
| { |
| "epoch": 23.290386521308225, |
| "grad_norm": 0.7094987034797668, |
| "learning_rate": 2.4850420291451016e-06, |
| "loss": 0.0863, |
| "step": 141000 |
| }, |
| { |
| "epoch": 23.290386521308225, |
| "eval_cer": 0.03659913763602765, |
| "eval_loss": 0.10513997077941895, |
| "eval_runtime": 52.6735, |
| "eval_samples_per_second": 32.103, |
| "eval_steps_per_second": 8.031, |
| "eval_wer": 0.20246050084306502, |
| "step": 141000 |
| }, |
| { |
| "epoch": 23.306904525933266, |
| "grad_norm": 0.5953539609909058, |
| "learning_rate": 2.4789242496543455e-06, |
| "loss": 0.0941, |
| "step": 141100 |
| }, |
| { |
| "epoch": 23.323422530558307, |
| "grad_norm": 0.6508031487464905, |
| "learning_rate": 2.4728064701635894e-06, |
| "loss": 0.0951, |
| "step": 141200 |
| }, |
| { |
| "epoch": 23.33994053518335, |
| "grad_norm": 0.6292299032211304, |
| "learning_rate": 2.4666886906728333e-06, |
| "loss": 0.0838, |
| "step": 141300 |
| }, |
| { |
| "epoch": 23.35645853980839, |
| "grad_norm": 0.7818630337715149, |
| "learning_rate": 2.4605709111820776e-06, |
| "loss": 0.0869, |
| "step": 141400 |
| }, |
| { |
| "epoch": 23.37297654443343, |
| "grad_norm": 0.8426799774169922, |
| "learning_rate": 2.4544531316913215e-06, |
| "loss": 0.0852, |
| "step": 141500 |
| }, |
| { |
| "epoch": 23.389494549058472, |
| "grad_norm": 0.5545341968536377, |
| "learning_rate": 2.4483353522005654e-06, |
| "loss": 0.0827, |
| "step": 141600 |
| }, |
| { |
| "epoch": 23.406012553683514, |
| "grad_norm": 1.2653288841247559, |
| "learning_rate": 2.4422175727098093e-06, |
| "loss": 0.0841, |
| "step": 141700 |
| }, |
| { |
| "epoch": 23.422530558308555, |
| "grad_norm": 0.7402147650718689, |
| "learning_rate": 2.436099793219053e-06, |
| "loss": 0.0836, |
| "step": 141800 |
| }, |
| { |
| "epoch": 23.439048562933596, |
| "grad_norm": 0.5832458734512329, |
| "learning_rate": 2.4299820137282975e-06, |
| "loss": 0.1247, |
| "step": 141900 |
| }, |
| { |
| "epoch": 23.455566567558638, |
| "grad_norm": 0.6517156958580017, |
| "learning_rate": 2.4238642342375413e-06, |
| "loss": 0.0871, |
| "step": 142000 |
| }, |
| { |
| "epoch": 23.455566567558638, |
| "eval_cer": 0.03671035521182671, |
| "eval_loss": 0.10508172959089279, |
| "eval_runtime": 52.7407, |
| "eval_samples_per_second": 32.063, |
| "eval_steps_per_second": 8.02, |
| "eval_wer": 0.20246050084306502, |
| "step": 142000 |
| }, |
| { |
| "epoch": 23.47208457218368, |
| "grad_norm": 0.6041878461837769, |
| "learning_rate": 2.4177464547467852e-06, |
| "loss": 0.0912, |
| "step": 142100 |
| }, |
| { |
| "epoch": 23.48860257680872, |
| "grad_norm": 0.5178912878036499, |
| "learning_rate": 2.411628675256029e-06, |
| "loss": 0.0925, |
| "step": 142200 |
| }, |
| { |
| "epoch": 23.50512058143376, |
| "grad_norm": 0.6299303770065308, |
| "learning_rate": 2.405510895765273e-06, |
| "loss": 0.088, |
| "step": 142300 |
| }, |
| { |
| "epoch": 23.521638586058803, |
| "grad_norm": 0.6988112926483154, |
| "learning_rate": 2.399393116274517e-06, |
| "loss": 0.1043, |
| "step": 142400 |
| }, |
| { |
| "epoch": 23.538156590683844, |
| "grad_norm": 0.5607922077178955, |
| "learning_rate": 2.393275336783761e-06, |
| "loss": 0.089, |
| "step": 142500 |
| }, |
| { |
| "epoch": 23.554674595308885, |
| "grad_norm": 0.4817243218421936, |
| "learning_rate": 2.387157557293005e-06, |
| "loss": 0.0874, |
| "step": 142600 |
| }, |
| { |
| "epoch": 23.571192599933926, |
| "grad_norm": 0.6620100140571594, |
| "learning_rate": 2.381039777802249e-06, |
| "loss": 0.0878, |
| "step": 142700 |
| }, |
| { |
| "epoch": 23.587710604558968, |
| "grad_norm": 0.9131438732147217, |
| "learning_rate": 2.374921998311493e-06, |
| "loss": 0.0879, |
| "step": 142800 |
| }, |
| { |
| "epoch": 23.60422860918401, |
| "grad_norm": 0.5091140270233154, |
| "learning_rate": 2.3688042188207367e-06, |
| "loss": 0.0941, |
| "step": 142900 |
| }, |
| { |
| "epoch": 23.62074661380905, |
| "grad_norm": 0.6191192865371704, |
| "learning_rate": 2.362686439329981e-06, |
| "loss": 0.0932, |
| "step": 143000 |
| }, |
| { |
| "epoch": 23.62074661380905, |
| "eval_cer": 0.036659024022996374, |
| "eval_loss": 0.10472416132688522, |
| "eval_runtime": 52.4751, |
| "eval_samples_per_second": 32.225, |
| "eval_steps_per_second": 8.061, |
| "eval_wer": 0.2030849934428277, |
| "step": 143000 |
| }, |
| { |
| "epoch": 23.63726461843409, |
| "grad_norm": 0.5774977803230286, |
| "learning_rate": 2.356568659839225e-06, |
| "loss": 0.1072, |
| "step": 143100 |
| }, |
| { |
| "epoch": 23.653782623059133, |
| "grad_norm": 0.6127384901046753, |
| "learning_rate": 2.350450880348469e-06, |
| "loss": 0.0856, |
| "step": 143200 |
| }, |
| { |
| "epoch": 23.670300627684174, |
| "grad_norm": 0.5244102478027344, |
| "learning_rate": 2.3443331008577127e-06, |
| "loss": 0.0845, |
| "step": 143300 |
| }, |
| { |
| "epoch": 23.686818632309215, |
| "grad_norm": 0.8045458197593689, |
| "learning_rate": 2.3382153213669566e-06, |
| "loss": 0.0887, |
| "step": 143400 |
| }, |
| { |
| "epoch": 23.703336636934257, |
| "grad_norm": 0.5768733024597168, |
| "learning_rate": 2.3320975418762005e-06, |
| "loss": 0.0997, |
| "step": 143500 |
| }, |
| { |
| "epoch": 23.7198546415593, |
| "grad_norm": 0.7417640089988708, |
| "learning_rate": 2.3259797623854448e-06, |
| "loss": 0.0952, |
| "step": 143600 |
| }, |
| { |
| "epoch": 23.736372646184343, |
| "grad_norm": 0.6068658232688904, |
| "learning_rate": 2.3198619828946887e-06, |
| "loss": 0.0928, |
| "step": 143700 |
| }, |
| { |
| "epoch": 23.752890650809384, |
| "grad_norm": 0.8562188148498535, |
| "learning_rate": 2.3137442034039326e-06, |
| "loss": 0.0868, |
| "step": 143800 |
| }, |
| { |
| "epoch": 23.769408655434425, |
| "grad_norm": 0.6002250909805298, |
| "learning_rate": 2.3076264239131764e-06, |
| "loss": 0.0883, |
| "step": 143900 |
| }, |
| { |
| "epoch": 23.785926660059467, |
| "grad_norm": 0.6457869410514832, |
| "learning_rate": 2.3015086444224203e-06, |
| "loss": 0.0871, |
| "step": 144000 |
| }, |
| { |
| "epoch": 23.785926660059467, |
| "eval_cer": 0.03666757922113476, |
| "eval_loss": 0.10333551466464996, |
| "eval_runtime": 53.1345, |
| "eval_samples_per_second": 31.825, |
| "eval_steps_per_second": 7.961, |
| "eval_wer": 0.20296009492287517, |
| "step": 144000 |
| }, |
| { |
| "epoch": 23.802444664684508, |
| "grad_norm": 0.6609480381011963, |
| "learning_rate": 2.2953908649316646e-06, |
| "loss": 0.0935, |
| "step": 144100 |
| }, |
| { |
| "epoch": 23.81896266930955, |
| "grad_norm": 0.5107303261756897, |
| "learning_rate": 2.2892730854409085e-06, |
| "loss": 0.0887, |
| "step": 144200 |
| }, |
| { |
| "epoch": 23.83548067393459, |
| "grad_norm": 0.6314355134963989, |
| "learning_rate": 2.2831553059501524e-06, |
| "loss": 0.0903, |
| "step": 144300 |
| }, |
| { |
| "epoch": 23.85199867855963, |
| "grad_norm": 0.49561741948127747, |
| "learning_rate": 2.2770375264593963e-06, |
| "loss": 0.0859, |
| "step": 144400 |
| }, |
| { |
| "epoch": 23.868516683184673, |
| "grad_norm": 0.7324890494346619, |
| "learning_rate": 2.27091974696864e-06, |
| "loss": 0.0924, |
| "step": 144500 |
| }, |
| { |
| "epoch": 23.885034687809714, |
| "grad_norm": 0.5809805393218994, |
| "learning_rate": 2.264801967477884e-06, |
| "loss": 0.0917, |
| "step": 144600 |
| }, |
| { |
| "epoch": 23.901552692434755, |
| "grad_norm": 0.6561674475669861, |
| "learning_rate": 2.2586841879871284e-06, |
| "loss": 0.0921, |
| "step": 144700 |
| }, |
| { |
| "epoch": 23.918070697059797, |
| "grad_norm": 0.618030846118927, |
| "learning_rate": 2.2525664084963723e-06, |
| "loss": 0.0954, |
| "step": 144800 |
| }, |
| { |
| "epoch": 23.934588701684838, |
| "grad_norm": 0.6414436101913452, |
| "learning_rate": 2.2464486290056166e-06, |
| "loss": 0.0881, |
| "step": 144900 |
| }, |
| { |
| "epoch": 23.95110670630988, |
| "grad_norm": 0.9026370644569397, |
| "learning_rate": 2.2403308495148605e-06, |
| "loss": 0.091, |
| "step": 145000 |
| }, |
| { |
| "epoch": 23.95110670630988, |
| "eval_cer": 0.036701800013688314, |
| "eval_loss": 0.1043509840965271, |
| "eval_runtime": 52.6333, |
| "eval_samples_per_second": 32.128, |
| "eval_steps_per_second": 8.037, |
| "eval_wer": 0.20320989196278025, |
| "step": 145000 |
| }, |
| { |
| "epoch": 23.96762471093492, |
| "grad_norm": 0.4682947099208832, |
| "learning_rate": 2.2342130700241043e-06, |
| "loss": 0.0892, |
| "step": 145100 |
| }, |
| { |
| "epoch": 23.984142715559962, |
| "grad_norm": 0.6425852179527283, |
| "learning_rate": 2.2280952905333482e-06, |
| "loss": 0.0877, |
| "step": 145200 |
| }, |
| { |
| "epoch": 24.000660720185003, |
| "grad_norm": 0.4977991282939911, |
| "learning_rate": 2.221977511042592e-06, |
| "loss": 0.0855, |
| "step": 145300 |
| }, |
| { |
| "epoch": 24.017178724810044, |
| "grad_norm": 0.68033766746521, |
| "learning_rate": 2.2158597315518364e-06, |
| "loss": 0.0887, |
| "step": 145400 |
| }, |
| { |
| "epoch": 24.033696729435086, |
| "grad_norm": 0.8233726024627686, |
| "learning_rate": 2.2097419520610803e-06, |
| "loss": 0.0926, |
| "step": 145500 |
| }, |
| { |
| "epoch": 24.050214734060127, |
| "grad_norm": 0.6886569261550903, |
| "learning_rate": 2.203624172570324e-06, |
| "loss": 0.0959, |
| "step": 145600 |
| }, |
| { |
| "epoch": 24.066732738685168, |
| "grad_norm": 0.5320963263511658, |
| "learning_rate": 2.197506393079568e-06, |
| "loss": 0.0928, |
| "step": 145700 |
| }, |
| { |
| "epoch": 24.08325074331021, |
| "grad_norm": 0.6369372010231018, |
| "learning_rate": 2.191388613588812e-06, |
| "loss": 0.0952, |
| "step": 145800 |
| }, |
| { |
| "epoch": 24.09976874793525, |
| "grad_norm": 0.6117287874221802, |
| "learning_rate": 2.1852708340980563e-06, |
| "loss": 0.1055, |
| "step": 145900 |
| }, |
| { |
| "epoch": 24.116286752560292, |
| "grad_norm": 0.7260856032371521, |
| "learning_rate": 2.1791530546073e-06, |
| "loss": 0.0978, |
| "step": 146000 |
| }, |
| { |
| "epoch": 24.116286752560292, |
| "eval_cer": 0.03682157278762576, |
| "eval_loss": 0.10561419278383255, |
| "eval_runtime": 52.1637, |
| "eval_samples_per_second": 32.417, |
| "eval_steps_per_second": 8.109, |
| "eval_wer": 0.20333479048273279, |
| "step": 146000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 181620, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.861432368096291e+20, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|