Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 110000, | |
| "best_metric": 0.07368261883895177, | |
| "best_model_checkpoint": "./TrOCR_SigLIP2_Aranizer_41K_AND_COMBINED/stage2/checkpoint-110000", | |
| "epoch": 50.0, | |
| "eval_steps": 10000, | |
| "global_step": 115050, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.021733224667209998, | |
| "grad_norm": 190.25732421875, | |
| "learning_rate": 4.900000000000001e-07, | |
| "loss": 17.665, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.043466449334419996, | |
| "grad_norm": 101.98078918457031, | |
| "learning_rate": 9.9e-07, | |
| "loss": 17.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06519967400162999, | |
| "grad_norm": 61.98678970336914, | |
| "learning_rate": 1.4900000000000001e-06, | |
| "loss": 16.6227, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08693289866883999, | |
| "grad_norm": 77.35042572021484, | |
| "learning_rate": 1.9900000000000004e-06, | |
| "loss": 16.5637, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10866612333604998, | |
| "grad_norm": 91.11700439453125, | |
| "learning_rate": 2.4900000000000003e-06, | |
| "loss": 16.7798, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13039934800325997, | |
| "grad_norm": 95.30789184570312, | |
| "learning_rate": 2.99e-06, | |
| "loss": 16.4945, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15213257267046998, | |
| "grad_norm": 81.99232482910156, | |
| "learning_rate": 3.49e-06, | |
| "loss": 16.2094, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.17386579733767998, | |
| "grad_norm": 65.64993286132812, | |
| "learning_rate": 3.990000000000001e-06, | |
| "loss": 16.4501, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.19559902200489, | |
| "grad_norm": 98.58848571777344, | |
| "learning_rate": 4.49e-06, | |
| "loss": 16.386, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.21733224667209997, | |
| "grad_norm": 85.1087646484375, | |
| "learning_rate": 4.9900000000000005e-06, | |
| "loss": 16.453, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23906547133930997, | |
| "grad_norm": 68.43016052246094, | |
| "learning_rate": 5.490000000000001e-06, | |
| "loss": 16.245, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.26079869600651995, | |
| "grad_norm": 71.09822082519531, | |
| "learning_rate": 5.99e-06, | |
| "loss": 16.0378, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.28253192067372995, | |
| "grad_norm": 71.08029174804688, | |
| "learning_rate": 6.4900000000000005e-06, | |
| "loss": 16.7085, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.30426514534093996, | |
| "grad_norm": 66.14205169677734, | |
| "learning_rate": 6.99e-06, | |
| "loss": 16.3454, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.32599837000814996, | |
| "grad_norm": 93.4423599243164, | |
| "learning_rate": 7.49e-06, | |
| "loss": 16.4424, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.34773159467535997, | |
| "grad_norm": 80.68280029296875, | |
| "learning_rate": 7.990000000000001e-06, | |
| "loss": 16.5934, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36946481934257, | |
| "grad_norm": 50.99578094482422, | |
| "learning_rate": 8.49e-06, | |
| "loss": 16.4154, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.39119804400978, | |
| "grad_norm": 73.9505386352539, | |
| "learning_rate": 8.99e-06, | |
| "loss": 16.4743, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4129312686769899, | |
| "grad_norm": 53.72673034667969, | |
| "learning_rate": 9.49e-06, | |
| "loss": 16.5231, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.43466449334419993, | |
| "grad_norm": 80.76425170898438, | |
| "learning_rate": 9.990000000000001e-06, | |
| "loss": 16.3367, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.45639771801140994, | |
| "grad_norm": 88.06133270263672, | |
| "learning_rate": 9.995703638754933e-06, | |
| "loss": 16.7444, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.47813094267861994, | |
| "grad_norm": 49.88163757324219, | |
| "learning_rate": 9.991319596668128e-06, | |
| "loss": 16.4815, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.49986416734582995, | |
| "grad_norm": 66.88523864746094, | |
| "learning_rate": 9.986935554581326e-06, | |
| "loss": 16.467, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5215973920130399, | |
| "grad_norm": 65.38529968261719, | |
| "learning_rate": 9.982551512494521e-06, | |
| "loss": 16.4785, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.54333061668025, | |
| "grad_norm": 54.72123336791992, | |
| "learning_rate": 9.978167470407716e-06, | |
| "loss": 16.5669, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5650638413474599, | |
| "grad_norm": 52.83816909790039, | |
| "learning_rate": 9.973783428320912e-06, | |
| "loss": 16.3767, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.58679706601467, | |
| "grad_norm": 72.39130401611328, | |
| "learning_rate": 9.969399386234109e-06, | |
| "loss": 16.2847, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6085302906818799, | |
| "grad_norm": 52.192020416259766, | |
| "learning_rate": 9.965015344147304e-06, | |
| "loss": 16.3576, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.63026351534909, | |
| "grad_norm": 50.59873962402344, | |
| "learning_rate": 9.960631302060501e-06, | |
| "loss": 15.9625, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6519967400162999, | |
| "grad_norm": 253.75856018066406, | |
| "learning_rate": 9.956247259973697e-06, | |
| "loss": 16.3934, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6737299646835099, | |
| "grad_norm": 43.8497314453125, | |
| "learning_rate": 9.951863217886892e-06, | |
| "loss": 16.686, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6954631893507199, | |
| "grad_norm": 48.21563720703125, | |
| "learning_rate": 9.947479175800089e-06, | |
| "loss": 16.4491, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7171964140179299, | |
| "grad_norm": 59.72194290161133, | |
| "learning_rate": 9.943095133713284e-06, | |
| "loss": 16.1068, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.73892963868514, | |
| "grad_norm": 117.0466079711914, | |
| "learning_rate": 9.93871109162648e-06, | |
| "loss": 16.3469, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7606628633523499, | |
| "grad_norm": 48.334346771240234, | |
| "learning_rate": 9.934327049539675e-06, | |
| "loss": 16.3334, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.78239608801956, | |
| "grad_norm": 54.45792007446289, | |
| "learning_rate": 9.929943007452872e-06, | |
| "loss": 16.2452, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8041293126867699, | |
| "grad_norm": 39.005428314208984, | |
| "learning_rate": 9.925558965366068e-06, | |
| "loss": 16.0999, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.8258625373539799, | |
| "grad_norm": 67.72175598144531, | |
| "learning_rate": 9.921174923279265e-06, | |
| "loss": 16.2755, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8475957620211899, | |
| "grad_norm": 72.75259399414062, | |
| "learning_rate": 9.91679088119246e-06, | |
| "loss": 16.5987, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.8693289866883999, | |
| "grad_norm": 58.86764144897461, | |
| "learning_rate": 9.912406839105656e-06, | |
| "loss": 16.4092, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8910622113556099, | |
| "grad_norm": 73.46177673339844, | |
| "learning_rate": 9.908022797018853e-06, | |
| "loss": 16.637, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9127954360228199, | |
| "grad_norm": 62.41428756713867, | |
| "learning_rate": 9.903638754932048e-06, | |
| "loss": 16.528, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9345286606900299, | |
| "grad_norm": 65.01278686523438, | |
| "learning_rate": 9.899254712845245e-06, | |
| "loss": 16.5003, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.9562618853572399, | |
| "grad_norm": 57.43757247924805, | |
| "learning_rate": 9.894870670758439e-06, | |
| "loss": 16.3343, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9779951100244498, | |
| "grad_norm": 55.26877975463867, | |
| "learning_rate": 9.890486628671636e-06, | |
| "loss": 16.4804, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9997283346916599, | |
| "grad_norm": 59.66270446777344, | |
| "learning_rate": 9.886102586584833e-06, | |
| "loss": 16.1125, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.0212985601738658, | |
| "grad_norm": 65.677490234375, | |
| "learning_rate": 9.881718544498028e-06, | |
| "loss": 15.5081, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.0430317848410757, | |
| "grad_norm": 46.62354278564453, | |
| "learning_rate": 9.877334502411224e-06, | |
| "loss": 15.5581, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.0647650095082857, | |
| "grad_norm": 47.7025032043457, | |
| "learning_rate": 9.87295046032442e-06, | |
| "loss": 15.2401, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.0864982341754958, | |
| "grad_norm": 57.22602081298828, | |
| "learning_rate": 9.868566418237616e-06, | |
| "loss": 15.5116, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.1082314588427058, | |
| "grad_norm": 51.149818420410156, | |
| "learning_rate": 9.864182376150812e-06, | |
| "loss": 15.5938, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.1299646835099157, | |
| "grad_norm": 80.70169067382812, | |
| "learning_rate": 9.859798334064009e-06, | |
| "loss": 15.5891, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.1516979081771257, | |
| "grad_norm": 59.62293243408203, | |
| "learning_rate": 9.855414291977204e-06, | |
| "loss": 15.6038, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.1734311328443359, | |
| "grad_norm": 109.22635650634766, | |
| "learning_rate": 9.8510302498904e-06, | |
| "loss": 15.4956, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.1951643575115458, | |
| "grad_norm": 54.90534591674805, | |
| "learning_rate": 9.846646207803597e-06, | |
| "loss": 15.5951, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.2168975821787558, | |
| "grad_norm": 130.99798583984375, | |
| "learning_rate": 9.842262165716792e-06, | |
| "loss": 15.5201, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.2386308068459657, | |
| "grad_norm": 45.625389099121094, | |
| "learning_rate": 9.837878123629987e-06, | |
| "loss": 15.4576, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.2603640315131757, | |
| "grad_norm": 36.033836364746094, | |
| "learning_rate": 9.833494081543183e-06, | |
| "loss": 15.4927, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.2820972561803858, | |
| "grad_norm": 52.81075668334961, | |
| "learning_rate": 9.82911003945638e-06, | |
| "loss": 15.4669, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.3038304808475958, | |
| "grad_norm": 43.44648361206055, | |
| "learning_rate": 9.824725997369575e-06, | |
| "loss": 15.5326, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.3255637055148057, | |
| "grad_norm": 40.79172134399414, | |
| "learning_rate": 9.820341955282772e-06, | |
| "loss": 15.6252, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.3472969301820157, | |
| "grad_norm": 54.189910888671875, | |
| "learning_rate": 9.815957913195968e-06, | |
| "loss": 15.5897, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.3690301548492259, | |
| "grad_norm": 55.73503112792969, | |
| "learning_rate": 9.811573871109163e-06, | |
| "loss": 15.6081, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.3907633795164358, | |
| "grad_norm": 67.98750305175781, | |
| "learning_rate": 9.80718982902236e-06, | |
| "loss": 15.5154, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.4124966041836458, | |
| "grad_norm": 61.99040222167969, | |
| "learning_rate": 9.802805786935556e-06, | |
| "loss": 15.464, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.4342298288508557, | |
| "grad_norm": 42.3632926940918, | |
| "learning_rate": 9.798421744848751e-06, | |
| "loss": 15.3425, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.4559630535180657, | |
| "grad_norm": 46.5098991394043, | |
| "learning_rate": 9.794037702761946e-06, | |
| "loss": 15.5891, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.4776962781852756, | |
| "grad_norm": 59.43826675415039, | |
| "learning_rate": 9.789653660675143e-06, | |
| "loss": 15.6337, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.4994295028524858, | |
| "grad_norm": 76.57585906982422, | |
| "learning_rate": 9.785269618588339e-06, | |
| "loss": 15.4892, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.5211627275196957, | |
| "grad_norm": 65.46538543701172, | |
| "learning_rate": 9.780885576501536e-06, | |
| "loss": 15.6873, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.542895952186906, | |
| "grad_norm": 65.29698181152344, | |
| "learning_rate": 9.776501534414731e-06, | |
| "loss": 15.5051, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.5646291768541158, | |
| "grad_norm": 46.745784759521484, | |
| "learning_rate": 9.772117492327927e-06, | |
| "loss": 15.6602, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.5863624015213258, | |
| "grad_norm": 44.605228424072266, | |
| "learning_rate": 9.767733450241124e-06, | |
| "loss": 15.5229, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.6080956261885357, | |
| "grad_norm": 47.4207649230957, | |
| "learning_rate": 9.763349408154319e-06, | |
| "loss": 15.5634, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.6298288508557457, | |
| "grad_norm": 43.18611145019531, | |
| "learning_rate": 9.758965366067516e-06, | |
| "loss": 15.5222, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.6515620755229556, | |
| "grad_norm": 39.6898078918457, | |
| "learning_rate": 9.754581323980712e-06, | |
| "loss": 15.5259, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.6732953001901656, | |
| "grad_norm": 71.0409164428711, | |
| "learning_rate": 9.750197281893907e-06, | |
| "loss": 15.5971, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.6950285248573758, | |
| "grad_norm": 53.462467193603516, | |
| "learning_rate": 9.745813239807102e-06, | |
| "loss": 15.4515, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.7167617495245857, | |
| "grad_norm": 40.28457260131836, | |
| "learning_rate": 9.7414291977203e-06, | |
| "loss": 15.4006, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.7384949741917957, | |
| "grad_norm": 50.27900695800781, | |
| "learning_rate": 9.737045155633495e-06, | |
| "loss": 15.3051, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.7602281988590058, | |
| "grad_norm": 44.33418655395508, | |
| "learning_rate": 9.73266111354669e-06, | |
| "loss": 15.7606, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.7819614235262158, | |
| "grad_norm": 65.12041473388672, | |
| "learning_rate": 9.728277071459887e-06, | |
| "loss": 15.4747, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.8036946481934257, | |
| "grad_norm": 50.64781951904297, | |
| "learning_rate": 9.723893029373083e-06, | |
| "loss": 15.5251, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.8254278728606357, | |
| "grad_norm": 37.71573257446289, | |
| "learning_rate": 9.71950898728628e-06, | |
| "loss": 15.4422, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.8471610975278456, | |
| "grad_norm": 53.08781433105469, | |
| "learning_rate": 9.715124945199475e-06, | |
| "loss": 15.6055, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.8688943221950556, | |
| "grad_norm": 40.412384033203125, | |
| "learning_rate": 9.71074090311267e-06, | |
| "loss": 15.3275, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.8906275468622655, | |
| "grad_norm": 81.10236358642578, | |
| "learning_rate": 9.706356861025866e-06, | |
| "loss": 15.391, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.9123607715294757, | |
| "grad_norm": 73.39491271972656, | |
| "learning_rate": 9.701972818939063e-06, | |
| "loss": 15.7053, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.9340939961966856, | |
| "grad_norm": 42.71440124511719, | |
| "learning_rate": 9.697588776852258e-06, | |
| "loss": 15.484, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.9558272208638958, | |
| "grad_norm": 73.08609008789062, | |
| "learning_rate": 9.693204734765454e-06, | |
| "loss": 15.5822, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.9775604455311058, | |
| "grad_norm": 66.59615325927734, | |
| "learning_rate": 9.68882069267865e-06, | |
| "loss": 15.6825, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.9992936701983157, | |
| "grad_norm": 63.10333251953125, | |
| "learning_rate": 9.684436650591846e-06, | |
| "loss": 15.5203, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.0208638956805216, | |
| "grad_norm": 57.844970703125, | |
| "learning_rate": 9.680052608505043e-06, | |
| "loss": 14.5109, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.0425971203477316, | |
| "grad_norm": 36.37318420410156, | |
| "learning_rate": 9.675668566418239e-06, | |
| "loss": 14.7488, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.0643303450149415, | |
| "grad_norm": 72.80779266357422, | |
| "learning_rate": 9.671284524331434e-06, | |
| "loss": 14.9111, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.0860635696821515, | |
| "grad_norm": 71.37971496582031, | |
| "learning_rate": 9.66690048224463e-06, | |
| "loss": 14.6878, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.1077967943493614, | |
| "grad_norm": 42.20883560180664, | |
| "learning_rate": 9.662516440157827e-06, | |
| "loss": 14.7469, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.1295300190165714, | |
| "grad_norm": 53.63486862182617, | |
| "learning_rate": 9.658132398071022e-06, | |
| "loss": 14.6422, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.1512632436837817, | |
| "grad_norm": 54.38608932495117, | |
| "learning_rate": 9.653748355984219e-06, | |
| "loss": 14.6238, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.1729964683509917, | |
| "grad_norm": 44.58712387084961, | |
| "learning_rate": 9.649364313897414e-06, | |
| "loss": 14.6619, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.1947296930182016, | |
| "grad_norm": 46.281524658203125, | |
| "learning_rate": 9.64498027181061e-06, | |
| "loss": 14.8443, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.2164629176854116, | |
| "grad_norm": 38.51953887939453, | |
| "learning_rate": 9.640596229723807e-06, | |
| "loss": 14.9273, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.2381961423526215, | |
| "grad_norm": 53.27817153930664, | |
| "learning_rate": 9.636212187637002e-06, | |
| "loss": 14.6411, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.2599293670198315, | |
| "grad_norm": 43.47584533691406, | |
| "learning_rate": 9.631828145550198e-06, | |
| "loss": 14.7459, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.2816625916870414, | |
| "grad_norm": 37.26194381713867, | |
| "learning_rate": 9.627444103463393e-06, | |
| "loss": 14.9103, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.3033958163542514, | |
| "grad_norm": 38.729373931884766, | |
| "learning_rate": 9.62306006137659e-06, | |
| "loss": 14.5397, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.3251290410214613, | |
| "grad_norm": 33.352901458740234, | |
| "learning_rate": 9.618676019289785e-06, | |
| "loss": 14.9044, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.3468622656886717, | |
| "grad_norm": 47.63081359863281, | |
| "learning_rate": 9.614291977202983e-06, | |
| "loss": 14.5471, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.3685954903558817, | |
| "grad_norm": 125.63179016113281, | |
| "learning_rate": 9.609907935116178e-06, | |
| "loss": 14.8267, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.3903287150230916, | |
| "grad_norm": 49.1522216796875, | |
| "learning_rate": 9.605523893029373e-06, | |
| "loss": 14.7433, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.4120619396903016, | |
| "grad_norm": 43.327091217041016, | |
| "learning_rate": 9.60113985094257e-06, | |
| "loss": 14.908, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.4337951643575115, | |
| "grad_norm": 30.76859474182129, | |
| "learning_rate": 9.596755808855766e-06, | |
| "loss": 14.7341, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.4555283890247215, | |
| "grad_norm": 42.72526550292969, | |
| "learning_rate": 9.592371766768961e-06, | |
| "loss": 15.0516, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.4772616136919314, | |
| "grad_norm": 58.9193000793457, | |
| "learning_rate": 9.587987724682157e-06, | |
| "loss": 14.8745, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.4989948383591414, | |
| "grad_norm": 53.90520095825195, | |
| "learning_rate": 9.583603682595354e-06, | |
| "loss": 14.8219, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.5207280630263513, | |
| "grad_norm": 61.370452880859375, | |
| "learning_rate": 9.579219640508549e-06, | |
| "loss": 14.6925, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.5424612876935617, | |
| "grad_norm": 47.58317184448242, | |
| "learning_rate": 9.574835598421746e-06, | |
| "loss": 14.8654, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.5641945123607717, | |
| "grad_norm": 51.90703582763672, | |
| "learning_rate": 9.570451556334942e-06, | |
| "loss": 14.8152, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.5859277370279816, | |
| "grad_norm": 42.62101364135742, | |
| "learning_rate": 9.566067514248137e-06, | |
| "loss": 15.0139, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.6076609616951916, | |
| "grad_norm": 58.69119644165039, | |
| "learning_rate": 9.561683472161334e-06, | |
| "loss": 14.8962, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.6293941863624015, | |
| "grad_norm": 58.02621841430664, | |
| "learning_rate": 9.55729943007453e-06, | |
| "loss": 15.0422, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.6511274110296115, | |
| "grad_norm": 45.985225677490234, | |
| "learning_rate": 9.552915387987726e-06, | |
| "loss": 14.8361, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.6728606356968214, | |
| "grad_norm": 58.74437713623047, | |
| "learning_rate": 9.548531345900922e-06, | |
| "loss": 14.9231, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.6945938603640314, | |
| "grad_norm": 54.490962982177734, | |
| "learning_rate": 9.544147303814117e-06, | |
| "loss": 14.7987, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.7163270850312413, | |
| "grad_norm": 44.067710876464844, | |
| "learning_rate": 9.539763261727313e-06, | |
| "loss": 14.8596, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.7380603096984517, | |
| "grad_norm": 56.0435676574707, | |
| "learning_rate": 9.53537921964051e-06, | |
| "loss": 14.7602, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.7597935343656617, | |
| "grad_norm": 68.08670806884766, | |
| "learning_rate": 9.530995177553705e-06, | |
| "loss": 14.78, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.7815267590328716, | |
| "grad_norm": 55.21569061279297, | |
| "learning_rate": 9.5266111354669e-06, | |
| "loss": 14.908, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.8032599837000816, | |
| "grad_norm": 48.79258346557617, | |
| "learning_rate": 9.522227093380098e-06, | |
| "loss": 14.8478, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.8249932083672915, | |
| "grad_norm": 61.38957214355469, | |
| "learning_rate": 9.517843051293293e-06, | |
| "loss": 14.9716, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.8467264330345015, | |
| "grad_norm": 53.00950622558594, | |
| "learning_rate": 9.51345900920649e-06, | |
| "loss": 14.7508, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.8684596577017114, | |
| "grad_norm": 43.13687515258789, | |
| "learning_rate": 9.509074967119685e-06, | |
| "loss": 14.8466, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.8901928823689214, | |
| "grad_norm": 54.39565658569336, | |
| "learning_rate": 9.50469092503288e-06, | |
| "loss": 14.8025, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.9119261070361313, | |
| "grad_norm": 47.21046447753906, | |
| "learning_rate": 9.500306882946078e-06, | |
| "loss": 14.8096, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.9336593317033417, | |
| "grad_norm": 51.13401412963867, | |
| "learning_rate": 9.495922840859273e-06, | |
| "loss": 14.9482, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.955392556370551, | |
| "grad_norm": 47.619503021240234, | |
| "learning_rate": 9.491538798772469e-06, | |
| "loss": 14.9805, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.9771257810377616, | |
| "grad_norm": 40.876808166503906, | |
| "learning_rate": 9.487154756685664e-06, | |
| "loss": 14.9048, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.9988590057049715, | |
| "grad_norm": 52.037567138671875, | |
| "learning_rate": 9.482770714598861e-06, | |
| "loss": 14.9026, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.0204292311871774, | |
| "grad_norm": 41.4274787902832, | |
| "learning_rate": 9.478386672512057e-06, | |
| "loss": 14.1481, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 3.0421624558543874, | |
| "grad_norm": 51.49604797363281, | |
| "learning_rate": 9.474002630425254e-06, | |
| "loss": 14.1383, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.0638956805215973, | |
| "grad_norm": 53.052005767822266, | |
| "learning_rate": 9.469618588338449e-06, | |
| "loss": 14.2031, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 3.0856289051888073, | |
| "grad_norm": 29.748735427856445, | |
| "learning_rate": 9.465234546251644e-06, | |
| "loss": 14.2273, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.1073621298560172, | |
| "grad_norm": 41.33003616333008, | |
| "learning_rate": 9.460850504164841e-06, | |
| "loss": 14.2338, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 3.129095354523227, | |
| "grad_norm": 39.78664779663086, | |
| "learning_rate": 9.456466462078037e-06, | |
| "loss": 14.1309, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.1508285791904376, | |
| "grad_norm": 35.99256896972656, | |
| "learning_rate": 9.452082419991234e-06, | |
| "loss": 14.2261, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 3.1725618038576475, | |
| "grad_norm": 40.001197814941406, | |
| "learning_rate": 9.44769837790443e-06, | |
| "loss": 14.1562, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.1942950285248575, | |
| "grad_norm": 48.2380256652832, | |
| "learning_rate": 9.443314335817625e-06, | |
| "loss": 14.0423, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 3.2160282531920674, | |
| "grad_norm": 44.41048812866211, | |
| "learning_rate": 9.43893029373082e-06, | |
| "loss": 14.0881, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.2377614778592774, | |
| "grad_norm": 29.655723571777344, | |
| "learning_rate": 9.434546251644017e-06, | |
| "loss": 14.2163, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 3.2594947025264873, | |
| "grad_norm": 40.9448356628418, | |
| "learning_rate": 9.430162209557213e-06, | |
| "loss": 14.2057, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.2812279271936973, | |
| "grad_norm": 50.84467315673828, | |
| "learning_rate": 9.425778167470408e-06, | |
| "loss": 14.2386, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 3.302961151860907, | |
| "grad_norm": 46.98764419555664, | |
| "learning_rate": 9.421394125383605e-06, | |
| "loss": 14.2241, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.324694376528117, | |
| "grad_norm": 46.322166442871094, | |
| "learning_rate": 9.4170100832968e-06, | |
| "loss": 14.1992, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 3.3464276011953276, | |
| "grad_norm": 45.123985290527344, | |
| "learning_rate": 9.412626041209997e-06, | |
| "loss": 14.2106, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.3681608258625375, | |
| "grad_norm": 50.508583068847656, | |
| "learning_rate": 9.408241999123193e-06, | |
| "loss": 14.0418, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.3898940505297475, | |
| "grad_norm": 42.03702926635742, | |
| "learning_rate": 9.403857957036388e-06, | |
| "loss": 14.298, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.4116272751969574, | |
| "grad_norm": 49.16743469238281, | |
| "learning_rate": 9.399473914949584e-06, | |
| "loss": 14.2654, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 3.4333604998641674, | |
| "grad_norm": 47.92793273925781, | |
| "learning_rate": 9.39508987286278e-06, | |
| "loss": 14.3558, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.4550937245313773, | |
| "grad_norm": 37.79042434692383, | |
| "learning_rate": 9.390705830775976e-06, | |
| "loss": 14.2301, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 3.4768269491985873, | |
| "grad_norm": 41.851051330566406, | |
| "learning_rate": 9.386321788689171e-06, | |
| "loss": 14.2025, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.498560173865797, | |
| "grad_norm": 58.03968811035156, | |
| "learning_rate": 9.381937746602369e-06, | |
| "loss": 14.3552, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 3.520293398533007, | |
| "grad_norm": 34.9418830871582, | |
| "learning_rate": 9.377553704515564e-06, | |
| "loss": 14.3336, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 3.5420266232002175, | |
| "grad_norm": 41.05316162109375, | |
| "learning_rate": 9.373169662428761e-06, | |
| "loss": 14.2819, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 3.563759847867427, | |
| "grad_norm": 45.65940856933594, | |
| "learning_rate": 9.368785620341956e-06, | |
| "loss": 14.2237, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 3.5854930725346374, | |
| "grad_norm": 37.271751403808594, | |
| "learning_rate": 9.364401578255152e-06, | |
| "loss": 14.0594, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 3.6072262972018474, | |
| "grad_norm": 41.95325469970703, | |
| "learning_rate": 9.360017536168347e-06, | |
| "loss": 14.1691, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 3.6289595218690573, | |
| "grad_norm": 91.28557586669922, | |
| "learning_rate": 9.355633494081544e-06, | |
| "loss": 14.1016, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 3.6506927465362673, | |
| "grad_norm": 56.508670806884766, | |
| "learning_rate": 9.35124945199474e-06, | |
| "loss": 14.2686, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 3.6724259712034772, | |
| "grad_norm": 69.8916015625, | |
| "learning_rate": 9.346865409907935e-06, | |
| "loss": 14.3426, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 3.694159195870687, | |
| "grad_norm": 51.414215087890625, | |
| "learning_rate": 9.342481367821132e-06, | |
| "loss": 14.3489, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.715892420537897, | |
| "grad_norm": 51.891639709472656, | |
| "learning_rate": 9.338097325734328e-06, | |
| "loss": 14.2614, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 3.7376256452051075, | |
| "grad_norm": 55.276275634765625, | |
| "learning_rate": 9.333713283647525e-06, | |
| "loss": 14.1835, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 3.759358869872317, | |
| "grad_norm": 38.33846664428711, | |
| "learning_rate": 9.32932924156072e-06, | |
| "loss": 14.5973, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 3.7810920945395274, | |
| "grad_norm": 46.052513122558594, | |
| "learning_rate": 9.324945199473915e-06, | |
| "loss": 14.3903, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 3.8028253192067374, | |
| "grad_norm": 35.333560943603516, | |
| "learning_rate": 9.32056115738711e-06, | |
| "loss": 14.3516, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 3.8245585438739473, | |
| "grad_norm": 52.49406051635742, | |
| "learning_rate": 9.316177115300308e-06, | |
| "loss": 14.371, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.8462917685411573, | |
| "grad_norm": 48.86211013793945, | |
| "learning_rate": 9.311793073213503e-06, | |
| "loss": 14.2632, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 3.8680249932083672, | |
| "grad_norm": 48.95231628417969, | |
| "learning_rate": 9.3074090311267e-06, | |
| "loss": 14.1847, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 3.889758217875577, | |
| "grad_norm": 37.594696044921875, | |
| "learning_rate": 9.303024989039896e-06, | |
| "loss": 14.3286, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 3.911491442542787, | |
| "grad_norm": 47.66452407836914, | |
| "learning_rate": 9.298640946953091e-06, | |
| "loss": 14.3358, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.9332246672099975, | |
| "grad_norm": 40.61109161376953, | |
| "learning_rate": 9.294256904866288e-06, | |
| "loss": 14.4558, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 3.954957891877207, | |
| "grad_norm": 34.296836853027344, | |
| "learning_rate": 9.289872862779484e-06, | |
| "loss": 14.3049, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 3.9766911165444174, | |
| "grad_norm": 43.91560363769531, | |
| "learning_rate": 9.285488820692679e-06, | |
| "loss": 14.3231, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 3.9984243412116274, | |
| "grad_norm": 37.4168586730957, | |
| "learning_rate": 9.281104778605874e-06, | |
| "loss": 14.2418, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 4.019994566693833, | |
| "grad_norm": 34.46104049682617, | |
| "learning_rate": 9.276720736519071e-06, | |
| "loss": 13.6035, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 4.041727791361043, | |
| "grad_norm": 38.560298919677734, | |
| "learning_rate": 9.272336694432267e-06, | |
| "loss": 13.6357, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 4.063461016028254, | |
| "grad_norm": 35.547752380371094, | |
| "learning_rate": 9.267952652345464e-06, | |
| "loss": 13.798, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 4.085194240695463, | |
| "grad_norm": 36.332298278808594, | |
| "learning_rate": 9.26356861025866e-06, | |
| "loss": 13.6992, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 4.1069274653626735, | |
| "grad_norm": 40.322715759277344, | |
| "learning_rate": 9.259184568171855e-06, | |
| "loss": 13.7247, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 4.128660690029883, | |
| "grad_norm": 27.05885887145996, | |
| "learning_rate": 9.254800526085052e-06, | |
| "loss": 13.801, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.150393914697093, | |
| "grad_norm": 38.66703414916992, | |
| "learning_rate": 9.250416483998247e-06, | |
| "loss": 13.7814, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 4.172127139364303, | |
| "grad_norm": 37.8776969909668, | |
| "learning_rate": 9.246032441911443e-06, | |
| "loss": 13.7403, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 4.193860364031513, | |
| "grad_norm": 36.977317810058594, | |
| "learning_rate": 9.241648399824638e-06, | |
| "loss": 13.8831, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 4.215593588698723, | |
| "grad_norm": 43.09788131713867, | |
| "learning_rate": 9.237264357737835e-06, | |
| "loss": 13.7397, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.237326813365933, | |
| "grad_norm": 33.9801139831543, | |
| "learning_rate": 9.23288031565103e-06, | |
| "loss": 13.8114, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 4.259060038033143, | |
| "grad_norm": 32.65711212158203, | |
| "learning_rate": 9.228496273564227e-06, | |
| "loss": 13.7081, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 4.280793262700353, | |
| "grad_norm": 71.91608428955078, | |
| "learning_rate": 9.224112231477423e-06, | |
| "loss": 13.7953, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 4.3025264873675635, | |
| "grad_norm": 28.490583419799805, | |
| "learning_rate": 9.219728189390618e-06, | |
| "loss": 13.7322, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 4.324259712034773, | |
| "grad_norm": 49.53886795043945, | |
| "learning_rate": 9.215344147303815e-06, | |
| "loss": 13.8046, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 4.345992936701983, | |
| "grad_norm": 40.42410659790039, | |
| "learning_rate": 9.21096010521701e-06, | |
| "loss": 13.735, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.345992936701983, | |
| "eval_cer": 0.07540147877501142, | |
| "eval_loss": 2.2973904609680176, | |
| "eval_runtime": 396.0823, | |
| "eval_samples_per_second": 13.649, | |
| "eval_steps_per_second": 3.413, | |
| "eval_wer": 0.22808918197519235, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.367726161369193, | |
| "grad_norm": 32.75251388549805, | |
| "learning_rate": 9.206576063130208e-06, | |
| "loss": 13.8058, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 4.389459386036403, | |
| "grad_norm": 35.6936149597168, | |
| "learning_rate": 9.202192021043403e-06, | |
| "loss": 13.7489, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 4.411192610703613, | |
| "grad_norm": 39.304100036621094, | |
| "learning_rate": 9.197807978956599e-06, | |
| "loss": 13.8124, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 4.432925835370823, | |
| "grad_norm": 39.43230438232422, | |
| "learning_rate": 9.193423936869794e-06, | |
| "loss": 13.9531, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 4.454659060038033, | |
| "grad_norm": 37.89631652832031, | |
| "learning_rate": 9.189039894782991e-06, | |
| "loss": 13.7975, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 4.476392284705243, | |
| "grad_norm": 36.32379150390625, | |
| "learning_rate": 9.184655852696186e-06, | |
| "loss": 13.9208, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 4.4981255093724535, | |
| "grad_norm": 39.24440002441406, | |
| "learning_rate": 9.180271810609382e-06, | |
| "loss": 13.88, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 4.519858734039663, | |
| "grad_norm": 32.791900634765625, | |
| "learning_rate": 9.175887768522579e-06, | |
| "loss": 13.8944, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 4.541591958706873, | |
| "grad_norm": 33.695865631103516, | |
| "learning_rate": 9.171503726435774e-06, | |
| "loss": 13.8637, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 4.563325183374083, | |
| "grad_norm": 33.961647033691406, | |
| "learning_rate": 9.167119684348971e-06, | |
| "loss": 13.7873, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 4.585058408041293, | |
| "grad_norm": 101.09957122802734, | |
| "learning_rate": 9.162735642262167e-06, | |
| "loss": 13.848, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 4.606791632708503, | |
| "grad_norm": 42.666595458984375, | |
| "learning_rate": 9.158351600175362e-06, | |
| "loss": 13.9049, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 4.628524857375713, | |
| "grad_norm": 44.05756378173828, | |
| "learning_rate": 9.153967558088558e-06, | |
| "loss": 13.9251, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 4.650258082042923, | |
| "grad_norm": 44.468162536621094, | |
| "learning_rate": 9.149583516001755e-06, | |
| "loss": 13.7975, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 4.671991306710133, | |
| "grad_norm": 35.0707893371582, | |
| "learning_rate": 9.14519947391495e-06, | |
| "loss": 13.9261, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 4.6937245313773435, | |
| "grad_norm": 40.042274475097656, | |
| "learning_rate": 9.140815431828145e-06, | |
| "loss": 13.8594, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 4.715457756044553, | |
| "grad_norm": 29.434371948242188, | |
| "learning_rate": 9.136431389741342e-06, | |
| "loss": 13.8838, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 4.737190980711763, | |
| "grad_norm": 37.34782409667969, | |
| "learning_rate": 9.132047347654538e-06, | |
| "loss": 13.7635, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 4.758924205378973, | |
| "grad_norm": 221.30532836914062, | |
| "learning_rate": 9.127663305567735e-06, | |
| "loss": 13.76, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 4.780657430046183, | |
| "grad_norm": 35.67972946166992, | |
| "learning_rate": 9.12327926348093e-06, | |
| "loss": 13.8596, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.802390654713393, | |
| "grad_norm": 52.167972564697266, | |
| "learning_rate": 9.118895221394126e-06, | |
| "loss": 13.8706, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 4.824123879380603, | |
| "grad_norm": 47.18834686279297, | |
| "learning_rate": 9.114511179307321e-06, | |
| "loss": 13.8763, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 4.845857104047813, | |
| "grad_norm": 42.99448776245117, | |
| "learning_rate": 9.110127137220518e-06, | |
| "loss": 13.9622, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 4.867590328715023, | |
| "grad_norm": 55.08070755004883, | |
| "learning_rate": 9.105743095133715e-06, | |
| "loss": 13.8151, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 4.8893235533822335, | |
| "grad_norm": 32.91100311279297, | |
| "learning_rate": 9.10135905304691e-06, | |
| "loss": 13.8262, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 4.911056778049443, | |
| "grad_norm": 34.78753662109375, | |
| "learning_rate": 9.096975010960106e-06, | |
| "loss": 13.928, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 4.932790002716653, | |
| "grad_norm": 40.1533203125, | |
| "learning_rate": 9.092590968873301e-06, | |
| "loss": 13.8169, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 4.954523227383863, | |
| "grad_norm": 41.97115707397461, | |
| "learning_rate": 9.088206926786498e-06, | |
| "loss": 13.8194, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 4.976256452051073, | |
| "grad_norm": 30.406110763549805, | |
| "learning_rate": 9.083822884699694e-06, | |
| "loss": 13.9941, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 4.997989676718283, | |
| "grad_norm": 44.02429962158203, | |
| "learning_rate": 9.07943884261289e-06, | |
| "loss": 13.9272, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 5.019559902200489, | |
| "grad_norm": 42.15421676635742, | |
| "learning_rate": 9.075054800526086e-06, | |
| "loss": 13.386, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 5.041293126867699, | |
| "grad_norm": 28.951597213745117, | |
| "learning_rate": 9.070670758439282e-06, | |
| "loss": 13.4325, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 5.0630263515349085, | |
| "grad_norm": 59.380374908447266, | |
| "learning_rate": 9.066286716352479e-06, | |
| "loss": 13.409, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 5.084759576202119, | |
| "grad_norm": 56.05976867675781, | |
| "learning_rate": 9.061902674265674e-06, | |
| "loss": 13.5315, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 5.106492800869329, | |
| "grad_norm": 36.069583892822266, | |
| "learning_rate": 9.05751863217887e-06, | |
| "loss": 13.3532, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 5.128226025536539, | |
| "grad_norm": 39.289833068847656, | |
| "learning_rate": 9.053134590092065e-06, | |
| "loss": 13.3686, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 5.149959250203749, | |
| "grad_norm": 37.062931060791016, | |
| "learning_rate": 9.048750548005262e-06, | |
| "loss": 13.4362, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 5.171692474870959, | |
| "grad_norm": 56.210750579833984, | |
| "learning_rate": 9.044366505918457e-06, | |
| "loss": 13.4053, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 5.193425699538169, | |
| "grad_norm": 26.70563507080078, | |
| "learning_rate": 9.039982463831653e-06, | |
| "loss": 13.3191, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 5.215158924205379, | |
| "grad_norm": 39.95426940917969, | |
| "learning_rate": 9.03559842174485e-06, | |
| "loss": 13.3161, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.236892148872589, | |
| "grad_norm": 29.761014938354492, | |
| "learning_rate": 9.031214379658045e-06, | |
| "loss": 13.5076, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 5.2586253735397985, | |
| "grad_norm": 32.707786560058594, | |
| "learning_rate": 9.026830337571242e-06, | |
| "loss": 13.4865, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 5.280358598207009, | |
| "grad_norm": 30.934314727783203, | |
| "learning_rate": 9.022446295484438e-06, | |
| "loss": 13.3348, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 5.302091822874219, | |
| "grad_norm": 38.97114562988281, | |
| "learning_rate": 9.018062253397633e-06, | |
| "loss": 13.4458, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 5.323825047541429, | |
| "grad_norm": 52.749507904052734, | |
| "learning_rate": 9.013678211310829e-06, | |
| "loss": 13.5492, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 5.345558272208639, | |
| "grad_norm": 37.54782485961914, | |
| "learning_rate": 9.009294169224026e-06, | |
| "loss": 13.3087, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 5.367291496875849, | |
| "grad_norm": 40.16310501098633, | |
| "learning_rate": 9.004910127137221e-06, | |
| "loss": 13.4691, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 5.389024721543059, | |
| "grad_norm": 58.52961349487305, | |
| "learning_rate": 9.000526085050418e-06, | |
| "loss": 13.5101, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 5.410757946210269, | |
| "grad_norm": 31.150737762451172, | |
| "learning_rate": 8.996142042963613e-06, | |
| "loss": 13.3933, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 5.432491170877479, | |
| "grad_norm": 31.380889892578125, | |
| "learning_rate": 8.991758000876809e-06, | |
| "loss": 13.5171, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 5.4542243955446885, | |
| "grad_norm": 45.46767044067383, | |
| "learning_rate": 8.987373958790006e-06, | |
| "loss": 13.4807, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 5.475957620211899, | |
| "grad_norm": 42.970542907714844, | |
| "learning_rate": 8.982989916703201e-06, | |
| "loss": 13.4787, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 5.497690844879109, | |
| "grad_norm": 51.134578704833984, | |
| "learning_rate": 8.978605874616397e-06, | |
| "loss": 13.4804, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 5.519424069546319, | |
| "grad_norm": 37.2877082824707, | |
| "learning_rate": 8.974221832529592e-06, | |
| "loss": 13.5335, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 5.541157294213529, | |
| "grad_norm": 41.327144622802734, | |
| "learning_rate": 8.96983779044279e-06, | |
| "loss": 13.5202, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 5.562890518880739, | |
| "grad_norm": 41.8232421875, | |
| "learning_rate": 8.965453748355985e-06, | |
| "loss": 13.4273, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 5.584623743547949, | |
| "grad_norm": 34.09703063964844, | |
| "learning_rate": 8.961069706269182e-06, | |
| "loss": 13.5441, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 5.606356968215159, | |
| "grad_norm": 34.51966094970703, | |
| "learning_rate": 8.956685664182377e-06, | |
| "loss": 13.5746, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 5.628090192882369, | |
| "grad_norm": 44.580360412597656, | |
| "learning_rate": 8.952301622095572e-06, | |
| "loss": 13.5962, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 5.6498234175495785, | |
| "grad_norm": 49.46404266357422, | |
| "learning_rate": 8.94791758000877e-06, | |
| "loss": 13.5788, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 5.671556642216789, | |
| "grad_norm": 33.00864028930664, | |
| "learning_rate": 8.943533537921965e-06, | |
| "loss": 13.4571, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 5.693289866883999, | |
| "grad_norm": 31.570575714111328, | |
| "learning_rate": 8.93914949583516e-06, | |
| "loss": 13.512, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 5.715023091551209, | |
| "grad_norm": 31.16398048400879, | |
| "learning_rate": 8.934765453748356e-06, | |
| "loss": 13.5388, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 5.736756316218419, | |
| "grad_norm": 31.840078353881836, | |
| "learning_rate": 8.930381411661553e-06, | |
| "loss": 13.5697, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 5.758489540885629, | |
| "grad_norm": 41.02314376831055, | |
| "learning_rate": 8.925997369574748e-06, | |
| "loss": 13.5952, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 5.780222765552839, | |
| "grad_norm": 38.16290283203125, | |
| "learning_rate": 8.921613327487945e-06, | |
| "loss": 13.6544, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 5.801955990220049, | |
| "grad_norm": 34.18564224243164, | |
| "learning_rate": 8.91722928540114e-06, | |
| "loss": 13.6995, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 5.823689214887259, | |
| "grad_norm": 27.264175415039062, | |
| "learning_rate": 8.912845243314336e-06, | |
| "loss": 13.5539, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 5.8454224395544685, | |
| "grad_norm": 39.271888732910156, | |
| "learning_rate": 8.908461201227533e-06, | |
| "loss": 13.6108, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 5.867155664221679, | |
| "grad_norm": 25.51955223083496, | |
| "learning_rate": 8.904077159140728e-06, | |
| "loss": 13.5095, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 5.888888888888889, | |
| "grad_norm": 37.255367279052734, | |
| "learning_rate": 8.899693117053926e-06, | |
| "loss": 13.5916, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 5.910622113556099, | |
| "grad_norm": 36.901702880859375, | |
| "learning_rate": 8.89530907496712e-06, | |
| "loss": 13.5283, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 5.932355338223309, | |
| "grad_norm": 36.892799377441406, | |
| "learning_rate": 8.890925032880316e-06, | |
| "loss": 13.6032, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 5.954088562890519, | |
| "grad_norm": 36.8080940246582, | |
| "learning_rate": 8.886540990793512e-06, | |
| "loss": 13.7407, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 5.975821787557729, | |
| "grad_norm": 41.102657318115234, | |
| "learning_rate": 8.882156948706709e-06, | |
| "loss": 13.5335, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 5.997555012224939, | |
| "grad_norm": 31.643165588378906, | |
| "learning_rate": 8.877772906619904e-06, | |
| "loss": 13.6137, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 6.0191252377071445, | |
| "grad_norm": 35.148006439208984, | |
| "learning_rate": 8.8733888645331e-06, | |
| "loss": 13.1715, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 6.040858462374355, | |
| "grad_norm": 34.13616943359375, | |
| "learning_rate": 8.869004822446297e-06, | |
| "loss": 13.1644, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 6.062591687041564, | |
| "grad_norm": 43.90581512451172, | |
| "learning_rate": 8.864620780359492e-06, | |
| "loss": 13.0996, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 6.084324911708775, | |
| "grad_norm": 36.725379943847656, | |
| "learning_rate": 8.860236738272689e-06, | |
| "loss": 13.1415, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 6.106058136375985, | |
| "grad_norm": 32.847129821777344, | |
| "learning_rate": 8.855852696185884e-06, | |
| "loss": 13.1429, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 6.127791361043195, | |
| "grad_norm": 27.32487678527832, | |
| "learning_rate": 8.85146865409908e-06, | |
| "loss": 13.2287, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 6.149524585710405, | |
| "grad_norm": 38.18893051147461, | |
| "learning_rate": 8.847084612012275e-06, | |
| "loss": 13.1909, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 6.171257810377615, | |
| "grad_norm": 29.566404342651367, | |
| "learning_rate": 8.842700569925472e-06, | |
| "loss": 13.1921, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 6.192991035044825, | |
| "grad_norm": 27.988677978515625, | |
| "learning_rate": 8.838316527838668e-06, | |
| "loss": 13.1361, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 6.2147242597120345, | |
| "grad_norm": 36.260833740234375, | |
| "learning_rate": 8.833932485751863e-06, | |
| "loss": 13.1959, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 6.236457484379245, | |
| "grad_norm": 37.56959533691406, | |
| "learning_rate": 8.82954844366506e-06, | |
| "loss": 13.0934, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 6.258190709046454, | |
| "grad_norm": 37.16026306152344, | |
| "learning_rate": 8.825164401578256e-06, | |
| "loss": 13.3774, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 6.279923933713665, | |
| "grad_norm": 51.96893310546875, | |
| "learning_rate": 8.820780359491453e-06, | |
| "loss": 13.2566, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 6.301657158380875, | |
| "grad_norm": 31.46018409729004, | |
| "learning_rate": 8.816396317404648e-06, | |
| "loss": 13.242, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 6.323390383048085, | |
| "grad_norm": 40.38423538208008, | |
| "learning_rate": 8.812012275317843e-06, | |
| "loss": 13.2649, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 6.345123607715295, | |
| "grad_norm": 33.40611267089844, | |
| "learning_rate": 8.807628233231039e-06, | |
| "loss": 13.2143, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 6.3668568323825045, | |
| "grad_norm": 32.6546745300293, | |
| "learning_rate": 8.803244191144236e-06, | |
| "loss": 13.2293, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 6.388590057049715, | |
| "grad_norm": 30.99147605895996, | |
| "learning_rate": 8.798860149057433e-06, | |
| "loss": 13.1595, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 6.4103232817169244, | |
| "grad_norm": 49.923667907714844, | |
| "learning_rate": 8.794476106970627e-06, | |
| "loss": 13.1624, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 6.432056506384135, | |
| "grad_norm": 27.526941299438477, | |
| "learning_rate": 8.790092064883824e-06, | |
| "loss": 13.245, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 6.453789731051344, | |
| "grad_norm": 41.09890365600586, | |
| "learning_rate": 8.785708022797019e-06, | |
| "loss": 13.1945, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 6.475522955718555, | |
| "grad_norm": 36.0584831237793, | |
| "learning_rate": 8.781323980710216e-06, | |
| "loss": 13.207, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 6.497256180385765, | |
| "grad_norm": 30.85024642944336, | |
| "learning_rate": 8.776939938623412e-06, | |
| "loss": 13.2022, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 6.518989405052975, | |
| "grad_norm": 34.92485427856445, | |
| "learning_rate": 8.772555896536607e-06, | |
| "loss": 13.3696, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 6.540722629720185, | |
| "grad_norm": 33.38056564331055, | |
| "learning_rate": 8.768171854449802e-06, | |
| "loss": 13.2597, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 6.5624558543873945, | |
| "grad_norm": 29.834815979003906, | |
| "learning_rate": 8.763787812363e-06, | |
| "loss": 13.2281, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 6.584189079054605, | |
| "grad_norm": 30.077539443969727, | |
| "learning_rate": 8.759403770276197e-06, | |
| "loss": 13.2554, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 6.605922303721814, | |
| "grad_norm": 43.224586486816406, | |
| "learning_rate": 8.755019728189392e-06, | |
| "loss": 13.1997, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 6.627655528389025, | |
| "grad_norm": 48.51641082763672, | |
| "learning_rate": 8.750635686102587e-06, | |
| "loss": 13.2243, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 6.649388753056234, | |
| "grad_norm": 29.839174270629883, | |
| "learning_rate": 8.746251644015783e-06, | |
| "loss": 13.3737, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 6.671121977723445, | |
| "grad_norm": 44.47172546386719, | |
| "learning_rate": 8.74186760192898e-06, | |
| "loss": 13.1659, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 6.692855202390655, | |
| "grad_norm": 27.568334579467773, | |
| "learning_rate": 8.737483559842175e-06, | |
| "loss": 13.2285, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 6.714588427057865, | |
| "grad_norm": 31.159231185913086, | |
| "learning_rate": 8.73309951775537e-06, | |
| "loss": 13.3102, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 6.736321651725075, | |
| "grad_norm": 30.869430541992188, | |
| "learning_rate": 8.728715475668566e-06, | |
| "loss": 13.2286, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 6.7580548763922845, | |
| "grad_norm": 51.48735427856445, | |
| "learning_rate": 8.724331433581763e-06, | |
| "loss": 13.3347, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 6.779788101059495, | |
| "grad_norm": 35.06986999511719, | |
| "learning_rate": 8.71994739149496e-06, | |
| "loss": 13.1973, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 6.801521325726704, | |
| "grad_norm": 27.670289993286133, | |
| "learning_rate": 8.715563349408155e-06, | |
| "loss": 13.162, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 6.823254550393915, | |
| "grad_norm": 34.26895523071289, | |
| "learning_rate": 8.711179307321351e-06, | |
| "loss": 13.3011, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 6.844987775061124, | |
| "grad_norm": 41.056182861328125, | |
| "learning_rate": 8.706795265234546e-06, | |
| "loss": 13.2564, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 6.866720999728335, | |
| "grad_norm": 47.23772048950195, | |
| "learning_rate": 8.702411223147743e-06, | |
| "loss": 13.3127, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 6.888454224395545, | |
| "grad_norm": 65.80028533935547, | |
| "learning_rate": 8.698027181060939e-06, | |
| "loss": 13.2797, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 6.910187449062755, | |
| "grad_norm": 40.93989562988281, | |
| "learning_rate": 8.693643138974134e-06, | |
| "loss": 13.3707, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 6.931920673729965, | |
| "grad_norm": 83.51680755615234, | |
| "learning_rate": 8.689259096887331e-06, | |
| "loss": 13.217, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 6.9536538983971745, | |
| "grad_norm": 32.16157150268555, | |
| "learning_rate": 8.684875054800527e-06, | |
| "loss": 13.274, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 6.975387123064385, | |
| "grad_norm": 31.57478904724121, | |
| "learning_rate": 8.680491012713724e-06, | |
| "loss": 13.2057, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 6.997120347731594, | |
| "grad_norm": 37.837303161621094, | |
| "learning_rate": 8.676106970626919e-06, | |
| "loss": 13.262, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 7.0186905732138, | |
| "grad_norm": 24.430326461791992, | |
| "learning_rate": 8.671722928540114e-06, | |
| "loss": 12.9144, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 7.040423797881011, | |
| "grad_norm": 45.298194885253906, | |
| "learning_rate": 8.66733888645331e-06, | |
| "loss": 12.8617, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 7.06215702254822, | |
| "grad_norm": 52.39512252807617, | |
| "learning_rate": 8.662954844366507e-06, | |
| "loss": 12.9514, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 7.083890247215431, | |
| "grad_norm": 35.9492073059082, | |
| "learning_rate": 8.658570802279702e-06, | |
| "loss": 12.9577, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 7.105623471882641, | |
| "grad_norm": 31.363454818725586, | |
| "learning_rate": 8.6541867601929e-06, | |
| "loss": 12.9849, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 7.1273566965498505, | |
| "grad_norm": 24.993553161621094, | |
| "learning_rate": 8.649802718106095e-06, | |
| "loss": 12.9269, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 7.149089921217061, | |
| "grad_norm": 28.327381134033203, | |
| "learning_rate": 8.64541867601929e-06, | |
| "loss": 12.941, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 7.17082314588427, | |
| "grad_norm": 30.908496856689453, | |
| "learning_rate": 8.641034633932487e-06, | |
| "loss": 13.0525, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 7.192556370551481, | |
| "grad_norm": 41.53740310668945, | |
| "learning_rate": 8.636650591845683e-06, | |
| "loss": 13.0038, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 7.21428959521869, | |
| "grad_norm": 34.16611862182617, | |
| "learning_rate": 8.632266549758878e-06, | |
| "loss": 12.9893, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 7.236022819885901, | |
| "grad_norm": 28.183107376098633, | |
| "learning_rate": 8.627882507672073e-06, | |
| "loss": 13.0103, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 7.25775604455311, | |
| "grad_norm": 28.345674514770508, | |
| "learning_rate": 8.62349846558527e-06, | |
| "loss": 12.9886, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 7.279489269220321, | |
| "grad_norm": 36.2637825012207, | |
| "learning_rate": 8.619114423498466e-06, | |
| "loss": 12.9905, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 7.301222493887531, | |
| "grad_norm": 32.89162826538086, | |
| "learning_rate": 8.614730381411663e-06, | |
| "loss": 12.9033, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 7.3229557185547405, | |
| "grad_norm": 31.151569366455078, | |
| "learning_rate": 8.610346339324858e-06, | |
| "loss": 13.026, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 7.344688943221951, | |
| "grad_norm": 32.4716682434082, | |
| "learning_rate": 8.605962297238054e-06, | |
| "loss": 12.9932, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 7.36642216788916, | |
| "grad_norm": 28.446046829223633, | |
| "learning_rate": 8.60157825515125e-06, | |
| "loss": 13.0201, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 7.388155392556371, | |
| "grad_norm": 27.000221252441406, | |
| "learning_rate": 8.597194213064446e-06, | |
| "loss": 13.0463, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 7.40988861722358, | |
| "grad_norm": 35.49698257446289, | |
| "learning_rate": 8.592810170977642e-06, | |
| "loss": 12.9461, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 7.431621841890791, | |
| "grad_norm": 48.70148849487305, | |
| "learning_rate": 8.588426128890837e-06, | |
| "loss": 13.0921, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 7.453355066558, | |
| "grad_norm": 28.99524688720703, | |
| "learning_rate": 8.584042086804034e-06, | |
| "loss": 12.9729, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 7.475088291225211, | |
| "grad_norm": 28.51788902282715, | |
| "learning_rate": 8.57965804471723e-06, | |
| "loss": 13.0311, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 7.496821515892421, | |
| "grad_norm": 48.5558967590332, | |
| "learning_rate": 8.575274002630427e-06, | |
| "loss": 13.0931, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 7.5185547405596305, | |
| "grad_norm": 35.883365631103516, | |
| "learning_rate": 8.570889960543622e-06, | |
| "loss": 13.0601, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 7.540287965226841, | |
| "grad_norm": 30.609474182128906, | |
| "learning_rate": 8.566505918456817e-06, | |
| "loss": 13.0277, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 7.56202118989405, | |
| "grad_norm": 31.2172794342041, | |
| "learning_rate": 8.562121876370014e-06, | |
| "loss": 12.9501, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 7.583754414561261, | |
| "grad_norm": 42.7708740234375, | |
| "learning_rate": 8.55773783428321e-06, | |
| "loss": 13.0667, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 7.60548763922847, | |
| "grad_norm": 30.39897346496582, | |
| "learning_rate": 8.553353792196407e-06, | |
| "loss": 13.0591, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 7.627220863895681, | |
| "grad_norm": 26.951528549194336, | |
| "learning_rate": 8.548969750109602e-06, | |
| "loss": 12.9949, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 7.64895408856289, | |
| "grad_norm": 33.658206939697266, | |
| "learning_rate": 8.544585708022798e-06, | |
| "loss": 13.0532, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 7.670687313230101, | |
| "grad_norm": 34.114768981933594, | |
| "learning_rate": 8.540201665935993e-06, | |
| "loss": 13.1035, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 7.692420537897311, | |
| "grad_norm": 29.691999435424805, | |
| "learning_rate": 8.53581762384919e-06, | |
| "loss": 13.0645, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 7.7141537625645205, | |
| "grad_norm": 39.269493103027344, | |
| "learning_rate": 8.531433581762385e-06, | |
| "loss": 13.112, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 7.735886987231731, | |
| "grad_norm": 37.816837310791016, | |
| "learning_rate": 8.527049539675581e-06, | |
| "loss": 13.0634, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 7.75762021189894, | |
| "grad_norm": 36.515132904052734, | |
| "learning_rate": 8.522665497588778e-06, | |
| "loss": 13.0395, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 7.779353436566151, | |
| "grad_norm": 22.76226043701172, | |
| "learning_rate": 8.518281455501973e-06, | |
| "loss": 13.0559, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 7.80108666123336, | |
| "grad_norm": 28.64872169494629, | |
| "learning_rate": 8.51389741341517e-06, | |
| "loss": 13.0638, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 7.822819885900571, | |
| "grad_norm": 41.4809684753418, | |
| "learning_rate": 8.509513371328366e-06, | |
| "loss": 13.0299, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 7.84455311056778, | |
| "grad_norm": 25.84028434753418, | |
| "learning_rate": 8.505129329241561e-06, | |
| "loss": 13.0003, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 7.8662863352349905, | |
| "grad_norm": 36.24126434326172, | |
| "learning_rate": 8.500745287154757e-06, | |
| "loss": 13.0231, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 7.888019559902201, | |
| "grad_norm": 19.62076187133789, | |
| "learning_rate": 8.496361245067954e-06, | |
| "loss": 13.006, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 7.9097527845694104, | |
| "grad_norm": 28.422643661499023, | |
| "learning_rate": 8.491977202981149e-06, | |
| "loss": 12.9981, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 7.931486009236621, | |
| "grad_norm": 36.77701187133789, | |
| "learning_rate": 8.487593160894344e-06, | |
| "loss": 13.1429, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 7.95321923390383, | |
| "grad_norm": 36.51480484008789, | |
| "learning_rate": 8.483209118807542e-06, | |
| "loss": 12.9689, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 7.974952458571041, | |
| "grad_norm": 30.303489685058594, | |
| "learning_rate": 8.478825076720737e-06, | |
| "loss": 13.0392, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 7.99668568323825, | |
| "grad_norm": 41.148353576660156, | |
| "learning_rate": 8.474441034633934e-06, | |
| "loss": 13.0697, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 8.018255908720457, | |
| "grad_norm": 30.144062042236328, | |
| "learning_rate": 8.47005699254713e-06, | |
| "loss": 12.7092, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 8.039989133387666, | |
| "grad_norm": 33.70432662963867, | |
| "learning_rate": 8.465672950460325e-06, | |
| "loss": 12.8033, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 8.061722358054876, | |
| "grad_norm": 25.66695785522461, | |
| "learning_rate": 8.46128890837352e-06, | |
| "loss": 12.7704, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 8.083455582722086, | |
| "grad_norm": 38.33973693847656, | |
| "learning_rate": 8.456904866286717e-06, | |
| "loss": 12.8512, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 8.105188807389297, | |
| "grad_norm": 25.794679641723633, | |
| "learning_rate": 8.452520824199914e-06, | |
| "loss": 12.7138, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 8.126922032056507, | |
| "grad_norm": 39.2582893371582, | |
| "learning_rate": 8.44813678211311e-06, | |
| "loss": 12.7657, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 8.148655256723716, | |
| "grad_norm": 30.886682510375977, | |
| "learning_rate": 8.443752740026305e-06, | |
| "loss": 12.7667, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 8.170388481390926, | |
| "grad_norm": 39.30559158325195, | |
| "learning_rate": 8.4393686979395e-06, | |
| "loss": 12.7548, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 8.192121706058137, | |
| "grad_norm": 22.945003509521484, | |
| "learning_rate": 8.434984655852698e-06, | |
| "loss": 12.8788, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 8.213854930725347, | |
| "grad_norm": 30.998369216918945, | |
| "learning_rate": 8.430600613765893e-06, | |
| "loss": 12.8048, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 8.235588155392556, | |
| "grad_norm": 29.44565773010254, | |
| "learning_rate": 8.426216571679088e-06, | |
| "loss": 12.7907, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 8.257321380059766, | |
| "grad_norm": 29.368488311767578, | |
| "learning_rate": 8.421832529592284e-06, | |
| "loss": 12.8157, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 8.279054604726976, | |
| "grad_norm": 28.4185791015625, | |
| "learning_rate": 8.41744848750548e-06, | |
| "loss": 12.8382, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 8.300787829394187, | |
| "grad_norm": 45.91888427734375, | |
| "learning_rate": 8.413064445418678e-06, | |
| "loss": 12.9013, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 8.322521054061397, | |
| "grad_norm": 36.90361022949219, | |
| "learning_rate": 8.408680403331873e-06, | |
| "loss": 12.8076, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 8.344254278728606, | |
| "grad_norm": 54.692935943603516, | |
| "learning_rate": 8.404296361245069e-06, | |
| "loss": 12.8288, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 8.365987503395816, | |
| "grad_norm": 27.947093963623047, | |
| "learning_rate": 8.399912319158264e-06, | |
| "loss": 12.8577, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 8.387720728063027, | |
| "grad_norm": 28.992555618286133, | |
| "learning_rate": 8.395528277071461e-06, | |
| "loss": 12.882, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 8.409453952730237, | |
| "grad_norm": 22.34044647216797, | |
| "learning_rate": 8.391144234984656e-06, | |
| "loss": 12.8171, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 8.431187177397446, | |
| "grad_norm": 50.96314239501953, | |
| "learning_rate": 8.386760192897852e-06, | |
| "loss": 12.8761, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 8.452920402064656, | |
| "grad_norm": Infinity, | |
| "learning_rate": 8.382376150811047e-06, | |
| "loss": 12.8613, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 8.474653626731866, | |
| "grad_norm": 25.97089195251465, | |
| "learning_rate": 8.377992108724244e-06, | |
| "loss": 12.8875, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 8.496386851399077, | |
| "grad_norm": 30.094532012939453, | |
| "learning_rate": 8.373608066637441e-06, | |
| "loss": 12.8784, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 8.518120076066285, | |
| "grad_norm": 37.806156158447266, | |
| "learning_rate": 8.369224024550637e-06, | |
| "loss": 12.8339, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 8.539853300733496, | |
| "grad_norm": 38.92607498168945, | |
| "learning_rate": 8.364839982463832e-06, | |
| "loss": 12.8541, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 8.561586525400706, | |
| "grad_norm": 31.54934310913086, | |
| "learning_rate": 8.360455940377028e-06, | |
| "loss": 12.8991, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 8.583319750067917, | |
| "grad_norm": 37.04362869262695, | |
| "learning_rate": 8.356071898290225e-06, | |
| "loss": 12.9116, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 8.605052974735127, | |
| "grad_norm": 38.93299865722656, | |
| "learning_rate": 8.35168785620342e-06, | |
| "loss": 12.8499, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 8.626786199402336, | |
| "grad_norm": 28.214290618896484, | |
| "learning_rate": 8.347303814116615e-06, | |
| "loss": 12.8512, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 8.648519424069546, | |
| "grad_norm": 27.576839447021484, | |
| "learning_rate": 8.34291977202981e-06, | |
| "loss": 12.8824, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 8.670252648736756, | |
| "grad_norm": 25.321149826049805, | |
| "learning_rate": 8.338535729943008e-06, | |
| "loss": 12.842, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 8.691985873403967, | |
| "grad_norm": 36.43674087524414, | |
| "learning_rate": 8.334151687856205e-06, | |
| "loss": 12.9156, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 8.691985873403967, | |
| "eval_cer": 0.07732709565131522, | |
| "eval_loss": 2.3227267265319824, | |
| "eval_runtime": 401.1503, | |
| "eval_samples_per_second": 13.476, | |
| "eval_steps_per_second": 3.37, | |
| "eval_wer": 0.23097817553776104, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 8.713719098071177, | |
| "grad_norm": 30.650314331054688, | |
| "learning_rate": 8.3297676457694e-06, | |
| "loss": 12.9089, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 8.735452322738386, | |
| "grad_norm": 27.448633193969727, | |
| "learning_rate": 8.325383603682596e-06, | |
| "loss": 12.8572, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 8.757185547405596, | |
| "grad_norm": 25.665332794189453, | |
| "learning_rate": 8.320999561595791e-06, | |
| "loss": 12.8087, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 8.778918772072807, | |
| "grad_norm": 43.74554443359375, | |
| "learning_rate": 8.316615519508988e-06, | |
| "loss": 12.915, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 8.800651996740017, | |
| "grad_norm": 31.74461555480957, | |
| "learning_rate": 8.312231477422184e-06, | |
| "loss": 12.8676, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 8.822385221407226, | |
| "grad_norm": 28.51342010498047, | |
| "learning_rate": 8.30784743533538e-06, | |
| "loss": 12.8645, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 8.844118446074436, | |
| "grad_norm": 27.660497665405273, | |
| "learning_rate": 8.303463393248576e-06, | |
| "loss": 12.9217, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 8.865851670741646, | |
| "grad_norm": 41.046485900878906, | |
| "learning_rate": 8.299079351161771e-06, | |
| "loss": 12.8472, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 8.887584895408857, | |
| "grad_norm": 50.21107482910156, | |
| "learning_rate": 8.294695309074969e-06, | |
| "loss": 12.8141, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 8.909318120076065, | |
| "grad_norm": 42.08512878417969, | |
| "learning_rate": 8.290311266988164e-06, | |
| "loss": 12.9162, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 8.931051344743276, | |
| "grad_norm": 22.199024200439453, | |
| "learning_rate": 8.28592722490136e-06, | |
| "loss": 12.8649, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 8.952784569410486, | |
| "grad_norm": 38.15290451049805, | |
| "learning_rate": 8.281543182814555e-06, | |
| "loss": 12.8547, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 8.974517794077697, | |
| "grad_norm": 35.076698303222656, | |
| "learning_rate": 8.277159140727752e-06, | |
| "loss": 12.8954, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 8.996251018744907, | |
| "grad_norm": 26.742168426513672, | |
| "learning_rate": 8.272775098640947e-06, | |
| "loss": 12.8845, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 9.017821244227113, | |
| "grad_norm": 18.43798828125, | |
| "learning_rate": 8.268391056554144e-06, | |
| "loss": 12.6111, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 9.039554468894321, | |
| "grad_norm": 22.483016967773438, | |
| "learning_rate": 8.26400701446734e-06, | |
| "loss": 12.6938, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 9.061287693561532, | |
| "grad_norm": 22.414525985717773, | |
| "learning_rate": 8.259622972380535e-06, | |
| "loss": 12.6499, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 9.083020918228742, | |
| "grad_norm": 33.88186264038086, | |
| "learning_rate": 8.255238930293732e-06, | |
| "loss": 12.5987, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 9.104754142895953, | |
| "grad_norm": 34.6947021484375, | |
| "learning_rate": 8.250854888206928e-06, | |
| "loss": 12.6804, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 9.126487367563163, | |
| "grad_norm": 22.22621726989746, | |
| "learning_rate": 8.246470846120123e-06, | |
| "loss": 12.7388, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 9.148220592230372, | |
| "grad_norm": 30.4085693359375, | |
| "learning_rate": 8.242086804033318e-06, | |
| "loss": 12.7085, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 9.169953816897582, | |
| "grad_norm": 131.27008056640625, | |
| "learning_rate": 8.237702761946515e-06, | |
| "loss": 12.7142, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 9.191687041564792, | |
| "grad_norm": 28.05132293701172, | |
| "learning_rate": 8.23331871985971e-06, | |
| "loss": 12.698, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 9.213420266232003, | |
| "grad_norm": 157.52548217773438, | |
| "learning_rate": 8.228934677772908e-06, | |
| "loss": 12.7275, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 9.235153490899211, | |
| "grad_norm": 29.362707138061523, | |
| "learning_rate": 8.224550635686103e-06, | |
| "loss": 12.648, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 9.256886715566422, | |
| "grad_norm": 27.221683502197266, | |
| "learning_rate": 8.220166593599299e-06, | |
| "loss": 12.7306, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 9.278619940233632, | |
| "grad_norm": 18.6680850982666, | |
| "learning_rate": 8.215782551512496e-06, | |
| "loss": 12.6896, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 9.300353164900843, | |
| "grad_norm": 35.81766128540039, | |
| "learning_rate": 8.211398509425691e-06, | |
| "loss": 12.6838, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 9.322086389568053, | |
| "grad_norm": 24.64043426513672, | |
| "learning_rate": 8.207014467338888e-06, | |
| "loss": 12.7201, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 9.343819614235262, | |
| "grad_norm": 41.39848327636719, | |
| "learning_rate": 8.202630425252084e-06, | |
| "loss": 12.7289, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 9.365552838902472, | |
| "grad_norm": 23.982431411743164, | |
| "learning_rate": 8.198246383165279e-06, | |
| "loss": 12.7145, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 9.387286063569682, | |
| "grad_norm": 25.513904571533203, | |
| "learning_rate": 8.193862341078474e-06, | |
| "loss": 12.6646, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 9.409019288236893, | |
| "grad_norm": 28.16943359375, | |
| "learning_rate": 8.189478298991671e-06, | |
| "loss": 12.7157, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 9.430752512904101, | |
| "grad_norm": 31.33350944519043, | |
| "learning_rate": 8.185094256904867e-06, | |
| "loss": 12.7245, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 9.452485737571312, | |
| "grad_norm": 22.30205726623535, | |
| "learning_rate": 8.180710214818062e-06, | |
| "loss": 12.7082, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 9.474218962238522, | |
| "grad_norm": 31.175230026245117, | |
| "learning_rate": 8.17632617273126e-06, | |
| "loss": 12.7716, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 9.495952186905733, | |
| "grad_norm": 24.61014747619629, | |
| "learning_rate": 8.171942130644455e-06, | |
| "loss": 12.7153, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 9.517685411572941, | |
| "grad_norm": 37.26193618774414, | |
| "learning_rate": 8.167558088557652e-06, | |
| "loss": 12.7623, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 9.539418636240152, | |
| "grad_norm": 29.6248779296875, | |
| "learning_rate": 8.163174046470847e-06, | |
| "loss": 12.7862, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 9.561151860907362, | |
| "grad_norm": 37.52980422973633, | |
| "learning_rate": 8.158790004384042e-06, | |
| "loss": 12.6912, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 9.582885085574572, | |
| "grad_norm": 35.345035552978516, | |
| "learning_rate": 8.154405962297238e-06, | |
| "loss": 12.677, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 9.604618310241783, | |
| "grad_norm": 32.45883560180664, | |
| "learning_rate": 8.150021920210435e-06, | |
| "loss": 12.6662, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 9.626351534908991, | |
| "grad_norm": 46.35236358642578, | |
| "learning_rate": 8.14563787812363e-06, | |
| "loss": 12.7472, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 9.648084759576202, | |
| "grad_norm": 26.202049255371094, | |
| "learning_rate": 8.141253836036826e-06, | |
| "loss": 12.7174, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 9.669817984243412, | |
| "grad_norm": 27.350576400756836, | |
| "learning_rate": 8.136869793950023e-06, | |
| "loss": 12.6917, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 9.691551208910623, | |
| "grad_norm": 32.96540451049805, | |
| "learning_rate": 8.132485751863218e-06, | |
| "loss": 12.7865, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 9.713284433577833, | |
| "grad_norm": 33.34325408935547, | |
| "learning_rate": 8.128101709776415e-06, | |
| "loss": 12.8177, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 9.735017658245042, | |
| "grad_norm": 24.0529727935791, | |
| "learning_rate": 8.12371766768961e-06, | |
| "loss": 12.7816, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 9.756750882912252, | |
| "grad_norm": 31.504335403442383, | |
| "learning_rate": 8.119333625602806e-06, | |
| "loss": 12.7014, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 9.778484107579462, | |
| "grad_norm": 37.35165023803711, | |
| "learning_rate": 8.114949583516001e-06, | |
| "loss": 12.6674, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 9.800217332246673, | |
| "grad_norm": 22.923002243041992, | |
| "learning_rate": 8.110565541429199e-06, | |
| "loss": 12.7619, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 9.821950556913881, | |
| "grad_norm": 29.871366500854492, | |
| "learning_rate": 8.106181499342396e-06, | |
| "loss": 12.7368, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 9.843683781581092, | |
| "grad_norm": 40.105369567871094, | |
| "learning_rate": 8.101797457255591e-06, | |
| "loss": 12.6962, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 9.865417006248302, | |
| "grad_norm": 25.92096710205078, | |
| "learning_rate": 8.097413415168786e-06, | |
| "loss": 12.686, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 9.887150230915513, | |
| "grad_norm": 42.663368225097656, | |
| "learning_rate": 8.093029373081982e-06, | |
| "loss": 12.7663, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 9.908883455582721, | |
| "grad_norm": 30.958925247192383, | |
| "learning_rate": 8.088645330995179e-06, | |
| "loss": 12.7574, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 9.930616680249932, | |
| "grad_norm": 32.973209381103516, | |
| "learning_rate": 8.084261288908374e-06, | |
| "loss": 12.7376, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 9.952349904917142, | |
| "grad_norm": 24.848648071289062, | |
| "learning_rate": 8.07987724682157e-06, | |
| "loss": 12.7988, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 9.974083129584352, | |
| "grad_norm": 38.90625762939453, | |
| "learning_rate": 8.075493204734765e-06, | |
| "loss": 12.7848, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 9.995816354251563, | |
| "grad_norm": 169.55076599121094, | |
| "learning_rate": 8.071109162647962e-06, | |
| "loss": 12.7591, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 10.017386579733769, | |
| "grad_norm": 25.580976486206055, | |
| "learning_rate": 8.06672512056116e-06, | |
| "loss": 12.4225, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 10.039119804400977, | |
| "grad_norm": 35.71001434326172, | |
| "learning_rate": 8.062341078474355e-06, | |
| "loss": 12.6339, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 10.060853029068188, | |
| "grad_norm": 27.853500366210938, | |
| "learning_rate": 8.05795703638755e-06, | |
| "loss": 12.5467, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 10.082586253735398, | |
| "grad_norm": 25.689022064208984, | |
| "learning_rate": 8.053572994300745e-06, | |
| "loss": 12.6073, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 10.104319478402608, | |
| "grad_norm": 19.449281692504883, | |
| "learning_rate": 8.049188952213942e-06, | |
| "loss": 12.65, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 10.126052703069817, | |
| "grad_norm": 50.91756820678711, | |
| "learning_rate": 8.044804910127138e-06, | |
| "loss": 12.6245, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 10.147785927737027, | |
| "grad_norm": 30.20039939880371, | |
| "learning_rate": 8.040420868040333e-06, | |
| "loss": 12.5309, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 10.169519152404238, | |
| "grad_norm": 19.78704071044922, | |
| "learning_rate": 8.036036825953529e-06, | |
| "loss": 12.5593, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 10.191252377071448, | |
| "grad_norm": 19.870885848999023, | |
| "learning_rate": 8.031652783866726e-06, | |
| "loss": 12.5285, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 10.212985601738659, | |
| "grad_norm": 28.326723098754883, | |
| "learning_rate": 8.027268741779923e-06, | |
| "loss": 12.5193, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 10.234718826405867, | |
| "grad_norm": 27.501436233520508, | |
| "learning_rate": 8.022884699693118e-06, | |
| "loss": 12.5663, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 10.256452051073078, | |
| "grad_norm": 28.51038932800293, | |
| "learning_rate": 8.018500657606314e-06, | |
| "loss": 12.6105, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 10.278185275740288, | |
| "grad_norm": 38.11888885498047, | |
| "learning_rate": 8.014116615519509e-06, | |
| "loss": 12.6369, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 10.299918500407498, | |
| "grad_norm": 56.63121032714844, | |
| "learning_rate": 8.009732573432706e-06, | |
| "loss": 12.5986, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 10.321651725074709, | |
| "grad_norm": 30.95232582092285, | |
| "learning_rate": 8.005348531345901e-06, | |
| "loss": 12.6104, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 10.343384949741917, | |
| "grad_norm": 46.855831146240234, | |
| "learning_rate": 8.000964489259098e-06, | |
| "loss": 12.6277, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 10.365118174409128, | |
| "grad_norm": 38.9176139831543, | |
| "learning_rate": 7.996580447172292e-06, | |
| "loss": 12.5793, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 10.386851399076338, | |
| "grad_norm": 20.209339141845703, | |
| "learning_rate": 7.99219640508549e-06, | |
| "loss": 12.5781, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 10.408584623743549, | |
| "grad_norm": 34.40525817871094, | |
| "learning_rate": 7.987812362998686e-06, | |
| "loss": 12.6018, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 10.430317848410757, | |
| "grad_norm": 44.757041931152344, | |
| "learning_rate": 7.983428320911882e-06, | |
| "loss": 12.6543, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 10.452051073077968, | |
| "grad_norm": 40.83699035644531, | |
| "learning_rate": 7.979044278825077e-06, | |
| "loss": 12.6135, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 10.473784297745178, | |
| "grad_norm": 31.089038848876953, | |
| "learning_rate": 7.974660236738272e-06, | |
| "loss": 12.6269, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 10.495517522412388, | |
| "grad_norm": 33.82300567626953, | |
| "learning_rate": 7.97027619465147e-06, | |
| "loss": 12.622, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 10.517250747079597, | |
| "grad_norm": 25.88127899169922, | |
| "learning_rate": 7.965892152564665e-06, | |
| "loss": 12.6332, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 10.538983971746807, | |
| "grad_norm": 29.95918083190918, | |
| "learning_rate": 7.961508110477862e-06, | |
| "loss": 12.6166, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 10.560717196414018, | |
| "grad_norm": 34.399444580078125, | |
| "learning_rate": 7.957124068391057e-06, | |
| "loss": 12.5997, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 10.582450421081228, | |
| "grad_norm": 26.007383346557617, | |
| "learning_rate": 7.952740026304253e-06, | |
| "loss": 12.5829, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 10.604183645748439, | |
| "grad_norm": 14.864594459533691, | |
| "learning_rate": 7.94835598421745e-06, | |
| "loss": 12.6532, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 10.625916870415647, | |
| "grad_norm": 31.178630828857422, | |
| "learning_rate": 7.943971942130645e-06, | |
| "loss": 12.5909, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 10.647650095082858, | |
| "grad_norm": 31.065549850463867, | |
| "learning_rate": 7.93958790004384e-06, | |
| "loss": 12.5674, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 10.669383319750068, | |
| "grad_norm": 28.21125030517578, | |
| "learning_rate": 7.935203857957036e-06, | |
| "loss": 12.6476, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 10.691116544417278, | |
| "grad_norm": 31.474586486816406, | |
| "learning_rate": 7.930819815870233e-06, | |
| "loss": 12.5938, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 10.712849769084489, | |
| "grad_norm": 26.097501754760742, | |
| "learning_rate": 7.926435773783428e-06, | |
| "loss": 12.6483, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 10.734582993751697, | |
| "grad_norm": 40.45956039428711, | |
| "learning_rate": 7.922051731696626e-06, | |
| "loss": 12.6591, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 10.756316218418908, | |
| "grad_norm": 23.737592697143555, | |
| "learning_rate": 7.917667689609821e-06, | |
| "loss": 12.5698, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 10.778049443086118, | |
| "grad_norm": 32.13654708862305, | |
| "learning_rate": 7.913283647523016e-06, | |
| "loss": 12.5617, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 10.799782667753329, | |
| "grad_norm": 28.451892852783203, | |
| "learning_rate": 7.908899605436213e-06, | |
| "loss": 12.6304, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 10.821515892420537, | |
| "grad_norm": 37.13362121582031, | |
| "learning_rate": 7.904515563349409e-06, | |
| "loss": 12.6649, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 10.843249117087748, | |
| "grad_norm": 45.161277770996094, | |
| "learning_rate": 7.900131521262606e-06, | |
| "loss": 12.6335, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 10.864982341754958, | |
| "grad_norm": 25.36030387878418, | |
| "learning_rate": 7.8957474791758e-06, | |
| "loss": 12.7371, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 10.886715566422168, | |
| "grad_norm": 38.44227981567383, | |
| "learning_rate": 7.891363437088997e-06, | |
| "loss": 12.6187, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 10.908448791089377, | |
| "grad_norm": 46.692874908447266, | |
| "learning_rate": 7.886979395002192e-06, | |
| "loss": 12.6517, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 10.930182015756587, | |
| "grad_norm": 28.845399856567383, | |
| "learning_rate": 7.882595352915389e-06, | |
| "loss": 12.5677, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 10.951915240423798, | |
| "grad_norm": 31.64191436767578, | |
| "learning_rate": 7.878211310828585e-06, | |
| "loss": 12.6347, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 10.973648465091008, | |
| "grad_norm": 32.57988357543945, | |
| "learning_rate": 7.87382726874178e-06, | |
| "loss": 12.5652, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 10.995381689758219, | |
| "grad_norm": 28.151342391967773, | |
| "learning_rate": 7.869443226654977e-06, | |
| "loss": 12.5981, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 11.016951915240424, | |
| "grad_norm": 29.2868595123291, | |
| "learning_rate": 7.865059184568172e-06, | |
| "loss": 12.4366, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 11.038685139907633, | |
| "grad_norm": 31.722579956054688, | |
| "learning_rate": 7.86067514248137e-06, | |
| "loss": 12.4879, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 11.060418364574844, | |
| "grad_norm": 29.232097625732422, | |
| "learning_rate": 7.856291100394565e-06, | |
| "loss": 12.4597, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 11.082151589242054, | |
| "grad_norm": 18.49676513671875, | |
| "learning_rate": 7.85190705830776e-06, | |
| "loss": 12.4631, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 11.103884813909264, | |
| "grad_norm": 27.89682388305664, | |
| "learning_rate": 7.847523016220956e-06, | |
| "loss": 12.4507, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 11.125618038576473, | |
| "grad_norm": 30.45709800720215, | |
| "learning_rate": 7.843138974134153e-06, | |
| "loss": 12.5008, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 11.147351263243683, | |
| "grad_norm": 62.570823669433594, | |
| "learning_rate": 7.838754932047348e-06, | |
| "loss": 12.5107, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 11.169084487910894, | |
| "grad_norm": 24.397315979003906, | |
| "learning_rate": 7.834370889960543e-06, | |
| "loss": 12.5059, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 11.190817712578104, | |
| "grad_norm": 18.074167251586914, | |
| "learning_rate": 7.82998684787374e-06, | |
| "loss": 12.5071, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 11.212550937245314, | |
| "grad_norm": 20.450908660888672, | |
| "learning_rate": 7.825602805786936e-06, | |
| "loss": 12.5048, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 11.234284161912523, | |
| "grad_norm": 19.00213623046875, | |
| "learning_rate": 7.821218763700133e-06, | |
| "loss": 12.4887, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 11.256017386579733, | |
| "grad_norm": 23.276472091674805, | |
| "learning_rate": 7.816834721613328e-06, | |
| "loss": 12.5311, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 11.277750611246944, | |
| "grad_norm": 33.67416763305664, | |
| "learning_rate": 7.812450679526524e-06, | |
| "loss": 12.5503, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 11.299483835914154, | |
| "grad_norm": 17.561626434326172, | |
| "learning_rate": 7.80806663743972e-06, | |
| "loss": 12.4831, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 11.321217060581363, | |
| "grad_norm": 24.35294532775879, | |
| "learning_rate": 7.803682595352916e-06, | |
| "loss": 12.4869, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 11.342950285248573, | |
| "grad_norm": 16.80247688293457, | |
| "learning_rate": 7.799298553266113e-06, | |
| "loss": 12.5581, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 11.364683509915784, | |
| "grad_norm": 22.540014266967773, | |
| "learning_rate": 7.794914511179307e-06, | |
| "loss": 12.5552, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 11.386416734582994, | |
| "grad_norm": 23.270639419555664, | |
| "learning_rate": 7.790530469092504e-06, | |
| "loss": 12.5005, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 11.408149959250204, | |
| "grad_norm": 27.789560317993164, | |
| "learning_rate": 7.7861464270057e-06, | |
| "loss": 12.5405, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 11.429883183917413, | |
| "grad_norm": 24.1334285736084, | |
| "learning_rate": 7.781762384918897e-06, | |
| "loss": 12.5056, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 11.451616408584623, | |
| "grad_norm": 35.342288970947266, | |
| "learning_rate": 7.777378342832092e-06, | |
| "loss": 12.501, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 11.473349633251834, | |
| "grad_norm": 27.646997451782227, | |
| "learning_rate": 7.772994300745287e-06, | |
| "loss": 12.4571, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 11.495082857919044, | |
| "grad_norm": 43.06098937988281, | |
| "learning_rate": 7.768610258658483e-06, | |
| "loss": 12.5856, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 11.516816082586253, | |
| "grad_norm": 21.487150192260742, | |
| "learning_rate": 7.76422621657168e-06, | |
| "loss": 12.4849, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 11.538549307253463, | |
| "grad_norm": 21.75229835510254, | |
| "learning_rate": 7.759842174484877e-06, | |
| "loss": 12.5192, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 11.560282531920674, | |
| "grad_norm": 23.02396011352539, | |
| "learning_rate": 7.755458132398072e-06, | |
| "loss": 12.5011, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 11.582015756587884, | |
| "grad_norm": 21.738445281982422, | |
| "learning_rate": 7.751074090311268e-06, | |
| "loss": 12.5525, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 11.603748981255094, | |
| "grad_norm": 38.93478775024414, | |
| "learning_rate": 7.746690048224463e-06, | |
| "loss": 12.4925, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 11.625482205922303, | |
| "grad_norm": 30.070697784423828, | |
| "learning_rate": 7.74230600613766e-06, | |
| "loss": 12.5598, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 11.647215430589513, | |
| "grad_norm": 44.55253982543945, | |
| "learning_rate": 7.737921964050856e-06, | |
| "loss": 12.4896, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 11.668948655256724, | |
| "grad_norm": 23.052288055419922, | |
| "learning_rate": 7.733537921964051e-06, | |
| "loss": 12.5198, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 11.690681879923934, | |
| "grad_norm": 24.383729934692383, | |
| "learning_rate": 7.729153879877246e-06, | |
| "loss": 12.5321, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 11.712415104591145, | |
| "grad_norm": 23.777788162231445, | |
| "learning_rate": 7.724769837790443e-06, | |
| "loss": 12.5403, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 11.734148329258353, | |
| "grad_norm": 22.8085994720459, | |
| "learning_rate": 7.72038579570364e-06, | |
| "loss": 12.5297, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 11.755881553925564, | |
| "grad_norm": 28.690683364868164, | |
| "learning_rate": 7.716001753616836e-06, | |
| "loss": 12.5626, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 11.777614778592774, | |
| "grad_norm": 23.2988338470459, | |
| "learning_rate": 7.711617711530031e-06, | |
| "loss": 12.4646, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 11.799348003259984, | |
| "grad_norm": 24.85117530822754, | |
| "learning_rate": 7.707233669443227e-06, | |
| "loss": 12.4886, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 11.821081227927193, | |
| "grad_norm": 34.84917449951172, | |
| "learning_rate": 7.702849627356424e-06, | |
| "loss": 12.578, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 11.842814452594403, | |
| "grad_norm": 27.57342529296875, | |
| "learning_rate": 7.698465585269619e-06, | |
| "loss": 12.5423, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 11.864547677261614, | |
| "grad_norm": 21.665023803710938, | |
| "learning_rate": 7.694081543182815e-06, | |
| "loss": 12.4848, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 11.886280901928824, | |
| "grad_norm": 20.787555694580078, | |
| "learning_rate": 7.68969750109601e-06, | |
| "loss": 12.4976, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 11.908014126596033, | |
| "grad_norm": 42.406837463378906, | |
| "learning_rate": 7.685313459009207e-06, | |
| "loss": 12.5549, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 11.929747351263243, | |
| "grad_norm": 23.60106658935547, | |
| "learning_rate": 7.680929416922404e-06, | |
| "loss": 12.5492, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 11.951480575930454, | |
| "grad_norm": 21.591079711914062, | |
| "learning_rate": 7.6765453748356e-06, | |
| "loss": 12.5018, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 11.973213800597664, | |
| "grad_norm": 32.685333251953125, | |
| "learning_rate": 7.672161332748795e-06, | |
| "loss": 12.5378, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 11.994947025264874, | |
| "grad_norm": 26.88076400756836, | |
| "learning_rate": 7.66777729066199e-06, | |
| "loss": 12.5529, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 12.01651725074708, | |
| "grad_norm": 19.660898208618164, | |
| "learning_rate": 7.663393248575187e-06, | |
| "loss": 12.3944, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 12.038250475414289, | |
| "grad_norm": 36.72605514526367, | |
| "learning_rate": 7.659009206488383e-06, | |
| "loss": 12.359, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 12.0599837000815, | |
| "grad_norm": 27.864477157592773, | |
| "learning_rate": 7.65462516440158e-06, | |
| "loss": 12.3951, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 12.08171692474871, | |
| "grad_norm": 34.72395324707031, | |
| "learning_rate": 7.650241122314775e-06, | |
| "loss": 12.4259, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 12.10345014941592, | |
| "grad_norm": 20.68131446838379, | |
| "learning_rate": 7.64585708022797e-06, | |
| "loss": 12.4737, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 12.125183374083129, | |
| "grad_norm": 27.369903564453125, | |
| "learning_rate": 7.641473038141168e-06, | |
| "loss": 12.4838, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 12.14691659875034, | |
| "grad_norm": 14.568199157714844, | |
| "learning_rate": 7.637088996054363e-06, | |
| "loss": 12.3812, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 12.16864982341755, | |
| "grad_norm": 20.099998474121094, | |
| "learning_rate": 7.632704953967558e-06, | |
| "loss": 12.4168, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 12.19038304808476, | |
| "grad_norm": 21.41561508178711, | |
| "learning_rate": 7.628320911880755e-06, | |
| "loss": 12.3799, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 12.21211627275197, | |
| "grad_norm": 23.49574851989746, | |
| "learning_rate": 7.623936869793951e-06, | |
| "loss": 12.4527, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 12.233849497419179, | |
| "grad_norm": 30.164730072021484, | |
| "learning_rate": 7.619552827707146e-06, | |
| "loss": 12.4353, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 12.25558272208639, | |
| "grad_norm": 32.27763748168945, | |
| "learning_rate": 7.6151687856203425e-06, | |
| "loss": 12.4808, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 12.2773159467536, | |
| "grad_norm": 36.46564483642578, | |
| "learning_rate": 7.610784743533538e-06, | |
| "loss": 12.3987, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 12.29904917142081, | |
| "grad_norm": 19.888980865478516, | |
| "learning_rate": 7.606400701446734e-06, | |
| "loss": 12.3889, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 12.320782396088019, | |
| "grad_norm": 20.877737045288086, | |
| "learning_rate": 7.602016659359931e-06, | |
| "loss": 12.4598, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 12.34251562075523, | |
| "grad_norm": 20.208404541015625, | |
| "learning_rate": 7.5976326172731266e-06, | |
| "loss": 12.3682, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 12.36424884542244, | |
| "grad_norm": 48.63652801513672, | |
| "learning_rate": 7.593248575186323e-06, | |
| "loss": 12.3874, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 12.38598207008965, | |
| "grad_norm": 23.263282775878906, | |
| "learning_rate": 7.588864533099518e-06, | |
| "loss": 12.4161, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 12.40771529475686, | |
| "grad_norm": 23.76000213623047, | |
| "learning_rate": 7.5844804910127144e-06, | |
| "loss": 12.4247, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 12.429448519424069, | |
| "grad_norm": 62.45661544799805, | |
| "learning_rate": 7.58009644892591e-06, | |
| "loss": 12.4226, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 12.45118174409128, | |
| "grad_norm": 33.05659484863281, | |
| "learning_rate": 7.575712406839106e-06, | |
| "loss": 12.493, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 12.47291496875849, | |
| "grad_norm": 23.853660583496094, | |
| "learning_rate": 7.5713283647523014e-06, | |
| "loss": 12.4388, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 12.4946481934257, | |
| "grad_norm": 30.970672607421875, | |
| "learning_rate": 7.5669443226654985e-06, | |
| "loss": 12.4721, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 12.516381418092909, | |
| "grad_norm": 20.660356521606445, | |
| "learning_rate": 7.562560280578695e-06, | |
| "loss": 12.4463, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 12.538114642760119, | |
| "grad_norm": 27.25446319580078, | |
| "learning_rate": 7.55817623849189e-06, | |
| "loss": 12.4359, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 12.55984786742733, | |
| "grad_norm": 19.96375274658203, | |
| "learning_rate": 7.553792196405086e-06, | |
| "loss": 12.4274, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 12.58158109209454, | |
| "grad_norm": 25.133895874023438, | |
| "learning_rate": 7.549408154318282e-06, | |
| "loss": 12.4288, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 12.60331431676175, | |
| "grad_norm": 55.64627456665039, | |
| "learning_rate": 7.545024112231478e-06, | |
| "loss": 12.4451, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 12.625047541428959, | |
| "grad_norm": 70.62721252441406, | |
| "learning_rate": 7.540640070144673e-06, | |
| "loss": 12.451, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 12.64678076609617, | |
| "grad_norm": 22.789186477661133, | |
| "learning_rate": 7.5362560280578705e-06, | |
| "loss": 12.43, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 12.66851399076338, | |
| "grad_norm": 25.138248443603516, | |
| "learning_rate": 7.531871985971065e-06, | |
| "loss": 12.4, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 12.69024721543059, | |
| "grad_norm": 18.74398422241211, | |
| "learning_rate": 7.527487943884262e-06, | |
| "loss": 12.4338, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 12.711980440097799, | |
| "grad_norm": 28.796159744262695, | |
| "learning_rate": 7.523103901797458e-06, | |
| "loss": 12.473, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 12.733713664765009, | |
| "grad_norm": 28.044872283935547, | |
| "learning_rate": 7.518719859710654e-06, | |
| "loss": 12.4155, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 12.75544688943222, | |
| "grad_norm": 21.100650787353516, | |
| "learning_rate": 7.51433581762385e-06, | |
| "loss": 12.4329, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 12.77718011409943, | |
| "grad_norm": 24.12652015686035, | |
| "learning_rate": 7.509951775537045e-06, | |
| "loss": 12.4504, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 12.79891333876664, | |
| "grad_norm": 18.889480590820312, | |
| "learning_rate": 7.5055677334502416e-06, | |
| "loss": 12.3981, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 12.820646563433849, | |
| "grad_norm": 20.395387649536133, | |
| "learning_rate": 7.501183691363437e-06, | |
| "loss": 12.4834, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 12.84237978810106, | |
| "grad_norm": 34.01985168457031, | |
| "learning_rate": 7.496799649276634e-06, | |
| "loss": 12.4485, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 12.86411301276827, | |
| "grad_norm": 36.57313537597656, | |
| "learning_rate": 7.49241560718983e-06, | |
| "loss": 12.5063, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 12.88584623743548, | |
| "grad_norm": 21.946285247802734, | |
| "learning_rate": 7.488031565103026e-06, | |
| "loss": 12.501, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 12.907579462102689, | |
| "grad_norm": 26.948814392089844, | |
| "learning_rate": 7.483647523016222e-06, | |
| "loss": 12.5122, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 12.929312686769899, | |
| "grad_norm": 31.4482364654541, | |
| "learning_rate": 7.479263480929417e-06, | |
| "loss": 12.4543, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 12.95104591143711, | |
| "grad_norm": 35.19594192504883, | |
| "learning_rate": 7.4748794388426135e-06, | |
| "loss": 12.4657, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 12.97277913610432, | |
| "grad_norm": 23.498001098632812, | |
| "learning_rate": 7.470495396755809e-06, | |
| "loss": 12.4462, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 12.99451236077153, | |
| "grad_norm": 48.50201416015625, | |
| "learning_rate": 7.466111354669006e-06, | |
| "loss": 12.4134, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 13.016082586253736, | |
| "grad_norm": 25.189435958862305, | |
| "learning_rate": 7.461727312582201e-06, | |
| "loss": 12.3134, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 13.037815810920945, | |
| "grad_norm": 21.985063552856445, | |
| "learning_rate": 7.457343270495398e-06, | |
| "loss": 12.3299, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 13.037815810920945, | |
| "eval_cer": 0.0770617061459272, | |
| "eval_loss": 2.334705352783203, | |
| "eval_runtime": 399.4375, | |
| "eval_samples_per_second": 13.534, | |
| "eval_steps_per_second": 3.385, | |
| "eval_wer": 0.23019312293923694, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 13.059549035588155, | |
| "grad_norm": 15.290221214294434, | |
| "learning_rate": 7.452959228408594e-06, | |
| "loss": 12.3108, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 13.081282260255366, | |
| "grad_norm": 26.75568389892578, | |
| "learning_rate": 7.448575186321789e-06, | |
| "loss": 12.3347, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 13.103015484922576, | |
| "grad_norm": 28.02945327758789, | |
| "learning_rate": 7.4441911442349854e-06, | |
| "loss": 12.3172, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 13.124748709589785, | |
| "grad_norm": 17.39537811279297, | |
| "learning_rate": 7.439807102148181e-06, | |
| "loss": 12.3004, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 13.146481934256995, | |
| "grad_norm": 21.168519973754883, | |
| "learning_rate": 7.435423060061377e-06, | |
| "loss": 12.3882, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 13.168215158924205, | |
| "grad_norm": 24.02804946899414, | |
| "learning_rate": 7.4310390179745725e-06, | |
| "loss": 12.3512, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 13.189948383591416, | |
| "grad_norm": 25.33257484436035, | |
| "learning_rate": 7.4266549758877695e-06, | |
| "loss": 12.3121, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 13.211681608258626, | |
| "grad_norm": 20.40574073791504, | |
| "learning_rate": 7.422270933800965e-06, | |
| "loss": 12.3737, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 13.233414832925835, | |
| "grad_norm": 25.527008056640625, | |
| "learning_rate": 7.417886891714161e-06, | |
| "loss": 12.3575, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 13.255148057593045, | |
| "grad_norm": 23.7490291595459, | |
| "learning_rate": 7.413502849627357e-06, | |
| "loss": 12.3835, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 13.276881282260256, | |
| "grad_norm": 23.39885139465332, | |
| "learning_rate": 7.409118807540553e-06, | |
| "loss": 12.3056, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 13.298614506927466, | |
| "grad_norm": 21.89725112915039, | |
| "learning_rate": 7.404734765453749e-06, | |
| "loss": 12.3262, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 13.320347731594675, | |
| "grad_norm": 20.838117599487305, | |
| "learning_rate": 7.400350723366944e-06, | |
| "loss": 12.3879, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 13.342080956261885, | |
| "grad_norm": 17.388107299804688, | |
| "learning_rate": 7.3959666812801415e-06, | |
| "loss": 12.3611, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 13.363814180929095, | |
| "grad_norm": 19.158178329467773, | |
| "learning_rate": 7.391582639193337e-06, | |
| "loss": 12.3782, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 13.385547405596306, | |
| "grad_norm": 28.794353485107422, | |
| "learning_rate": 7.387198597106533e-06, | |
| "loss": 12.4156, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 13.407280630263516, | |
| "grad_norm": 24.086498260498047, | |
| "learning_rate": 7.3828145550197285e-06, | |
| "loss": 12.3903, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 13.429013854930725, | |
| "grad_norm": 24.688875198364258, | |
| "learning_rate": 7.378430512932925e-06, | |
| "loss": 12.3829, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 13.450747079597935, | |
| "grad_norm": 54.69606018066406, | |
| "learning_rate": 7.374046470846121e-06, | |
| "loss": 12.3398, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 13.472480304265146, | |
| "grad_norm": 25.10434341430664, | |
| "learning_rate": 7.369662428759316e-06, | |
| "loss": 12.3753, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 13.494213528932356, | |
| "grad_norm": 35.44208908081055, | |
| "learning_rate": 7.3652783866725134e-06, | |
| "loss": 12.3937, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 13.515946753599565, | |
| "grad_norm": 19.743236541748047, | |
| "learning_rate": 7.360894344585709e-06, | |
| "loss": 12.439, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 13.537679978266775, | |
| "grad_norm": 29.914348602294922, | |
| "learning_rate": 7.356510302498905e-06, | |
| "loss": 12.3594, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 13.559413202933985, | |
| "grad_norm": 105.84856414794922, | |
| "learning_rate": 7.3521262604121004e-06, | |
| "loss": 12.352, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 13.581146427601196, | |
| "grad_norm": 23.331436157226562, | |
| "learning_rate": 7.347742218325297e-06, | |
| "loss": 12.355, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 13.602879652268406, | |
| "grad_norm": 18.46331214904785, | |
| "learning_rate": 7.343358176238492e-06, | |
| "loss": 12.3481, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 13.624612876935615, | |
| "grad_norm": 22.384254455566406, | |
| "learning_rate": 7.338974134151688e-06, | |
| "loss": 12.4047, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 13.646346101602825, | |
| "grad_norm": 34.16387176513672, | |
| "learning_rate": 7.3345900920648845e-06, | |
| "loss": 12.4131, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 13.668079326270036, | |
| "grad_norm": 59.95965576171875, | |
| "learning_rate": 7.33020604997808e-06, | |
| "loss": 12.3539, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 13.689812550937246, | |
| "grad_norm": 21.647342681884766, | |
| "learning_rate": 7.325822007891277e-06, | |
| "loss": 12.3936, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 13.711545775604455, | |
| "grad_norm": 20.892303466796875, | |
| "learning_rate": 7.321437965804472e-06, | |
| "loss": 12.4042, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 13.733279000271665, | |
| "grad_norm": 25.085771560668945, | |
| "learning_rate": 7.317053923717669e-06, | |
| "loss": 12.4563, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 13.755012224938875, | |
| "grad_norm": 29.819766998291016, | |
| "learning_rate": 7.312669881630864e-06, | |
| "loss": 12.3417, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 13.776745449606086, | |
| "grad_norm": 23.446327209472656, | |
| "learning_rate": 7.30828583954406e-06, | |
| "loss": 12.4085, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 13.798478674273296, | |
| "grad_norm": 30.441680908203125, | |
| "learning_rate": 7.303901797457256e-06, | |
| "loss": 12.3269, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 13.820211898940505, | |
| "grad_norm": 46.045162200927734, | |
| "learning_rate": 7.299517755370452e-06, | |
| "loss": 12.3459, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 13.841945123607715, | |
| "grad_norm": 20.486669540405273, | |
| "learning_rate": 7.295133713283649e-06, | |
| "loss": 12.4457, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 13.863678348274926, | |
| "grad_norm": 18.060197830200195, | |
| "learning_rate": 7.290749671196844e-06, | |
| "loss": 12.4123, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 13.885411572942136, | |
| "grad_norm": 29.656959533691406, | |
| "learning_rate": 7.2863656291100406e-06, | |
| "loss": 12.3888, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 13.907144797609345, | |
| "grad_norm": 16.68509864807129, | |
| "learning_rate": 7.281981587023236e-06, | |
| "loss": 12.3878, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 13.928878022276555, | |
| "grad_norm": 27.963064193725586, | |
| "learning_rate": 7.277597544936432e-06, | |
| "loss": 12.3907, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 13.950611246943765, | |
| "grad_norm": 27.46925163269043, | |
| "learning_rate": 7.2732135028496276e-06, | |
| "loss": 12.4483, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 13.972344471610976, | |
| "grad_norm": 23.631675720214844, | |
| "learning_rate": 7.268829460762824e-06, | |
| "loss": 12.4326, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 13.994077696278186, | |
| "grad_norm": 44.30888748168945, | |
| "learning_rate": 7.264445418676019e-06, | |
| "loss": 12.4026, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 14.015647921760392, | |
| "grad_norm": 17.614269256591797, | |
| "learning_rate": 7.260061376589215e-06, | |
| "loss": 12.2351, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 14.0373811464276, | |
| "grad_norm": 16.82352638244629, | |
| "learning_rate": 7.2556773345024125e-06, | |
| "loss": 12.2887, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 14.059114371094811, | |
| "grad_norm": 22.863889694213867, | |
| "learning_rate": 7.251293292415608e-06, | |
| "loss": 12.2874, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 14.080847595762021, | |
| "grad_norm": 22.543703079223633, | |
| "learning_rate": 7.246909250328804e-06, | |
| "loss": 12.2726, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 14.102580820429232, | |
| "grad_norm": 19.95811653137207, | |
| "learning_rate": 7.2425252082419995e-06, | |
| "loss": 12.2948, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 14.12431404509644, | |
| "grad_norm": 11.412972450256348, | |
| "learning_rate": 7.238141166155196e-06, | |
| "loss": 12.2971, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 14.14604726976365, | |
| "grad_norm": 30.869230270385742, | |
| "learning_rate": 7.233757124068391e-06, | |
| "loss": 12.3222, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 14.167780494430861, | |
| "grad_norm": 37.976741790771484, | |
| "learning_rate": 7.229373081981587e-06, | |
| "loss": 12.3079, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 14.189513719098072, | |
| "grad_norm": 23.526809692382812, | |
| "learning_rate": 7.224989039894783e-06, | |
| "loss": 12.3085, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 14.211246943765282, | |
| "grad_norm": 19.888294219970703, | |
| "learning_rate": 7.22060499780798e-06, | |
| "loss": 12.3141, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 14.23298016843249, | |
| "grad_norm": 16.727022171020508, | |
| "learning_rate": 7.216220955721176e-06, | |
| "loss": 12.275, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 14.254713393099701, | |
| "grad_norm": 14.18730640411377, | |
| "learning_rate": 7.2118369136343715e-06, | |
| "loss": 12.3144, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 14.276446617766911, | |
| "grad_norm": 20.451278686523438, | |
| "learning_rate": 7.207452871547568e-06, | |
| "loss": 12.2727, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 14.298179842434122, | |
| "grad_norm": 16.769447326660156, | |
| "learning_rate": 7.203068829460763e-06, | |
| "loss": 12.3384, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 14.31991306710133, | |
| "grad_norm": 27.05632781982422, | |
| "learning_rate": 7.198684787373959e-06, | |
| "loss": 12.3492, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 14.34164629176854, | |
| "grad_norm": 38.1939582824707, | |
| "learning_rate": 7.194300745287155e-06, | |
| "loss": 12.284, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 14.363379516435751, | |
| "grad_norm": 32.06970977783203, | |
| "learning_rate": 7.189916703200352e-06, | |
| "loss": 12.3158, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 14.385112741102962, | |
| "grad_norm": 25.079200744628906, | |
| "learning_rate": 7.185532661113547e-06, | |
| "loss": 12.3486, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 14.406845965770172, | |
| "grad_norm": 21.099042892456055, | |
| "learning_rate": 7.181148619026743e-06, | |
| "loss": 12.2961, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 14.42857919043738, | |
| "grad_norm": 18.112712860107422, | |
| "learning_rate": 7.17676457693994e-06, | |
| "loss": 12.2852, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 14.450312415104591, | |
| "grad_norm": 18.887737274169922, | |
| "learning_rate": 7.172380534853135e-06, | |
| "loss": 12.276, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 14.472045639771801, | |
| "grad_norm": 21.17413902282715, | |
| "learning_rate": 7.167996492766331e-06, | |
| "loss": 12.3109, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 14.493778864439012, | |
| "grad_norm": 67.79141998291016, | |
| "learning_rate": 7.163612450679527e-06, | |
| "loss": 12.3326, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 14.51551208910622, | |
| "grad_norm": 18.88022232055664, | |
| "learning_rate": 7.159228408592723e-06, | |
| "loss": 12.355, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 14.53724531377343, | |
| "grad_norm": 14.09670639038086, | |
| "learning_rate": 7.154844366505918e-06, | |
| "loss": 12.3049, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 14.558978538440641, | |
| "grad_norm": 22.51435661315918, | |
| "learning_rate": 7.150460324419115e-06, | |
| "loss": 12.3043, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 14.580711763107852, | |
| "grad_norm": 20.429990768432617, | |
| "learning_rate": 7.146076282332311e-06, | |
| "loss": 12.3029, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 14.602444987775062, | |
| "grad_norm": 27.559160232543945, | |
| "learning_rate": 7.141692240245507e-06, | |
| "loss": 12.3017, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 14.62417821244227, | |
| "grad_norm": 26.29608917236328, | |
| "learning_rate": 7.137308198158703e-06, | |
| "loss": 12.347, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 14.645911437109481, | |
| "grad_norm": 12.279489517211914, | |
| "learning_rate": 7.132924156071899e-06, | |
| "loss": 12.2923, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 14.667644661776691, | |
| "grad_norm": 19.162981033325195, | |
| "learning_rate": 7.128540113985095e-06, | |
| "loss": 12.3289, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 14.689377886443902, | |
| "grad_norm": 19.87074089050293, | |
| "learning_rate": 7.12415607189829e-06, | |
| "loss": 12.2862, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 14.71111111111111, | |
| "grad_norm": 22.431442260742188, | |
| "learning_rate": 7.119772029811487e-06, | |
| "loss": 12.3116, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 14.73284433577832, | |
| "grad_norm": 16.823158264160156, | |
| "learning_rate": 7.115387987724683e-06, | |
| "loss": 12.3284, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 14.754577560445531, | |
| "grad_norm": 26.60719108581543, | |
| "learning_rate": 7.111003945637879e-06, | |
| "loss": 12.2962, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 14.776310785112742, | |
| "grad_norm": 20.54785919189453, | |
| "learning_rate": 7.106619903551074e-06, | |
| "loss": 12.2979, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 14.79804400977995, | |
| "grad_norm": 26.004840850830078, | |
| "learning_rate": 7.1022358614642705e-06, | |
| "loss": 12.3252, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 14.81977723444716, | |
| "grad_norm": 21.51180648803711, | |
| "learning_rate": 7.097851819377467e-06, | |
| "loss": 12.2844, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 14.841510459114371, | |
| "grad_norm": 21.89017105102539, | |
| "learning_rate": 7.093467777290662e-06, | |
| "loss": 12.3084, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 14.863243683781581, | |
| "grad_norm": 23.298505783081055, | |
| "learning_rate": 7.089083735203859e-06, | |
| "loss": 12.3825, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 14.884976908448792, | |
| "grad_norm": 21.83428382873535, | |
| "learning_rate": 7.084699693117054e-06, | |
| "loss": 12.2981, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 14.906710133116, | |
| "grad_norm": 26.309865951538086, | |
| "learning_rate": 7.080315651030251e-06, | |
| "loss": 12.2713, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 14.92844335778321, | |
| "grad_norm": 28.134078979492188, | |
| "learning_rate": 7.075931608943446e-06, | |
| "loss": 12.311, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 14.950176582450421, | |
| "grad_norm": 25.938369750976562, | |
| "learning_rate": 7.0715475668566425e-06, | |
| "loss": 12.2782, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 14.971909807117632, | |
| "grad_norm": 25.179311752319336, | |
| "learning_rate": 7.067163524769839e-06, | |
| "loss": 12.3384, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 14.993643031784842, | |
| "grad_norm": 18.602447509765625, | |
| "learning_rate": 7.062779482683034e-06, | |
| "loss": 12.3125, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 15.015213257267048, | |
| "grad_norm": 23.754281997680664, | |
| "learning_rate": 7.05839544059623e-06, | |
| "loss": 12.1766, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 15.036946481934256, | |
| "grad_norm": 22.74106788635254, | |
| "learning_rate": 7.054011398509426e-06, | |
| "loss": 12.228, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 15.058679706601467, | |
| "grad_norm": 35.1696662902832, | |
| "learning_rate": 7.049627356422623e-06, | |
| "loss": 12.2026, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 15.080412931268677, | |
| "grad_norm": 20.032005310058594, | |
| "learning_rate": 7.045243314335818e-06, | |
| "loss": 12.2443, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 15.102146155935888, | |
| "grad_norm": 30.315168380737305, | |
| "learning_rate": 7.040859272249014e-06, | |
| "loss": 12.221, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 15.123879380603096, | |
| "grad_norm": 15.685395240783691, | |
| "learning_rate": 7.03647523016221e-06, | |
| "loss": 12.2394, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 15.145612605270307, | |
| "grad_norm": 24.66408348083496, | |
| "learning_rate": 7.032091188075406e-06, | |
| "loss": 12.1932, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 15.167345829937517, | |
| "grad_norm": 20.8659725189209, | |
| "learning_rate": 7.027707145988602e-06, | |
| "loss": 12.2888, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 15.189079054604727, | |
| "grad_norm": 25.82394027709961, | |
| "learning_rate": 7.023323103901798e-06, | |
| "loss": 12.1971, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 15.210812279271938, | |
| "grad_norm": 17.442481994628906, | |
| "learning_rate": 7.018939061814995e-06, | |
| "loss": 12.2246, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 15.232545503939146, | |
| "grad_norm": 16.10444450378418, | |
| "learning_rate": 7.01455501972819e-06, | |
| "loss": 12.2378, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 15.254278728606357, | |
| "grad_norm": 20.300018310546875, | |
| "learning_rate": 7.010170977641386e-06, | |
| "loss": 12.2806, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 15.276011953273567, | |
| "grad_norm": 30.641281127929688, | |
| "learning_rate": 7.005786935554582e-06, | |
| "loss": 12.2608, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 15.297745177940778, | |
| "grad_norm": 36.21476745605469, | |
| "learning_rate": 7.001402893467778e-06, | |
| "loss": 12.2502, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 15.319478402607986, | |
| "grad_norm": 31.640207290649414, | |
| "learning_rate": 6.997018851380973e-06, | |
| "loss": 12.2664, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 15.341211627275197, | |
| "grad_norm": 14.65031623840332, | |
| "learning_rate": 6.99263480929417e-06, | |
| "loss": 12.2727, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 15.362944851942407, | |
| "grad_norm": 15.896147727966309, | |
| "learning_rate": 6.988250767207367e-06, | |
| "loss": 12.2335, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 15.384678076609617, | |
| "grad_norm": 27.60741424560547, | |
| "learning_rate": 6.983866725120561e-06, | |
| "loss": 12.241, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 15.406411301276828, | |
| "grad_norm": 27.842981338500977, | |
| "learning_rate": 6.979482683033758e-06, | |
| "loss": 12.299, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 15.428144525944036, | |
| "grad_norm": 14.332504272460938, | |
| "learning_rate": 6.975098640946954e-06, | |
| "loss": 12.2547, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 15.449877750611247, | |
| "grad_norm": 13.6268310546875, | |
| "learning_rate": 6.97071459886015e-06, | |
| "loss": 12.2764, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 15.471610975278457, | |
| "grad_norm": 27.122060775756836, | |
| "learning_rate": 6.966330556773345e-06, | |
| "loss": 12.2568, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 15.493344199945668, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.9619465146865415e-06, | |
| "loss": 12.2716, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 15.515077424612876, | |
| "grad_norm": 16.30205726623535, | |
| "learning_rate": 6.957562472599737e-06, | |
| "loss": 12.1952, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 15.536810649280087, | |
| "grad_norm": 17.126123428344727, | |
| "learning_rate": 6.953178430512933e-06, | |
| "loss": 12.2365, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 15.558543873947297, | |
| "grad_norm": 33.20661163330078, | |
| "learning_rate": 6.94879438842613e-06, | |
| "loss": 12.2776, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 15.580277098614507, | |
| "grad_norm": 22.688047409057617, | |
| "learning_rate": 6.944410346339326e-06, | |
| "loss": 12.3202, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 15.602010323281718, | |
| "grad_norm": 19.268665313720703, | |
| "learning_rate": 6.940026304252522e-06, | |
| "loss": 12.2744, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 15.623743547948926, | |
| "grad_norm": 44.28622817993164, | |
| "learning_rate": 6.935642262165717e-06, | |
| "loss": 12.2619, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 15.645476772616137, | |
| "grad_norm": 11.47972297668457, | |
| "learning_rate": 6.9312582200789135e-06, | |
| "loss": 12.2281, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 15.667209997283347, | |
| "grad_norm": 26.456462860107422, | |
| "learning_rate": 6.926874177992109e-06, | |
| "loss": 12.2591, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 15.688943221950558, | |
| "grad_norm": 18.363269805908203, | |
| "learning_rate": 6.922490135905305e-06, | |
| "loss": 12.2958, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 15.710676446617766, | |
| "grad_norm": 21.405649185180664, | |
| "learning_rate": 6.9181060938185005e-06, | |
| "loss": 12.2485, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 15.732409671284977, | |
| "grad_norm": 27.277904510498047, | |
| "learning_rate": 6.913722051731698e-06, | |
| "loss": 12.2324, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 15.754142895952187, | |
| "grad_norm": 20.303300857543945, | |
| "learning_rate": 6.909338009644894e-06, | |
| "loss": 12.2939, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 15.775876120619397, | |
| "grad_norm": 14.886679649353027, | |
| "learning_rate": 6.904953967558089e-06, | |
| "loss": 12.2036, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 15.797609345286606, | |
| "grad_norm": 17.747087478637695, | |
| "learning_rate": 6.9005699254712854e-06, | |
| "loss": 12.2459, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 15.819342569953816, | |
| "grad_norm": 34.592708587646484, | |
| "learning_rate": 6.896185883384481e-06, | |
| "loss": 12.2688, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 15.841075794621027, | |
| "grad_norm": 20.060144424438477, | |
| "learning_rate": 6.891801841297677e-06, | |
| "loss": 12.2528, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 15.862809019288237, | |
| "grad_norm": 13.47815227508545, | |
| "learning_rate": 6.8874177992108724e-06, | |
| "loss": 12.2797, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 15.884542243955448, | |
| "grad_norm": 20.81302833557129, | |
| "learning_rate": 6.883033757124069e-06, | |
| "loss": 12.269, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 15.906275468622656, | |
| "grad_norm": 29.326114654541016, | |
| "learning_rate": 6.878649715037264e-06, | |
| "loss": 12.296, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 15.928008693289867, | |
| "grad_norm": 21.322439193725586, | |
| "learning_rate": 6.874265672950461e-06, | |
| "loss": 12.3283, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 15.949741917957077, | |
| "grad_norm": 25.019590377807617, | |
| "learning_rate": 6.869881630863657e-06, | |
| "loss": 12.2913, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 15.971475142624287, | |
| "grad_norm": 16.494823455810547, | |
| "learning_rate": 6.865497588776853e-06, | |
| "loss": 12.2574, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 15.993208367291498, | |
| "grad_norm": 22.250595092773438, | |
| "learning_rate": 6.861113546690049e-06, | |
| "loss": 12.26, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 16.014778592773702, | |
| "grad_norm": 15.23131275177002, | |
| "learning_rate": 6.856729504603244e-06, | |
| "loss": 12.1167, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 16.036511817440914, | |
| "grad_norm": 18.172534942626953, | |
| "learning_rate": 6.852345462516441e-06, | |
| "loss": 12.1946, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 16.058245042108123, | |
| "grad_norm": 27.63299560546875, | |
| "learning_rate": 6.847961420429636e-06, | |
| "loss": 12.1801, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 16.07997826677533, | |
| "grad_norm": 22.287805557250977, | |
| "learning_rate": 6.843577378342833e-06, | |
| "loss": 12.2187, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 16.101711491442543, | |
| "grad_norm": 15.652294158935547, | |
| "learning_rate": 6.8391933362560285e-06, | |
| "loss": 12.1837, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 16.123444716109752, | |
| "grad_norm": 23.91109848022461, | |
| "learning_rate": 6.834809294169225e-06, | |
| "loss": 12.2074, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 16.145177940776964, | |
| "grad_norm": 24.845624923706055, | |
| "learning_rate": 6.830425252082421e-06, | |
| "loss": 12.1387, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 16.166911165444173, | |
| "grad_norm": 19.137048721313477, | |
| "learning_rate": 6.826041209995616e-06, | |
| "loss": 12.2304, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 16.18864439011138, | |
| "grad_norm": 17.24692153930664, | |
| "learning_rate": 6.8216571679088126e-06, | |
| "loss": 12.208, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 16.210377614778594, | |
| "grad_norm": 20.503686904907227, | |
| "learning_rate": 6.817273125822008e-06, | |
| "loss": 12.2103, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 16.232110839445802, | |
| "grad_norm": 27.404552459716797, | |
| "learning_rate": 6.812889083735205e-06, | |
| "loss": 12.2422, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 16.253844064113014, | |
| "grad_norm": 25.16230010986328, | |
| "learning_rate": 6.8085050416483996e-06, | |
| "loss": 12.2006, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 16.275577288780223, | |
| "grad_norm": 18.156126022338867, | |
| "learning_rate": 6.804120999561597e-06, | |
| "loss": 12.2023, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 16.29731051344743, | |
| "grad_norm": 19.68562889099121, | |
| "learning_rate": 6.799736957474792e-06, | |
| "loss": 12.1877, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 16.319043738114644, | |
| "grad_norm": 17.91988182067871, | |
| "learning_rate": 6.795352915387988e-06, | |
| "loss": 12.215, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 16.340776962781852, | |
| "grad_norm": 15.31675910949707, | |
| "learning_rate": 6.7909688733011845e-06, | |
| "loss": 12.1548, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 16.36251018744906, | |
| "grad_norm": 8.975651741027832, | |
| "learning_rate": 6.78658483121438e-06, | |
| "loss": 12.169, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 16.384243412116273, | |
| "grad_norm": 16.77298927307129, | |
| "learning_rate": 6.782200789127576e-06, | |
| "loss": 12.2139, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 16.405976636783482, | |
| "grad_norm": 23.03885269165039, | |
| "learning_rate": 6.7778167470407715e-06, | |
| "loss": 12.2093, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 16.427709861450694, | |
| "grad_norm": 18.47231101989746, | |
| "learning_rate": 6.773432704953969e-06, | |
| "loss": 12.1992, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 16.449443086117903, | |
| "grad_norm": 28.977338790893555, | |
| "learning_rate": 6.769048662867164e-06, | |
| "loss": 12.1989, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 16.47117631078511, | |
| "grad_norm": 16.37677574157715, | |
| "learning_rate": 6.76466462078036e-06, | |
| "loss": 12.2296, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 16.492909535452323, | |
| "grad_norm": 13.731319427490234, | |
| "learning_rate": 6.760280578693556e-06, | |
| "loss": 12.186, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 16.514642760119532, | |
| "grad_norm": 20.206491470336914, | |
| "learning_rate": 6.755896536606752e-06, | |
| "loss": 12.1552, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 16.536375984786744, | |
| "grad_norm": 19.88826560974121, | |
| "learning_rate": 6.751512494519948e-06, | |
| "loss": 12.2298, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 16.558109209453953, | |
| "grad_norm": 31.184532165527344, | |
| "learning_rate": 6.7471284524331435e-06, | |
| "loss": 12.2041, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 16.57984243412116, | |
| "grad_norm": 37.404266357421875, | |
| "learning_rate": 6.7427444103463405e-06, | |
| "loss": 12.208, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 16.601575658788374, | |
| "grad_norm": 12.503349304199219, | |
| "learning_rate": 6.738360368259536e-06, | |
| "loss": 12.2289, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 16.623308883455582, | |
| "grad_norm": 14.80574893951416, | |
| "learning_rate": 6.733976326172732e-06, | |
| "loss": 12.2027, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 16.645042108122794, | |
| "grad_norm": 18.339298248291016, | |
| "learning_rate": 6.7295922840859276e-06, | |
| "loss": 12.2122, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 16.666775332790003, | |
| "grad_norm": 9.988556861877441, | |
| "learning_rate": 6.725208241999124e-06, | |
| "loss": 12.2169, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 16.68850855745721, | |
| "grad_norm": 16.23221778869629, | |
| "learning_rate": 6.720824199912319e-06, | |
| "loss": 12.2432, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 16.710241782124424, | |
| "grad_norm": 17.93288803100586, | |
| "learning_rate": 6.716440157825515e-06, | |
| "loss": 12.1946, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 16.731975006791632, | |
| "grad_norm": 23.863719940185547, | |
| "learning_rate": 6.7120561157387125e-06, | |
| "loss": 12.2601, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 16.75370823145884, | |
| "grad_norm": 32.24260330200195, | |
| "learning_rate": 6.707672073651907e-06, | |
| "loss": 12.2242, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 16.775441456126053, | |
| "grad_norm": 31.188295364379883, | |
| "learning_rate": 6.703288031565104e-06, | |
| "loss": 12.1977, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 16.797174680793262, | |
| "grad_norm": 21.935489654541016, | |
| "learning_rate": 6.6989039894782995e-06, | |
| "loss": 12.2153, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 16.818907905460474, | |
| "grad_norm": 16.820199966430664, | |
| "learning_rate": 6.694519947391496e-06, | |
| "loss": 12.1841, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 16.840641130127683, | |
| "grad_norm": 27.350257873535156, | |
| "learning_rate": 6.690135905304691e-06, | |
| "loss": 12.2308, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 16.86237435479489, | |
| "grad_norm": 20.717317581176758, | |
| "learning_rate": 6.685751863217887e-06, | |
| "loss": 12.2139, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 16.884107579462103, | |
| "grad_norm": 20.515241622924805, | |
| "learning_rate": 6.681367821131084e-06, | |
| "loss": 12.209, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 16.905840804129312, | |
| "grad_norm": 16.544082641601562, | |
| "learning_rate": 6.676983779044279e-06, | |
| "loss": 12.2183, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 16.927574028796524, | |
| "grad_norm": 17.54091453552246, | |
| "learning_rate": 6.672599736957476e-06, | |
| "loss": 12.2609, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 16.949307253463733, | |
| "grad_norm": 21.071598052978516, | |
| "learning_rate": 6.6682156948706714e-06, | |
| "loss": 12.1727, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 16.97104047813094, | |
| "grad_norm": 17.628015518188477, | |
| "learning_rate": 6.663831652783868e-06, | |
| "loss": 12.1862, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 16.992773702798154, | |
| "grad_norm": 13.298240661621094, | |
| "learning_rate": 6.659447610697063e-06, | |
| "loss": 12.2279, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 17.014343928280358, | |
| "grad_norm": 12.616509437561035, | |
| "learning_rate": 6.655063568610259e-06, | |
| "loss": 12.0914, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 17.03607715294757, | |
| "grad_norm": 21.71387481689453, | |
| "learning_rate": 6.650679526523455e-06, | |
| "loss": 12.1576, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 17.05781037761478, | |
| "grad_norm": 26.497800827026367, | |
| "learning_rate": 6.646295484436651e-06, | |
| "loss": 12.1522, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 17.079543602281987, | |
| "grad_norm": 20.276397705078125, | |
| "learning_rate": 6.641911442349848e-06, | |
| "loss": 12.1579, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 17.1012768269492, | |
| "grad_norm": 18.534727096557617, | |
| "learning_rate": 6.637527400263043e-06, | |
| "loss": 12.178, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 17.123010051616408, | |
| "grad_norm": 29.980501174926758, | |
| "learning_rate": 6.63314335817624e-06, | |
| "loss": 12.1507, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 17.14474327628362, | |
| "grad_norm": 25.486083984375, | |
| "learning_rate": 6.628759316089435e-06, | |
| "loss": 12.148, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 17.16647650095083, | |
| "grad_norm": 24.499359130859375, | |
| "learning_rate": 6.624375274002631e-06, | |
| "loss": 12.1513, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 17.188209725618037, | |
| "grad_norm": 22.07660484313965, | |
| "learning_rate": 6.619991231915827e-06, | |
| "loss": 12.1477, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 17.20994295028525, | |
| "grad_norm": 28.42877960205078, | |
| "learning_rate": 6.615607189829023e-06, | |
| "loss": 12.1747, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 17.231676174952458, | |
| "grad_norm": 22.489025115966797, | |
| "learning_rate": 6.611223147742218e-06, | |
| "loss": 12.1474, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 17.25340939961967, | |
| "grad_norm": 24.58718490600586, | |
| "learning_rate": 6.6068391056554145e-06, | |
| "loss": 12.1613, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 17.27514262428688, | |
| "grad_norm": 92.33475494384766, | |
| "learning_rate": 6.6024550635686116e-06, | |
| "loss": 12.1637, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 17.296875848954087, | |
| "grad_norm": 19.350147247314453, | |
| "learning_rate": 6.598071021481807e-06, | |
| "loss": 12.184, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 17.3186090736213, | |
| "grad_norm": 14.627690315246582, | |
| "learning_rate": 6.593686979395003e-06, | |
| "loss": 12.1552, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 17.34034229828851, | |
| "grad_norm": 11.52912425994873, | |
| "learning_rate": 6.5893029373081986e-06, | |
| "loss": 12.1784, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 17.362075522955717, | |
| "grad_norm": 18.19782829284668, | |
| "learning_rate": 6.584918895221395e-06, | |
| "loss": 12.127, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 17.38380874762293, | |
| "grad_norm": 24.676179885864258, | |
| "learning_rate": 6.58053485313459e-06, | |
| "loss": 12.2129, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 17.38380874762293, | |
| "eval_cer": 0.0766327626430326, | |
| "eval_loss": 2.3462953567504883, | |
| "eval_runtime": 399.3051, | |
| "eval_samples_per_second": 13.539, | |
| "eval_steps_per_second": 3.386, | |
| "eval_wer": 0.22991050400376825, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 17.405541972290138, | |
| "grad_norm": 10.956995010375977, | |
| "learning_rate": 6.5761508110477864e-06, | |
| "loss": 12.1388, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 17.42727519695735, | |
| "grad_norm": 23.64618682861328, | |
| "learning_rate": 6.571766768960982e-06, | |
| "loss": 12.1628, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 17.44900842162456, | |
| "grad_norm": 22.68800926208496, | |
| "learning_rate": 6.567382726874179e-06, | |
| "loss": 12.1489, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 17.470741646291767, | |
| "grad_norm": 17.155860900878906, | |
| "learning_rate": 6.562998684787375e-06, | |
| "loss": 12.1374, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 17.49247487095898, | |
| "grad_norm": 19.839338302612305, | |
| "learning_rate": 6.5586146427005705e-06, | |
| "loss": 12.1891, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 17.514208095626188, | |
| "grad_norm": 24.002262115478516, | |
| "learning_rate": 6.554230600613767e-06, | |
| "loss": 12.1819, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 17.5359413202934, | |
| "grad_norm": 14.681846618652344, | |
| "learning_rate": 6.549846558526962e-06, | |
| "loss": 12.1583, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 17.55767454496061, | |
| "grad_norm": 28.004215240478516, | |
| "learning_rate": 6.545462516440158e-06, | |
| "loss": 12.1953, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 17.579407769627817, | |
| "grad_norm": 18.857913970947266, | |
| "learning_rate": 6.541078474353354e-06, | |
| "loss": 12.1922, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 17.60114099429503, | |
| "grad_norm": 27.09821319580078, | |
| "learning_rate": 6.536694432266551e-06, | |
| "loss": 12.1901, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 17.622874218962238, | |
| "grad_norm": 15.759273529052734, | |
| "learning_rate": 6.532310390179745e-06, | |
| "loss": 12.1536, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 17.64460744362945, | |
| "grad_norm": 13.474365234375, | |
| "learning_rate": 6.5279263480929425e-06, | |
| "loss": 12.1806, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 17.66634066829666, | |
| "grad_norm": 14.334114074707031, | |
| "learning_rate": 6.523542306006139e-06, | |
| "loss": 12.1881, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 17.688073892963867, | |
| "grad_norm": 28.76114845275879, | |
| "learning_rate": 6.519158263919334e-06, | |
| "loss": 12.2126, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 17.70980711763108, | |
| "grad_norm": 22.386192321777344, | |
| "learning_rate": 6.51477422183253e-06, | |
| "loss": 12.153, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 17.73154034229829, | |
| "grad_norm": 12.762558937072754, | |
| "learning_rate": 6.510390179745726e-06, | |
| "loss": 12.1787, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 17.753273566965497, | |
| "grad_norm": 15.222195625305176, | |
| "learning_rate": 6.506006137658922e-06, | |
| "loss": 12.1646, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 17.77500679163271, | |
| "grad_norm": 35.954437255859375, | |
| "learning_rate": 6.501622095572117e-06, | |
| "loss": 12.1707, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 17.796740016299918, | |
| "grad_norm": 11.987882614135742, | |
| "learning_rate": 6.497238053485314e-06, | |
| "loss": 12.1824, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 17.81847324096713, | |
| "grad_norm": 31.296215057373047, | |
| "learning_rate": 6.49285401139851e-06, | |
| "loss": 12.213, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 17.84020646563434, | |
| "grad_norm": 16.63829231262207, | |
| "learning_rate": 6.488469969311706e-06, | |
| "loss": 12.1762, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 17.861939690301547, | |
| "grad_norm": 13.500885963439941, | |
| "learning_rate": 6.484085927224902e-06, | |
| "loss": 12.1677, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 17.88367291496876, | |
| "grad_norm": 29.857112884521484, | |
| "learning_rate": 6.479701885138098e-06, | |
| "loss": 12.1812, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 17.905406139635968, | |
| "grad_norm": 14.494293212890625, | |
| "learning_rate": 6.475317843051294e-06, | |
| "loss": 12.1565, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 17.92713936430318, | |
| "grad_norm": 11.10816478729248, | |
| "learning_rate": 6.470933800964489e-06, | |
| "loss": 12.1799, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 17.94887258897039, | |
| "grad_norm": 19.924617767333984, | |
| "learning_rate": 6.466549758877686e-06, | |
| "loss": 12.1382, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 17.970605813637597, | |
| "grad_norm": 21.809062957763672, | |
| "learning_rate": 6.462165716790882e-06, | |
| "loss": 12.1727, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 17.99233903830481, | |
| "grad_norm": 12.314726829528809, | |
| "learning_rate": 6.457781674704078e-06, | |
| "loss": 12.1355, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 18.013909263787014, | |
| "grad_norm": 22.337913513183594, | |
| "learning_rate": 6.453397632617273e-06, | |
| "loss": 12.0397, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 18.035642488454226, | |
| "grad_norm": 14.111494064331055, | |
| "learning_rate": 6.44901359053047e-06, | |
| "loss": 12.0939, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 18.057375713121434, | |
| "grad_norm": 16.706897735595703, | |
| "learning_rate": 6.444629548443666e-06, | |
| "loss": 12.1425, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 18.079108937788643, | |
| "grad_norm": 26.20379066467285, | |
| "learning_rate": 6.440245506356861e-06, | |
| "loss": 12.1265, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 18.100842162455855, | |
| "grad_norm": 14.789849281311035, | |
| "learning_rate": 6.4358614642700574e-06, | |
| "loss": 12.1377, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 18.122575387123064, | |
| "grad_norm": 34.11836242675781, | |
| "learning_rate": 6.431477422183253e-06, | |
| "loss": 12.1273, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 18.144308611790276, | |
| "grad_norm": 32.26976013183594, | |
| "learning_rate": 6.42709338009645e-06, | |
| "loss": 12.0853, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 18.166041836457484, | |
| "grad_norm": 19.59932518005371, | |
| "learning_rate": 6.422709338009645e-06, | |
| "loss": 12.0887, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 18.187775061124693, | |
| "grad_norm": 16.68062400817871, | |
| "learning_rate": 6.4183252959228415e-06, | |
| "loss": 12.148, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 18.209508285791905, | |
| "grad_norm": 18.44430923461914, | |
| "learning_rate": 6.413941253836037e-06, | |
| "loss": 12.132, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 18.231241510459114, | |
| "grad_norm": 23.202688217163086, | |
| "learning_rate": 6.409557211749233e-06, | |
| "loss": 12.1606, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 18.252974735126326, | |
| "grad_norm": 11.007984161376953, | |
| "learning_rate": 6.405173169662429e-06, | |
| "loss": 12.1526, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 18.274707959793535, | |
| "grad_norm": 43.34115219116211, | |
| "learning_rate": 6.400789127575625e-06, | |
| "loss": 12.1461, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 18.296441184460743, | |
| "grad_norm": 21.273698806762695, | |
| "learning_rate": 6.396405085488822e-06, | |
| "loss": 12.116, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 18.318174409127955, | |
| "grad_norm": 21.992979049682617, | |
| "learning_rate": 6.392021043402017e-06, | |
| "loss": 12.0956, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 18.339907633795164, | |
| "grad_norm": 10.890033721923828, | |
| "learning_rate": 6.3876370013152135e-06, | |
| "loss": 12.0999, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 18.361640858462373, | |
| "grad_norm": 10.554021835327148, | |
| "learning_rate": 6.383252959228409e-06, | |
| "loss": 12.1158, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 18.383374083129585, | |
| "grad_norm": 11.385374069213867, | |
| "learning_rate": 6.378868917141605e-06, | |
| "loss": 12.1541, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 18.405107307796793, | |
| "grad_norm": 11.36735725402832, | |
| "learning_rate": 6.3744848750548005e-06, | |
| "loss": 12.119, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 18.426840532464006, | |
| "grad_norm": 19.5784969329834, | |
| "learning_rate": 6.370100832967997e-06, | |
| "loss": 12.1513, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 18.448573757131214, | |
| "grad_norm": 10.584908485412598, | |
| "learning_rate": 6.365716790881194e-06, | |
| "loss": 12.1572, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 18.470306981798423, | |
| "grad_norm": 23.416278839111328, | |
| "learning_rate": 6.361332748794389e-06, | |
| "loss": 12.1384, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 18.492040206465635, | |
| "grad_norm": 22.098583221435547, | |
| "learning_rate": 6.3569487067075854e-06, | |
| "loss": 12.1005, | |
| "step": 42550 | |
| }, | |
| { | |
| "epoch": 18.513773431132844, | |
| "grad_norm": 27.949371337890625, | |
| "learning_rate": 6.352564664620781e-06, | |
| "loss": 12.1248, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 18.535506655800056, | |
| "grad_norm": 13.4013090133667, | |
| "learning_rate": 6.348180622533977e-06, | |
| "loss": 12.1335, | |
| "step": 42650 | |
| }, | |
| { | |
| "epoch": 18.557239880467264, | |
| "grad_norm": 19.233583450317383, | |
| "learning_rate": 6.3437965804471724e-06, | |
| "loss": 12.1416, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 18.578973105134473, | |
| "grad_norm": 17.514616012573242, | |
| "learning_rate": 6.339412538360369e-06, | |
| "loss": 12.1561, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 18.600706329801685, | |
| "grad_norm": 17.83085823059082, | |
| "learning_rate": 6.335028496273564e-06, | |
| "loss": 12.1655, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 18.622439554468894, | |
| "grad_norm": 32.00389099121094, | |
| "learning_rate": 6.33064445418676e-06, | |
| "loss": 12.1489, | |
| "step": 42850 | |
| }, | |
| { | |
| "epoch": 18.644172779136106, | |
| "grad_norm": 36.5909309387207, | |
| "learning_rate": 6.326260412099957e-06, | |
| "loss": 12.1353, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 18.665906003803315, | |
| "grad_norm": 19.841901779174805, | |
| "learning_rate": 6.321876370013153e-06, | |
| "loss": 12.1237, | |
| "step": 42950 | |
| }, | |
| { | |
| "epoch": 18.687639228470523, | |
| "grad_norm": 12.05302619934082, | |
| "learning_rate": 6.317492327926349e-06, | |
| "loss": 12.0931, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 18.709372453137735, | |
| "grad_norm": 51.42092514038086, | |
| "learning_rate": 6.313108285839544e-06, | |
| "loss": 12.0907, | |
| "step": 43050 | |
| }, | |
| { | |
| "epoch": 18.731105677804944, | |
| "grad_norm": 21.547746658325195, | |
| "learning_rate": 6.308724243752741e-06, | |
| "loss": 12.1051, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 18.752838902472153, | |
| "grad_norm": 17.779346466064453, | |
| "learning_rate": 6.304340201665936e-06, | |
| "loss": 12.1208, | |
| "step": 43150 | |
| }, | |
| { | |
| "epoch": 18.774572127139365, | |
| "grad_norm": 12.786531448364258, | |
| "learning_rate": 6.299956159579132e-06, | |
| "loss": 12.1527, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 18.796305351806573, | |
| "grad_norm": 15.865018844604492, | |
| "learning_rate": 6.295572117492329e-06, | |
| "loss": 12.1003, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 18.818038576473786, | |
| "grad_norm": 12.622864723205566, | |
| "learning_rate": 6.291188075405525e-06, | |
| "loss": 12.1439, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 18.839771801140994, | |
| "grad_norm": 12.189949035644531, | |
| "learning_rate": 6.286804033318721e-06, | |
| "loss": 12.132, | |
| "step": 43350 | |
| }, | |
| { | |
| "epoch": 18.861505025808203, | |
| "grad_norm": 18.03951072692871, | |
| "learning_rate": 6.282419991231916e-06, | |
| "loss": 12.1327, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 18.883238250475415, | |
| "grad_norm": 25.907819747924805, | |
| "learning_rate": 6.2780359491451126e-06, | |
| "loss": 12.1319, | |
| "step": 43450 | |
| }, | |
| { | |
| "epoch": 18.904971475142624, | |
| "grad_norm": 39.924564361572266, | |
| "learning_rate": 6.273651907058308e-06, | |
| "loss": 12.1779, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 18.926704699809836, | |
| "grad_norm": 10.564095497131348, | |
| "learning_rate": 6.269267864971504e-06, | |
| "loss": 12.1198, | |
| "step": 43550 | |
| }, | |
| { | |
| "epoch": 18.948437924477044, | |
| "grad_norm": 16.400606155395508, | |
| "learning_rate": 6.2648838228846996e-06, | |
| "loss": 12.1314, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 18.970171149144253, | |
| "grad_norm": 16.357927322387695, | |
| "learning_rate": 6.260499780797896e-06, | |
| "loss": 12.1305, | |
| "step": 43650 | |
| }, | |
| { | |
| "epoch": 18.991904373811465, | |
| "grad_norm": 18.073299407958984, | |
| "learning_rate": 6.256115738711093e-06, | |
| "loss": 12.1585, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 19.01347459929367, | |
| "grad_norm": 14.831045150756836, | |
| "learning_rate": 6.251731696624288e-06, | |
| "loss": 12.023, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 19.03520782396088, | |
| "grad_norm": 20.606718063354492, | |
| "learning_rate": 6.2473476545374845e-06, | |
| "loss": 12.0678, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 19.05694104862809, | |
| "grad_norm": 20.03177261352539, | |
| "learning_rate": 6.24296361245068e-06, | |
| "loss": 12.1054, | |
| "step": 43850 | |
| }, | |
| { | |
| "epoch": 19.0786742732953, | |
| "grad_norm": 16.764787673950195, | |
| "learning_rate": 6.238579570363876e-06, | |
| "loss": 12.076, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 19.10040749796251, | |
| "grad_norm": 20.074857711791992, | |
| "learning_rate": 6.2341955282770715e-06, | |
| "loss": 12.0784, | |
| "step": 43950 | |
| }, | |
| { | |
| "epoch": 19.12214072262972, | |
| "grad_norm": 14.84661865234375, | |
| "learning_rate": 6.229811486190268e-06, | |
| "loss": 12.0933, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 19.14387394729693, | |
| "grad_norm": 17.447168350219727, | |
| "learning_rate": 6.225427444103463e-06, | |
| "loss": 12.0883, | |
| "step": 44050 | |
| }, | |
| { | |
| "epoch": 19.16560717196414, | |
| "grad_norm": 21.10520362854004, | |
| "learning_rate": 6.22104340201666e-06, | |
| "loss": 12.0839, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 19.18734039663135, | |
| "grad_norm": 17.273950576782227, | |
| "learning_rate": 6.2166593599298564e-06, | |
| "loss": 12.0517, | |
| "step": 44150 | |
| }, | |
| { | |
| "epoch": 19.20907362129856, | |
| "grad_norm": 11.963603019714355, | |
| "learning_rate": 6.212275317843052e-06, | |
| "loss": 12.1211, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 19.23080684596577, | |
| "grad_norm": 28.02683448791504, | |
| "learning_rate": 6.207891275756248e-06, | |
| "loss": 12.1012, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 19.252540070632982, | |
| "grad_norm": 18.750391006469727, | |
| "learning_rate": 6.2035072336694435e-06, | |
| "loss": 12.1257, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 19.27427329530019, | |
| "grad_norm": 13.95964241027832, | |
| "learning_rate": 6.19912319158264e-06, | |
| "loss": 12.1096, | |
| "step": 44350 | |
| }, | |
| { | |
| "epoch": 19.2960065199674, | |
| "grad_norm": 13.954286575317383, | |
| "learning_rate": 6.194739149495835e-06, | |
| "loss": 12.096, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 19.31773974463461, | |
| "grad_norm": 19.977340698242188, | |
| "learning_rate": 6.190355107409032e-06, | |
| "loss": 12.1119, | |
| "step": 44450 | |
| }, | |
| { | |
| "epoch": 19.33947296930182, | |
| "grad_norm": 20.84231948852539, | |
| "learning_rate": 6.1859710653222275e-06, | |
| "loss": 12.117, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 19.36120619396903, | |
| "grad_norm": 15.089071273803711, | |
| "learning_rate": 6.181587023235424e-06, | |
| "loss": 12.0531, | |
| "step": 44550 | |
| }, | |
| { | |
| "epoch": 19.38293941863624, | |
| "grad_norm": 19.530078887939453, | |
| "learning_rate": 6.17720298114862e-06, | |
| "loss": 12.0606, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 19.40467264330345, | |
| "grad_norm": 35.273353576660156, | |
| "learning_rate": 6.172818939061815e-06, | |
| "loss": 12.0703, | |
| "step": 44650 | |
| }, | |
| { | |
| "epoch": 19.42640586797066, | |
| "grad_norm": 27.611345291137695, | |
| "learning_rate": 6.168434896975012e-06, | |
| "loss": 12.0979, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 19.44813909263787, | |
| "grad_norm": 12.463072776794434, | |
| "learning_rate": 6.164050854888207e-06, | |
| "loss": 12.1186, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 19.46987231730508, | |
| "grad_norm": 13.169920921325684, | |
| "learning_rate": 6.159666812801403e-06, | |
| "loss": 12.1214, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 19.49160554197229, | |
| "grad_norm": 19.480173110961914, | |
| "learning_rate": 6.155282770714599e-06, | |
| "loss": 12.0956, | |
| "step": 44850 | |
| }, | |
| { | |
| "epoch": 19.5133387666395, | |
| "grad_norm": 12.746538162231445, | |
| "learning_rate": 6.150898728627796e-06, | |
| "loss": 12.0653, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 19.53507199130671, | |
| "grad_norm": 20.619016647338867, | |
| "learning_rate": 6.146514686540991e-06, | |
| "loss": 12.1175, | |
| "step": 44950 | |
| }, | |
| { | |
| "epoch": 19.55680521597392, | |
| "grad_norm": 19.82939910888672, | |
| "learning_rate": 6.142130644454187e-06, | |
| "loss": 12.126, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 19.57853844064113, | |
| "grad_norm": 14.061666488647461, | |
| "learning_rate": 6.1377466023673836e-06, | |
| "loss": 12.0754, | |
| "step": 45050 | |
| }, | |
| { | |
| "epoch": 19.60027166530834, | |
| "grad_norm": 13.00661849975586, | |
| "learning_rate": 6.133362560280579e-06, | |
| "loss": 12.0868, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 19.62200488997555, | |
| "grad_norm": 14.957731246948242, | |
| "learning_rate": 6.128978518193775e-06, | |
| "loss": 12.0564, | |
| "step": 45150 | |
| }, | |
| { | |
| "epoch": 19.64373811464276, | |
| "grad_norm": 14.701393127441406, | |
| "learning_rate": 6.124594476106971e-06, | |
| "loss": 12.1071, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 19.66547133930997, | |
| "grad_norm": 17.358051300048828, | |
| "learning_rate": 6.120210434020168e-06, | |
| "loss": 12.118, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 19.68720456397718, | |
| "grad_norm": 14.36281967163086, | |
| "learning_rate": 6.115826391933363e-06, | |
| "loss": 12.1246, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 19.70893778864439, | |
| "grad_norm": 30.517263412475586, | |
| "learning_rate": 6.111442349846559e-06, | |
| "loss": 12.1116, | |
| "step": 45350 | |
| }, | |
| { | |
| "epoch": 19.7306710133116, | |
| "grad_norm": 16.39494514465332, | |
| "learning_rate": 6.107058307759755e-06, | |
| "loss": 12.1275, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 19.75240423797881, | |
| "grad_norm": 15.935347557067871, | |
| "learning_rate": 6.102674265672951e-06, | |
| "loss": 12.0961, | |
| "step": 45450 | |
| }, | |
| { | |
| "epoch": 19.77413746264602, | |
| "grad_norm": 17.69158172607422, | |
| "learning_rate": 6.098290223586147e-06, | |
| "loss": 12.1242, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 19.79587068731323, | |
| "grad_norm": 17.6668758392334, | |
| "learning_rate": 6.0939061814993425e-06, | |
| "loss": 12.0872, | |
| "step": 45550 | |
| }, | |
| { | |
| "epoch": 19.81760391198044, | |
| "grad_norm": 16.675373077392578, | |
| "learning_rate": 6.08952213941254e-06, | |
| "loss": 12.0705, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 19.83933713664765, | |
| "grad_norm": 18.560033798217773, | |
| "learning_rate": 6.085138097325735e-06, | |
| "loss": 12.098, | |
| "step": 45650 | |
| }, | |
| { | |
| "epoch": 19.86107036131486, | |
| "grad_norm": 18.61153793334961, | |
| "learning_rate": 6.080754055238931e-06, | |
| "loss": 12.1017, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 19.88280358598207, | |
| "grad_norm": 23.753692626953125, | |
| "learning_rate": 6.076370013152127e-06, | |
| "loss": 12.101, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 19.90453681064928, | |
| "grad_norm": 12.80927848815918, | |
| "learning_rate": 6.071985971065323e-06, | |
| "loss": 12.116, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 19.92627003531649, | |
| "grad_norm": 22.449129104614258, | |
| "learning_rate": 6.067601928978518e-06, | |
| "loss": 12.1071, | |
| "step": 45850 | |
| }, | |
| { | |
| "epoch": 19.9480032599837, | |
| "grad_norm": 53.62459945678711, | |
| "learning_rate": 6.0632178868917145e-06, | |
| "loss": 12.1369, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 19.96973648465091, | |
| "grad_norm": 18.846603393554688, | |
| "learning_rate": 6.058833844804911e-06, | |
| "loss": 12.099, | |
| "step": 45950 | |
| }, | |
| { | |
| "epoch": 19.99146970931812, | |
| "grad_norm": 28.6248836517334, | |
| "learning_rate": 6.054449802718106e-06, | |
| "loss": 12.1369, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 20.013039934800325, | |
| "grad_norm": 17.0070858001709, | |
| "learning_rate": 6.050065760631303e-06, | |
| "loss": 11.9934, | |
| "step": 46050 | |
| }, | |
| { | |
| "epoch": 20.034773159467537, | |
| "grad_norm": 30.195463180541992, | |
| "learning_rate": 6.0456817185444986e-06, | |
| "loss": 12.0606, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 20.056506384134746, | |
| "grad_norm": 15.557343482971191, | |
| "learning_rate": 6.041297676457695e-06, | |
| "loss": 12.0555, | |
| "step": 46150 | |
| }, | |
| { | |
| "epoch": 20.078239608801955, | |
| "grad_norm": 20.677410125732422, | |
| "learning_rate": 6.03691363437089e-06, | |
| "loss": 12.0169, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 20.099972833469167, | |
| "grad_norm": 8.35476016998291, | |
| "learning_rate": 6.032529592284086e-06, | |
| "loss": 12.1085, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 20.121706058136375, | |
| "grad_norm": 21.85611915588379, | |
| "learning_rate": 6.028145550197282e-06, | |
| "loss": 12.0724, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 20.143439282803588, | |
| "grad_norm": 15.336892127990723, | |
| "learning_rate": 6.023761508110478e-06, | |
| "loss": 12.0564, | |
| "step": 46350 | |
| }, | |
| { | |
| "epoch": 20.165172507470796, | |
| "grad_norm": 14.198942184448242, | |
| "learning_rate": 6.019377466023675e-06, | |
| "loss": 12.0629, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 20.186905732138005, | |
| "grad_norm": 19.750280380249023, | |
| "learning_rate": 6.0149934239368705e-06, | |
| "loss": 12.0606, | |
| "step": 46450 | |
| }, | |
| { | |
| "epoch": 20.208638956805217, | |
| "grad_norm": 23.643993377685547, | |
| "learning_rate": 6.010609381850067e-06, | |
| "loss": 12.0624, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 20.230372181472426, | |
| "grad_norm": 15.32921028137207, | |
| "learning_rate": 6.006225339763262e-06, | |
| "loss": 12.0515, | |
| "step": 46550 | |
| }, | |
| { | |
| "epoch": 20.252105406139634, | |
| "grad_norm": 18.966848373413086, | |
| "learning_rate": 6.001841297676458e-06, | |
| "loss": 12.0743, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 20.273838630806846, | |
| "grad_norm": 15.885478973388672, | |
| "learning_rate": 5.997457255589654e-06, | |
| "loss": 12.068, | |
| "step": 46650 | |
| }, | |
| { | |
| "epoch": 20.295571855474055, | |
| "grad_norm": 25.81429672241211, | |
| "learning_rate": 5.99307321350285e-06, | |
| "loss": 12.1066, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 20.317305080141267, | |
| "grad_norm": 14.397024154663086, | |
| "learning_rate": 5.988689171416045e-06, | |
| "loss": 12.1048, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 20.339038304808476, | |
| "grad_norm": 38.001121520996094, | |
| "learning_rate": 5.984305129329242e-06, | |
| "loss": 12.0548, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 20.360771529475684, | |
| "grad_norm": 19.49797248840332, | |
| "learning_rate": 5.979921087242439e-06, | |
| "loss": 12.0747, | |
| "step": 46850 | |
| }, | |
| { | |
| "epoch": 20.382504754142897, | |
| "grad_norm": 13.953147888183594, | |
| "learning_rate": 5.975537045155634e-06, | |
| "loss": 12.0704, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 20.404237978810105, | |
| "grad_norm": 33.00684356689453, | |
| "learning_rate": 5.97115300306883e-06, | |
| "loss": 12.0737, | |
| "step": 46950 | |
| }, | |
| { | |
| "epoch": 20.425971203477317, | |
| "grad_norm": 14.40523910522461, | |
| "learning_rate": 5.966768960982026e-06, | |
| "loss": 12.0644, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 20.447704428144526, | |
| "grad_norm": 17.341297149658203, | |
| "learning_rate": 5.962384918895222e-06, | |
| "loss": 12.0375, | |
| "step": 47050 | |
| }, | |
| { | |
| "epoch": 20.469437652811735, | |
| "grad_norm": 11.500914573669434, | |
| "learning_rate": 5.958000876808417e-06, | |
| "loss": 12.0957, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 20.491170877478947, | |
| "grad_norm": 14.926876068115234, | |
| "learning_rate": 5.9536168347216135e-06, | |
| "loss": 12.0661, | |
| "step": 47150 | |
| }, | |
| { | |
| "epoch": 20.512904102146155, | |
| "grad_norm": 33.41230392456055, | |
| "learning_rate": 5.949232792634809e-06, | |
| "loss": 12.0683, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 20.534637326813368, | |
| "grad_norm": 11.592459678649902, | |
| "learning_rate": 5.944848750548006e-06, | |
| "loss": 12.0852, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 20.556370551480576, | |
| "grad_norm": 11.893900871276855, | |
| "learning_rate": 5.940464708461202e-06, | |
| "loss": 12.0927, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 20.578103776147785, | |
| "grad_norm": 19.416147232055664, | |
| "learning_rate": 5.936080666374398e-06, | |
| "loss": 12.0571, | |
| "step": 47350 | |
| }, | |
| { | |
| "epoch": 20.599837000814997, | |
| "grad_norm": 114.77404022216797, | |
| "learning_rate": 5.931696624287594e-06, | |
| "loss": 12.0694, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 20.621570225482206, | |
| "grad_norm": 22.660274505615234, | |
| "learning_rate": 5.927312582200789e-06, | |
| "loss": 12.0863, | |
| "step": 47450 | |
| }, | |
| { | |
| "epoch": 20.643303450149418, | |
| "grad_norm": 27.254777908325195, | |
| "learning_rate": 5.9229285401139855e-06, | |
| "loss": 12.0506, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 20.665036674816626, | |
| "grad_norm": 18.767820358276367, | |
| "learning_rate": 5.918544498027181e-06, | |
| "loss": 12.0552, | |
| "step": 47550 | |
| }, | |
| { | |
| "epoch": 20.686769899483835, | |
| "grad_norm": 12.995434761047363, | |
| "learning_rate": 5.914160455940378e-06, | |
| "loss": 12.0879, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 20.708503124151047, | |
| "grad_norm": 14.814035415649414, | |
| "learning_rate": 5.909776413853573e-06, | |
| "loss": 12.0959, | |
| "step": 47650 | |
| }, | |
| { | |
| "epoch": 20.730236348818256, | |
| "grad_norm": 25.315176010131836, | |
| "learning_rate": 5.90539237176677e-06, | |
| "loss": 12.0976, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 20.751969573485464, | |
| "grad_norm": 25.416751861572266, | |
| "learning_rate": 5.901008329679966e-06, | |
| "loss": 12.0528, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 20.773702798152677, | |
| "grad_norm": 16.93905258178711, | |
| "learning_rate": 5.896624287593161e-06, | |
| "loss": 12.0705, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 20.795436022819885, | |
| "grad_norm": 30.060588836669922, | |
| "learning_rate": 5.8922402455063574e-06, | |
| "loss": 12.0466, | |
| "step": 47850 | |
| }, | |
| { | |
| "epoch": 20.817169247487097, | |
| "grad_norm": 13.423187255859375, | |
| "learning_rate": 5.887856203419553e-06, | |
| "loss": 12.0681, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 20.838902472154306, | |
| "grad_norm": 13.607131004333496, | |
| "learning_rate": 5.883472161332749e-06, | |
| "loss": 12.0687, | |
| "step": 47950 | |
| }, | |
| { | |
| "epoch": 20.860635696821515, | |
| "grad_norm": 22.271543502807617, | |
| "learning_rate": 5.8790881192459444e-06, | |
| "loss": 12.1039, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 20.882368921488727, | |
| "grad_norm": 25.268817901611328, | |
| "learning_rate": 5.8747040771591415e-06, | |
| "loss": 12.09, | |
| "step": 48050 | |
| }, | |
| { | |
| "epoch": 20.904102146155935, | |
| "grad_norm": 15.665398597717285, | |
| "learning_rate": 5.870320035072338e-06, | |
| "loss": 12.0956, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 20.925835370823147, | |
| "grad_norm": 21.067293167114258, | |
| "learning_rate": 5.865935992985533e-06, | |
| "loss": 12.0499, | |
| "step": 48150 | |
| }, | |
| { | |
| "epoch": 20.947568595490356, | |
| "grad_norm": 22.776708602905273, | |
| "learning_rate": 5.861551950898729e-06, | |
| "loss": 12.0762, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 20.969301820157565, | |
| "grad_norm": 8.629790306091309, | |
| "learning_rate": 5.857167908811925e-06, | |
| "loss": 12.0614, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 20.991035044824777, | |
| "grad_norm": 15.550890922546387, | |
| "learning_rate": 5.852783866725121e-06, | |
| "loss": 12.0792, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 21.01260527030698, | |
| "grad_norm": 12.225948333740234, | |
| "learning_rate": 5.848399824638316e-06, | |
| "loss": 11.9374, | |
| "step": 48350 | |
| }, | |
| { | |
| "epoch": 21.034338494974193, | |
| "grad_norm": 14.14416790008545, | |
| "learning_rate": 5.8440157825515135e-06, | |
| "loss": 12.0157, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 21.056071719641402, | |
| "grad_norm": 17.12042236328125, | |
| "learning_rate": 5.839631740464709e-06, | |
| "loss": 12.0288, | |
| "step": 48450 | |
| }, | |
| { | |
| "epoch": 21.07780494430861, | |
| "grad_norm": 13.070446968078613, | |
| "learning_rate": 5.835247698377905e-06, | |
| "loss": 12.0528, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 21.099538168975823, | |
| "grad_norm": 22.833274841308594, | |
| "learning_rate": 5.830863656291101e-06, | |
| "loss": 12.0479, | |
| "step": 48550 | |
| }, | |
| { | |
| "epoch": 21.12127139364303, | |
| "grad_norm": 19.790773391723633, | |
| "learning_rate": 5.826479614204297e-06, | |
| "loss": 12.044, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 21.143004618310243, | |
| "grad_norm": 16.40357208251953, | |
| "learning_rate": 5.822095572117493e-06, | |
| "loss": 12.0299, | |
| "step": 48650 | |
| }, | |
| { | |
| "epoch": 21.164737842977452, | |
| "grad_norm": 13.88508129119873, | |
| "learning_rate": 5.817711530030688e-06, | |
| "loss": 12.0255, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 21.18647106764466, | |
| "grad_norm": 18.211301803588867, | |
| "learning_rate": 5.813327487943885e-06, | |
| "loss": 12.0283, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 21.208204292311873, | |
| "grad_norm": 13.291574478149414, | |
| "learning_rate": 5.80894344585708e-06, | |
| "loss": 12.0283, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 21.22993751697908, | |
| "grad_norm": 15.905344009399414, | |
| "learning_rate": 5.804559403770277e-06, | |
| "loss": 12.0526, | |
| "step": 48850 | |
| }, | |
| { | |
| "epoch": 21.251670741646294, | |
| "grad_norm": 11.572737693786621, | |
| "learning_rate": 5.800175361683472e-06, | |
| "loss": 12.0322, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 21.273403966313502, | |
| "grad_norm": 17.022083282470703, | |
| "learning_rate": 5.795791319596669e-06, | |
| "loss": 12.0426, | |
| "step": 48950 | |
| }, | |
| { | |
| "epoch": 21.29513719098071, | |
| "grad_norm": 23.31209945678711, | |
| "learning_rate": 5.791407277509865e-06, | |
| "loss": 12.0233, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 21.316870415647923, | |
| "grad_norm": 23.8966007232666, | |
| "learning_rate": 5.78702323542306e-06, | |
| "loss": 12.0526, | |
| "step": 49050 | |
| }, | |
| { | |
| "epoch": 21.33860364031513, | |
| "grad_norm": 17.35943031311035, | |
| "learning_rate": 5.7826391933362565e-06, | |
| "loss": 12.0379, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 21.36033686498234, | |
| "grad_norm": 33.082645416259766, | |
| "learning_rate": 5.778255151249452e-06, | |
| "loss": 12.0407, | |
| "step": 49150 | |
| }, | |
| { | |
| "epoch": 21.382070089649552, | |
| "grad_norm": 13.810714721679688, | |
| "learning_rate": 5.773871109162649e-06, | |
| "loss": 12.0193, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 21.40380331431676, | |
| "grad_norm": 15.985318183898926, | |
| "learning_rate": 5.769487067075844e-06, | |
| "loss": 12.0437, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 21.425536538983973, | |
| "grad_norm": 11.185006141662598, | |
| "learning_rate": 5.765103024989041e-06, | |
| "loss": 12.0347, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 21.44726976365118, | |
| "grad_norm": 13.088438034057617, | |
| "learning_rate": 5.760718982902236e-06, | |
| "loss": 12.036, | |
| "step": 49350 | |
| }, | |
| { | |
| "epoch": 21.46900298831839, | |
| "grad_norm": 35.933502197265625, | |
| "learning_rate": 5.756334940815432e-06, | |
| "loss": 12.0709, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 21.490736212985603, | |
| "grad_norm": 13.896368026733398, | |
| "learning_rate": 5.7519508987286285e-06, | |
| "loss": 12.0181, | |
| "step": 49450 | |
| }, | |
| { | |
| "epoch": 21.51246943765281, | |
| "grad_norm": 15.991681098937988, | |
| "learning_rate": 5.747566856641824e-06, | |
| "loss": 12.0274, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 21.534202662320023, | |
| "grad_norm": 21.10006332397461, | |
| "learning_rate": 5.743182814555021e-06, | |
| "loss": 12.0587, | |
| "step": 49550 | |
| }, | |
| { | |
| "epoch": 21.555935886987232, | |
| "grad_norm": 18.29193115234375, | |
| "learning_rate": 5.738798772468216e-06, | |
| "loss": 12.028, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 21.57766911165444, | |
| "grad_norm": 27.753482818603516, | |
| "learning_rate": 5.7344147303814125e-06, | |
| "loss": 11.9988, | |
| "step": 49650 | |
| }, | |
| { | |
| "epoch": 21.599402336321653, | |
| "grad_norm": 24.744070053100586, | |
| "learning_rate": 5.730030688294608e-06, | |
| "loss": 12.0743, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 21.62113556098886, | |
| "grad_norm": 21.145042419433594, | |
| "learning_rate": 5.725646646207804e-06, | |
| "loss": 12.0425, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 21.64286878565607, | |
| "grad_norm": 13.751763343811035, | |
| "learning_rate": 5.7212626041209996e-06, | |
| "loss": 12.077, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 21.664602010323282, | |
| "grad_norm": 31.52511978149414, | |
| "learning_rate": 5.716878562034196e-06, | |
| "loss": 12.0228, | |
| "step": 49850 | |
| }, | |
| { | |
| "epoch": 21.68633523499049, | |
| "grad_norm": 51.40691375732422, | |
| "learning_rate": 5.712494519947393e-06, | |
| "loss": 12.0487, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 21.708068459657703, | |
| "grad_norm": 12.909490585327148, | |
| "learning_rate": 5.708110477860587e-06, | |
| "loss": 12.0468, | |
| "step": 49950 | |
| }, | |
| { | |
| "epoch": 21.72980168432491, | |
| "grad_norm": 14.6589937210083, | |
| "learning_rate": 5.7037264357737845e-06, | |
| "loss": 12.0168, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 21.72980168432491, | |
| "eval_cer": 0.07568846975176824, | |
| "eval_loss": 2.362048864364624, | |
| "eval_runtime": 397.6775, | |
| "eval_samples_per_second": 13.594, | |
| "eval_steps_per_second": 3.4, | |
| "eval_wer": 0.22898414193750982, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 21.75153490899212, | |
| "grad_norm": 20.892807006835938, | |
| "learning_rate": 5.69934239368698e-06, | |
| "loss": 12.021, | |
| "step": 50050 | |
| }, | |
| { | |
| "epoch": 21.773268133659332, | |
| "grad_norm": 14.854979515075684, | |
| "learning_rate": 5.694958351600176e-06, | |
| "loss": 12.0355, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 21.79500135832654, | |
| "grad_norm": 18.140365600585938, | |
| "learning_rate": 5.6905743095133715e-06, | |
| "loss": 12.0173, | |
| "step": 50150 | |
| }, | |
| { | |
| "epoch": 21.816734582993753, | |
| "grad_norm": 17.70104217529297, | |
| "learning_rate": 5.686190267426568e-06, | |
| "loss": 12.0801, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 21.83846780766096, | |
| "grad_norm": 18.51262092590332, | |
| "learning_rate": 5.681806225339763e-06, | |
| "loss": 12.0334, | |
| "step": 50250 | |
| }, | |
| { | |
| "epoch": 21.86020103232817, | |
| "grad_norm": 15.687026023864746, | |
| "learning_rate": 5.677422183252959e-06, | |
| "loss": 12.0553, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 21.881934256995383, | |
| "grad_norm": 19.184951782226562, | |
| "learning_rate": 5.6730381411661564e-06, | |
| "loss": 12.0409, | |
| "step": 50350 | |
| }, | |
| { | |
| "epoch": 21.90366748166259, | |
| "grad_norm": 18.097457885742188, | |
| "learning_rate": 5.668654099079352e-06, | |
| "loss": 12.058, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 21.925400706329803, | |
| "grad_norm": 26.270936965942383, | |
| "learning_rate": 5.664270056992548e-06, | |
| "loss": 12.064, | |
| "step": 50450 | |
| }, | |
| { | |
| "epoch": 21.947133930997012, | |
| "grad_norm": 26.288280487060547, | |
| "learning_rate": 5.6598860149057434e-06, | |
| "loss": 12.034, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 21.96886715566422, | |
| "grad_norm": 10.051491737365723, | |
| "learning_rate": 5.65550197281894e-06, | |
| "loss": 12.0676, | |
| "step": 50550 | |
| }, | |
| { | |
| "epoch": 21.990600380331433, | |
| "grad_norm": 15.91609001159668, | |
| "learning_rate": 5.651117930732135e-06, | |
| "loss": 12.0488, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 22.012170605813637, | |
| "grad_norm": 16.341890335083008, | |
| "learning_rate": 5.646733888645331e-06, | |
| "loss": 11.9307, | |
| "step": 50650 | |
| }, | |
| { | |
| "epoch": 22.03390383048085, | |
| "grad_norm": 17.389766693115234, | |
| "learning_rate": 5.642349846558527e-06, | |
| "loss": 11.9959, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 22.055637055148058, | |
| "grad_norm": 15.45628547668457, | |
| "learning_rate": 5.637965804471724e-06, | |
| "loss": 12.0322, | |
| "step": 50750 | |
| }, | |
| { | |
| "epoch": 22.077370279815266, | |
| "grad_norm": 14.2662935256958, | |
| "learning_rate": 5.63358176238492e-06, | |
| "loss": 12.0679, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 22.09910350448248, | |
| "grad_norm": 18.397008895874023, | |
| "learning_rate": 5.629197720298115e-06, | |
| "loss": 12.0007, | |
| "step": 50850 | |
| }, | |
| { | |
| "epoch": 22.120836729149687, | |
| "grad_norm": 14.498343467712402, | |
| "learning_rate": 5.624813678211312e-06, | |
| "loss": 11.9903, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 22.1425699538169, | |
| "grad_norm": 26.300201416015625, | |
| "learning_rate": 5.620429636124507e-06, | |
| "loss": 12.0488, | |
| "step": 50950 | |
| }, | |
| { | |
| "epoch": 22.164303178484108, | |
| "grad_norm": 17.42373275756836, | |
| "learning_rate": 5.616045594037703e-06, | |
| "loss": 12.0156, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 22.186036403151316, | |
| "grad_norm": 13.430180549621582, | |
| "learning_rate": 5.611661551950899e-06, | |
| "loss": 12.0147, | |
| "step": 51050 | |
| }, | |
| { | |
| "epoch": 22.20776962781853, | |
| "grad_norm": 8.827760696411133, | |
| "learning_rate": 5.607277509864095e-06, | |
| "loss": 12.0464, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 22.229502852485737, | |
| "grad_norm": 13.834342002868652, | |
| "learning_rate": 5.60289346777729e-06, | |
| "loss": 11.9739, | |
| "step": 51150 | |
| }, | |
| { | |
| "epoch": 22.251236077152946, | |
| "grad_norm": 15.042898178100586, | |
| "learning_rate": 5.598509425690487e-06, | |
| "loss": 12.0098, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 22.272969301820158, | |
| "grad_norm": 19.06934356689453, | |
| "learning_rate": 5.5941253836036836e-06, | |
| "loss": 11.9855, | |
| "step": 51250 | |
| }, | |
| { | |
| "epoch": 22.294702526487367, | |
| "grad_norm": 11.361977577209473, | |
| "learning_rate": 5.589741341516879e-06, | |
| "loss": 12.0193, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 22.31643575115458, | |
| "grad_norm": 19.977092742919922, | |
| "learning_rate": 5.585357299430075e-06, | |
| "loss": 12.0072, | |
| "step": 51350 | |
| }, | |
| { | |
| "epoch": 22.338168975821787, | |
| "grad_norm": 18.312875747680664, | |
| "learning_rate": 5.5809732573432706e-06, | |
| "loss": 12.0161, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 22.359902200488996, | |
| "grad_norm": 10.536518096923828, | |
| "learning_rate": 5.576589215256467e-06, | |
| "loss": 12.0285, | |
| "step": 51450 | |
| }, | |
| { | |
| "epoch": 22.381635425156208, | |
| "grad_norm": 15.011421203613281, | |
| "learning_rate": 5.572205173169662e-06, | |
| "loss": 11.9876, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 22.403368649823417, | |
| "grad_norm": 17.05405616760254, | |
| "learning_rate": 5.567821131082859e-06, | |
| "loss": 12.0425, | |
| "step": 51550 | |
| }, | |
| { | |
| "epoch": 22.42510187449063, | |
| "grad_norm": 16.87340545654297, | |
| "learning_rate": 5.563437088996055e-06, | |
| "loss": 12.0218, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 22.446835099157838, | |
| "grad_norm": 19.586755752563477, | |
| "learning_rate": 5.559053046909251e-06, | |
| "loss": 12.032, | |
| "step": 51650 | |
| }, | |
| { | |
| "epoch": 22.468568323825046, | |
| "grad_norm": 27.009822845458984, | |
| "learning_rate": 5.554669004822447e-06, | |
| "loss": 12.0083, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 22.49030154849226, | |
| "grad_norm": 11.635884284973145, | |
| "learning_rate": 5.5502849627356425e-06, | |
| "loss": 12.025, | |
| "step": 51750 | |
| }, | |
| { | |
| "epoch": 22.512034773159467, | |
| "grad_norm": 17.531131744384766, | |
| "learning_rate": 5.545900920648839e-06, | |
| "loss": 12.0123, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 22.53376799782668, | |
| "grad_norm": 10.203145980834961, | |
| "learning_rate": 5.541516878562034e-06, | |
| "loss": 12.0013, | |
| "step": 51850 | |
| }, | |
| { | |
| "epoch": 22.555501222493888, | |
| "grad_norm": 19.1767635345459, | |
| "learning_rate": 5.537132836475231e-06, | |
| "loss": 12.0279, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 22.577234447161096, | |
| "grad_norm": 31.68284034729004, | |
| "learning_rate": 5.532748794388426e-06, | |
| "loss": 12.0053, | |
| "step": 51950 | |
| }, | |
| { | |
| "epoch": 22.59896767182831, | |
| "grad_norm": 10.772562980651855, | |
| "learning_rate": 5.528364752301623e-06, | |
| "loss": 12.0153, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 22.620700896495517, | |
| "grad_norm": 99.19184875488281, | |
| "learning_rate": 5.523980710214818e-06, | |
| "loss": 12.0059, | |
| "step": 52050 | |
| }, | |
| { | |
| "epoch": 22.642434121162726, | |
| "grad_norm": 20.737354278564453, | |
| "learning_rate": 5.5195966681280145e-06, | |
| "loss": 12.0263, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 22.664167345829938, | |
| "grad_norm": 15.494745254516602, | |
| "learning_rate": 5.515212626041211e-06, | |
| "loss": 12.0129, | |
| "step": 52150 | |
| }, | |
| { | |
| "epoch": 22.685900570497147, | |
| "grad_norm": 34.782100677490234, | |
| "learning_rate": 5.510828583954406e-06, | |
| "loss": 12.0497, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 22.70763379516436, | |
| "grad_norm": 18.235090255737305, | |
| "learning_rate": 5.506444541867602e-06, | |
| "loss": 11.9992, | |
| "step": 52250 | |
| }, | |
| { | |
| "epoch": 22.729367019831567, | |
| "grad_norm": 27.689912796020508, | |
| "learning_rate": 5.502060499780798e-06, | |
| "loss": 12.0023, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 22.751100244498776, | |
| "grad_norm": 18.36990737915039, | |
| "learning_rate": 5.497676457693995e-06, | |
| "loss": 12.0056, | |
| "step": 52350 | |
| }, | |
| { | |
| "epoch": 22.772833469165988, | |
| "grad_norm": 18.038314819335938, | |
| "learning_rate": 5.49329241560719e-06, | |
| "loss": 12.0212, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 22.794566693833197, | |
| "grad_norm": 8.06163501739502, | |
| "learning_rate": 5.488908373520386e-06, | |
| "loss": 12.0274, | |
| "step": 52450 | |
| }, | |
| { | |
| "epoch": 22.81629991850041, | |
| "grad_norm": 15.676831245422363, | |
| "learning_rate": 5.484524331433583e-06, | |
| "loss": 12.0148, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 22.838033143167618, | |
| "grad_norm": 24.74848747253418, | |
| "learning_rate": 5.480140289346778e-06, | |
| "loss": 12.0186, | |
| "step": 52550 | |
| }, | |
| { | |
| "epoch": 22.859766367834826, | |
| "grad_norm": 10.006168365478516, | |
| "learning_rate": 5.475756247259974e-06, | |
| "loss": 12.0071, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 22.88149959250204, | |
| "grad_norm": 10.135807991027832, | |
| "learning_rate": 5.47137220517317e-06, | |
| "loss": 12.0224, | |
| "step": 52650 | |
| }, | |
| { | |
| "epoch": 22.903232817169247, | |
| "grad_norm": 16.03304100036621, | |
| "learning_rate": 5.466988163086367e-06, | |
| "loss": 12.0253, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 22.92496604183646, | |
| "grad_norm": 15.307913780212402, | |
| "learning_rate": 5.462604120999562e-06, | |
| "loss": 12.0234, | |
| "step": 52750 | |
| }, | |
| { | |
| "epoch": 22.946699266503668, | |
| "grad_norm": 27.5895938873291, | |
| "learning_rate": 5.458220078912758e-06, | |
| "loss": 12.0162, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 22.968432491170876, | |
| "grad_norm": 14.608256340026855, | |
| "learning_rate": 5.453836036825954e-06, | |
| "loss": 12.0005, | |
| "step": 52850 | |
| }, | |
| { | |
| "epoch": 22.99016571583809, | |
| "grad_norm": 41.10546112060547, | |
| "learning_rate": 5.44945199473915e-06, | |
| "loss": 12.0735, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 23.011735941320293, | |
| "grad_norm": 12.675127983093262, | |
| "learning_rate": 5.445067952652346e-06, | |
| "loss": 11.9152, | |
| "step": 52950 | |
| }, | |
| { | |
| "epoch": 23.033469165987505, | |
| "grad_norm": 16.779767990112305, | |
| "learning_rate": 5.440683910565542e-06, | |
| "loss": 11.9743, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 23.055202390654713, | |
| "grad_norm": 29.24107551574707, | |
| "learning_rate": 5.436299868478739e-06, | |
| "loss": 11.9844, | |
| "step": 53050 | |
| }, | |
| { | |
| "epoch": 23.076935615321922, | |
| "grad_norm": 15.517463684082031, | |
| "learning_rate": 5.431915826391933e-06, | |
| "loss": 12.0084, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 23.098668839989134, | |
| "grad_norm": 14.068320274353027, | |
| "learning_rate": 5.42753178430513e-06, | |
| "loss": 11.982, | |
| "step": 53150 | |
| }, | |
| { | |
| "epoch": 23.120402064656343, | |
| "grad_norm": 13.296953201293945, | |
| "learning_rate": 5.423147742218326e-06, | |
| "loss": 12.0076, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 23.142135289323555, | |
| "grad_norm": 11.365141868591309, | |
| "learning_rate": 5.418763700131522e-06, | |
| "loss": 11.9825, | |
| "step": 53250 | |
| }, | |
| { | |
| "epoch": 23.163868513990764, | |
| "grad_norm": 11.649621963500977, | |
| "learning_rate": 5.414379658044717e-06, | |
| "loss": 11.9874, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 23.185601738657972, | |
| "grad_norm": 12.506479263305664, | |
| "learning_rate": 5.4099956159579135e-06, | |
| "loss": 12.0203, | |
| "step": 53350 | |
| }, | |
| { | |
| "epoch": 23.207334963325184, | |
| "grad_norm": 26.387269973754883, | |
| "learning_rate": 5.40561157387111e-06, | |
| "loss": 11.9718, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 23.229068187992393, | |
| "grad_norm": 30.277488708496094, | |
| "learning_rate": 5.401227531784305e-06, | |
| "loss": 11.9922, | |
| "step": 53450 | |
| }, | |
| { | |
| "epoch": 23.2508014126596, | |
| "grad_norm": 16.27001953125, | |
| "learning_rate": 5.396843489697502e-06, | |
| "loss": 12.0103, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 23.272534637326814, | |
| "grad_norm": 10.601898193359375, | |
| "learning_rate": 5.392459447610698e-06, | |
| "loss": 11.982, | |
| "step": 53550 | |
| }, | |
| { | |
| "epoch": 23.294267861994022, | |
| "grad_norm": 16.928091049194336, | |
| "learning_rate": 5.388075405523894e-06, | |
| "loss": 11.9921, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 23.316001086661235, | |
| "grad_norm": 17.180408477783203, | |
| "learning_rate": 5.383691363437089e-06, | |
| "loss": 11.9681, | |
| "step": 53650 | |
| }, | |
| { | |
| "epoch": 23.337734311328443, | |
| "grad_norm": 9.645658493041992, | |
| "learning_rate": 5.3793073213502855e-06, | |
| "loss": 11.9921, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 23.359467535995652, | |
| "grad_norm": 7.888517379760742, | |
| "learning_rate": 5.374923279263481e-06, | |
| "loss": 11.9957, | |
| "step": 53750 | |
| }, | |
| { | |
| "epoch": 23.381200760662864, | |
| "grad_norm": 23.52006721496582, | |
| "learning_rate": 5.370539237176677e-06, | |
| "loss": 11.9913, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 23.402933985330073, | |
| "grad_norm": 17.327842712402344, | |
| "learning_rate": 5.366155195089874e-06, | |
| "loss": 11.985, | |
| "step": 53850 | |
| }, | |
| { | |
| "epoch": 23.424667209997285, | |
| "grad_norm": 15.461244583129883, | |
| "learning_rate": 5.3617711530030696e-06, | |
| "loss": 11.9856, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 23.446400434664493, | |
| "grad_norm": 10.2888822555542, | |
| "learning_rate": 5.357387110916266e-06, | |
| "loss": 12.0014, | |
| "step": 53950 | |
| }, | |
| { | |
| "epoch": 23.468133659331702, | |
| "grad_norm": 16.063997268676758, | |
| "learning_rate": 5.353003068829461e-06, | |
| "loss": 11.997, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 23.489866883998914, | |
| "grad_norm": 28.185026168823242, | |
| "learning_rate": 5.3486190267426574e-06, | |
| "loss": 11.9855, | |
| "step": 54050 | |
| }, | |
| { | |
| "epoch": 23.511600108666123, | |
| "grad_norm": 16.92442512512207, | |
| "learning_rate": 5.344234984655853e-06, | |
| "loss": 12.0206, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 23.533333333333335, | |
| "grad_norm": 6.245467662811279, | |
| "learning_rate": 5.339850942569049e-06, | |
| "loss": 11.9748, | |
| "step": 54150 | |
| }, | |
| { | |
| "epoch": 23.555066558000544, | |
| "grad_norm": 14.348546981811523, | |
| "learning_rate": 5.3354669004822444e-06, | |
| "loss": 11.9609, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 23.576799782667752, | |
| "grad_norm": 10.864014625549316, | |
| "learning_rate": 5.331082858395441e-06, | |
| "loss": 11.9947, | |
| "step": 54250 | |
| }, | |
| { | |
| "epoch": 23.598533007334964, | |
| "grad_norm": 8.79773998260498, | |
| "learning_rate": 5.326698816308638e-06, | |
| "loss": 12.0031, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 23.620266232002173, | |
| "grad_norm": 19.14083480834961, | |
| "learning_rate": 5.322314774221833e-06, | |
| "loss": 11.9738, | |
| "step": 54350 | |
| }, | |
| { | |
| "epoch": 23.64199945666938, | |
| "grad_norm": 10.049248695373535, | |
| "learning_rate": 5.317930732135029e-06, | |
| "loss": 11.9514, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 23.663732681336594, | |
| "grad_norm": 11.119285583496094, | |
| "learning_rate": 5.313546690048225e-06, | |
| "loss": 11.9914, | |
| "step": 54450 | |
| }, | |
| { | |
| "epoch": 23.685465906003802, | |
| "grad_norm": 8.268950462341309, | |
| "learning_rate": 5.309162647961421e-06, | |
| "loss": 11.994, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 23.707199130671015, | |
| "grad_norm": 14.429734230041504, | |
| "learning_rate": 5.304778605874616e-06, | |
| "loss": 11.975, | |
| "step": 54550 | |
| }, | |
| { | |
| "epoch": 23.728932355338223, | |
| "grad_norm": 15.248434066772461, | |
| "learning_rate": 5.300394563787813e-06, | |
| "loss": 11.9967, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 23.750665580005432, | |
| "grad_norm": 27.12610626220703, | |
| "learning_rate": 5.296010521701008e-06, | |
| "loss": 12.0066, | |
| "step": 54650 | |
| }, | |
| { | |
| "epoch": 23.772398804672644, | |
| "grad_norm": 11.624201774597168, | |
| "learning_rate": 5.291626479614205e-06, | |
| "loss": 11.9857, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 23.794132029339853, | |
| "grad_norm": 38.6632194519043, | |
| "learning_rate": 5.287242437527401e-06, | |
| "loss": 12.0068, | |
| "step": 54750 | |
| }, | |
| { | |
| "epoch": 23.815865254007065, | |
| "grad_norm": 21.433034896850586, | |
| "learning_rate": 5.282858395440597e-06, | |
| "loss": 11.9545, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 23.837598478674273, | |
| "grad_norm": 12.88279914855957, | |
| "learning_rate": 5.278474353353793e-06, | |
| "loss": 11.9675, | |
| "step": 54850 | |
| }, | |
| { | |
| "epoch": 23.859331703341482, | |
| "grad_norm": 11.213829040527344, | |
| "learning_rate": 5.274090311266988e-06, | |
| "loss": 11.9907, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 23.881064928008694, | |
| "grad_norm": 32.87601852416992, | |
| "learning_rate": 5.2697062691801846e-06, | |
| "loss": 12.0041, | |
| "step": 54950 | |
| }, | |
| { | |
| "epoch": 23.902798152675903, | |
| "grad_norm": 12.214354515075684, | |
| "learning_rate": 5.26532222709338e-06, | |
| "loss": 12.0013, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 23.924531377343115, | |
| "grad_norm": 18.823352813720703, | |
| "learning_rate": 5.260938185006577e-06, | |
| "loss": 12.0205, | |
| "step": 55050 | |
| }, | |
| { | |
| "epoch": 23.946264602010324, | |
| "grad_norm": 11.764278411865234, | |
| "learning_rate": 5.2565541429197716e-06, | |
| "loss": 12.0045, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 23.967997826677532, | |
| "grad_norm": 33.26872253417969, | |
| "learning_rate": 5.252170100832969e-06, | |
| "loss": 11.9852, | |
| "step": 55150 | |
| }, | |
| { | |
| "epoch": 23.989731051344744, | |
| "grad_norm": 20.137388229370117, | |
| "learning_rate": 5.247786058746165e-06, | |
| "loss": 12.0023, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 24.01130127682695, | |
| "grad_norm": 13.359118461608887, | |
| "learning_rate": 5.24340201665936e-06, | |
| "loss": 11.8893, | |
| "step": 55250 | |
| }, | |
| { | |
| "epoch": 24.03303450149416, | |
| "grad_norm": 12.654318809509277, | |
| "learning_rate": 5.2390179745725565e-06, | |
| "loss": 11.9913, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 24.05476772616137, | |
| "grad_norm": 12.723244667053223, | |
| "learning_rate": 5.234633932485752e-06, | |
| "loss": 11.9835, | |
| "step": 55350 | |
| }, | |
| { | |
| "epoch": 24.076500950828578, | |
| "grad_norm": 10.007128715515137, | |
| "learning_rate": 5.230249890398948e-06, | |
| "loss": 11.9639, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 24.09823417549579, | |
| "grad_norm": 24.932937622070312, | |
| "learning_rate": 5.2258658483121435e-06, | |
| "loss": 11.9567, | |
| "step": 55450 | |
| }, | |
| { | |
| "epoch": 24.119967400163, | |
| "grad_norm": 13.288817405700684, | |
| "learning_rate": 5.221481806225341e-06, | |
| "loss": 11.9896, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 24.14170062483021, | |
| "grad_norm": 24.153135299682617, | |
| "learning_rate": 5.217097764138536e-06, | |
| "loss": 11.9458, | |
| "step": 55550 | |
| }, | |
| { | |
| "epoch": 24.16343384949742, | |
| "grad_norm": 21.456832885742188, | |
| "learning_rate": 5.212713722051732e-06, | |
| "loss": 11.9637, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 24.185167074164628, | |
| "grad_norm": 11.885467529296875, | |
| "learning_rate": 5.2083296799649284e-06, | |
| "loss": 11.9763, | |
| "step": 55650 | |
| }, | |
| { | |
| "epoch": 24.20690029883184, | |
| "grad_norm": 18.14926528930664, | |
| "learning_rate": 5.203945637878124e-06, | |
| "loss": 11.9793, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 24.22863352349905, | |
| "grad_norm": 10.626521110534668, | |
| "learning_rate": 5.19956159579132e-06, | |
| "loss": 11.9717, | |
| "step": 55750 | |
| }, | |
| { | |
| "epoch": 24.250366748166257, | |
| "grad_norm": 18.046018600463867, | |
| "learning_rate": 5.1951775537045155e-06, | |
| "loss": 11.9679, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 24.27209997283347, | |
| "grad_norm": 19.871051788330078, | |
| "learning_rate": 5.1907935116177125e-06, | |
| "loss": 11.9655, | |
| "step": 55850 | |
| }, | |
| { | |
| "epoch": 24.29383319750068, | |
| "grad_norm": 26.990354537963867, | |
| "learning_rate": 5.186409469530908e-06, | |
| "loss": 11.9776, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 24.31556642216789, | |
| "grad_norm": 13.593362808227539, | |
| "learning_rate": 5.182025427444104e-06, | |
| "loss": 11.9765, | |
| "step": 55950 | |
| }, | |
| { | |
| "epoch": 24.3372996468351, | |
| "grad_norm": 21.99699592590332, | |
| "learning_rate": 5.1776413853572995e-06, | |
| "loss": 11.9698, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 24.359032871502308, | |
| "grad_norm": 17.28653335571289, | |
| "learning_rate": 5.173257343270496e-06, | |
| "loss": 11.9668, | |
| "step": 56050 | |
| }, | |
| { | |
| "epoch": 24.38076609616952, | |
| "grad_norm": 46.031005859375, | |
| "learning_rate": 5.168873301183692e-06, | |
| "loss": 11.9729, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 24.40249932083673, | |
| "grad_norm": 32.24114227294922, | |
| "learning_rate": 5.164489259096887e-06, | |
| "loss": 11.9543, | |
| "step": 56150 | |
| }, | |
| { | |
| "epoch": 24.42423254550394, | |
| "grad_norm": 32.9847297668457, | |
| "learning_rate": 5.160105217010084e-06, | |
| "loss": 11.9631, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 24.44596577017115, | |
| "grad_norm": 28.538616180419922, | |
| "learning_rate": 5.155721174923279e-06, | |
| "loss": 11.9914, | |
| "step": 56250 | |
| }, | |
| { | |
| "epoch": 24.467698994838358, | |
| "grad_norm": 10.636951446533203, | |
| "learning_rate": 5.151337132836476e-06, | |
| "loss": 11.9533, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 24.48943221950557, | |
| "grad_norm": 18.541378021240234, | |
| "learning_rate": 5.1469530907496715e-06, | |
| "loss": 11.9635, | |
| "step": 56350 | |
| }, | |
| { | |
| "epoch": 24.51116544417278, | |
| "grad_norm": 15.477215766906738, | |
| "learning_rate": 5.142569048662868e-06, | |
| "loss": 11.973, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 24.53289866883999, | |
| "grad_norm": 8.257668495178223, | |
| "learning_rate": 5.138185006576063e-06, | |
| "loss": 11.9541, | |
| "step": 56450 | |
| }, | |
| { | |
| "epoch": 24.5546318935072, | |
| "grad_norm": 12.362825393676758, | |
| "learning_rate": 5.133800964489259e-06, | |
| "loss": 11.9543, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 24.576365118174408, | |
| "grad_norm": 18.897563934326172, | |
| "learning_rate": 5.1294169224024556e-06, | |
| "loss": 11.9828, | |
| "step": 56550 | |
| }, | |
| { | |
| "epoch": 24.59809834284162, | |
| "grad_norm": 22.83639907836914, | |
| "learning_rate": 5.125032880315651e-06, | |
| "loss": 11.9907, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 24.61983156750883, | |
| "grad_norm": 26.016014099121094, | |
| "learning_rate": 5.120648838228848e-06, | |
| "loss": 11.9798, | |
| "step": 56650 | |
| }, | |
| { | |
| "epoch": 24.641564792176037, | |
| "grad_norm": 7.745444297790527, | |
| "learning_rate": 5.1162647961420434e-06, | |
| "loss": 12.0051, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 24.66329801684325, | |
| "grad_norm": 14.89815616607666, | |
| "learning_rate": 5.11188075405524e-06, | |
| "loss": 11.9648, | |
| "step": 56750 | |
| }, | |
| { | |
| "epoch": 24.68503124151046, | |
| "grad_norm": 13.663446426391602, | |
| "learning_rate": 5.107496711968435e-06, | |
| "loss": 11.9961, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 24.70676446617767, | |
| "grad_norm": 15.474350929260254, | |
| "learning_rate": 5.103112669881631e-06, | |
| "loss": 11.9687, | |
| "step": 56850 | |
| }, | |
| { | |
| "epoch": 24.72849769084488, | |
| "grad_norm": 32.1036376953125, | |
| "learning_rate": 5.0987286277948275e-06, | |
| "loss": 12.0102, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 24.750230915512088, | |
| "grad_norm": 21.14737892150879, | |
| "learning_rate": 5.094344585708023e-06, | |
| "loss": 11.9577, | |
| "step": 56950 | |
| }, | |
| { | |
| "epoch": 24.7719641401793, | |
| "grad_norm": 26.35091781616211, | |
| "learning_rate": 5.08996054362122e-06, | |
| "loss": 11.9979, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 24.79369736484651, | |
| "grad_norm": 40.08930587768555, | |
| "learning_rate": 5.085576501534415e-06, | |
| "loss": 11.956, | |
| "step": 57050 | |
| }, | |
| { | |
| "epoch": 24.81543058951372, | |
| "grad_norm": 21.480506896972656, | |
| "learning_rate": 5.081192459447612e-06, | |
| "loss": 11.9701, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 24.83716381418093, | |
| "grad_norm": 12.940244674682617, | |
| "learning_rate": 5.076808417360807e-06, | |
| "loss": 11.9642, | |
| "step": 57150 | |
| }, | |
| { | |
| "epoch": 24.858897038848138, | |
| "grad_norm": 14.284876823425293, | |
| "learning_rate": 5.072424375274003e-06, | |
| "loss": 11.9604, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 24.88063026351535, | |
| "grad_norm": 9.244315147399902, | |
| "learning_rate": 5.068040333187199e-06, | |
| "loss": 11.9762, | |
| "step": 57250 | |
| }, | |
| { | |
| "epoch": 24.90236348818256, | |
| "grad_norm": 21.19985580444336, | |
| "learning_rate": 5.063656291100395e-06, | |
| "loss": 11.9824, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 24.92409671284977, | |
| "grad_norm": 20.60128402709961, | |
| "learning_rate": 5.059272249013591e-06, | |
| "loss": 11.9888, | |
| "step": 57350 | |
| }, | |
| { | |
| "epoch": 24.94582993751698, | |
| "grad_norm": 22.071367263793945, | |
| "learning_rate": 5.0548882069267865e-06, | |
| "loss": 11.9722, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 24.967563162184188, | |
| "grad_norm": 19.631771087646484, | |
| "learning_rate": 5.0505041648399836e-06, | |
| "loss": 11.9691, | |
| "step": 57450 | |
| }, | |
| { | |
| "epoch": 24.9892963868514, | |
| "grad_norm": 11.300741195678711, | |
| "learning_rate": 5.046120122753179e-06, | |
| "loss": 11.9764, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 25.010866612333604, | |
| "grad_norm": 28.297489166259766, | |
| "learning_rate": 5.041736080666375e-06, | |
| "loss": 11.8502, | |
| "step": 57550 | |
| }, | |
| { | |
| "epoch": 25.032599837000816, | |
| "grad_norm": 19.681974411010742, | |
| "learning_rate": 5.0373520385795706e-06, | |
| "loss": 11.9632, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 25.054333061668025, | |
| "grad_norm": 9.978123664855957, | |
| "learning_rate": 5.032967996492767e-06, | |
| "loss": 11.9736, | |
| "step": 57650 | |
| }, | |
| { | |
| "epoch": 25.076066286335234, | |
| "grad_norm": 22.59627342224121, | |
| "learning_rate": 5.028583954405962e-06, | |
| "loss": 11.9202, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 25.097799511002446, | |
| "grad_norm": 15.177567481994629, | |
| "learning_rate": 5.024199912319158e-06, | |
| "loss": 11.9432, | |
| "step": 57750 | |
| }, | |
| { | |
| "epoch": 25.119532735669654, | |
| "grad_norm": 11.103377342224121, | |
| "learning_rate": 5.0198158702323555e-06, | |
| "loss": 11.933, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 25.141265960336867, | |
| "grad_norm": 15.902565956115723, | |
| "learning_rate": 5.015431828145551e-06, | |
| "loss": 11.9292, | |
| "step": 57850 | |
| }, | |
| { | |
| "epoch": 25.162999185004075, | |
| "grad_norm": 21.157047271728516, | |
| "learning_rate": 5.011047786058747e-06, | |
| "loss": 11.9355, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 25.184732409671284, | |
| "grad_norm": 15.4396333694458, | |
| "learning_rate": 5.0066637439719425e-06, | |
| "loss": 11.9417, | |
| "step": 57950 | |
| }, | |
| { | |
| "epoch": 25.206465634338496, | |
| "grad_norm": 17.689163208007812, | |
| "learning_rate": 5.002279701885139e-06, | |
| "loss": 11.9411, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 25.228198859005705, | |
| "grad_norm": 20.323307037353516, | |
| "learning_rate": 4.997895659798335e-06, | |
| "loss": 11.9537, | |
| "step": 58050 | |
| }, | |
| { | |
| "epoch": 25.249932083672913, | |
| "grad_norm": 11.056938171386719, | |
| "learning_rate": 4.99351161771153e-06, | |
| "loss": 11.9289, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 25.271665308340125, | |
| "grad_norm": 13.280766487121582, | |
| "learning_rate": 4.989127575624727e-06, | |
| "loss": 11.9525, | |
| "step": 58150 | |
| }, | |
| { | |
| "epoch": 25.293398533007334, | |
| "grad_norm": 19.21057891845703, | |
| "learning_rate": 4.984743533537922e-06, | |
| "loss": 11.9329, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 25.315131757674546, | |
| "grad_norm": 11.26260757446289, | |
| "learning_rate": 4.980359491451118e-06, | |
| "loss": 11.9324, | |
| "step": 58250 | |
| }, | |
| { | |
| "epoch": 25.336864982341755, | |
| "grad_norm": 23.691085815429688, | |
| "learning_rate": 4.9759754493643145e-06, | |
| "loss": 11.9377, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 25.358598207008963, | |
| "grad_norm": 14.544368743896484, | |
| "learning_rate": 4.97159140727751e-06, | |
| "loss": 11.9555, | |
| "step": 58350 | |
| }, | |
| { | |
| "epoch": 25.380331431676176, | |
| "grad_norm": 30.192901611328125, | |
| "learning_rate": 4.967207365190706e-06, | |
| "loss": 11.9389, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 25.402064656343384, | |
| "grad_norm": 13.255487442016602, | |
| "learning_rate": 4.962823323103902e-06, | |
| "loss": 11.9655, | |
| "step": 58450 | |
| }, | |
| { | |
| "epoch": 25.423797881010596, | |
| "grad_norm": 21.28059959411621, | |
| "learning_rate": 4.9584392810170985e-06, | |
| "loss": 11.9339, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 25.445531105677805, | |
| "grad_norm": 19.402381896972656, | |
| "learning_rate": 4.954055238930294e-06, | |
| "loss": 11.9438, | |
| "step": 58550 | |
| }, | |
| { | |
| "epoch": 25.467264330345014, | |
| "grad_norm": 23.586254119873047, | |
| "learning_rate": 4.94967119684349e-06, | |
| "loss": 11.9565, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 25.488997555012226, | |
| "grad_norm": 12.589113235473633, | |
| "learning_rate": 4.945287154756686e-06, | |
| "loss": 11.9255, | |
| "step": 58650 | |
| }, | |
| { | |
| "epoch": 25.510730779679434, | |
| "grad_norm": 13.459474563598633, | |
| "learning_rate": 4.940903112669882e-06, | |
| "loss": 11.9577, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 25.532464004346647, | |
| "grad_norm": 44.4463005065918, | |
| "learning_rate": 4.936519070583078e-06, | |
| "loss": 11.9748, | |
| "step": 58750 | |
| }, | |
| { | |
| "epoch": 25.554197229013855, | |
| "grad_norm": 17.335121154785156, | |
| "learning_rate": 4.932135028496273e-06, | |
| "loss": 11.9485, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 25.575930453681064, | |
| "grad_norm": 13.910146713256836, | |
| "learning_rate": 4.92775098640947e-06, | |
| "loss": 11.9251, | |
| "step": 58850 | |
| }, | |
| { | |
| "epoch": 25.597663678348276, | |
| "grad_norm": 12.966668128967285, | |
| "learning_rate": 4.923366944322666e-06, | |
| "loss": 11.9461, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 25.619396903015485, | |
| "grad_norm": 11.38027572631836, | |
| "learning_rate": 4.918982902235862e-06, | |
| "loss": 11.9485, | |
| "step": 58950 | |
| }, | |
| { | |
| "epoch": 25.641130127682693, | |
| "grad_norm": 19.2831974029541, | |
| "learning_rate": 4.914598860149058e-06, | |
| "loss": 11.9539, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 25.662863352349905, | |
| "grad_norm": 14.93049144744873, | |
| "learning_rate": 4.910214818062254e-06, | |
| "loss": 11.9358, | |
| "step": 59050 | |
| }, | |
| { | |
| "epoch": 25.684596577017114, | |
| "grad_norm": 20.345487594604492, | |
| "learning_rate": 4.90583077597545e-06, | |
| "loss": 11.963, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 25.706329801684326, | |
| "grad_norm": 22.333740234375, | |
| "learning_rate": 4.901446733888645e-06, | |
| "loss": 11.9593, | |
| "step": 59150 | |
| }, | |
| { | |
| "epoch": 25.728063026351535, | |
| "grad_norm": 15.723165512084961, | |
| "learning_rate": 4.897062691801842e-06, | |
| "loss": 11.9462, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 25.749796251018743, | |
| "grad_norm": 23.927995681762695, | |
| "learning_rate": 4.892678649715038e-06, | |
| "loss": 11.9395, | |
| "step": 59250 | |
| }, | |
| { | |
| "epoch": 25.771529475685956, | |
| "grad_norm": 9.985795974731445, | |
| "learning_rate": 4.888294607628233e-06, | |
| "loss": 11.9668, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 25.793262700353164, | |
| "grad_norm": 13.037304878234863, | |
| "learning_rate": 4.8839105655414294e-06, | |
| "loss": 11.9362, | |
| "step": 59350 | |
| }, | |
| { | |
| "epoch": 25.814995925020376, | |
| "grad_norm": 14.396384239196777, | |
| "learning_rate": 4.879526523454626e-06, | |
| "loss": 11.9613, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 25.836729149687585, | |
| "grad_norm": 12.580947875976562, | |
| "learning_rate": 4.875142481367822e-06, | |
| "loss": 11.947, | |
| "step": 59450 | |
| }, | |
| { | |
| "epoch": 25.858462374354794, | |
| "grad_norm": 9.566840171813965, | |
| "learning_rate": 4.870758439281017e-06, | |
| "loss": 11.9588, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 25.880195599022006, | |
| "grad_norm": 14.287603378295898, | |
| "learning_rate": 4.8663743971942135e-06, | |
| "loss": 11.9367, | |
| "step": 59550 | |
| }, | |
| { | |
| "epoch": 25.901928823689214, | |
| "grad_norm": 22.067798614501953, | |
| "learning_rate": 4.86199035510741e-06, | |
| "loss": 11.9608, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 25.923662048356427, | |
| "grad_norm": 18.1433162689209, | |
| "learning_rate": 4.857606313020605e-06, | |
| "loss": 11.9365, | |
| "step": 59650 | |
| }, | |
| { | |
| "epoch": 25.945395273023635, | |
| "grad_norm": 19.52138900756836, | |
| "learning_rate": 4.853222270933801e-06, | |
| "loss": 11.9533, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 25.967128497690844, | |
| "grad_norm": 8.619915008544922, | |
| "learning_rate": 4.848838228846997e-06, | |
| "loss": 11.9343, | |
| "step": 59750 | |
| }, | |
| { | |
| "epoch": 25.988861722358056, | |
| "grad_norm": 23.551292419433594, | |
| "learning_rate": 4.844454186760194e-06, | |
| "loss": 11.9369, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 26.01043194784026, | |
| "grad_norm": 11.714635848999023, | |
| "learning_rate": 4.840070144673389e-06, | |
| "loss": 11.8362, | |
| "step": 59850 | |
| }, | |
| { | |
| "epoch": 26.032165172507472, | |
| "grad_norm": 12.336874961853027, | |
| "learning_rate": 4.8356861025865855e-06, | |
| "loss": 11.8948, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 26.05389839717468, | |
| "grad_norm": 20.45733642578125, | |
| "learning_rate": 4.831302060499781e-06, | |
| "loss": 11.9095, | |
| "step": 59950 | |
| }, | |
| { | |
| "epoch": 26.07563162184189, | |
| "grad_norm": 19.363704681396484, | |
| "learning_rate": 4.826918018412977e-06, | |
| "loss": 11.9093, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 26.07563162184189, | |
| "eval_cer": 0.07668522335921395, | |
| "eval_loss": 2.3816096782684326, | |
| "eval_runtime": 394.5974, | |
| "eval_samples_per_second": 13.7, | |
| "eval_steps_per_second": 3.426, | |
| "eval_wer": 0.2290783482493327, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 26.0973648465091, | |
| "grad_norm": 10.920241355895996, | |
| "learning_rate": 4.822533976326173e-06, | |
| "loss": 11.8988, | |
| "step": 60050 | |
| }, | |
| { | |
| "epoch": 26.11909807117631, | |
| "grad_norm": 8.169657707214355, | |
| "learning_rate": 4.818149934239369e-06, | |
| "loss": 11.9302, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 26.140831295843522, | |
| "grad_norm": 21.631534576416016, | |
| "learning_rate": 4.813765892152565e-06, | |
| "loss": 11.9295, | |
| "step": 60150 | |
| }, | |
| { | |
| "epoch": 26.16256452051073, | |
| "grad_norm": 15.736180305480957, | |
| "learning_rate": 4.80938185006576e-06, | |
| "loss": 11.9402, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 26.18429774517794, | |
| "grad_norm": 8.994476318359375, | |
| "learning_rate": 4.804997807978957e-06, | |
| "loss": 11.9254, | |
| "step": 60250 | |
| }, | |
| { | |
| "epoch": 26.206030969845152, | |
| "grad_norm": 15.551674842834473, | |
| "learning_rate": 4.800613765892153e-06, | |
| "loss": 11.9274, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 26.22776419451236, | |
| "grad_norm": 8.010394096374512, | |
| "learning_rate": 4.796229723805349e-06, | |
| "loss": 11.9204, | |
| "step": 60350 | |
| }, | |
| { | |
| "epoch": 26.24949741917957, | |
| "grad_norm": 8.433065414428711, | |
| "learning_rate": 4.791845681718545e-06, | |
| "loss": 11.9123, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 26.27123064384678, | |
| "grad_norm": 11.69290542602539, | |
| "learning_rate": 4.787461639631741e-06, | |
| "loss": 11.9423, | |
| "step": 60450 | |
| }, | |
| { | |
| "epoch": 26.29296386851399, | |
| "grad_norm": 11.806631088256836, | |
| "learning_rate": 4.783077597544937e-06, | |
| "loss": 11.9229, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 26.314697093181202, | |
| "grad_norm": 9.421358108520508, | |
| "learning_rate": 4.778693555458132e-06, | |
| "loss": 11.9254, | |
| "step": 60550 | |
| }, | |
| { | |
| "epoch": 26.33643031784841, | |
| "grad_norm": 15.151471138000488, | |
| "learning_rate": 4.7743095133713285e-06, | |
| "loss": 11.9324, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 26.35816354251562, | |
| "grad_norm": 7.523982524871826, | |
| "learning_rate": 4.769925471284525e-06, | |
| "loss": 11.9094, | |
| "step": 60650 | |
| }, | |
| { | |
| "epoch": 26.37989676718283, | |
| "grad_norm": 7.315085411071777, | |
| "learning_rate": 4.765541429197721e-06, | |
| "loss": 11.9156, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 26.40162999185004, | |
| "grad_norm": 37.69257354736328, | |
| "learning_rate": 4.761157387110917e-06, | |
| "loss": 11.9491, | |
| "step": 60750 | |
| }, | |
| { | |
| "epoch": 26.423363216517252, | |
| "grad_norm": 12.536825180053711, | |
| "learning_rate": 4.756773345024113e-06, | |
| "loss": 11.937, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 26.44509644118446, | |
| "grad_norm": 19.952590942382812, | |
| "learning_rate": 4.752389302937309e-06, | |
| "loss": 11.9031, | |
| "step": 60850 | |
| }, | |
| { | |
| "epoch": 26.46682966585167, | |
| "grad_norm": 9.468097686767578, | |
| "learning_rate": 4.748005260850504e-06, | |
| "loss": 11.93, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 26.48856289051888, | |
| "grad_norm": 9.063526153564453, | |
| "learning_rate": 4.7436212187637005e-06, | |
| "loss": 11.9282, | |
| "step": 60950 | |
| }, | |
| { | |
| "epoch": 26.51029611518609, | |
| "grad_norm": 23.76058006286621, | |
| "learning_rate": 4.739237176676897e-06, | |
| "loss": 11.9431, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 26.532029339853302, | |
| "grad_norm": 16.783021926879883, | |
| "learning_rate": 4.734853134590092e-06, | |
| "loss": 11.9221, | |
| "step": 61050 | |
| }, | |
| { | |
| "epoch": 26.55376256452051, | |
| "grad_norm": 20.15511131286621, | |
| "learning_rate": 4.730469092503288e-06, | |
| "loss": 11.9392, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 26.57549578918772, | |
| "grad_norm": 14.874903678894043, | |
| "learning_rate": 4.7260850504164845e-06, | |
| "loss": 11.953, | |
| "step": 61150 | |
| }, | |
| { | |
| "epoch": 26.597229013854932, | |
| "grad_norm": 7.126718044281006, | |
| "learning_rate": 4.721701008329681e-06, | |
| "loss": 11.9446, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 26.61896223852214, | |
| "grad_norm": 9.697017669677734, | |
| "learning_rate": 4.717316966242876e-06, | |
| "loss": 11.9047, | |
| "step": 61250 | |
| }, | |
| { | |
| "epoch": 26.64069546318935, | |
| "grad_norm": 16.13836097717285, | |
| "learning_rate": 4.712932924156072e-06, | |
| "loss": 11.9156, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 26.66242868785656, | |
| "grad_norm": 10.770340919494629, | |
| "learning_rate": 4.708548882069268e-06, | |
| "loss": 11.9602, | |
| "step": 61350 | |
| }, | |
| { | |
| "epoch": 26.68416191252377, | |
| "grad_norm": 20.800886154174805, | |
| "learning_rate": 4.704164839982464e-06, | |
| "loss": 11.9496, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 26.705895137190982, | |
| "grad_norm": 14.415149688720703, | |
| "learning_rate": 4.69978079789566e-06, | |
| "loss": 11.8953, | |
| "step": 61450 | |
| }, | |
| { | |
| "epoch": 26.72762836185819, | |
| "grad_norm": 16.533891677856445, | |
| "learning_rate": 4.695396755808856e-06, | |
| "loss": 11.9318, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 26.7493615865254, | |
| "grad_norm": 12.036311149597168, | |
| "learning_rate": 4.691012713722053e-06, | |
| "loss": 11.9219, | |
| "step": 61550 | |
| }, | |
| { | |
| "epoch": 26.77109481119261, | |
| "grad_norm": 9.894879341125488, | |
| "learning_rate": 4.686628671635248e-06, | |
| "loss": 11.925, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 26.79282803585982, | |
| "grad_norm": 13.89318561553955, | |
| "learning_rate": 4.682244629548444e-06, | |
| "loss": 11.9642, | |
| "step": 61650 | |
| }, | |
| { | |
| "epoch": 26.814561260527032, | |
| "grad_norm": 7.87830114364624, | |
| "learning_rate": 4.67786058746164e-06, | |
| "loss": 11.9357, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 26.83629448519424, | |
| "grad_norm": 12.9856595993042, | |
| "learning_rate": 4.673476545374836e-06, | |
| "loss": 11.9305, | |
| "step": 61750 | |
| }, | |
| { | |
| "epoch": 26.85802770986145, | |
| "grad_norm": 9.654988288879395, | |
| "learning_rate": 4.669092503288032e-06, | |
| "loss": 11.9184, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 26.87976093452866, | |
| "grad_norm": 58.50657653808594, | |
| "learning_rate": 4.664708461201228e-06, | |
| "loss": 11.9274, | |
| "step": 61850 | |
| }, | |
| { | |
| "epoch": 26.90149415919587, | |
| "grad_norm": 9.662385940551758, | |
| "learning_rate": 4.660324419114424e-06, | |
| "loss": 11.9326, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 26.92322738386308, | |
| "grad_norm": 11.249975204467773, | |
| "learning_rate": 4.655940377027619e-06, | |
| "loss": 11.9301, | |
| "step": 61950 | |
| }, | |
| { | |
| "epoch": 26.94496060853029, | |
| "grad_norm": 14.355755805969238, | |
| "learning_rate": 4.651556334940816e-06, | |
| "loss": 11.925, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 26.9666938331975, | |
| "grad_norm": 55.27675247192383, | |
| "learning_rate": 4.647172292854012e-06, | |
| "loss": 11.9072, | |
| "step": 62050 | |
| }, | |
| { | |
| "epoch": 26.988427057864712, | |
| "grad_norm": 9.871424674987793, | |
| "learning_rate": 4.642788250767208e-06, | |
| "loss": 11.9371, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 27.009997283346916, | |
| "grad_norm": 11.582411766052246, | |
| "learning_rate": 4.638404208680404e-06, | |
| "loss": 11.8118, | |
| "step": 62150 | |
| }, | |
| { | |
| "epoch": 27.031730508014128, | |
| "grad_norm": 7.072801113128662, | |
| "learning_rate": 4.6340201665935995e-06, | |
| "loss": 11.8716, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 27.053463732681337, | |
| "grad_norm": 12.715493202209473, | |
| "learning_rate": 4.629636124506796e-06, | |
| "loss": 11.9066, | |
| "step": 62250 | |
| }, | |
| { | |
| "epoch": 27.075196957348545, | |
| "grad_norm": 13.285543441772461, | |
| "learning_rate": 4.625252082419991e-06, | |
| "loss": 11.9065, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 27.096930182015758, | |
| "grad_norm": 14.948770523071289, | |
| "learning_rate": 4.620868040333187e-06, | |
| "loss": 11.8932, | |
| "step": 62350 | |
| }, | |
| { | |
| "epoch": 27.118663406682966, | |
| "grad_norm": 7.0187296867370605, | |
| "learning_rate": 4.616483998246384e-06, | |
| "loss": 11.9063, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 27.14039663135018, | |
| "grad_norm": 11.898140907287598, | |
| "learning_rate": 4.61209995615958e-06, | |
| "loss": 11.8861, | |
| "step": 62450 | |
| }, | |
| { | |
| "epoch": 27.162129856017387, | |
| "grad_norm": 7.729825496673584, | |
| "learning_rate": 4.607715914072775e-06, | |
| "loss": 11.9102, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 27.183863080684596, | |
| "grad_norm": 9.05493450164795, | |
| "learning_rate": 4.6033318719859715e-06, | |
| "loss": 11.901, | |
| "step": 62550 | |
| }, | |
| { | |
| "epoch": 27.205596305351808, | |
| "grad_norm": 48.41245651245117, | |
| "learning_rate": 4.598947829899168e-06, | |
| "loss": 11.9217, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 27.227329530019016, | |
| "grad_norm": 8.19921875, | |
| "learning_rate": 4.594563787812363e-06, | |
| "loss": 11.9103, | |
| "step": 62650 | |
| }, | |
| { | |
| "epoch": 27.249062754686225, | |
| "grad_norm": 7.067399024963379, | |
| "learning_rate": 4.590179745725559e-06, | |
| "loss": 11.9102, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 27.270795979353437, | |
| "grad_norm": 10.219547271728516, | |
| "learning_rate": 4.5857957036387556e-06, | |
| "loss": 11.9086, | |
| "step": 62750 | |
| }, | |
| { | |
| "epoch": 27.292529204020646, | |
| "grad_norm": 11.2730073928833, | |
| "learning_rate": 4.581411661551951e-06, | |
| "loss": 11.8907, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 27.314262428687858, | |
| "grad_norm": 23.644775390625, | |
| "learning_rate": 4.577027619465147e-06, | |
| "loss": 11.9194, | |
| "step": 62850 | |
| }, | |
| { | |
| "epoch": 27.335995653355067, | |
| "grad_norm": 13.088956832885742, | |
| "learning_rate": 4.572643577378343e-06, | |
| "loss": 11.9178, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 27.357728878022275, | |
| "grad_norm": 12.945446968078613, | |
| "learning_rate": 4.56825953529154e-06, | |
| "loss": 11.9127, | |
| "step": 62950 | |
| }, | |
| { | |
| "epoch": 27.379462102689487, | |
| "grad_norm": 7.951735019683838, | |
| "learning_rate": 4.563875493204735e-06, | |
| "loss": 11.9237, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 27.401195327356696, | |
| "grad_norm": 13.66278076171875, | |
| "learning_rate": 4.559491451117931e-06, | |
| "loss": 11.8985, | |
| "step": 63050 | |
| }, | |
| { | |
| "epoch": 27.422928552023908, | |
| "grad_norm": 6.567673683166504, | |
| "learning_rate": 4.555107409031127e-06, | |
| "loss": 11.9311, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 27.444661776691117, | |
| "grad_norm": 11.139328956604004, | |
| "learning_rate": 4.550723366944323e-06, | |
| "loss": 11.9207, | |
| "step": 63150 | |
| }, | |
| { | |
| "epoch": 27.466395001358325, | |
| "grad_norm": 18.506877899169922, | |
| "learning_rate": 4.546339324857519e-06, | |
| "loss": 11.9053, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 27.488128226025538, | |
| "grad_norm": 16.45941925048828, | |
| "learning_rate": 4.5419552827707145e-06, | |
| "loss": 11.9132, | |
| "step": 63250 | |
| }, | |
| { | |
| "epoch": 27.509861450692746, | |
| "grad_norm": 13.74703311920166, | |
| "learning_rate": 4.537571240683912e-06, | |
| "loss": 11.9032, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 27.531594675359955, | |
| "grad_norm": 5.686723232269287, | |
| "learning_rate": 4.533187198597107e-06, | |
| "loss": 11.9135, | |
| "step": 63350 | |
| }, | |
| { | |
| "epoch": 27.553327900027167, | |
| "grad_norm": 47.760013580322266, | |
| "learning_rate": 4.528803156510303e-06, | |
| "loss": 11.9263, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 27.575061124694376, | |
| "grad_norm": 13.832674026489258, | |
| "learning_rate": 4.524419114423499e-06, | |
| "loss": 11.9114, | |
| "step": 63450 | |
| }, | |
| { | |
| "epoch": 27.596794349361588, | |
| "grad_norm": 22.621736526489258, | |
| "learning_rate": 4.520035072336695e-06, | |
| "loss": 11.9134, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 27.618527574028796, | |
| "grad_norm": 13.379792213439941, | |
| "learning_rate": 4.515651030249891e-06, | |
| "loss": 11.9058, | |
| "step": 63550 | |
| }, | |
| { | |
| "epoch": 27.640260798696005, | |
| "grad_norm": 12.987919807434082, | |
| "learning_rate": 4.5112669881630865e-06, | |
| "loss": 11.9083, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 27.661994023363217, | |
| "grad_norm": 16.87094497680664, | |
| "learning_rate": 4.506882946076283e-06, | |
| "loss": 11.908, | |
| "step": 63650 | |
| }, | |
| { | |
| "epoch": 27.683727248030426, | |
| "grad_norm": 9.978212356567383, | |
| "learning_rate": 4.502498903989478e-06, | |
| "loss": 11.8919, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 27.705460472697638, | |
| "grad_norm": 13.08248519897461, | |
| "learning_rate": 4.498114861902675e-06, | |
| "loss": 11.9, | |
| "step": 63750 | |
| }, | |
| { | |
| "epoch": 27.727193697364847, | |
| "grad_norm": 14.08407974243164, | |
| "learning_rate": 4.4937308198158706e-06, | |
| "loss": 11.9366, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 27.748926922032055, | |
| "grad_norm": 11.779139518737793, | |
| "learning_rate": 4.489346777729067e-06, | |
| "loss": 11.9216, | |
| "step": 63850 | |
| }, | |
| { | |
| "epoch": 27.770660146699267, | |
| "grad_norm": 7.019837856292725, | |
| "learning_rate": 4.484962735642262e-06, | |
| "loss": 11.9144, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 27.792393371366476, | |
| "grad_norm": 8.715902328491211, | |
| "learning_rate": 4.480578693555458e-06, | |
| "loss": 11.9335, | |
| "step": 63950 | |
| }, | |
| { | |
| "epoch": 27.814126596033688, | |
| "grad_norm": 17.31736183166504, | |
| "learning_rate": 4.476194651468655e-06, | |
| "loss": 11.9164, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 27.835859820700897, | |
| "grad_norm": 7.397292613983154, | |
| "learning_rate": 4.47181060938185e-06, | |
| "loss": 11.8935, | |
| "step": 64050 | |
| }, | |
| { | |
| "epoch": 27.857593045368105, | |
| "grad_norm": 15.1404447555542, | |
| "learning_rate": 4.467426567295046e-06, | |
| "loss": 11.9156, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 27.879326270035317, | |
| "grad_norm": 16.563631057739258, | |
| "learning_rate": 4.4630425252082425e-06, | |
| "loss": 11.8858, | |
| "step": 64150 | |
| }, | |
| { | |
| "epoch": 27.901059494702526, | |
| "grad_norm": 10.400628089904785, | |
| "learning_rate": 4.458658483121439e-06, | |
| "loss": 11.9083, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 27.92279271936974, | |
| "grad_norm": 8.129082679748535, | |
| "learning_rate": 4.454274441034634e-06, | |
| "loss": 11.9361, | |
| "step": 64250 | |
| }, | |
| { | |
| "epoch": 27.944525944036947, | |
| "grad_norm": 22.946596145629883, | |
| "learning_rate": 4.44989039894783e-06, | |
| "loss": 11.8997, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 27.966259168704156, | |
| "grad_norm": 17.139440536499023, | |
| "learning_rate": 4.445506356861027e-06, | |
| "loss": 11.9067, | |
| "step": 64350 | |
| }, | |
| { | |
| "epoch": 27.987992393371368, | |
| "grad_norm": 8.700691223144531, | |
| "learning_rate": 4.441122314774222e-06, | |
| "loss": 11.8941, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 28.009562618853572, | |
| "grad_norm": 9.634552001953125, | |
| "learning_rate": 4.436738272687418e-06, | |
| "loss": 11.7983, | |
| "step": 64450 | |
| }, | |
| { | |
| "epoch": 28.031295843520784, | |
| "grad_norm": 12.564841270446777, | |
| "learning_rate": 4.432354230600614e-06, | |
| "loss": 11.8731, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 28.053029068187993, | |
| "grad_norm": 10.420557022094727, | |
| "learning_rate": 4.42797018851381e-06, | |
| "loss": 11.896, | |
| "step": 64550 | |
| }, | |
| { | |
| "epoch": 28.0747622928552, | |
| "grad_norm": 13.071510314941406, | |
| "learning_rate": 4.423586146427006e-06, | |
| "loss": 11.8855, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 28.096495517522413, | |
| "grad_norm": 11.409537315368652, | |
| "learning_rate": 4.419202104340202e-06, | |
| "loss": 11.8987, | |
| "step": 64650 | |
| }, | |
| { | |
| "epoch": 28.118228742189622, | |
| "grad_norm": 17.64859390258789, | |
| "learning_rate": 4.4148180622533985e-06, | |
| "loss": 11.8742, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 28.13996196685683, | |
| "grad_norm": 8.101343154907227, | |
| "learning_rate": 4.410434020166594e-06, | |
| "loss": 11.8781, | |
| "step": 64750 | |
| }, | |
| { | |
| "epoch": 28.161695191524043, | |
| "grad_norm": 11.35251522064209, | |
| "learning_rate": 4.40604997807979e-06, | |
| "loss": 11.8891, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 28.18342841619125, | |
| "grad_norm": 19.521108627319336, | |
| "learning_rate": 4.4016659359929855e-06, | |
| "loss": 11.882, | |
| "step": 64850 | |
| }, | |
| { | |
| "epoch": 28.205161640858464, | |
| "grad_norm": 14.904671669006348, | |
| "learning_rate": 4.397281893906182e-06, | |
| "loss": 11.8987, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 28.226894865525672, | |
| "grad_norm": 11.82111644744873, | |
| "learning_rate": 4.392897851819378e-06, | |
| "loss": 11.8718, | |
| "step": 64950 | |
| }, | |
| { | |
| "epoch": 28.24862809019288, | |
| "grad_norm": 7.986074924468994, | |
| "learning_rate": 4.388513809732573e-06, | |
| "loss": 11.8931, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 28.270361314860093, | |
| "grad_norm": 10.135086059570312, | |
| "learning_rate": 4.38412976764577e-06, | |
| "loss": 11.8845, | |
| "step": 65050 | |
| }, | |
| { | |
| "epoch": 28.2920945395273, | |
| "grad_norm": 9.275798797607422, | |
| "learning_rate": 4.379745725558966e-06, | |
| "loss": 11.8647, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 28.313827764194514, | |
| "grad_norm": 7.864231586456299, | |
| "learning_rate": 4.375361683472162e-06, | |
| "loss": 11.9019, | |
| "step": 65150 | |
| }, | |
| { | |
| "epoch": 28.335560988861722, | |
| "grad_norm": 37.51991653442383, | |
| "learning_rate": 4.3709776413853575e-06, | |
| "loss": 11.8779, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 28.35729421352893, | |
| "grad_norm": 7.752624034881592, | |
| "learning_rate": 4.366593599298554e-06, | |
| "loss": 11.9023, | |
| "step": 65250 | |
| }, | |
| { | |
| "epoch": 28.379027438196143, | |
| "grad_norm": 12.627674102783203, | |
| "learning_rate": 4.36220955721175e-06, | |
| "loss": 11.8921, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 28.40076066286335, | |
| "grad_norm": 26.206846237182617, | |
| "learning_rate": 4.357825515124945e-06, | |
| "loss": 11.891, | |
| "step": 65350 | |
| }, | |
| { | |
| "epoch": 28.422493887530564, | |
| "grad_norm": 18.58912467956543, | |
| "learning_rate": 4.3534414730381416e-06, | |
| "loss": 11.908, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 28.444227112197773, | |
| "grad_norm": 16.89732551574707, | |
| "learning_rate": 4.349057430951337e-06, | |
| "loss": 11.8899, | |
| "step": 65450 | |
| }, | |
| { | |
| "epoch": 28.46596033686498, | |
| "grad_norm": 7.8719964027404785, | |
| "learning_rate": 4.344673388864534e-06, | |
| "loss": 11.8946, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 28.487693561532193, | |
| "grad_norm": 11.639144897460938, | |
| "learning_rate": 4.3402893467777294e-06, | |
| "loss": 11.9214, | |
| "step": 65550 | |
| }, | |
| { | |
| "epoch": 28.509426786199402, | |
| "grad_norm": 29.2702579498291, | |
| "learning_rate": 4.335905304690926e-06, | |
| "loss": 11.9217, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 28.531160010866614, | |
| "grad_norm": 48.321807861328125, | |
| "learning_rate": 4.331521262604121e-06, | |
| "loss": 11.889, | |
| "step": 65650 | |
| }, | |
| { | |
| "epoch": 28.552893235533823, | |
| "grad_norm": 12.334220886230469, | |
| "learning_rate": 4.327137220517317e-06, | |
| "loss": 11.8814, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 28.57462646020103, | |
| "grad_norm": 13.60355281829834, | |
| "learning_rate": 4.3227531784305135e-06, | |
| "loss": 11.9137, | |
| "step": 65750 | |
| }, | |
| { | |
| "epoch": 28.596359684868244, | |
| "grad_norm": 12.374007225036621, | |
| "learning_rate": 4.318369136343709e-06, | |
| "loss": 11.9145, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 28.618092909535452, | |
| "grad_norm": 15.23318862915039, | |
| "learning_rate": 4.313985094256905e-06, | |
| "loss": 11.8971, | |
| "step": 65850 | |
| }, | |
| { | |
| "epoch": 28.63982613420266, | |
| "grad_norm": 8.697155952453613, | |
| "learning_rate": 4.3096010521701005e-06, | |
| "loss": 11.8899, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 28.661559358869873, | |
| "grad_norm": 6.101230621337891, | |
| "learning_rate": 4.305217010083298e-06, | |
| "loss": 11.8835, | |
| "step": 65950 | |
| }, | |
| { | |
| "epoch": 28.68329258353708, | |
| "grad_norm": 30.645008087158203, | |
| "learning_rate": 4.300832967996493e-06, | |
| "loss": 11.9118, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 28.705025808204294, | |
| "grad_norm": 10.432790756225586, | |
| "learning_rate": 4.296448925909689e-06, | |
| "loss": 11.898, | |
| "step": 66050 | |
| }, | |
| { | |
| "epoch": 28.726759032871502, | |
| "grad_norm": 22.726320266723633, | |
| "learning_rate": 4.2920648838228855e-06, | |
| "loss": 11.8829, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 28.74849225753871, | |
| "grad_norm": 15.002222061157227, | |
| "learning_rate": 4.287680841736081e-06, | |
| "loss": 11.8922, | |
| "step": 66150 | |
| }, | |
| { | |
| "epoch": 28.770225482205923, | |
| "grad_norm": 16.7822208404541, | |
| "learning_rate": 4.283296799649277e-06, | |
| "loss": 11.9015, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 28.79195870687313, | |
| "grad_norm": 10.86782455444336, | |
| "learning_rate": 4.2789127575624725e-06, | |
| "loss": 11.9095, | |
| "step": 66250 | |
| }, | |
| { | |
| "epoch": 28.813691931540344, | |
| "grad_norm": 14.24905776977539, | |
| "learning_rate": 4.274528715475669e-06, | |
| "loss": 11.8791, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 28.835425156207553, | |
| "grad_norm": 8.511114120483398, | |
| "learning_rate": 4.270144673388865e-06, | |
| "loss": 11.8846, | |
| "step": 66350 | |
| }, | |
| { | |
| "epoch": 28.85715838087476, | |
| "grad_norm": 10.261749267578125, | |
| "learning_rate": 4.265760631302061e-06, | |
| "loss": 11.9029, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 28.878891605541973, | |
| "grad_norm": 48.72242736816406, | |
| "learning_rate": 4.261376589215257e-06, | |
| "loss": 11.9049, | |
| "step": 66450 | |
| }, | |
| { | |
| "epoch": 28.900624830209182, | |
| "grad_norm": 10.668495178222656, | |
| "learning_rate": 4.256992547128453e-06, | |
| "loss": 11.8856, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 28.92235805487639, | |
| "grad_norm": 7.709607124328613, | |
| "learning_rate": 4.252608505041649e-06, | |
| "loss": 11.8888, | |
| "step": 66550 | |
| }, | |
| { | |
| "epoch": 28.944091279543603, | |
| "grad_norm": 30.70176124572754, | |
| "learning_rate": 4.248224462954844e-06, | |
| "loss": 11.9187, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 28.96582450421081, | |
| "grad_norm": 13.879278182983398, | |
| "learning_rate": 4.243840420868041e-06, | |
| "loss": 11.8825, | |
| "step": 66650 | |
| }, | |
| { | |
| "epoch": 28.987557728878024, | |
| "grad_norm": 7.8939714431762695, | |
| "learning_rate": 4.239456378781237e-06, | |
| "loss": 11.9042, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 29.009127954360228, | |
| "grad_norm": 16.196550369262695, | |
| "learning_rate": 4.235072336694432e-06, | |
| "loss": 11.8091, | |
| "step": 66750 | |
| }, | |
| { | |
| "epoch": 29.03086117902744, | |
| "grad_norm": 10.502305030822754, | |
| "learning_rate": 4.2306882946076285e-06, | |
| "loss": 11.8647, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 29.05259440369465, | |
| "grad_norm": 28.054792404174805, | |
| "learning_rate": 4.226304252520825e-06, | |
| "loss": 11.8775, | |
| "step": 66850 | |
| }, | |
| { | |
| "epoch": 29.074327628361857, | |
| "grad_norm": 5.852464199066162, | |
| "learning_rate": 4.221920210434021e-06, | |
| "loss": 11.8717, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 29.09606085302907, | |
| "grad_norm": 10.438371658325195, | |
| "learning_rate": 4.217536168347216e-06, | |
| "loss": 11.8783, | |
| "step": 66950 | |
| }, | |
| { | |
| "epoch": 29.117794077696278, | |
| "grad_norm": 5.391887664794922, | |
| "learning_rate": 4.213152126260413e-06, | |
| "loss": 11.8597, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 29.139527302363486, | |
| "grad_norm": 15.71295166015625, | |
| "learning_rate": 4.208768084173608e-06, | |
| "loss": 11.8726, | |
| "step": 67050 | |
| }, | |
| { | |
| "epoch": 29.1612605270307, | |
| "grad_norm": 15.637112617492676, | |
| "learning_rate": 4.204384042086804e-06, | |
| "loss": 11.8549, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 29.182993751697907, | |
| "grad_norm": 16.201160430908203, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 11.8751, | |
| "step": 67150 | |
| }, | |
| { | |
| "epoch": 29.20472697636512, | |
| "grad_norm": 18.363697052001953, | |
| "learning_rate": 4.195615957913196e-06, | |
| "loss": 11.9021, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 29.226460201032328, | |
| "grad_norm": 15.013435363769531, | |
| "learning_rate": 4.191231915826393e-06, | |
| "loss": 11.8747, | |
| "step": 67250 | |
| }, | |
| { | |
| "epoch": 29.248193425699537, | |
| "grad_norm": 14.785465240478516, | |
| "learning_rate": 4.186847873739588e-06, | |
| "loss": 11.8775, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 29.26992665036675, | |
| "grad_norm": 13.100189208984375, | |
| "learning_rate": 4.1824638316527845e-06, | |
| "loss": 11.8587, | |
| "step": 67350 | |
| }, | |
| { | |
| "epoch": 29.291659875033957, | |
| "grad_norm": 9.864031791687012, | |
| "learning_rate": 4.17807978956598e-06, | |
| "loss": 11.9118, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 29.31339309970117, | |
| "grad_norm": 19.341495513916016, | |
| "learning_rate": 4.173695747479176e-06, | |
| "loss": 11.8819, | |
| "step": 67450 | |
| }, | |
| { | |
| "epoch": 29.335126324368378, | |
| "grad_norm": 7.35308837890625, | |
| "learning_rate": 4.169311705392372e-06, | |
| "loss": 11.8731, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 29.356859549035587, | |
| "grad_norm": 8.811240196228027, | |
| "learning_rate": 4.164927663305568e-06, | |
| "loss": 11.8819, | |
| "step": 67550 | |
| }, | |
| { | |
| "epoch": 29.3785927737028, | |
| "grad_norm": 9.851766586303711, | |
| "learning_rate": 4.160543621218764e-06, | |
| "loss": 11.8942, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 29.400325998370008, | |
| "grad_norm": 14.708338737487793, | |
| "learning_rate": 4.156159579131959e-06, | |
| "loss": 11.8899, | |
| "step": 67650 | |
| }, | |
| { | |
| "epoch": 29.42205922303722, | |
| "grad_norm": 11.063777923583984, | |
| "learning_rate": 4.1517755370451565e-06, | |
| "loss": 11.8602, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 29.44379244770443, | |
| "grad_norm": 11.282812118530273, | |
| "learning_rate": 4.147391494958352e-06, | |
| "loss": 11.8651, | |
| "step": 67750 | |
| }, | |
| { | |
| "epoch": 29.465525672371637, | |
| "grad_norm": 258.5189514160156, | |
| "learning_rate": 4.143007452871548e-06, | |
| "loss": 11.8813, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 29.48725889703885, | |
| "grad_norm": 17.533771514892578, | |
| "learning_rate": 4.138623410784744e-06, | |
| "loss": 11.8777, | |
| "step": 67850 | |
| }, | |
| { | |
| "epoch": 29.508992121706058, | |
| "grad_norm": 9.061328887939453, | |
| "learning_rate": 4.13423936869794e-06, | |
| "loss": 11.863, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 29.530725346373266, | |
| "grad_norm": 14.129364013671875, | |
| "learning_rate": 4.129855326611136e-06, | |
| "loss": 11.8837, | |
| "step": 67950 | |
| }, | |
| { | |
| "epoch": 29.55245857104048, | |
| "grad_norm": 21.77886390686035, | |
| "learning_rate": 4.125471284524331e-06, | |
| "loss": 11.8897, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 29.574191795707687, | |
| "grad_norm": 8.441765785217285, | |
| "learning_rate": 4.1210872424375276e-06, | |
| "loss": 11.9048, | |
| "step": 68050 | |
| }, | |
| { | |
| "epoch": 29.5959250203749, | |
| "grad_norm": 11.595650672912598, | |
| "learning_rate": 4.116703200350724e-06, | |
| "loss": 11.8899, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 29.617658245042108, | |
| "grad_norm": 16.048147201538086, | |
| "learning_rate": 4.11231915826392e-06, | |
| "loss": 11.8787, | |
| "step": 68150 | |
| }, | |
| { | |
| "epoch": 29.639391469709317, | |
| "grad_norm": 9.9227294921875, | |
| "learning_rate": 4.1079351161771154e-06, | |
| "loss": 11.8748, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 29.66112469437653, | |
| "grad_norm": 9.97187614440918, | |
| "learning_rate": 4.103551074090312e-06, | |
| "loss": 11.8759, | |
| "step": 68250 | |
| }, | |
| { | |
| "epoch": 29.682857919043737, | |
| "grad_norm": 18.43181610107422, | |
| "learning_rate": 4.099167032003508e-06, | |
| "loss": 11.8683, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 29.70459114371095, | |
| "grad_norm": 18.20121192932129, | |
| "learning_rate": 4.094782989916703e-06, | |
| "loss": 11.8865, | |
| "step": 68350 | |
| }, | |
| { | |
| "epoch": 29.726324368378158, | |
| "grad_norm": 6.934305667877197, | |
| "learning_rate": 4.0903989478298995e-06, | |
| "loss": 11.8502, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 29.748057593045367, | |
| "grad_norm": 12.715697288513184, | |
| "learning_rate": 4.086014905743096e-06, | |
| "loss": 11.8733, | |
| "step": 68450 | |
| }, | |
| { | |
| "epoch": 29.76979081771258, | |
| "grad_norm": 9.016664505004883, | |
| "learning_rate": 4.081630863656291e-06, | |
| "loss": 11.8775, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 29.791524042379788, | |
| "grad_norm": 7.763296127319336, | |
| "learning_rate": 4.077246821569487e-06, | |
| "loss": 11.8733, | |
| "step": 68550 | |
| }, | |
| { | |
| "epoch": 29.813257267047, | |
| "grad_norm": 10.350701332092285, | |
| "learning_rate": 4.072862779482684e-06, | |
| "loss": 11.8632, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 29.83499049171421, | |
| "grad_norm": 6.480827331542969, | |
| "learning_rate": 4.06847873739588e-06, | |
| "loss": 11.8788, | |
| "step": 68650 | |
| }, | |
| { | |
| "epoch": 29.856723716381417, | |
| "grad_norm": 20.947677612304688, | |
| "learning_rate": 4.064094695309075e-06, | |
| "loss": 11.8773, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 29.87845694104863, | |
| "grad_norm": 10.931136131286621, | |
| "learning_rate": 4.0597106532222715e-06, | |
| "loss": 11.8612, | |
| "step": 68750 | |
| }, | |
| { | |
| "epoch": 29.900190165715838, | |
| "grad_norm": 10.79286003112793, | |
| "learning_rate": 4.055326611135467e-06, | |
| "loss": 11.8702, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 29.921923390383046, | |
| "grad_norm": 45.66188049316406, | |
| "learning_rate": 4.050942569048663e-06, | |
| "loss": 11.8613, | |
| "step": 68850 | |
| }, | |
| { | |
| "epoch": 29.94365661505026, | |
| "grad_norm": 6.688445091247559, | |
| "learning_rate": 4.046558526961859e-06, | |
| "loss": 11.8739, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 29.965389839717467, | |
| "grad_norm": 14.173410415649414, | |
| "learning_rate": 4.042174484875055e-06, | |
| "loss": 11.8595, | |
| "step": 68950 | |
| }, | |
| { | |
| "epoch": 29.98712306438468, | |
| "grad_norm": 13.653775215148926, | |
| "learning_rate": 4.037790442788251e-06, | |
| "loss": 11.8927, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 30.008693289866883, | |
| "grad_norm": 9.138008117675781, | |
| "learning_rate": 4.033406400701447e-06, | |
| "loss": 11.7683, | |
| "step": 69050 | |
| }, | |
| { | |
| "epoch": 30.030426514534096, | |
| "grad_norm": 16.62093162536621, | |
| "learning_rate": 4.029022358614643e-06, | |
| "loss": 11.8934, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 30.052159739201304, | |
| "grad_norm": 7.672760486602783, | |
| "learning_rate": 4.024638316527839e-06, | |
| "loss": 11.8631, | |
| "step": 69150 | |
| }, | |
| { | |
| "epoch": 30.073892963868513, | |
| "grad_norm": 8.038310050964355, | |
| "learning_rate": 4.020254274441035e-06, | |
| "loss": 11.8599, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 30.095626188535725, | |
| "grad_norm": 10.817283630371094, | |
| "learning_rate": 4.015870232354231e-06, | |
| "loss": 11.8627, | |
| "step": 69250 | |
| }, | |
| { | |
| "epoch": 30.117359413202934, | |
| "grad_norm": 6.556225299835205, | |
| "learning_rate": 4.011486190267427e-06, | |
| "loss": 11.8671, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 30.139092637870142, | |
| "grad_norm": 16.242650985717773, | |
| "learning_rate": 4.007102148180623e-06, | |
| "loss": 11.875, | |
| "step": 69350 | |
| }, | |
| { | |
| "epoch": 30.160825862537354, | |
| "grad_norm": 5.174230575561523, | |
| "learning_rate": 4.002718106093818e-06, | |
| "loss": 11.8433, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 30.182559087204563, | |
| "grad_norm": 7.197856426239014, | |
| "learning_rate": 3.998334064007015e-06, | |
| "loss": 11.8622, | |
| "step": 69450 | |
| }, | |
| { | |
| "epoch": 30.204292311871775, | |
| "grad_norm": 21.63473892211914, | |
| "learning_rate": 3.993950021920211e-06, | |
| "loss": 11.8656, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 30.226025536538984, | |
| "grad_norm": 17.78504753112793, | |
| "learning_rate": 3.989565979833407e-06, | |
| "loss": 11.8521, | |
| "step": 69550 | |
| }, | |
| { | |
| "epoch": 30.247758761206192, | |
| "grad_norm": 18.68705940246582, | |
| "learning_rate": 3.985181937746602e-06, | |
| "loss": 11.8453, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 30.269491985873405, | |
| "grad_norm": 7.354127407073975, | |
| "learning_rate": 3.980797895659799e-06, | |
| "loss": 11.8665, | |
| "step": 69650 | |
| }, | |
| { | |
| "epoch": 30.291225210540613, | |
| "grad_norm": 7.651024341583252, | |
| "learning_rate": 3.976413853572995e-06, | |
| "loss": 11.853, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 30.312958435207825, | |
| "grad_norm": 8.84490966796875, | |
| "learning_rate": 3.97202981148619e-06, | |
| "loss": 11.8501, | |
| "step": 69750 | |
| }, | |
| { | |
| "epoch": 30.334691659875034, | |
| "grad_norm": 8.941247940063477, | |
| "learning_rate": 3.9676457693993865e-06, | |
| "loss": 11.8615, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 30.356424884542243, | |
| "grad_norm": 13.154361724853516, | |
| "learning_rate": 3.963261727312583e-06, | |
| "loss": 11.8584, | |
| "step": 69850 | |
| }, | |
| { | |
| "epoch": 30.378158109209455, | |
| "grad_norm": 13.795583724975586, | |
| "learning_rate": 3.958877685225779e-06, | |
| "loss": 11.8604, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 30.399891333876663, | |
| "grad_norm": 8.268631935119629, | |
| "learning_rate": 3.954493643138974e-06, | |
| "loss": 11.859, | |
| "step": 69950 | |
| }, | |
| { | |
| "epoch": 30.421624558543876, | |
| "grad_norm": 28.959548950195312, | |
| "learning_rate": 3.9501096010521705e-06, | |
| "loss": 11.8702, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 30.421624558543876, | |
| "eval_cer": 0.0757471023169121, | |
| "eval_loss": 2.39117431640625, | |
| "eval_runtime": 396.909, | |
| "eval_samples_per_second": 13.62, | |
| "eval_steps_per_second": 3.406, | |
| "eval_wer": 0.22849740932642487, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 30.443357783211084, | |
| "grad_norm": 15.879110336303711, | |
| "learning_rate": 3.945725558965367e-06, | |
| "loss": 11.8687, | |
| "step": 70050 | |
| }, | |
| { | |
| "epoch": 30.465091007878293, | |
| "grad_norm": 14.088164329528809, | |
| "learning_rate": 3.941341516878562e-06, | |
| "loss": 11.85, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 30.486824232545505, | |
| "grad_norm": 5.0238752365112305, | |
| "learning_rate": 3.936957474791758e-06, | |
| "loss": 11.8533, | |
| "step": 70150 | |
| }, | |
| { | |
| "epoch": 30.508557457212714, | |
| "grad_norm": 11.336899757385254, | |
| "learning_rate": 3.932573432704954e-06, | |
| "loss": 11.8699, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 30.530290681879922, | |
| "grad_norm": 17.313730239868164, | |
| "learning_rate": 3.92818939061815e-06, | |
| "loss": 11.8562, | |
| "step": 70250 | |
| }, | |
| { | |
| "epoch": 30.552023906547134, | |
| "grad_norm": 28.565584182739258, | |
| "learning_rate": 3.923805348531346e-06, | |
| "loss": 11.8547, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 30.573757131214343, | |
| "grad_norm": 6.773772239685059, | |
| "learning_rate": 3.9194213064445425e-06, | |
| "loss": 11.8538, | |
| "step": 70350 | |
| }, | |
| { | |
| "epoch": 30.595490355881555, | |
| "grad_norm": 15.116411209106445, | |
| "learning_rate": 3.915037264357739e-06, | |
| "loss": 11.8638, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 30.617223580548764, | |
| "grad_norm": 9.379572868347168, | |
| "learning_rate": 3.910653222270934e-06, | |
| "loss": 11.8757, | |
| "step": 70450 | |
| }, | |
| { | |
| "epoch": 30.638956805215972, | |
| "grad_norm": 12.259918212890625, | |
| "learning_rate": 3.90626918018413e-06, | |
| "loss": 11.876, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 30.660690029883185, | |
| "grad_norm": 12.57608699798584, | |
| "learning_rate": 3.901885138097326e-06, | |
| "loss": 11.8507, | |
| "step": 70550 | |
| }, | |
| { | |
| "epoch": 30.682423254550393, | |
| "grad_norm": 8.661283493041992, | |
| "learning_rate": 3.897501096010522e-06, | |
| "loss": 11.8491, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 30.704156479217605, | |
| "grad_norm": 9.84383773803711, | |
| "learning_rate": 3.893117053923718e-06, | |
| "loss": 11.8489, | |
| "step": 70650 | |
| }, | |
| { | |
| "epoch": 30.725889703884814, | |
| "grad_norm": 9.917572975158691, | |
| "learning_rate": 3.888733011836914e-06, | |
| "loss": 11.8785, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 30.747622928552023, | |
| "grad_norm": 7.059745788574219, | |
| "learning_rate": 3.88434896975011e-06, | |
| "loss": 11.8737, | |
| "step": 70750 | |
| }, | |
| { | |
| "epoch": 30.769356153219235, | |
| "grad_norm": 20.44463348388672, | |
| "learning_rate": 3.879964927663306e-06, | |
| "loss": 11.8699, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 30.791089377886443, | |
| "grad_norm": 6.311903476715088, | |
| "learning_rate": 3.875580885576502e-06, | |
| "loss": 11.8542, | |
| "step": 70850 | |
| }, | |
| { | |
| "epoch": 30.812822602553656, | |
| "grad_norm": 6.262167930603027, | |
| "learning_rate": 3.871196843489698e-06, | |
| "loss": 11.8626, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 30.834555827220864, | |
| "grad_norm": 8.859283447265625, | |
| "learning_rate": 3.866812801402894e-06, | |
| "loss": 11.8909, | |
| "step": 70950 | |
| }, | |
| { | |
| "epoch": 30.856289051888073, | |
| "grad_norm": 6.593499660491943, | |
| "learning_rate": 3.86242875931609e-06, | |
| "loss": 11.8474, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 30.878022276555285, | |
| "grad_norm": 16.074264526367188, | |
| "learning_rate": 3.8580447172292855e-06, | |
| "loss": 11.8634, | |
| "step": 71050 | |
| }, | |
| { | |
| "epoch": 30.899755501222494, | |
| "grad_norm": 16.934633255004883, | |
| "learning_rate": 3.853660675142482e-06, | |
| "loss": 11.8481, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 30.921488725889702, | |
| "grad_norm": 11.176169395446777, | |
| "learning_rate": 3.849276633055677e-06, | |
| "loss": 11.8678, | |
| "step": 71150 | |
| }, | |
| { | |
| "epoch": 30.943221950556914, | |
| "grad_norm": 13.823466300964355, | |
| "learning_rate": 3.844892590968873e-06, | |
| "loss": 11.8525, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 30.964955175224123, | |
| "grad_norm": 12.757974624633789, | |
| "learning_rate": 3.84050854888207e-06, | |
| "loss": 11.8596, | |
| "step": 71250 | |
| }, | |
| { | |
| "epoch": 30.986688399891335, | |
| "grad_norm": 6.2555108070373535, | |
| "learning_rate": 3.836124506795266e-06, | |
| "loss": 11.8729, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 31.00825862537354, | |
| "grad_norm": 7.998335361480713, | |
| "learning_rate": 3.831740464708461e-06, | |
| "loss": 11.7557, | |
| "step": 71350 | |
| }, | |
| { | |
| "epoch": 31.02999185004075, | |
| "grad_norm": 7.063460826873779, | |
| "learning_rate": 3.8273564226216575e-06, | |
| "loss": 11.8673, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 31.05172507470796, | |
| "grad_norm": 7.559152126312256, | |
| "learning_rate": 3.822972380534854e-06, | |
| "loss": 11.8614, | |
| "step": 71450 | |
| }, | |
| { | |
| "epoch": 31.07345829937517, | |
| "grad_norm": 9.765264511108398, | |
| "learning_rate": 3.818588338448049e-06, | |
| "loss": 11.8243, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 31.09519152404238, | |
| "grad_norm": 8.741211891174316, | |
| "learning_rate": 3.8142042963612453e-06, | |
| "loss": 11.8631, | |
| "step": 71550 | |
| }, | |
| { | |
| "epoch": 31.11692474870959, | |
| "grad_norm": 10.110342025756836, | |
| "learning_rate": 3.809820254274441e-06, | |
| "loss": 11.8624, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 31.138657973376798, | |
| "grad_norm": 7.525726318359375, | |
| "learning_rate": 3.805436212187637e-06, | |
| "loss": 11.8511, | |
| "step": 71650 | |
| }, | |
| { | |
| "epoch": 31.16039119804401, | |
| "grad_norm": 6.264368057250977, | |
| "learning_rate": 3.8010521701008336e-06, | |
| "loss": 11.8447, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 31.18212442271122, | |
| "grad_norm": 6.522670745849609, | |
| "learning_rate": 3.7966681280140294e-06, | |
| "loss": 11.8619, | |
| "step": 71750 | |
| }, | |
| { | |
| "epoch": 31.20385764737843, | |
| "grad_norm": 17.985116958618164, | |
| "learning_rate": 3.7922840859272252e-06, | |
| "loss": 11.8723, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 31.22559087204564, | |
| "grad_norm": 10.094488143920898, | |
| "learning_rate": 3.787900043840421e-06, | |
| "loss": 11.8429, | |
| "step": 71850 | |
| }, | |
| { | |
| "epoch": 31.24732409671285, | |
| "grad_norm": 12.937264442443848, | |
| "learning_rate": 3.7835160017536173e-06, | |
| "loss": 11.8569, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 31.26905732138006, | |
| "grad_norm": 20.594358444213867, | |
| "learning_rate": 3.779131959666813e-06, | |
| "loss": 11.8438, | |
| "step": 71950 | |
| }, | |
| { | |
| "epoch": 31.29079054604727, | |
| "grad_norm": 10.052034378051758, | |
| "learning_rate": 3.774747917580009e-06, | |
| "loss": 11.8419, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 31.31252377071448, | |
| "grad_norm": 8.929048538208008, | |
| "learning_rate": 3.7703638754932047e-06, | |
| "loss": 11.8299, | |
| "step": 72050 | |
| }, | |
| { | |
| "epoch": 31.33425699538169, | |
| "grad_norm": 9.807400703430176, | |
| "learning_rate": 3.7659798334064014e-06, | |
| "loss": 11.8243, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 31.3559902200489, | |
| "grad_norm": 17.955623626708984, | |
| "learning_rate": 3.761595791319597e-06, | |
| "loss": 11.8255, | |
| "step": 72150 | |
| }, | |
| { | |
| "epoch": 31.37772344471611, | |
| "grad_norm": 19.642745971679688, | |
| "learning_rate": 3.757211749232793e-06, | |
| "loss": 11.8498, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 31.39945666938332, | |
| "grad_norm": 8.74807357788086, | |
| "learning_rate": 3.752827707145989e-06, | |
| "loss": 11.8492, | |
| "step": 72250 | |
| }, | |
| { | |
| "epoch": 31.42118989405053, | |
| "grad_norm": 8.516878128051758, | |
| "learning_rate": 3.748443665059185e-06, | |
| "loss": 11.8481, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 31.44292311871774, | |
| "grad_norm": 26.898788452148438, | |
| "learning_rate": 3.744059622972381e-06, | |
| "loss": 11.8371, | |
| "step": 72350 | |
| }, | |
| { | |
| "epoch": 31.46465634338495, | |
| "grad_norm": 6.748674392700195, | |
| "learning_rate": 3.7396755808855766e-06, | |
| "loss": 11.8486, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 31.48638956805216, | |
| "grad_norm": 10.551872253417969, | |
| "learning_rate": 3.7352915387987725e-06, | |
| "loss": 11.8533, | |
| "step": 72450 | |
| }, | |
| { | |
| "epoch": 31.50812279271937, | |
| "grad_norm": 14.1845703125, | |
| "learning_rate": 3.7309074967119687e-06, | |
| "loss": 11.8414, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 31.529856017386578, | |
| "grad_norm": 16.51775360107422, | |
| "learning_rate": 3.726523454625165e-06, | |
| "loss": 11.8535, | |
| "step": 72550 | |
| }, | |
| { | |
| "epoch": 31.55158924205379, | |
| "grad_norm": 24.120222091674805, | |
| "learning_rate": 3.7221394125383607e-06, | |
| "loss": 11.8557, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 31.573322466721, | |
| "grad_norm": 6.063103199005127, | |
| "learning_rate": 3.717755370451557e-06, | |
| "loss": 11.8375, | |
| "step": 72650 | |
| }, | |
| { | |
| "epoch": 31.59505569138821, | |
| "grad_norm": 11.34897232055664, | |
| "learning_rate": 3.7133713283647528e-06, | |
| "loss": 11.8438, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 31.61678891605542, | |
| "grad_norm": 9.746992111206055, | |
| "learning_rate": 3.7089872862779486e-06, | |
| "loss": 11.8471, | |
| "step": 72750 | |
| }, | |
| { | |
| "epoch": 31.63852214072263, | |
| "grad_norm": 8.114310264587402, | |
| "learning_rate": 3.7046032441911444e-06, | |
| "loss": 11.8412, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 31.66025536538984, | |
| "grad_norm": 8.393730163574219, | |
| "learning_rate": 3.70021920210434e-06, | |
| "loss": 11.837, | |
| "step": 72850 | |
| }, | |
| { | |
| "epoch": 31.68198859005705, | |
| "grad_norm": 8.245162963867188, | |
| "learning_rate": 3.6958351600175364e-06, | |
| "loss": 11.8553, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 31.70372181472426, | |
| "grad_norm": 7.575582981109619, | |
| "learning_rate": 3.6914511179307323e-06, | |
| "loss": 11.845, | |
| "step": 72950 | |
| }, | |
| { | |
| "epoch": 31.72545503939147, | |
| "grad_norm": 7.178465366363525, | |
| "learning_rate": 3.6870670758439285e-06, | |
| "loss": 11.8327, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 31.74718826405868, | |
| "grad_norm": 8.260749816894531, | |
| "learning_rate": 3.6826830337571247e-06, | |
| "loss": 11.8478, | |
| "step": 73050 | |
| }, | |
| { | |
| "epoch": 31.76892148872589, | |
| "grad_norm": 45.3736457824707, | |
| "learning_rate": 3.6782989916703205e-06, | |
| "loss": 11.8445, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 31.7906547133931, | |
| "grad_norm": 15.68336296081543, | |
| "learning_rate": 3.6739149495835163e-06, | |
| "loss": 11.8551, | |
| "step": 73150 | |
| }, | |
| { | |
| "epoch": 31.81238793806031, | |
| "grad_norm": 5.821103572845459, | |
| "learning_rate": 3.669530907496712e-06, | |
| "loss": 11.8517, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 31.83412116272752, | |
| "grad_norm": 12.418885231018066, | |
| "learning_rate": 3.665146865409908e-06, | |
| "loss": 11.8517, | |
| "step": 73250 | |
| }, | |
| { | |
| "epoch": 31.85585438739473, | |
| "grad_norm": 8.705698013305664, | |
| "learning_rate": 3.660762823323104e-06, | |
| "loss": 11.8424, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 31.87758761206194, | |
| "grad_norm": 9.667759895324707, | |
| "learning_rate": 3.6563787812363e-06, | |
| "loss": 11.8561, | |
| "step": 73350 | |
| }, | |
| { | |
| "epoch": 31.89932083672915, | |
| "grad_norm": 14.76951789855957, | |
| "learning_rate": 3.651994739149496e-06, | |
| "loss": 11.8605, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 31.921054061396358, | |
| "grad_norm": 14.691853523254395, | |
| "learning_rate": 3.6476106970626925e-06, | |
| "loss": 11.8595, | |
| "step": 73450 | |
| }, | |
| { | |
| "epoch": 31.94278728606357, | |
| "grad_norm": 7.9246721267700195, | |
| "learning_rate": 3.6432266549758883e-06, | |
| "loss": 11.8473, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 31.96452051073078, | |
| "grad_norm": 5.882972240447998, | |
| "learning_rate": 3.638842612889084e-06, | |
| "loss": 11.8418, | |
| "step": 73550 | |
| }, | |
| { | |
| "epoch": 31.98625373539799, | |
| "grad_norm": 6.664644718170166, | |
| "learning_rate": 3.63445857080228e-06, | |
| "loss": 11.8388, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 32.007823960880195, | |
| "grad_norm": 7.938138961791992, | |
| "learning_rate": 3.630074528715476e-06, | |
| "loss": 11.7538, | |
| "step": 73650 | |
| }, | |
| { | |
| "epoch": 32.029557185547404, | |
| "grad_norm": 8.011933326721191, | |
| "learning_rate": 3.625690486628672e-06, | |
| "loss": 11.8182, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 32.05129041021461, | |
| "grad_norm": 11.604764938354492, | |
| "learning_rate": 3.6213064445418678e-06, | |
| "loss": 11.8178, | |
| "step": 73750 | |
| }, | |
| { | |
| "epoch": 32.07302363488183, | |
| "grad_norm": 13.241369247436523, | |
| "learning_rate": 3.6169224024550636e-06, | |
| "loss": 11.8383, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 32.09475685954904, | |
| "grad_norm": 5.11595344543457, | |
| "learning_rate": 3.6125383603682594e-06, | |
| "loss": 11.8519, | |
| "step": 73850 | |
| }, | |
| { | |
| "epoch": 32.116490084216245, | |
| "grad_norm": 17.570140838623047, | |
| "learning_rate": 3.608154318281456e-06, | |
| "loss": 11.8329, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 32.138223308883454, | |
| "grad_norm": 10.764384269714355, | |
| "learning_rate": 3.603770276194652e-06, | |
| "loss": 11.8312, | |
| "step": 73950 | |
| }, | |
| { | |
| "epoch": 32.15995653355066, | |
| "grad_norm": 21.758943557739258, | |
| "learning_rate": 3.5993862341078477e-06, | |
| "loss": 11.8513, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 32.18168975821788, | |
| "grad_norm": 6.227720260620117, | |
| "learning_rate": 3.595002192021044e-06, | |
| "loss": 11.8468, | |
| "step": 74050 | |
| }, | |
| { | |
| "epoch": 32.20342298288509, | |
| "grad_norm": 6.502994537353516, | |
| "learning_rate": 3.5906181499342397e-06, | |
| "loss": 11.8287, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 32.225156207552295, | |
| "grad_norm": 8.124176025390625, | |
| "learning_rate": 3.5862341078474355e-06, | |
| "loss": 11.8244, | |
| "step": 74150 | |
| }, | |
| { | |
| "epoch": 32.246889432219504, | |
| "grad_norm": 17.224422454833984, | |
| "learning_rate": 3.5818500657606313e-06, | |
| "loss": 11.8529, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 32.26862265688671, | |
| "grad_norm": 16.075273513793945, | |
| "learning_rate": 3.577466023673827e-06, | |
| "loss": 11.8337, | |
| "step": 74250 | |
| }, | |
| { | |
| "epoch": 32.29035588155393, | |
| "grad_norm": 10.724888801574707, | |
| "learning_rate": 3.573081981587024e-06, | |
| "loss": 11.8234, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 32.31208910622114, | |
| "grad_norm": 13.913077354431152, | |
| "learning_rate": 3.5686979395002196e-06, | |
| "loss": 11.824, | |
| "step": 74350 | |
| }, | |
| { | |
| "epoch": 32.333822330888346, | |
| "grad_norm": 5.98539924621582, | |
| "learning_rate": 3.5643138974134154e-06, | |
| "loss": 11.8469, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 32.355555555555554, | |
| "grad_norm": 16.95889663696289, | |
| "learning_rate": 3.5599298553266117e-06, | |
| "loss": 11.8407, | |
| "step": 74450 | |
| }, | |
| { | |
| "epoch": 32.37728878022276, | |
| "grad_norm": 11.7858304977417, | |
| "learning_rate": 3.5555458132398075e-06, | |
| "loss": 11.8458, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 32.39902200488998, | |
| "grad_norm": 12.35476303100586, | |
| "learning_rate": 3.5511617711530033e-06, | |
| "loss": 11.8322, | |
| "step": 74550 | |
| }, | |
| { | |
| "epoch": 32.42075522955719, | |
| "grad_norm": 8.592928886413574, | |
| "learning_rate": 3.546777729066199e-06, | |
| "loss": 11.8464, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 32.442488454224396, | |
| "grad_norm": 15.99875259399414, | |
| "learning_rate": 3.5423936869793953e-06, | |
| "loss": 11.8412, | |
| "step": 74650 | |
| }, | |
| { | |
| "epoch": 32.464221678891604, | |
| "grad_norm": 6.029876232147217, | |
| "learning_rate": 3.538009644892591e-06, | |
| "loss": 11.8529, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 32.48595490355881, | |
| "grad_norm": 25.144210815429688, | |
| "learning_rate": 3.5336256028057874e-06, | |
| "loss": 11.8538, | |
| "step": 74750 | |
| }, | |
| { | |
| "epoch": 32.50768812822603, | |
| "grad_norm": 4.607775688171387, | |
| "learning_rate": 3.529241560718983e-06, | |
| "loss": 11.8652, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 32.52942135289324, | |
| "grad_norm": 9.385605812072754, | |
| "learning_rate": 3.5248575186321794e-06, | |
| "loss": 11.8256, | |
| "step": 74850 | |
| }, | |
| { | |
| "epoch": 32.551154577560446, | |
| "grad_norm": 8.230783462524414, | |
| "learning_rate": 3.5204734765453752e-06, | |
| "loss": 11.8065, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 32.572887802227655, | |
| "grad_norm": 38.624691009521484, | |
| "learning_rate": 3.516089434458571e-06, | |
| "loss": 11.8167, | |
| "step": 74950 | |
| }, | |
| { | |
| "epoch": 32.59462102689486, | |
| "grad_norm": 17.61267852783203, | |
| "learning_rate": 3.511705392371767e-06, | |
| "loss": 11.8357, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 32.61635425156208, | |
| "grad_norm": 6.209005355834961, | |
| "learning_rate": 3.507321350284963e-06, | |
| "loss": 11.8375, | |
| "step": 75050 | |
| }, | |
| { | |
| "epoch": 32.63808747622929, | |
| "grad_norm": 14.121482849121094, | |
| "learning_rate": 3.502937308198159e-06, | |
| "loss": 11.8476, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 32.659820700896496, | |
| "grad_norm": 28.74132537841797, | |
| "learning_rate": 3.4985532661113547e-06, | |
| "loss": 11.8317, | |
| "step": 75150 | |
| }, | |
| { | |
| "epoch": 32.681553925563705, | |
| "grad_norm": 6.806987762451172, | |
| "learning_rate": 3.4941692240245514e-06, | |
| "loss": 11.8527, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 32.70328715023091, | |
| "grad_norm": 7.174561500549316, | |
| "learning_rate": 3.489785181937747e-06, | |
| "loss": 11.8188, | |
| "step": 75250 | |
| }, | |
| { | |
| "epoch": 32.72502037489812, | |
| "grad_norm": 13.119464874267578, | |
| "learning_rate": 3.485401139850943e-06, | |
| "loss": 11.8392, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 32.74675359956534, | |
| "grad_norm": 8.41006851196289, | |
| "learning_rate": 3.4810170977641388e-06, | |
| "loss": 11.8283, | |
| "step": 75350 | |
| }, | |
| { | |
| "epoch": 32.768486824232546, | |
| "grad_norm": 10.47354507446289, | |
| "learning_rate": 3.4766330556773346e-06, | |
| "loss": 11.8295, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 32.790220048899755, | |
| "grad_norm": 7.730106353759766, | |
| "learning_rate": 3.472249013590531e-06, | |
| "loss": 11.8672, | |
| "step": 75450 | |
| }, | |
| { | |
| "epoch": 32.811953273566964, | |
| "grad_norm": 6.337311744689941, | |
| "learning_rate": 3.4678649715037266e-06, | |
| "loss": 11.8289, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 32.83368649823417, | |
| "grad_norm": 9.5441255569458, | |
| "learning_rate": 3.4634809294169225e-06, | |
| "loss": 11.8215, | |
| "step": 75550 | |
| }, | |
| { | |
| "epoch": 32.85541972290139, | |
| "grad_norm": 8.01675796508789, | |
| "learning_rate": 3.4590968873301183e-06, | |
| "loss": 11.8303, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 32.8771529475686, | |
| "grad_norm": 10.308701515197754, | |
| "learning_rate": 3.454712845243315e-06, | |
| "loss": 11.8337, | |
| "step": 75650 | |
| }, | |
| { | |
| "epoch": 32.898886172235805, | |
| "grad_norm": 8.78437614440918, | |
| "learning_rate": 3.4503288031565107e-06, | |
| "loss": 11.8283, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 32.920619396903014, | |
| "grad_norm": 12.1674222946167, | |
| "learning_rate": 3.4459447610697065e-06, | |
| "loss": 11.8378, | |
| "step": 75750 | |
| }, | |
| { | |
| "epoch": 32.94235262157022, | |
| "grad_norm": 11.723808288574219, | |
| "learning_rate": 3.4415607189829024e-06, | |
| "loss": 11.8242, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 32.96408584623744, | |
| "grad_norm": 18.23768424987793, | |
| "learning_rate": 3.4371766768960986e-06, | |
| "loss": 11.8577, | |
| "step": 75850 | |
| }, | |
| { | |
| "epoch": 32.98581907090465, | |
| "grad_norm": 23.5877742767334, | |
| "learning_rate": 3.4327926348092944e-06, | |
| "loss": 11.8222, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 33.007389296386854, | |
| "grad_norm": 6.948608875274658, | |
| "learning_rate": 3.42840859272249e-06, | |
| "loss": 11.7388, | |
| "step": 75950 | |
| }, | |
| { | |
| "epoch": 33.02912252105406, | |
| "grad_norm": 4.4768476486206055, | |
| "learning_rate": 3.424024550635686e-06, | |
| "loss": 11.8317, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 33.05085574572127, | |
| "grad_norm": 7.8470282554626465, | |
| "learning_rate": 3.4196405085488823e-06, | |
| "loss": 11.8416, | |
| "step": 76050 | |
| }, | |
| { | |
| "epoch": 33.07258897038848, | |
| "grad_norm": 7.3259053230285645, | |
| "learning_rate": 3.4152564664620785e-06, | |
| "loss": 11.8322, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 33.09432219505569, | |
| "grad_norm": 16.797231674194336, | |
| "learning_rate": 3.4108724243752743e-06, | |
| "loss": 11.8436, | |
| "step": 76150 | |
| }, | |
| { | |
| "epoch": 33.116055419722905, | |
| "grad_norm": 4.982487201690674, | |
| "learning_rate": 3.4064883822884705e-06, | |
| "loss": 11.799, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 33.13778864439011, | |
| "grad_norm": 8.252666473388672, | |
| "learning_rate": 3.4021043402016663e-06, | |
| "loss": 11.8154, | |
| "step": 76250 | |
| }, | |
| { | |
| "epoch": 33.15952186905732, | |
| "grad_norm": 9.021413803100586, | |
| "learning_rate": 3.397720298114862e-06, | |
| "loss": 11.8364, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 33.18125509372453, | |
| "grad_norm": 4.675612926483154, | |
| "learning_rate": 3.393336256028058e-06, | |
| "loss": 11.8131, | |
| "step": 76350 | |
| }, | |
| { | |
| "epoch": 33.20298831839174, | |
| "grad_norm": 8.468708992004395, | |
| "learning_rate": 3.3889522139412538e-06, | |
| "loss": 11.8201, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 33.224721543058955, | |
| "grad_norm": 21.99992561340332, | |
| "learning_rate": 3.38456817185445e-06, | |
| "loss": 11.8318, | |
| "step": 76450 | |
| }, | |
| { | |
| "epoch": 33.24645476772616, | |
| "grad_norm": 5.2964301109313965, | |
| "learning_rate": 3.3801841297676462e-06, | |
| "loss": 11.8196, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 33.26818799239337, | |
| "grad_norm": 12.34626579284668, | |
| "learning_rate": 3.375800087680842e-06, | |
| "loss": 11.8333, | |
| "step": 76550 | |
| }, | |
| { | |
| "epoch": 33.28992121706058, | |
| "grad_norm": 12.113372802734375, | |
| "learning_rate": 3.3714160455940383e-06, | |
| "loss": 11.8187, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 33.31165444172779, | |
| "grad_norm": 15.364481925964355, | |
| "learning_rate": 3.367032003507234e-06, | |
| "loss": 11.8173, | |
| "step": 76650 | |
| }, | |
| { | |
| "epoch": 33.333387666395, | |
| "grad_norm": 4.8235063552856445, | |
| "learning_rate": 3.36264796142043e-06, | |
| "loss": 11.8306, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 33.355120891062214, | |
| "grad_norm": 23.78803253173828, | |
| "learning_rate": 3.3582639193336257e-06, | |
| "loss": 11.8027, | |
| "step": 76750 | |
| }, | |
| { | |
| "epoch": 33.37685411572942, | |
| "grad_norm": 9.344151496887207, | |
| "learning_rate": 3.3538798772468215e-06, | |
| "loss": 11.8113, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 33.39858734039663, | |
| "grad_norm": 8.895915985107422, | |
| "learning_rate": 3.3494958351600178e-06, | |
| "loss": 11.8214, | |
| "step": 76850 | |
| }, | |
| { | |
| "epoch": 33.42032056506384, | |
| "grad_norm": 33.99968719482422, | |
| "learning_rate": 3.3451117930732136e-06, | |
| "loss": 11.8217, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 33.44205378973105, | |
| "grad_norm": 9.92707633972168, | |
| "learning_rate": 3.34072775098641e-06, | |
| "loss": 11.8012, | |
| "step": 76950 | |
| }, | |
| { | |
| "epoch": 33.463787014398264, | |
| "grad_norm": 12.355010986328125, | |
| "learning_rate": 3.336343708899606e-06, | |
| "loss": 11.8093, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 33.48552023906547, | |
| "grad_norm": 12.512097358703613, | |
| "learning_rate": 3.331959666812802e-06, | |
| "loss": 11.8357, | |
| "step": 77050 | |
| }, | |
| { | |
| "epoch": 33.50725346373268, | |
| "grad_norm": 4.472128391265869, | |
| "learning_rate": 3.3275756247259977e-06, | |
| "loss": 11.8175, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 33.52898668839989, | |
| "grad_norm": 12.460317611694336, | |
| "learning_rate": 3.3231915826391935e-06, | |
| "loss": 11.8219, | |
| "step": 77150 | |
| }, | |
| { | |
| "epoch": 33.5507199130671, | |
| "grad_norm": 10.255359649658203, | |
| "learning_rate": 3.3188075405523897e-06, | |
| "loss": 11.8135, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 33.572453137734314, | |
| "grad_norm": 9.60875415802002, | |
| "learning_rate": 3.3144234984655855e-06, | |
| "loss": 11.8244, | |
| "step": 77250 | |
| }, | |
| { | |
| "epoch": 33.59418636240152, | |
| "grad_norm": 7.315709590911865, | |
| "learning_rate": 3.3100394563787813e-06, | |
| "loss": 11.8137, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 33.61591958706873, | |
| "grad_norm": 16.642723083496094, | |
| "learning_rate": 3.305655414291977e-06, | |
| "loss": 11.8368, | |
| "step": 77350 | |
| }, | |
| { | |
| "epoch": 33.63765281173594, | |
| "grad_norm": 4.400660991668701, | |
| "learning_rate": 3.301271372205174e-06, | |
| "loss": 11.8195, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 33.65938603640315, | |
| "grad_norm": 8.862713813781738, | |
| "learning_rate": 3.2968873301183696e-06, | |
| "loss": 11.8168, | |
| "step": 77450 | |
| }, | |
| { | |
| "epoch": 33.681119261070364, | |
| "grad_norm": 10.427742004394531, | |
| "learning_rate": 3.2925032880315654e-06, | |
| "loss": 11.8033, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 33.70285248573757, | |
| "grad_norm": 6.926135540008545, | |
| "learning_rate": 3.2881192459447612e-06, | |
| "loss": 11.828, | |
| "step": 77550 | |
| }, | |
| { | |
| "epoch": 33.72458571040478, | |
| "grad_norm": 5.068178176879883, | |
| "learning_rate": 3.2837352038579575e-06, | |
| "loss": 11.8273, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 33.74631893507199, | |
| "grad_norm": 6.944793224334717, | |
| "learning_rate": 3.2793511617711533e-06, | |
| "loss": 11.813, | |
| "step": 77650 | |
| }, | |
| { | |
| "epoch": 33.7680521597392, | |
| "grad_norm": 36.322383880615234, | |
| "learning_rate": 3.274967119684349e-06, | |
| "loss": 11.8141, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 33.789785384406414, | |
| "grad_norm": 6.488020420074463, | |
| "learning_rate": 3.270583077597545e-06, | |
| "loss": 11.8322, | |
| "step": 77750 | |
| }, | |
| { | |
| "epoch": 33.81151860907362, | |
| "grad_norm": 9.435515403747559, | |
| "learning_rate": 3.2661990355107407e-06, | |
| "loss": 11.8242, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 33.83325183374083, | |
| "grad_norm": 4.060996055603027, | |
| "learning_rate": 3.2618149934239374e-06, | |
| "loss": 11.816, | |
| "step": 77850 | |
| }, | |
| { | |
| "epoch": 33.85498505840804, | |
| "grad_norm": 13.589747428894043, | |
| "learning_rate": 3.257430951337133e-06, | |
| "loss": 11.8091, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 33.87671828307525, | |
| "grad_norm": 11.052616119384766, | |
| "learning_rate": 3.253046909250329e-06, | |
| "loss": 11.8391, | |
| "step": 77950 | |
| }, | |
| { | |
| "epoch": 33.898451507742465, | |
| "grad_norm": 10.746622085571289, | |
| "learning_rate": 3.2486628671635252e-06, | |
| "loss": 11.8432, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 33.92018473240967, | |
| "grad_norm": 10.50125503540039, | |
| "learning_rate": 3.244278825076721e-06, | |
| "loss": 11.8408, | |
| "step": 78050 | |
| }, | |
| { | |
| "epoch": 33.94191795707688, | |
| "grad_norm": 6.73277473449707, | |
| "learning_rate": 3.239894782989917e-06, | |
| "loss": 11.8296, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 33.96365118174409, | |
| "grad_norm": 10.480985641479492, | |
| "learning_rate": 3.2355107409031126e-06, | |
| "loss": 11.8362, | |
| "step": 78150 | |
| }, | |
| { | |
| "epoch": 33.9853844064113, | |
| "grad_norm": 7.480873107910156, | |
| "learning_rate": 3.231126698816309e-06, | |
| "loss": 11.8262, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 34.00695463189351, | |
| "grad_norm": 9.988080024719238, | |
| "learning_rate": 3.2267426567295047e-06, | |
| "loss": 11.735, | |
| "step": 78250 | |
| }, | |
| { | |
| "epoch": 34.028687856560715, | |
| "grad_norm": 10.169675827026367, | |
| "learning_rate": 3.222358614642701e-06, | |
| "loss": 11.8251, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 34.050421081227924, | |
| "grad_norm": 13.815673828125, | |
| "learning_rate": 3.217974572555897e-06, | |
| "loss": 11.8091, | |
| "step": 78350 | |
| }, | |
| { | |
| "epoch": 34.07215430589514, | |
| "grad_norm": 10.704404830932617, | |
| "learning_rate": 3.213590530469093e-06, | |
| "loss": 11.8009, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 34.09388753056235, | |
| "grad_norm": 9.71978759765625, | |
| "learning_rate": 3.2092064883822888e-06, | |
| "loss": 11.8105, | |
| "step": 78450 | |
| }, | |
| { | |
| "epoch": 34.11562075522956, | |
| "grad_norm": 18.075393676757812, | |
| "learning_rate": 3.2048224462954846e-06, | |
| "loss": 11.8083, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 34.137353979896766, | |
| "grad_norm": 10.046432495117188, | |
| "learning_rate": 3.2004384042086804e-06, | |
| "loss": 11.7901, | |
| "step": 78550 | |
| }, | |
| { | |
| "epoch": 34.159087204563974, | |
| "grad_norm": 11.01378345489502, | |
| "learning_rate": 3.1960543621218766e-06, | |
| "loss": 11.7937, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 34.18082042923119, | |
| "grad_norm": 20.022729873657227, | |
| "learning_rate": 3.1916703200350724e-06, | |
| "loss": 11.8135, | |
| "step": 78650 | |
| }, | |
| { | |
| "epoch": 34.2025536538984, | |
| "grad_norm": 6.636748790740967, | |
| "learning_rate": 3.1872862779482687e-06, | |
| "loss": 11.8014, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 34.22428687856561, | |
| "grad_norm": 13.776731491088867, | |
| "learning_rate": 3.182902235861465e-06, | |
| "loss": 11.8353, | |
| "step": 78750 | |
| }, | |
| { | |
| "epoch": 34.246020103232816, | |
| "grad_norm": 4.75822114944458, | |
| "learning_rate": 3.1785181937746607e-06, | |
| "loss": 11.7999, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 34.267753327900024, | |
| "grad_norm": 11.153389930725098, | |
| "learning_rate": 3.1741341516878565e-06, | |
| "loss": 11.8159, | |
| "step": 78850 | |
| }, | |
| { | |
| "epoch": 34.28948655256724, | |
| "grad_norm": 13.353851318359375, | |
| "learning_rate": 3.1697501096010523e-06, | |
| "loss": 11.807, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 34.31121977723445, | |
| "grad_norm": 5.565258026123047, | |
| "learning_rate": 3.165366067514248e-06, | |
| "loss": 11.8092, | |
| "step": 78950 | |
| }, | |
| { | |
| "epoch": 34.33295300190166, | |
| "grad_norm": 16.32341194152832, | |
| "learning_rate": 3.1609820254274444e-06, | |
| "loss": 11.8032, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 34.354686226568866, | |
| "grad_norm": 8.501863479614258, | |
| "learning_rate": 3.15659798334064e-06, | |
| "loss": 11.8121, | |
| "step": 79050 | |
| }, | |
| { | |
| "epoch": 34.376419451236075, | |
| "grad_norm": 5.864038467407227, | |
| "learning_rate": 3.152213941253836e-06, | |
| "loss": 11.8086, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 34.39815267590329, | |
| "grad_norm": 12.040675163269043, | |
| "learning_rate": 3.1478298991670327e-06, | |
| "loss": 11.8134, | |
| "step": 79150 | |
| }, | |
| { | |
| "epoch": 34.4198859005705, | |
| "grad_norm": 24.84530258178711, | |
| "learning_rate": 3.1434458570802285e-06, | |
| "loss": 11.8149, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 34.44161912523771, | |
| "grad_norm": 11.11407470703125, | |
| "learning_rate": 3.1390618149934243e-06, | |
| "loss": 11.8105, | |
| "step": 79250 | |
| }, | |
| { | |
| "epoch": 34.463352349904916, | |
| "grad_norm": 15.913960456848145, | |
| "learning_rate": 3.13467777290662e-06, | |
| "loss": 11.8004, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 34.485085574572125, | |
| "grad_norm": 7.755058288574219, | |
| "learning_rate": 3.1302937308198163e-06, | |
| "loss": 11.8044, | |
| "step": 79350 | |
| }, | |
| { | |
| "epoch": 34.50681879923934, | |
| "grad_norm": 5.433537006378174, | |
| "learning_rate": 3.125909688733012e-06, | |
| "loss": 11.8161, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 34.52855202390655, | |
| "grad_norm": 6.0616912841796875, | |
| "learning_rate": 3.121525646646208e-06, | |
| "loss": 11.806, | |
| "step": 79450 | |
| }, | |
| { | |
| "epoch": 34.55028524857376, | |
| "grad_norm": 8.498095512390137, | |
| "learning_rate": 3.1171416045594038e-06, | |
| "loss": 11.8407, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 34.572018473240966, | |
| "grad_norm": 24.549198150634766, | |
| "learning_rate": 3.1127575624725996e-06, | |
| "loss": 11.8168, | |
| "step": 79550 | |
| }, | |
| { | |
| "epoch": 34.593751697908175, | |
| "grad_norm": 11.136092185974121, | |
| "learning_rate": 3.1083735203857962e-06, | |
| "loss": 11.8184, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 34.61548492257539, | |
| "grad_norm": 8.212324142456055, | |
| "learning_rate": 3.103989478298992e-06, | |
| "loss": 11.802, | |
| "step": 79650 | |
| }, | |
| { | |
| "epoch": 34.6372181472426, | |
| "grad_norm": 4.912588596343994, | |
| "learning_rate": 3.099605436212188e-06, | |
| "loss": 11.8164, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 34.65895137190981, | |
| "grad_norm": 5.911812782287598, | |
| "learning_rate": 3.095221394125384e-06, | |
| "loss": 11.8035, | |
| "step": 79750 | |
| }, | |
| { | |
| "epoch": 34.68068459657702, | |
| "grad_norm": 13.612801551818848, | |
| "learning_rate": 3.09083735203858e-06, | |
| "loss": 11.8221, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 34.702417821244225, | |
| "grad_norm": 7.984292030334473, | |
| "learning_rate": 3.0864533099517757e-06, | |
| "loss": 11.8073, | |
| "step": 79850 | |
| }, | |
| { | |
| "epoch": 34.724151045911434, | |
| "grad_norm": 12.358894348144531, | |
| "learning_rate": 3.0820692678649715e-06, | |
| "loss": 11.793, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 34.74588427057865, | |
| "grad_norm": 9.695011138916016, | |
| "learning_rate": 3.0776852257781673e-06, | |
| "loss": 11.8138, | |
| "step": 79950 | |
| }, | |
| { | |
| "epoch": 34.76761749524586, | |
| "grad_norm": 13.982564926147461, | |
| "learning_rate": 3.0733011836913636e-06, | |
| "loss": 11.8171, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 34.76761749524586, | |
| "eval_cer": 0.0757471023169121, | |
| "eval_loss": 2.4033260345458984, | |
| "eval_runtime": 399.2668, | |
| "eval_samples_per_second": 13.54, | |
| "eval_steps_per_second": 3.386, | |
| "eval_wer": 0.22807348092322186, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 34.78935071991307, | |
| "grad_norm": 12.179760932922363, | |
| "learning_rate": 3.06891714160456e-06, | |
| "loss": 11.8053, | |
| "step": 80050 | |
| }, | |
| { | |
| "epoch": 34.811083944580275, | |
| "grad_norm": 11.413451194763184, | |
| "learning_rate": 3.0645330995177556e-06, | |
| "loss": 11.8123, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 34.832817169247484, | |
| "grad_norm": 4.437108993530273, | |
| "learning_rate": 3.060149057430952e-06, | |
| "loss": 11.8026, | |
| "step": 80150 | |
| }, | |
| { | |
| "epoch": 34.8545503939147, | |
| "grad_norm": 73.1333236694336, | |
| "learning_rate": 3.0557650153441477e-06, | |
| "loss": 11.8005, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 34.87628361858191, | |
| "grad_norm": 8.468038558959961, | |
| "learning_rate": 3.0513809732573435e-06, | |
| "loss": 11.8035, | |
| "step": 80250 | |
| }, | |
| { | |
| "epoch": 34.89801684324912, | |
| "grad_norm": 6.311350345611572, | |
| "learning_rate": 3.0469969311705393e-06, | |
| "loss": 11.8177, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 34.919750067916326, | |
| "grad_norm": 6.435243606567383, | |
| "learning_rate": 3.0426128890837355e-06, | |
| "loss": 11.8085, | |
| "step": 80350 | |
| }, | |
| { | |
| "epoch": 34.941483292583534, | |
| "grad_norm": 23.506589889526367, | |
| "learning_rate": 3.0382288469969313e-06, | |
| "loss": 11.8198, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 34.96321651725075, | |
| "grad_norm": 14.792353630065918, | |
| "learning_rate": 3.033844804910127e-06, | |
| "loss": 11.8236, | |
| "step": 80450 | |
| }, | |
| { | |
| "epoch": 34.98494974191796, | |
| "grad_norm": 9.948033332824707, | |
| "learning_rate": 3.0294607628233234e-06, | |
| "loss": 11.7999, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 35.006519967400166, | |
| "grad_norm": 10.65179443359375, | |
| "learning_rate": 3.0250767207365196e-06, | |
| "loss": 11.7357, | |
| "step": 80550 | |
| }, | |
| { | |
| "epoch": 35.028253192067375, | |
| "grad_norm": 17.818878173828125, | |
| "learning_rate": 3.0206926786497154e-06, | |
| "loss": 11.8027, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 35.04998641673458, | |
| "grad_norm": 12.1105318069458, | |
| "learning_rate": 3.0163086365629112e-06, | |
| "loss": 11.8061, | |
| "step": 80650 | |
| }, | |
| { | |
| "epoch": 35.07171964140179, | |
| "grad_norm": 9.265717506408691, | |
| "learning_rate": 3.011924594476107e-06, | |
| "loss": 11.8093, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 35.093452866069, | |
| "grad_norm": 7.630993366241455, | |
| "learning_rate": 3.0075405523893033e-06, | |
| "loss": 11.8209, | |
| "step": 80750 | |
| }, | |
| { | |
| "epoch": 35.115186090736216, | |
| "grad_norm": 9.022866249084473, | |
| "learning_rate": 3.003156510302499e-06, | |
| "loss": 11.8025, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 35.136919315403425, | |
| "grad_norm": 7.761841297149658, | |
| "learning_rate": 2.998772468215695e-06, | |
| "loss": 11.8045, | |
| "step": 80850 | |
| }, | |
| { | |
| "epoch": 35.158652540070634, | |
| "grad_norm": 5.690446853637695, | |
| "learning_rate": 2.9943884261288915e-06, | |
| "loss": 11.7887, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 35.18038576473784, | |
| "grad_norm": 8.062559127807617, | |
| "learning_rate": 2.9900043840420874e-06, | |
| "loss": 11.8076, | |
| "step": 80950 | |
| }, | |
| { | |
| "epoch": 35.20211898940505, | |
| "grad_norm": 11.13079833984375, | |
| "learning_rate": 2.985620341955283e-06, | |
| "loss": 11.791, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 35.22385221407227, | |
| "grad_norm": 9.493050575256348, | |
| "learning_rate": 2.981236299868479e-06, | |
| "loss": 11.7975, | |
| "step": 81050 | |
| }, | |
| { | |
| "epoch": 35.245585438739475, | |
| "grad_norm": 5.601952075958252, | |
| "learning_rate": 2.9768522577816748e-06, | |
| "loss": 11.8132, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 35.267318663406684, | |
| "grad_norm": 16.74399757385254, | |
| "learning_rate": 2.972468215694871e-06, | |
| "loss": 11.8072, | |
| "step": 81150 | |
| }, | |
| { | |
| "epoch": 35.28905188807389, | |
| "grad_norm": 14.785292625427246, | |
| "learning_rate": 2.968084173608067e-06, | |
| "loss": 11.7861, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 35.3107851127411, | |
| "grad_norm": 4.938207149505615, | |
| "learning_rate": 2.9637001315212626e-06, | |
| "loss": 11.7976, | |
| "step": 81250 | |
| }, | |
| { | |
| "epoch": 35.33251833740831, | |
| "grad_norm": 7.194094181060791, | |
| "learning_rate": 2.9593160894344585e-06, | |
| "loss": 11.8092, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 35.354251562075525, | |
| "grad_norm": 8.084842681884766, | |
| "learning_rate": 2.954932047347655e-06, | |
| "loss": 11.8066, | |
| "step": 81350 | |
| }, | |
| { | |
| "epoch": 35.375984786742734, | |
| "grad_norm": 13.50389289855957, | |
| "learning_rate": 2.950548005260851e-06, | |
| "loss": 11.8166, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 35.39771801140994, | |
| "grad_norm": 34.29204559326172, | |
| "learning_rate": 2.9461639631740467e-06, | |
| "loss": 11.7957, | |
| "step": 81450 | |
| }, | |
| { | |
| "epoch": 35.41945123607715, | |
| "grad_norm": 115.18916320800781, | |
| "learning_rate": 2.9417799210872425e-06, | |
| "loss": 11.7966, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 35.44118446074436, | |
| "grad_norm": 6.231071949005127, | |
| "learning_rate": 2.9373958790004388e-06, | |
| "loss": 11.8096, | |
| "step": 81550 | |
| }, | |
| { | |
| "epoch": 35.462917685411576, | |
| "grad_norm": 12.070874214172363, | |
| "learning_rate": 2.9330118369136346e-06, | |
| "loss": 11.7912, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 35.484650910078784, | |
| "grad_norm": 11.014456748962402, | |
| "learning_rate": 2.9286277948268304e-06, | |
| "loss": 11.7999, | |
| "step": 81650 | |
| }, | |
| { | |
| "epoch": 35.50638413474599, | |
| "grad_norm": 7.878298759460449, | |
| "learning_rate": 2.924243752740026e-06, | |
| "loss": 11.7924, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 35.5281173594132, | |
| "grad_norm": 9.946538925170898, | |
| "learning_rate": 2.9198597106532224e-06, | |
| "loss": 11.7876, | |
| "step": 81750 | |
| }, | |
| { | |
| "epoch": 35.54985058408041, | |
| "grad_norm": 9.082895278930664, | |
| "learning_rate": 2.9154756685664187e-06, | |
| "loss": 11.7947, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 35.571583808747626, | |
| "grad_norm": 8.261942863464355, | |
| "learning_rate": 2.9110916264796145e-06, | |
| "loss": 11.8, | |
| "step": 81850 | |
| }, | |
| { | |
| "epoch": 35.593317033414834, | |
| "grad_norm": 8.274785041809082, | |
| "learning_rate": 2.9067075843928107e-06, | |
| "loss": 11.7947, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 35.61505025808204, | |
| "grad_norm": 12.555307388305664, | |
| "learning_rate": 2.9023235423060065e-06, | |
| "loss": 11.8046, | |
| "step": 81950 | |
| }, | |
| { | |
| "epoch": 35.63678348274925, | |
| "grad_norm": 16.864561080932617, | |
| "learning_rate": 2.8979395002192023e-06, | |
| "loss": 11.8078, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 35.65851670741646, | |
| "grad_norm": 7.3884782791137695, | |
| "learning_rate": 2.893555458132398e-06, | |
| "loss": 11.7997, | |
| "step": 82050 | |
| }, | |
| { | |
| "epoch": 35.680249932083676, | |
| "grad_norm": 5.330382823944092, | |
| "learning_rate": 2.889171416045594e-06, | |
| "loss": 11.7931, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 35.701983156750885, | |
| "grad_norm": 135.67330932617188, | |
| "learning_rate": 2.88478737395879e-06, | |
| "loss": 11.8205, | |
| "step": 82150 | |
| }, | |
| { | |
| "epoch": 35.72371638141809, | |
| "grad_norm": 11.841288566589355, | |
| "learning_rate": 2.880403331871986e-06, | |
| "loss": 11.8093, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 35.7454496060853, | |
| "grad_norm": 7.48586368560791, | |
| "learning_rate": 2.8760192897851822e-06, | |
| "loss": 11.7827, | |
| "step": 82250 | |
| }, | |
| { | |
| "epoch": 35.76718283075251, | |
| "grad_norm": 8.122782707214355, | |
| "learning_rate": 2.8716352476983785e-06, | |
| "loss": 11.8002, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 35.788916055419726, | |
| "grad_norm": 11.976639747619629, | |
| "learning_rate": 2.8672512056115743e-06, | |
| "loss": 11.7877, | |
| "step": 82350 | |
| }, | |
| { | |
| "epoch": 35.810649280086935, | |
| "grad_norm": 4.260416507720947, | |
| "learning_rate": 2.86286716352477e-06, | |
| "loss": 11.8016, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 35.83238250475414, | |
| "grad_norm": 6.422642230987549, | |
| "learning_rate": 2.858483121437966e-06, | |
| "loss": 11.7963, | |
| "step": 82450 | |
| }, | |
| { | |
| "epoch": 35.85411572942135, | |
| "grad_norm": 17.52088165283203, | |
| "learning_rate": 2.8540990793511617e-06, | |
| "loss": 11.8099, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 35.87584895408856, | |
| "grad_norm": 18.16527557373047, | |
| "learning_rate": 2.849715037264358e-06, | |
| "loss": 11.8045, | |
| "step": 82550 | |
| }, | |
| { | |
| "epoch": 35.897582178755776, | |
| "grad_norm": 21.54142189025879, | |
| "learning_rate": 2.8453309951775538e-06, | |
| "loss": 11.8147, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 35.919315403422985, | |
| "grad_norm": 10.738289833068848, | |
| "learning_rate": 2.8409469530907496e-06, | |
| "loss": 11.7977, | |
| "step": 82650 | |
| }, | |
| { | |
| "epoch": 35.94104862809019, | |
| "grad_norm": 7.5517144203186035, | |
| "learning_rate": 2.8365629110039462e-06, | |
| "loss": 11.8223, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 35.9627818527574, | |
| "grad_norm": 17.005064010620117, | |
| "learning_rate": 2.832178868917142e-06, | |
| "loss": 11.7941, | |
| "step": 82750 | |
| }, | |
| { | |
| "epoch": 35.98451507742461, | |
| "grad_norm": 20.802410125732422, | |
| "learning_rate": 2.827794826830338e-06, | |
| "loss": 11.8144, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 36.00608530290682, | |
| "grad_norm": 4.643016815185547, | |
| "learning_rate": 2.8234107847435337e-06, | |
| "loss": 11.7041, | |
| "step": 82850 | |
| }, | |
| { | |
| "epoch": 36.02781852757403, | |
| "grad_norm": 5.0188398361206055, | |
| "learning_rate": 2.81902674265673e-06, | |
| "loss": 11.7857, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 36.049551752241236, | |
| "grad_norm": 43.052833557128906, | |
| "learning_rate": 2.8146427005699257e-06, | |
| "loss": 11.7965, | |
| "step": 82950 | |
| }, | |
| { | |
| "epoch": 36.07128497690845, | |
| "grad_norm": 5.6486382484436035, | |
| "learning_rate": 2.8102586584831215e-06, | |
| "loss": 11.7801, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 36.09301820157566, | |
| "grad_norm": 9.257708549499512, | |
| "learning_rate": 2.8058746163963173e-06, | |
| "loss": 11.7884, | |
| "step": 83050 | |
| }, | |
| { | |
| "epoch": 36.11475142624287, | |
| "grad_norm": 9.969672203063965, | |
| "learning_rate": 2.801490574309514e-06, | |
| "loss": 11.7893, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 36.13648465091008, | |
| "grad_norm": 4.864919185638428, | |
| "learning_rate": 2.79710653222271e-06, | |
| "loss": 11.7891, | |
| "step": 83150 | |
| }, | |
| { | |
| "epoch": 36.158217875577286, | |
| "grad_norm": 15.945795059204102, | |
| "learning_rate": 2.7927224901359056e-06, | |
| "loss": 11.8076, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 36.1799511002445, | |
| "grad_norm": 8.471965789794922, | |
| "learning_rate": 2.7883384480491014e-06, | |
| "loss": 11.8079, | |
| "step": 83250 | |
| }, | |
| { | |
| "epoch": 36.20168432491171, | |
| "grad_norm": 3.7765846252441406, | |
| "learning_rate": 2.7839544059622976e-06, | |
| "loss": 11.7893, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 36.22341754957892, | |
| "grad_norm": 32.80738067626953, | |
| "learning_rate": 2.7795703638754935e-06, | |
| "loss": 11.8117, | |
| "step": 83350 | |
| }, | |
| { | |
| "epoch": 36.24515077424613, | |
| "grad_norm": 11.759632110595703, | |
| "learning_rate": 2.7751863217886893e-06, | |
| "loss": 11.7887, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 36.266883998913336, | |
| "grad_norm": 9.582806587219238, | |
| "learning_rate": 2.770802279701885e-06, | |
| "loss": 11.7988, | |
| "step": 83450 | |
| }, | |
| { | |
| "epoch": 36.28861722358055, | |
| "grad_norm": 13.065892219543457, | |
| "learning_rate": 2.7664182376150813e-06, | |
| "loss": 11.7974, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 36.31035044824776, | |
| "grad_norm": 25.009721755981445, | |
| "learning_rate": 2.7620341955282775e-06, | |
| "loss": 11.7938, | |
| "step": 83550 | |
| }, | |
| { | |
| "epoch": 36.33208367291497, | |
| "grad_norm": 7.72334098815918, | |
| "learning_rate": 2.7576501534414734e-06, | |
| "loss": 11.7859, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 36.35381689758218, | |
| "grad_norm": 8.665655136108398, | |
| "learning_rate": 2.753266111354669e-06, | |
| "loss": 11.7859, | |
| "step": 83650 | |
| }, | |
| { | |
| "epoch": 36.375550122249386, | |
| "grad_norm": 19.630573272705078, | |
| "learning_rate": 2.7488820692678654e-06, | |
| "loss": 11.7842, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 36.3972833469166, | |
| "grad_norm": 13.641834259033203, | |
| "learning_rate": 2.7444980271810612e-06, | |
| "loss": 11.8031, | |
| "step": 83750 | |
| }, | |
| { | |
| "epoch": 36.41901657158381, | |
| "grad_norm": 5.9598917961120605, | |
| "learning_rate": 2.740113985094257e-06, | |
| "loss": 11.7813, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 36.44074979625102, | |
| "grad_norm": 8.549332618713379, | |
| "learning_rate": 2.735729943007453e-06, | |
| "loss": 11.7959, | |
| "step": 83850 | |
| }, | |
| { | |
| "epoch": 36.46248302091823, | |
| "grad_norm": 4.3795857429504395, | |
| "learning_rate": 2.731345900920649e-06, | |
| "loss": 11.7963, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 36.484216245585436, | |
| "grad_norm": 7.300856113433838, | |
| "learning_rate": 2.726961858833845e-06, | |
| "loss": 11.7902, | |
| "step": 83950 | |
| }, | |
| { | |
| "epoch": 36.50594947025265, | |
| "grad_norm": 7.026275157928467, | |
| "learning_rate": 2.722577816747041e-06, | |
| "loss": 11.7988, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 36.52768269491986, | |
| "grad_norm": 12.537973403930664, | |
| "learning_rate": 2.7181937746602373e-06, | |
| "loss": 11.7891, | |
| "step": 84050 | |
| }, | |
| { | |
| "epoch": 36.54941591958707, | |
| "grad_norm": 6.903670787811279, | |
| "learning_rate": 2.713809732573433e-06, | |
| "loss": 11.7984, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 36.57114914425428, | |
| "grad_norm": 11.342251777648926, | |
| "learning_rate": 2.709425690486629e-06, | |
| "loss": 11.7915, | |
| "step": 84150 | |
| }, | |
| { | |
| "epoch": 36.59288236892149, | |
| "grad_norm": 10.707886695861816, | |
| "learning_rate": 2.7050416483998248e-06, | |
| "loss": 11.7955, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 36.614615593588695, | |
| "grad_norm": 7.921166896820068, | |
| "learning_rate": 2.7006576063130206e-06, | |
| "loss": 11.7868, | |
| "step": 84250 | |
| }, | |
| { | |
| "epoch": 36.63634881825591, | |
| "grad_norm": 9.0649995803833, | |
| "learning_rate": 2.696273564226217e-06, | |
| "loss": 11.7949, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 36.65808204292312, | |
| "grad_norm": 13.355379104614258, | |
| "learning_rate": 2.6918895221394126e-06, | |
| "loss": 11.7763, | |
| "step": 84350 | |
| }, | |
| { | |
| "epoch": 36.67981526759033, | |
| "grad_norm": 12.289958953857422, | |
| "learning_rate": 2.6875054800526084e-06, | |
| "loss": 11.7861, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 36.70154849225754, | |
| "grad_norm": 4.684927940368652, | |
| "learning_rate": 2.683121437965805e-06, | |
| "loss": 11.7803, | |
| "step": 84450 | |
| }, | |
| { | |
| "epoch": 36.723281716924745, | |
| "grad_norm": 7.917582035064697, | |
| "learning_rate": 2.678737395879001e-06, | |
| "loss": 11.799, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 36.74501494159196, | |
| "grad_norm": 5.413401126861572, | |
| "learning_rate": 2.6743533537921967e-06, | |
| "loss": 11.7873, | |
| "step": 84550 | |
| }, | |
| { | |
| "epoch": 36.76674816625917, | |
| "grad_norm": 14.283327102661133, | |
| "learning_rate": 2.6699693117053925e-06, | |
| "loss": 11.7867, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 36.78848139092638, | |
| "grad_norm": 8.163966178894043, | |
| "learning_rate": 2.6655852696185883e-06, | |
| "loss": 11.8112, | |
| "step": 84650 | |
| }, | |
| { | |
| "epoch": 36.81021461559359, | |
| "grad_norm": 7.235820770263672, | |
| "learning_rate": 2.6612012275317846e-06, | |
| "loss": 11.781, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 36.831947840260796, | |
| "grad_norm": 4.746747016906738, | |
| "learning_rate": 2.6568171854449804e-06, | |
| "loss": 11.7941, | |
| "step": 84750 | |
| }, | |
| { | |
| "epoch": 36.85368106492801, | |
| "grad_norm": 4.514492511749268, | |
| "learning_rate": 2.652433143358176e-06, | |
| "loss": 11.7947, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 36.87541428959522, | |
| "grad_norm": 10.46290111541748, | |
| "learning_rate": 2.648049101271372e-06, | |
| "loss": 11.7826, | |
| "step": 84850 | |
| }, | |
| { | |
| "epoch": 36.89714751426243, | |
| "grad_norm": 8.848064422607422, | |
| "learning_rate": 2.6436650591845687e-06, | |
| "loss": 11.7802, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 36.91888073892964, | |
| "grad_norm": 6.194151401519775, | |
| "learning_rate": 2.6392810170977645e-06, | |
| "loss": 11.7857, | |
| "step": 84950 | |
| }, | |
| { | |
| "epoch": 36.940613963596846, | |
| "grad_norm": 8.114167213439941, | |
| "learning_rate": 2.6348969750109603e-06, | |
| "loss": 11.7934, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 36.96234718826406, | |
| "grad_norm": 11.507193565368652, | |
| "learning_rate": 2.6305129329241565e-06, | |
| "loss": 11.7928, | |
| "step": 85050 | |
| }, | |
| { | |
| "epoch": 36.98408041293127, | |
| "grad_norm": 12.524467468261719, | |
| "learning_rate": 2.6261288908373523e-06, | |
| "loss": 11.7915, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 37.00565063841348, | |
| "grad_norm": 12.674412727355957, | |
| "learning_rate": 2.621744848750548e-06, | |
| "loss": 11.6996, | |
| "step": 85150 | |
| }, | |
| { | |
| "epoch": 37.027383863080686, | |
| "grad_norm": 4.092529773712158, | |
| "learning_rate": 2.617360806663744e-06, | |
| "loss": 11.784, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 37.049117087747895, | |
| "grad_norm": 4.772137641906738, | |
| "learning_rate": 2.6129767645769398e-06, | |
| "loss": 11.7883, | |
| "step": 85250 | |
| }, | |
| { | |
| "epoch": 37.070850312415104, | |
| "grad_norm": 9.263222694396973, | |
| "learning_rate": 2.608592722490136e-06, | |
| "loss": 11.7655, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 37.09258353708231, | |
| "grad_norm": 11.615614891052246, | |
| "learning_rate": 2.6042086804033322e-06, | |
| "loss": 11.7987, | |
| "step": 85350 | |
| }, | |
| { | |
| "epoch": 37.11431676174953, | |
| "grad_norm": 3.8077142238616943, | |
| "learning_rate": 2.599824638316528e-06, | |
| "loss": 11.7786, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 37.13604998641674, | |
| "grad_norm": 5.6897993087768555, | |
| "learning_rate": 2.5954405962297243e-06, | |
| "loss": 11.797, | |
| "step": 85450 | |
| }, | |
| { | |
| "epoch": 37.157783211083945, | |
| "grad_norm": 5.3308305740356445, | |
| "learning_rate": 2.59105655414292e-06, | |
| "loss": 11.7697, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 37.179516435751154, | |
| "grad_norm": 8.419775009155273, | |
| "learning_rate": 2.586672512056116e-06, | |
| "loss": 11.7771, | |
| "step": 85550 | |
| }, | |
| { | |
| "epoch": 37.20124966041836, | |
| "grad_norm": 4.629072189331055, | |
| "learning_rate": 2.5822884699693117e-06, | |
| "loss": 11.7899, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 37.22298288508557, | |
| "grad_norm": 16.38741683959961, | |
| "learning_rate": 2.5779044278825075e-06, | |
| "loss": 11.7799, | |
| "step": 85650 | |
| }, | |
| { | |
| "epoch": 37.24471610975279, | |
| "grad_norm": 39.32244110107422, | |
| "learning_rate": 2.5735203857957038e-06, | |
| "loss": 11.791, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 37.266449334419995, | |
| "grad_norm": 14.12863826751709, | |
| "learning_rate": 2.5691363437089e-06, | |
| "loss": 11.7906, | |
| "step": 85750 | |
| }, | |
| { | |
| "epoch": 37.288182559087204, | |
| "grad_norm": 7.304044723510742, | |
| "learning_rate": 2.564752301622096e-06, | |
| "loss": 11.7865, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 37.30991578375441, | |
| "grad_norm": 11.347620964050293, | |
| "learning_rate": 2.560368259535292e-06, | |
| "loss": 11.8021, | |
| "step": 85850 | |
| }, | |
| { | |
| "epoch": 37.33164900842162, | |
| "grad_norm": 9.358373641967773, | |
| "learning_rate": 2.555984217448488e-06, | |
| "loss": 11.7979, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 37.35338223308884, | |
| "grad_norm": 3.5885915756225586, | |
| "learning_rate": 2.5516001753616837e-06, | |
| "loss": 11.7782, | |
| "step": 85950 | |
| }, | |
| { | |
| "epoch": 37.375115457756046, | |
| "grad_norm": 9.129725456237793, | |
| "learning_rate": 2.5472161332748795e-06, | |
| "loss": 11.783, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 37.396848682423254, | |
| "grad_norm": 8.22261905670166, | |
| "learning_rate": 2.5428320911880757e-06, | |
| "loss": 11.7652, | |
| "step": 86050 | |
| }, | |
| { | |
| "epoch": 37.41858190709046, | |
| "grad_norm": 6.796608924865723, | |
| "learning_rate": 2.5384480491012715e-06, | |
| "loss": 11.7897, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 37.44031513175767, | |
| "grad_norm": 3.8288304805755615, | |
| "learning_rate": 2.5340640070144673e-06, | |
| "loss": 11.7803, | |
| "step": 86150 | |
| }, | |
| { | |
| "epoch": 37.46204835642489, | |
| "grad_norm": 4.8984599113464355, | |
| "learning_rate": 2.529679964927664e-06, | |
| "loss": 11.7998, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 37.483781581092096, | |
| "grad_norm": 11.828535079956055, | |
| "learning_rate": 2.52529592284086e-06, | |
| "loss": 11.7756, | |
| "step": 86250 | |
| }, | |
| { | |
| "epoch": 37.505514805759304, | |
| "grad_norm": 7.632526397705078, | |
| "learning_rate": 2.5209118807540556e-06, | |
| "loss": 11.7816, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 37.52724803042651, | |
| "grad_norm": 13.461671829223633, | |
| "learning_rate": 2.5165278386672514e-06, | |
| "loss": 11.7924, | |
| "step": 86350 | |
| }, | |
| { | |
| "epoch": 37.54898125509372, | |
| "grad_norm": 5.831872940063477, | |
| "learning_rate": 2.5121437965804472e-06, | |
| "loss": 11.767, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 37.57071447976094, | |
| "grad_norm": 15.29990005493164, | |
| "learning_rate": 2.5077597544936435e-06, | |
| "loss": 11.7853, | |
| "step": 86450 | |
| }, | |
| { | |
| "epoch": 37.592447704428146, | |
| "grad_norm": 18.313222885131836, | |
| "learning_rate": 2.5033757124068393e-06, | |
| "loss": 11.7759, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 37.614180929095355, | |
| "grad_norm": 7.100087642669678, | |
| "learning_rate": 2.4989916703200355e-06, | |
| "loss": 11.7861, | |
| "step": 86550 | |
| }, | |
| { | |
| "epoch": 37.63591415376256, | |
| "grad_norm": 10.970837593078613, | |
| "learning_rate": 2.4946076282332313e-06, | |
| "loss": 11.7859, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 37.65764737842977, | |
| "grad_norm": 17.918962478637695, | |
| "learning_rate": 2.490223586146427e-06, | |
| "loss": 11.781, | |
| "step": 86650 | |
| }, | |
| { | |
| "epoch": 37.67938060309699, | |
| "grad_norm": 23.407426834106445, | |
| "learning_rate": 2.485839544059623e-06, | |
| "loss": 11.7763, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 37.701113827764196, | |
| "grad_norm": 6.149535655975342, | |
| "learning_rate": 2.481455501972819e-06, | |
| "loss": 11.7819, | |
| "step": 86750 | |
| }, | |
| { | |
| "epoch": 37.722847052431405, | |
| "grad_norm": 5.372469425201416, | |
| "learning_rate": 2.477071459886015e-06, | |
| "loss": 11.7762, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 37.74458027709861, | |
| "grad_norm": 21.797292709350586, | |
| "learning_rate": 2.472687417799211e-06, | |
| "loss": 11.7822, | |
| "step": 86850 | |
| }, | |
| { | |
| "epoch": 37.76631350176582, | |
| "grad_norm": 17.68259048461914, | |
| "learning_rate": 2.468303375712407e-06, | |
| "loss": 11.7753, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 37.78804672643304, | |
| "grad_norm": 10.163092613220215, | |
| "learning_rate": 2.463919333625603e-06, | |
| "loss": 11.7662, | |
| "step": 86950 | |
| }, | |
| { | |
| "epoch": 37.809779951100246, | |
| "grad_norm": 6.926383972167969, | |
| "learning_rate": 2.459535291538799e-06, | |
| "loss": 11.7897, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 37.831513175767455, | |
| "grad_norm": 8.474647521972656, | |
| "learning_rate": 2.455151249451995e-06, | |
| "loss": 11.78, | |
| "step": 87050 | |
| }, | |
| { | |
| "epoch": 37.853246400434664, | |
| "grad_norm": 16.252666473388672, | |
| "learning_rate": 2.4507672073651907e-06, | |
| "loss": 11.7683, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 37.87497962510187, | |
| "grad_norm": 9.422881126403809, | |
| "learning_rate": 2.446383165278387e-06, | |
| "loss": 11.78, | |
| "step": 87150 | |
| }, | |
| { | |
| "epoch": 37.89671284976909, | |
| "grad_norm": 13.145493507385254, | |
| "learning_rate": 2.441999123191583e-06, | |
| "loss": 11.7806, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 37.9184460744363, | |
| "grad_norm": 9.204483985900879, | |
| "learning_rate": 2.437615081104779e-06, | |
| "loss": 11.7722, | |
| "step": 87250 | |
| }, | |
| { | |
| "epoch": 37.940179299103505, | |
| "grad_norm": 9.611700057983398, | |
| "learning_rate": 2.4332310390179748e-06, | |
| "loss": 11.7957, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 37.961912523770714, | |
| "grad_norm": 11.884017944335938, | |
| "learning_rate": 2.4288469969311706e-06, | |
| "loss": 11.7755, | |
| "step": 87350 | |
| }, | |
| { | |
| "epoch": 37.98364574843792, | |
| "grad_norm": 5.692808151245117, | |
| "learning_rate": 2.424462954844367e-06, | |
| "loss": 11.7766, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 38.00521597392013, | |
| "grad_norm": 66.74461364746094, | |
| "learning_rate": 2.4200789127575626e-06, | |
| "loss": 11.7051, | |
| "step": 87450 | |
| }, | |
| { | |
| "epoch": 38.02694919858734, | |
| "grad_norm": 5.268041610717773, | |
| "learning_rate": 2.4156948706707584e-06, | |
| "loss": 11.7609, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 38.04868242325455, | |
| "grad_norm": 8.159131050109863, | |
| "learning_rate": 2.4113108285839547e-06, | |
| "loss": 11.773, | |
| "step": 87550 | |
| }, | |
| { | |
| "epoch": 38.07041564792176, | |
| "grad_norm": 8.749338150024414, | |
| "learning_rate": 2.4069267864971505e-06, | |
| "loss": 11.7782, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 38.09214887258897, | |
| "grad_norm": 9.121374130249023, | |
| "learning_rate": 2.4025427444103467e-06, | |
| "loss": 11.7703, | |
| "step": 87650 | |
| }, | |
| { | |
| "epoch": 38.11388209725618, | |
| "grad_norm": 10.743656158447266, | |
| "learning_rate": 2.3981587023235425e-06, | |
| "loss": 11.7798, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 38.13561532192339, | |
| "grad_norm": 5.2683000564575195, | |
| "learning_rate": 2.3937746602367383e-06, | |
| "loss": 11.7791, | |
| "step": 87750 | |
| }, | |
| { | |
| "epoch": 38.1573485465906, | |
| "grad_norm": 9.600702285766602, | |
| "learning_rate": 2.389390618149934e-06, | |
| "loss": 11.7658, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 38.17908177125781, | |
| "grad_norm": 10.094902992248535, | |
| "learning_rate": 2.3850065760631304e-06, | |
| "loss": 11.7891, | |
| "step": 87850 | |
| }, | |
| { | |
| "epoch": 38.20081499592502, | |
| "grad_norm": 8.227887153625488, | |
| "learning_rate": 2.3806225339763266e-06, | |
| "loss": 11.7706, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 38.22254822059223, | |
| "grad_norm": 7.997677803039551, | |
| "learning_rate": 2.3762384918895224e-06, | |
| "loss": 11.772, | |
| "step": 87950 | |
| }, | |
| { | |
| "epoch": 38.24428144525944, | |
| "grad_norm": 6.51764440536499, | |
| "learning_rate": 2.3718544498027182e-06, | |
| "loss": 11.7739, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 38.26601466992665, | |
| "grad_norm": 4.949069499969482, | |
| "learning_rate": 2.367470407715914e-06, | |
| "loss": 11.778, | |
| "step": 88050 | |
| }, | |
| { | |
| "epoch": 38.28774789459386, | |
| "grad_norm": 4.438246250152588, | |
| "learning_rate": 2.3630863656291103e-06, | |
| "loss": 11.7742, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 38.30948111926107, | |
| "grad_norm": 11.066926956176758, | |
| "learning_rate": 2.358702323542306e-06, | |
| "loss": 11.7817, | |
| "step": 88150 | |
| }, | |
| { | |
| "epoch": 38.33121434392828, | |
| "grad_norm": 6.765926837921143, | |
| "learning_rate": 2.3543182814555023e-06, | |
| "loss": 11.7767, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 38.35294756859549, | |
| "grad_norm": 6.704973220825195, | |
| "learning_rate": 2.349934239368698e-06, | |
| "loss": 11.7671, | |
| "step": 88250 | |
| }, | |
| { | |
| "epoch": 38.3746807932627, | |
| "grad_norm": 10.575370788574219, | |
| "learning_rate": 2.3455501972818944e-06, | |
| "loss": 11.771, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 38.396414017929914, | |
| "grad_norm": 3.860527992248535, | |
| "learning_rate": 2.34116615519509e-06, | |
| "loss": 11.7685, | |
| "step": 88350 | |
| }, | |
| { | |
| "epoch": 38.41814724259712, | |
| "grad_norm": 10.35341739654541, | |
| "learning_rate": 2.336782113108286e-06, | |
| "loss": 11.7715, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 38.43988046726433, | |
| "grad_norm": 7.268162727355957, | |
| "learning_rate": 2.332398071021482e-06, | |
| "loss": 11.7878, | |
| "step": 88450 | |
| }, | |
| { | |
| "epoch": 38.46161369193154, | |
| "grad_norm": 6.3647871017456055, | |
| "learning_rate": 2.328014028934678e-06, | |
| "loss": 11.7675, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 38.48334691659875, | |
| "grad_norm": 27.453014373779297, | |
| "learning_rate": 2.323629986847874e-06, | |
| "loss": 11.7635, | |
| "step": 88550 | |
| }, | |
| { | |
| "epoch": 38.505080141265964, | |
| "grad_norm": 12.105439186096191, | |
| "learning_rate": 2.31924594476107e-06, | |
| "loss": 11.7825, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 38.52681336593317, | |
| "grad_norm": 12.992817878723145, | |
| "learning_rate": 2.314861902674266e-06, | |
| "loss": 11.76, | |
| "step": 88650 | |
| }, | |
| { | |
| "epoch": 38.54854659060038, | |
| "grad_norm": 8.603561401367188, | |
| "learning_rate": 2.3104778605874617e-06, | |
| "loss": 11.7655, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 38.57027981526759, | |
| "grad_norm": 4.036582946777344, | |
| "learning_rate": 2.306093818500658e-06, | |
| "loss": 11.7882, | |
| "step": 88750 | |
| }, | |
| { | |
| "epoch": 38.5920130399348, | |
| "grad_norm": 5.422863483428955, | |
| "learning_rate": 2.3017097764138538e-06, | |
| "loss": 11.7683, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 38.61374626460201, | |
| "grad_norm": 5.842752933502197, | |
| "learning_rate": 2.2973257343270496e-06, | |
| "loss": 11.7695, | |
| "step": 88850 | |
| }, | |
| { | |
| "epoch": 38.63547948926922, | |
| "grad_norm": 9.653190612792969, | |
| "learning_rate": 2.292941692240246e-06, | |
| "loss": 11.7873, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 38.65721271393643, | |
| "grad_norm": 5.730354309082031, | |
| "learning_rate": 2.2885576501534416e-06, | |
| "loss": 11.768, | |
| "step": 88950 | |
| }, | |
| { | |
| "epoch": 38.67894593860364, | |
| "grad_norm": 17.826345443725586, | |
| "learning_rate": 2.284173608066638e-06, | |
| "loss": 11.776, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 38.70067916327085, | |
| "grad_norm": 9.027566909790039, | |
| "learning_rate": 2.2797895659798336e-06, | |
| "loss": 11.755, | |
| "step": 89050 | |
| }, | |
| { | |
| "epoch": 38.72241238793806, | |
| "grad_norm": 4.53999662399292, | |
| "learning_rate": 2.2754055238930295e-06, | |
| "loss": 11.765, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 38.74414561260527, | |
| "grad_norm": 8.569631576538086, | |
| "learning_rate": 2.2710214818062253e-06, | |
| "loss": 11.7777, | |
| "step": 89150 | |
| }, | |
| { | |
| "epoch": 38.76587883727248, | |
| "grad_norm": 24.965681076049805, | |
| "learning_rate": 2.2666374397194215e-06, | |
| "loss": 11.7738, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 38.78761206193969, | |
| "grad_norm": 7.554117202758789, | |
| "learning_rate": 2.2622533976326173e-06, | |
| "loss": 11.7801, | |
| "step": 89250 | |
| }, | |
| { | |
| "epoch": 38.8093452866069, | |
| "grad_norm": 16.02465057373047, | |
| "learning_rate": 2.2578693555458135e-06, | |
| "loss": 11.785, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 38.83107851127411, | |
| "grad_norm": 9.892585754394531, | |
| "learning_rate": 2.2534853134590094e-06, | |
| "loss": 11.7812, | |
| "step": 89350 | |
| }, | |
| { | |
| "epoch": 38.85281173594132, | |
| "grad_norm": 20.471792221069336, | |
| "learning_rate": 2.2491012713722056e-06, | |
| "loss": 11.774, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 38.87454496060853, | |
| "grad_norm": 15.924908638000488, | |
| "learning_rate": 2.2447172292854014e-06, | |
| "loss": 11.7735, | |
| "step": 89450 | |
| }, | |
| { | |
| "epoch": 38.89627818527574, | |
| "grad_norm": 9.257697105407715, | |
| "learning_rate": 2.2403331871985972e-06, | |
| "loss": 11.784, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 38.91801140994295, | |
| "grad_norm": 8.59609317779541, | |
| "learning_rate": 2.235949145111793e-06, | |
| "loss": 11.7924, | |
| "step": 89550 | |
| }, | |
| { | |
| "epoch": 38.93974463461016, | |
| "grad_norm": 5.643759727478027, | |
| "learning_rate": 2.2315651030249893e-06, | |
| "loss": 11.7727, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 38.96147785927737, | |
| "grad_norm": 3.887133836746216, | |
| "learning_rate": 2.227181060938185e-06, | |
| "loss": 11.7728, | |
| "step": 89650 | |
| }, | |
| { | |
| "epoch": 38.98321108394458, | |
| "grad_norm": 15.085949897766113, | |
| "learning_rate": 2.2227970188513813e-06, | |
| "loss": 11.7837, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 39.00478130942679, | |
| "grad_norm": 4.734740257263184, | |
| "learning_rate": 2.218412976764577e-06, | |
| "loss": 11.6785, | |
| "step": 89750 | |
| }, | |
| { | |
| "epoch": 39.026514534094, | |
| "grad_norm": 7.277717590332031, | |
| "learning_rate": 2.214028934677773e-06, | |
| "loss": 11.7636, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 39.04824775876121, | |
| "grad_norm": 12.622576713562012, | |
| "learning_rate": 2.209644892590969e-06, | |
| "loss": 11.7632, | |
| "step": 89850 | |
| }, | |
| { | |
| "epoch": 39.069980983428415, | |
| "grad_norm": 8.72470760345459, | |
| "learning_rate": 2.205260850504165e-06, | |
| "loss": 11.7806, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 39.091714208095624, | |
| "grad_norm": 8.548195838928223, | |
| "learning_rate": 2.2008768084173608e-06, | |
| "loss": 11.7642, | |
| "step": 89950 | |
| }, | |
| { | |
| "epoch": 39.11344743276284, | |
| "grad_norm": 5.267911911010742, | |
| "learning_rate": 2.196492766330557e-06, | |
| "loss": 11.7606, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 39.11344743276284, | |
| "eval_cer": 0.07434609260242184, | |
| "eval_loss": 2.4127438068389893, | |
| "eval_runtime": 397.8168, | |
| "eval_samples_per_second": 13.589, | |
| "eval_steps_per_second": 3.399, | |
| "eval_wer": 0.22556131260794474, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 39.13518065743005, | |
| "grad_norm": 13.38624095916748, | |
| "learning_rate": 2.1921087242437532e-06, | |
| "loss": 11.7632, | |
| "step": 90050 | |
| }, | |
| { | |
| "epoch": 39.15691388209726, | |
| "grad_norm": 13.824868202209473, | |
| "learning_rate": 2.187724682156949e-06, | |
| "loss": 11.7722, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 39.178647106764465, | |
| "grad_norm": 10.600975036621094, | |
| "learning_rate": 2.183340640070145e-06, | |
| "loss": 11.7692, | |
| "step": 90150 | |
| }, | |
| { | |
| "epoch": 39.200380331431674, | |
| "grad_norm": 10.806214332580566, | |
| "learning_rate": 2.1789565979833407e-06, | |
| "loss": 11.753, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 39.22211355609888, | |
| "grad_norm": 7.800635814666748, | |
| "learning_rate": 2.1745725558965365e-06, | |
| "loss": 11.7616, | |
| "step": 90250 | |
| }, | |
| { | |
| "epoch": 39.2438467807661, | |
| "grad_norm": 7.492881774902344, | |
| "learning_rate": 2.1701885138097327e-06, | |
| "loss": 11.7439, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 39.26558000543331, | |
| "grad_norm": 12.061040878295898, | |
| "learning_rate": 2.1658044717229285e-06, | |
| "loss": 11.7567, | |
| "step": 90350 | |
| }, | |
| { | |
| "epoch": 39.287313230100516, | |
| "grad_norm": 6.541141510009766, | |
| "learning_rate": 2.1614204296361248e-06, | |
| "loss": 11.775, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 39.309046454767724, | |
| "grad_norm": 5.959283828735352, | |
| "learning_rate": 2.1570363875493206e-06, | |
| "loss": 11.7633, | |
| "step": 90450 | |
| }, | |
| { | |
| "epoch": 39.33077967943493, | |
| "grad_norm": 9.226263046264648, | |
| "learning_rate": 2.152652345462517e-06, | |
| "loss": 11.7746, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 39.35251290410215, | |
| "grad_norm": 15.486861228942871, | |
| "learning_rate": 2.1482683033757126e-06, | |
| "loss": 11.775, | |
| "step": 90550 | |
| }, | |
| { | |
| "epoch": 39.37424612876936, | |
| "grad_norm": 6.420067310333252, | |
| "learning_rate": 2.1438842612889084e-06, | |
| "loss": 11.7812, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 39.395979353436566, | |
| "grad_norm": 7.908544540405273, | |
| "learning_rate": 2.1395002192021042e-06, | |
| "loss": 11.7573, | |
| "step": 90650 | |
| }, | |
| { | |
| "epoch": 39.417712578103774, | |
| "grad_norm": 7.737216472625732, | |
| "learning_rate": 2.1351161771153005e-06, | |
| "loss": 11.7621, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 39.43944580277098, | |
| "grad_norm": 5.630201816558838, | |
| "learning_rate": 2.1307321350284967e-06, | |
| "loss": 11.7629, | |
| "step": 90750 | |
| }, | |
| { | |
| "epoch": 39.4611790274382, | |
| "grad_norm": 5.563817024230957, | |
| "learning_rate": 2.1263480929416925e-06, | |
| "loss": 11.7663, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 39.48291225210541, | |
| "grad_norm": 6.785152912139893, | |
| "learning_rate": 2.1219640508548883e-06, | |
| "loss": 11.7575, | |
| "step": 90850 | |
| }, | |
| { | |
| "epoch": 39.504645476772616, | |
| "grad_norm": 5.261542797088623, | |
| "learning_rate": 2.117580008768084e-06, | |
| "loss": 11.7676, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 39.526378701439825, | |
| "grad_norm": 8.951974868774414, | |
| "learning_rate": 2.1131959666812804e-06, | |
| "loss": 11.7626, | |
| "step": 90950 | |
| }, | |
| { | |
| "epoch": 39.54811192610703, | |
| "grad_norm": 5.925467491149902, | |
| "learning_rate": 2.108811924594476e-06, | |
| "loss": 11.7652, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 39.56984515077425, | |
| "grad_norm": 6.1618194580078125, | |
| "learning_rate": 2.1044278825076724e-06, | |
| "loss": 11.7526, | |
| "step": 91050 | |
| }, | |
| { | |
| "epoch": 39.59157837544146, | |
| "grad_norm": 5.625064373016357, | |
| "learning_rate": 2.1000438404208682e-06, | |
| "loss": 11.7605, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 39.613311600108666, | |
| "grad_norm": 12.690841674804688, | |
| "learning_rate": 2.0956597983340645e-06, | |
| "loss": 11.7649, | |
| "step": 91150 | |
| }, | |
| { | |
| "epoch": 39.635044824775875, | |
| "grad_norm": 12.550384521484375, | |
| "learning_rate": 2.0912757562472603e-06, | |
| "loss": 11.792, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 39.65677804944308, | |
| "grad_norm": 9.650748252868652, | |
| "learning_rate": 2.086891714160456e-06, | |
| "loss": 11.7733, | |
| "step": 91250 | |
| }, | |
| { | |
| "epoch": 39.6785112741103, | |
| "grad_norm": 8.173276901245117, | |
| "learning_rate": 2.082507672073652e-06, | |
| "loss": 11.7835, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 39.70024449877751, | |
| "grad_norm": 8.434061050415039, | |
| "learning_rate": 2.0781236299868477e-06, | |
| "loss": 11.7474, | |
| "step": 91350 | |
| }, | |
| { | |
| "epoch": 39.721977723444716, | |
| "grad_norm": 9.518670082092285, | |
| "learning_rate": 2.073739587900044e-06, | |
| "loss": 11.7687, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 39.743710948111925, | |
| "grad_norm": 6.331693649291992, | |
| "learning_rate": 2.06935554581324e-06, | |
| "loss": 11.7578, | |
| "step": 91450 | |
| }, | |
| { | |
| "epoch": 39.765444172779134, | |
| "grad_norm": 9.190653800964355, | |
| "learning_rate": 2.064971503726436e-06, | |
| "loss": 11.7756, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 39.78717739744635, | |
| "grad_norm": 13.398709297180176, | |
| "learning_rate": 2.060587461639632e-06, | |
| "loss": 11.7704, | |
| "step": 91550 | |
| }, | |
| { | |
| "epoch": 39.80891062211356, | |
| "grad_norm": 11.104494094848633, | |
| "learning_rate": 2.056203419552828e-06, | |
| "loss": 11.7623, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 39.83064384678077, | |
| "grad_norm": 5.477586269378662, | |
| "learning_rate": 2.051819377466024e-06, | |
| "loss": 11.7647, | |
| "step": 91650 | |
| }, | |
| { | |
| "epoch": 39.852377071447975, | |
| "grad_norm": 3.5749564170837402, | |
| "learning_rate": 2.0474353353792197e-06, | |
| "loss": 11.7695, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 39.874110296115184, | |
| "grad_norm": 5.978471755981445, | |
| "learning_rate": 2.043051293292416e-06, | |
| "loss": 11.7648, | |
| "step": 91750 | |
| }, | |
| { | |
| "epoch": 39.89584352078239, | |
| "grad_norm": 25.19099235534668, | |
| "learning_rate": 2.0386672512056117e-06, | |
| "loss": 11.7612, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 39.91757674544961, | |
| "grad_norm": 5.882903099060059, | |
| "learning_rate": 2.034283209118808e-06, | |
| "loss": 11.7799, | |
| "step": 91850 | |
| }, | |
| { | |
| "epoch": 39.93930997011682, | |
| "grad_norm": 27.048709869384766, | |
| "learning_rate": 2.0298991670320037e-06, | |
| "loss": 11.7737, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 39.961043194784025, | |
| "grad_norm": 9.073412895202637, | |
| "learning_rate": 2.0255151249451996e-06, | |
| "loss": 11.7612, | |
| "step": 91950 | |
| }, | |
| { | |
| "epoch": 39.982776419451234, | |
| "grad_norm": 13.002010345458984, | |
| "learning_rate": 2.0211310828583954e-06, | |
| "loss": 11.7782, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 40.00434664493344, | |
| "grad_norm": 9.383039474487305, | |
| "learning_rate": 2.0167470407715916e-06, | |
| "loss": 11.6788, | |
| "step": 92050 | |
| }, | |
| { | |
| "epoch": 40.02607986960065, | |
| "grad_norm": 9.476020812988281, | |
| "learning_rate": 2.0123629986847874e-06, | |
| "loss": 11.7597, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 40.04781309426786, | |
| "grad_norm": 8.409124374389648, | |
| "learning_rate": 2.0079789565979836e-06, | |
| "loss": 11.7492, | |
| "step": 92150 | |
| }, | |
| { | |
| "epoch": 40.069546318935075, | |
| "grad_norm": 6.030084609985352, | |
| "learning_rate": 2.0035949145111795e-06, | |
| "loss": 11.7751, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 40.09127954360228, | |
| "grad_norm": 4.96845006942749, | |
| "learning_rate": 1.9992108724243757e-06, | |
| "loss": 11.7443, | |
| "step": 92250 | |
| }, | |
| { | |
| "epoch": 40.11301276826949, | |
| "grad_norm": 11.096965789794922, | |
| "learning_rate": 1.9948268303375715e-06, | |
| "loss": 11.7718, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 40.1347459929367, | |
| "grad_norm": 6.45682430267334, | |
| "learning_rate": 1.9904427882507673e-06, | |
| "loss": 11.7592, | |
| "step": 92350 | |
| }, | |
| { | |
| "epoch": 40.15647921760391, | |
| "grad_norm": 7.665036678314209, | |
| "learning_rate": 1.986058746163963e-06, | |
| "loss": 11.7715, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 40.178212442271125, | |
| "grad_norm": 4.477396011352539, | |
| "learning_rate": 1.9816747040771594e-06, | |
| "loss": 11.7572, | |
| "step": 92450 | |
| }, | |
| { | |
| "epoch": 40.19994566693833, | |
| "grad_norm": 13.868246078491211, | |
| "learning_rate": 1.977290661990355e-06, | |
| "loss": 11.7656, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 40.22167889160554, | |
| "grad_norm": 7.29514217376709, | |
| "learning_rate": 1.9729066199035514e-06, | |
| "loss": 11.7525, | |
| "step": 92550 | |
| }, | |
| { | |
| "epoch": 40.24341211627275, | |
| "grad_norm": 5.126781463623047, | |
| "learning_rate": 1.968522577816747e-06, | |
| "loss": 11.7623, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 40.26514534093996, | |
| "grad_norm": 13.333416938781738, | |
| "learning_rate": 1.964138535729943e-06, | |
| "loss": 11.7645, | |
| "step": 92650 | |
| }, | |
| { | |
| "epoch": 40.286878565607175, | |
| "grad_norm": 4.38609504699707, | |
| "learning_rate": 1.9597544936431393e-06, | |
| "loss": 11.7471, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 40.308611790274384, | |
| "grad_norm": 18.64853858947754, | |
| "learning_rate": 1.955370451556335e-06, | |
| "loss": 11.7614, | |
| "step": 92750 | |
| }, | |
| { | |
| "epoch": 40.33034501494159, | |
| "grad_norm": 10.51586627960205, | |
| "learning_rate": 1.950986409469531e-06, | |
| "loss": 11.7572, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 40.3520782396088, | |
| "grad_norm": 10.547462463378906, | |
| "learning_rate": 1.946602367382727e-06, | |
| "loss": 11.7609, | |
| "step": 92850 | |
| }, | |
| { | |
| "epoch": 40.37381146427601, | |
| "grad_norm": 5.690642356872559, | |
| "learning_rate": 1.9422183252959233e-06, | |
| "loss": 11.7481, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 40.395544688943225, | |
| "grad_norm": 3.9211175441741943, | |
| "learning_rate": 1.937834283209119e-06, | |
| "loss": 11.7461, | |
| "step": 92950 | |
| }, | |
| { | |
| "epoch": 40.417277913610434, | |
| "grad_norm": 4.40631103515625, | |
| "learning_rate": 1.933450241122315e-06, | |
| "loss": 11.7587, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 40.43901113827764, | |
| "grad_norm": 4.007054328918457, | |
| "learning_rate": 1.9290661990355108e-06, | |
| "loss": 11.7649, | |
| "step": 93050 | |
| }, | |
| { | |
| "epoch": 40.46074436294485, | |
| "grad_norm": 9.994613647460938, | |
| "learning_rate": 1.9246821569487066e-06, | |
| "loss": 11.7632, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 40.48247758761206, | |
| "grad_norm": 7.985722541809082, | |
| "learning_rate": 1.920298114861903e-06, | |
| "loss": 11.7661, | |
| "step": 93150 | |
| }, | |
| { | |
| "epoch": 40.50421081227927, | |
| "grad_norm": 17.07448387145996, | |
| "learning_rate": 1.9159140727750986e-06, | |
| "loss": 11.7407, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 40.525944036946484, | |
| "grad_norm": 5.9471211433410645, | |
| "learning_rate": 1.911530030688295e-06, | |
| "loss": 11.7688, | |
| "step": 93250 | |
| }, | |
| { | |
| "epoch": 40.54767726161369, | |
| "grad_norm": 5.977828502655029, | |
| "learning_rate": 1.9071459886014907e-06, | |
| "loss": 11.7515, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 40.5694104862809, | |
| "grad_norm": 37.1627082824707, | |
| "learning_rate": 1.902761946514687e-06, | |
| "loss": 11.7671, | |
| "step": 93350 | |
| }, | |
| { | |
| "epoch": 40.59114371094811, | |
| "grad_norm": 6.538187503814697, | |
| "learning_rate": 1.8983779044278827e-06, | |
| "loss": 11.7546, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 40.61287693561532, | |
| "grad_norm": 7.786463260650635, | |
| "learning_rate": 1.8939938623410785e-06, | |
| "loss": 11.7539, | |
| "step": 93450 | |
| }, | |
| { | |
| "epoch": 40.634610160282534, | |
| "grad_norm": 9.64076042175293, | |
| "learning_rate": 1.8896098202542746e-06, | |
| "loss": 11.7615, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 40.65634338494974, | |
| "grad_norm": 6.382667064666748, | |
| "learning_rate": 1.8852257781674704e-06, | |
| "loss": 11.749, | |
| "step": 93550 | |
| }, | |
| { | |
| "epoch": 40.67807660961695, | |
| "grad_norm": 9.91306209564209, | |
| "learning_rate": 1.8808417360806666e-06, | |
| "loss": 11.7645, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 40.69980983428416, | |
| "grad_norm": 5.685323715209961, | |
| "learning_rate": 1.8764576939938626e-06, | |
| "loss": 11.7511, | |
| "step": 93650 | |
| }, | |
| { | |
| "epoch": 40.72154305895137, | |
| "grad_norm": 6.233023166656494, | |
| "learning_rate": 1.8720736519070584e-06, | |
| "loss": 11.7425, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 40.743276283618584, | |
| "grad_norm": 7.542703628540039, | |
| "learning_rate": 1.8676896098202542e-06, | |
| "loss": 11.7679, | |
| "step": 93750 | |
| }, | |
| { | |
| "epoch": 40.76500950828579, | |
| "grad_norm": 20.795351028442383, | |
| "learning_rate": 1.8633055677334505e-06, | |
| "loss": 11.7475, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 40.786742732953, | |
| "grad_norm": 8.967228889465332, | |
| "learning_rate": 1.8589215256466465e-06, | |
| "loss": 11.7556, | |
| "step": 93850 | |
| }, | |
| { | |
| "epoch": 40.80847595762021, | |
| "grad_norm": 9.180179595947266, | |
| "learning_rate": 1.8545374835598423e-06, | |
| "loss": 11.7549, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 40.83020918228742, | |
| "grad_norm": 4.092516899108887, | |
| "learning_rate": 1.8501534414730381e-06, | |
| "loss": 11.7597, | |
| "step": 93950 | |
| }, | |
| { | |
| "epoch": 40.851942406954635, | |
| "grad_norm": 4.368731498718262, | |
| "learning_rate": 1.8457693993862344e-06, | |
| "loss": 11.7655, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 40.87367563162184, | |
| "grad_norm": 3.8829784393310547, | |
| "learning_rate": 1.8413853572994304e-06, | |
| "loss": 11.7653, | |
| "step": 94050 | |
| }, | |
| { | |
| "epoch": 40.89540885628905, | |
| "grad_norm": 30.0755615234375, | |
| "learning_rate": 1.8370013152126262e-06, | |
| "loss": 11.7594, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 40.91714208095626, | |
| "grad_norm": 5.881009578704834, | |
| "learning_rate": 1.8326172731258222e-06, | |
| "loss": 11.7536, | |
| "step": 94150 | |
| }, | |
| { | |
| "epoch": 40.93887530562347, | |
| "grad_norm": 11.916665077209473, | |
| "learning_rate": 1.828233231039018e-06, | |
| "loss": 11.746, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 40.960608530290685, | |
| "grad_norm": 4.125995635986328, | |
| "learning_rate": 1.8238491889522143e-06, | |
| "loss": 11.7549, | |
| "step": 94250 | |
| }, | |
| { | |
| "epoch": 40.98234175495789, | |
| "grad_norm": 9.298914909362793, | |
| "learning_rate": 1.81946514686541e-06, | |
| "loss": 11.746, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 41.0039119804401, | |
| "grad_norm": 6.543211460113525, | |
| "learning_rate": 1.815081104778606e-06, | |
| "loss": 11.6774, | |
| "step": 94350 | |
| }, | |
| { | |
| "epoch": 41.02564520510731, | |
| "grad_norm": 10.013541221618652, | |
| "learning_rate": 1.810697062691802e-06, | |
| "loss": 11.747, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 41.04737842977452, | |
| "grad_norm": 26.30128288269043, | |
| "learning_rate": 1.8063130206049981e-06, | |
| "loss": 11.7447, | |
| "step": 94450 | |
| }, | |
| { | |
| "epoch": 41.06911165444173, | |
| "grad_norm": 6.381859302520752, | |
| "learning_rate": 1.801928978518194e-06, | |
| "loss": 11.7397, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 41.090844879108936, | |
| "grad_norm": 6.652487754821777, | |
| "learning_rate": 1.79754493643139e-06, | |
| "loss": 11.7431, | |
| "step": 94550 | |
| }, | |
| { | |
| "epoch": 41.11257810377615, | |
| "grad_norm": 5.449718952178955, | |
| "learning_rate": 1.7931608943445858e-06, | |
| "loss": 11.7549, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 41.13431132844336, | |
| "grad_norm": 3.2079617977142334, | |
| "learning_rate": 1.7887768522577818e-06, | |
| "loss": 11.7596, | |
| "step": 94650 | |
| }, | |
| { | |
| "epoch": 41.15604455311057, | |
| "grad_norm": 9.676004409790039, | |
| "learning_rate": 1.7843928101709778e-06, | |
| "loss": 11.7529, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 41.17777777777778, | |
| "grad_norm": 9.563153266906738, | |
| "learning_rate": 1.7800087680841738e-06, | |
| "loss": 11.7469, | |
| "step": 94750 | |
| }, | |
| { | |
| "epoch": 41.199511002444986, | |
| "grad_norm": 8.786322593688965, | |
| "learning_rate": 1.7756247259973697e-06, | |
| "loss": 11.7364, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 41.221244227112194, | |
| "grad_norm": 8.319864273071289, | |
| "learning_rate": 1.7712406839105657e-06, | |
| "loss": 11.7545, | |
| "step": 94850 | |
| }, | |
| { | |
| "epoch": 41.24297745177941, | |
| "grad_norm": 11.104187965393066, | |
| "learning_rate": 1.7668566418237617e-06, | |
| "loss": 11.7425, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 41.26471067644662, | |
| "grad_norm": 5.2200026512146, | |
| "learning_rate": 1.7624725997369577e-06, | |
| "loss": 11.7511, | |
| "step": 94950 | |
| }, | |
| { | |
| "epoch": 41.28644390111383, | |
| "grad_norm": 5.0673933029174805, | |
| "learning_rate": 1.7580885576501535e-06, | |
| "loss": 11.7597, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 41.308177125781036, | |
| "grad_norm": 6.790633678436279, | |
| "learning_rate": 1.7537045155633495e-06, | |
| "loss": 11.7566, | |
| "step": 95050 | |
| }, | |
| { | |
| "epoch": 41.329910350448245, | |
| "grad_norm": 5.625386714935303, | |
| "learning_rate": 1.7493204734765456e-06, | |
| "loss": 11.7483, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 41.35164357511546, | |
| "grad_norm": 5.591269493103027, | |
| "learning_rate": 1.7449364313897416e-06, | |
| "loss": 11.7497, | |
| "step": 95150 | |
| }, | |
| { | |
| "epoch": 41.37337679978267, | |
| "grad_norm": 5.52367639541626, | |
| "learning_rate": 1.7405523893029374e-06, | |
| "loss": 11.7637, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 41.39511002444988, | |
| "grad_norm": 3.352064371109009, | |
| "learning_rate": 1.7361683472161334e-06, | |
| "loss": 11.7539, | |
| "step": 95250 | |
| }, | |
| { | |
| "epoch": 41.416843249117086, | |
| "grad_norm": 10.966846466064453, | |
| "learning_rate": 1.7317843051293292e-06, | |
| "loss": 11.7587, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 41.438576473784295, | |
| "grad_norm": 4.151219844818115, | |
| "learning_rate": 1.7274002630425255e-06, | |
| "loss": 11.7389, | |
| "step": 95350 | |
| }, | |
| { | |
| "epoch": 41.46030969845151, | |
| "grad_norm": 7.952151775360107, | |
| "learning_rate": 1.7230162209557213e-06, | |
| "loss": 11.7477, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 41.48204292311872, | |
| "grad_norm": 7.96391487121582, | |
| "learning_rate": 1.7186321788689173e-06, | |
| "loss": 11.7551, | |
| "step": 95450 | |
| }, | |
| { | |
| "epoch": 41.50377614778593, | |
| "grad_norm": 5.120693206787109, | |
| "learning_rate": 1.7142481367821131e-06, | |
| "loss": 11.7426, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 41.525509372453136, | |
| "grad_norm": 4.3286027908325195, | |
| "learning_rate": 1.7098640946953093e-06, | |
| "loss": 11.7461, | |
| "step": 95550 | |
| }, | |
| { | |
| "epoch": 41.547242597120345, | |
| "grad_norm": 7.304409027099609, | |
| "learning_rate": 1.7054800526085052e-06, | |
| "loss": 11.7565, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 41.56897582178756, | |
| "grad_norm": 5.609922885894775, | |
| "learning_rate": 1.7010960105217012e-06, | |
| "loss": 11.7654, | |
| "step": 95650 | |
| }, | |
| { | |
| "epoch": 41.59070904645477, | |
| "grad_norm": 7.889837265014648, | |
| "learning_rate": 1.696711968434897e-06, | |
| "loss": 11.7447, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 41.61244227112198, | |
| "grad_norm": 6.620103359222412, | |
| "learning_rate": 1.692327926348093e-06, | |
| "loss": 11.7406, | |
| "step": 95750 | |
| }, | |
| { | |
| "epoch": 41.63417549578919, | |
| "grad_norm": 5.3006463050842285, | |
| "learning_rate": 1.687943884261289e-06, | |
| "loss": 11.7421, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 41.655908720456395, | |
| "grad_norm": 6.590972423553467, | |
| "learning_rate": 1.683559842174485e-06, | |
| "loss": 11.7446, | |
| "step": 95850 | |
| }, | |
| { | |
| "epoch": 41.67764194512361, | |
| "grad_norm": 9.739943504333496, | |
| "learning_rate": 1.6791758000876809e-06, | |
| "loss": 11.7506, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 41.69937516979082, | |
| "grad_norm": 5.318305969238281, | |
| "learning_rate": 1.6747917580008769e-06, | |
| "loss": 11.7503, | |
| "step": 95950 | |
| }, | |
| { | |
| "epoch": 41.72110839445803, | |
| "grad_norm": 4.048628807067871, | |
| "learning_rate": 1.6704077159140731e-06, | |
| "loss": 11.7579, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 41.74284161912524, | |
| "grad_norm": 6.817996025085449, | |
| "learning_rate": 1.666023673827269e-06, | |
| "loss": 11.7427, | |
| "step": 96050 | |
| }, | |
| { | |
| "epoch": 41.764574843792445, | |
| "grad_norm": 10.243191719055176, | |
| "learning_rate": 1.6616396317404647e-06, | |
| "loss": 11.7662, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 41.78630806845966, | |
| "grad_norm": 17.077388763427734, | |
| "learning_rate": 1.6572555896536608e-06, | |
| "loss": 11.7471, | |
| "step": 96150 | |
| }, | |
| { | |
| "epoch": 41.80804129312687, | |
| "grad_norm": 10.154135704040527, | |
| "learning_rate": 1.652871547566857e-06, | |
| "loss": 11.7526, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 41.82977451779408, | |
| "grad_norm": 15.618302345275879, | |
| "learning_rate": 1.6484875054800528e-06, | |
| "loss": 11.7451, | |
| "step": 96250 | |
| }, | |
| { | |
| "epoch": 41.85150774246129, | |
| "grad_norm": 5.547327041625977, | |
| "learning_rate": 1.6441034633932486e-06, | |
| "loss": 11.759, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 41.873240967128496, | |
| "grad_norm": 4.5382304191589355, | |
| "learning_rate": 1.6397194213064446e-06, | |
| "loss": 11.7377, | |
| "step": 96350 | |
| }, | |
| { | |
| "epoch": 41.89497419179571, | |
| "grad_norm": 4.469516277313232, | |
| "learning_rate": 1.6353353792196405e-06, | |
| "loss": 11.7555, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 41.91670741646292, | |
| "grad_norm": 4.71604061126709, | |
| "learning_rate": 1.6309513371328367e-06, | |
| "loss": 11.745, | |
| "step": 96450 | |
| }, | |
| { | |
| "epoch": 41.93844064113013, | |
| "grad_norm": 4.897347450256348, | |
| "learning_rate": 1.6265672950460327e-06, | |
| "loss": 11.7572, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 41.96017386579734, | |
| "grad_norm": 19.85100746154785, | |
| "learning_rate": 1.6221832529592285e-06, | |
| "loss": 11.7499, | |
| "step": 96550 | |
| }, | |
| { | |
| "epoch": 41.981907090464546, | |
| "grad_norm": 5.2028117179870605, | |
| "learning_rate": 1.6177992108724243e-06, | |
| "loss": 11.7622, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 42.00347731594675, | |
| "grad_norm": 12.004639625549316, | |
| "learning_rate": 1.6134151687856206e-06, | |
| "loss": 11.6536, | |
| "step": 96650 | |
| }, | |
| { | |
| "epoch": 42.02521054061396, | |
| "grad_norm": 5.1376848220825195, | |
| "learning_rate": 1.6090311266988166e-06, | |
| "loss": 11.7351, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 42.04694376528117, | |
| "grad_norm": 5.914182662963867, | |
| "learning_rate": 1.6046470846120124e-06, | |
| "loss": 11.7339, | |
| "step": 96750 | |
| }, | |
| { | |
| "epoch": 42.068676989948386, | |
| "grad_norm": 7.787753105163574, | |
| "learning_rate": 1.6002630425252082e-06, | |
| "loss": 11.7402, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 42.090410214615595, | |
| "grad_norm": 6.375885009765625, | |
| "learning_rate": 1.5958790004384042e-06, | |
| "loss": 11.7447, | |
| "step": 96850 | |
| }, | |
| { | |
| "epoch": 42.112143439282804, | |
| "grad_norm": 24.318653106689453, | |
| "learning_rate": 1.5914949583516005e-06, | |
| "loss": 11.7561, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 42.13387666395001, | |
| "grad_norm": 3.9973437786102295, | |
| "learning_rate": 1.5871109162647963e-06, | |
| "loss": 11.7402, | |
| "step": 96950 | |
| }, | |
| { | |
| "epoch": 42.15560988861722, | |
| "grad_norm": 7.005822658538818, | |
| "learning_rate": 1.5827268741779923e-06, | |
| "loss": 11.739, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 42.17734311328444, | |
| "grad_norm": 7.374983787536621, | |
| "learning_rate": 1.5783428320911881e-06, | |
| "loss": 11.7416, | |
| "step": 97050 | |
| }, | |
| { | |
| "epoch": 42.199076337951645, | |
| "grad_norm": 7.227538108825684, | |
| "learning_rate": 1.5739587900043843e-06, | |
| "loss": 11.7344, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 42.220809562618854, | |
| "grad_norm": 16.485198974609375, | |
| "learning_rate": 1.5695747479175802e-06, | |
| "loss": 11.7412, | |
| "step": 97150 | |
| }, | |
| { | |
| "epoch": 42.24254278728606, | |
| "grad_norm": 8.366280555725098, | |
| "learning_rate": 1.5651907058307762e-06, | |
| "loss": 11.7441, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 42.26427601195327, | |
| "grad_norm": 4.64595365524292, | |
| "learning_rate": 1.560806663743972e-06, | |
| "loss": 11.7412, | |
| "step": 97250 | |
| }, | |
| { | |
| "epoch": 42.28600923662049, | |
| "grad_norm": 8.422093391418457, | |
| "learning_rate": 1.5564226216571678e-06, | |
| "loss": 11.7501, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 42.307742461287695, | |
| "grad_norm": 7.655717849731445, | |
| "learning_rate": 1.552038579570364e-06, | |
| "loss": 11.7379, | |
| "step": 97350 | |
| }, | |
| { | |
| "epoch": 42.329475685954904, | |
| "grad_norm": 5.5334272384643555, | |
| "learning_rate": 1.54765453748356e-06, | |
| "loss": 11.7498, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 42.35120891062211, | |
| "grad_norm": 23.848054885864258, | |
| "learning_rate": 1.5432704953967559e-06, | |
| "loss": 11.7518, | |
| "step": 97450 | |
| }, | |
| { | |
| "epoch": 42.37294213528932, | |
| "grad_norm": 12.059696197509766, | |
| "learning_rate": 1.5388864533099519e-06, | |
| "loss": 11.7456, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 42.39467535995654, | |
| "grad_norm": 5.756223678588867, | |
| "learning_rate": 1.534502411223148e-06, | |
| "loss": 11.7399, | |
| "step": 97550 | |
| }, | |
| { | |
| "epoch": 42.416408584623746, | |
| "grad_norm": 3.3854639530181885, | |
| "learning_rate": 1.530118369136344e-06, | |
| "loss": 11.7399, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 42.438141809290954, | |
| "grad_norm": 33.99950408935547, | |
| "learning_rate": 1.5257343270495397e-06, | |
| "loss": 11.7448, | |
| "step": 97650 | |
| }, | |
| { | |
| "epoch": 42.45987503395816, | |
| "grad_norm": 12.245558738708496, | |
| "learning_rate": 1.5213502849627358e-06, | |
| "loss": 11.7508, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 42.48160825862537, | |
| "grad_norm": 13.875435829162598, | |
| "learning_rate": 1.5169662428759318e-06, | |
| "loss": 11.7354, | |
| "step": 97750 | |
| }, | |
| { | |
| "epoch": 42.50334148329259, | |
| "grad_norm": 4.234302997589111, | |
| "learning_rate": 1.5125822007891278e-06, | |
| "loss": 11.7334, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 42.525074707959796, | |
| "grad_norm": 6.025676727294922, | |
| "learning_rate": 1.5081981587023236e-06, | |
| "loss": 11.7598, | |
| "step": 97850 | |
| }, | |
| { | |
| "epoch": 42.546807932627004, | |
| "grad_norm": 6.693691253662109, | |
| "learning_rate": 1.5038141166155196e-06, | |
| "loss": 11.7353, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 42.56854115729421, | |
| "grad_norm": 15.162924766540527, | |
| "learning_rate": 1.4994300745287155e-06, | |
| "loss": 11.7358, | |
| "step": 97950 | |
| }, | |
| { | |
| "epoch": 42.59027438196142, | |
| "grad_norm": 14.031683921813965, | |
| "learning_rate": 1.4950460324419117e-06, | |
| "loss": 11.7534, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 42.61200760662863, | |
| "grad_norm": 5.764448642730713, | |
| "learning_rate": 1.4906619903551075e-06, | |
| "loss": 11.7335, | |
| "step": 98050 | |
| }, | |
| { | |
| "epoch": 42.633740831295846, | |
| "grad_norm": 4.945387840270996, | |
| "learning_rate": 1.4862779482683035e-06, | |
| "loss": 11.7459, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 42.655474055963055, | |
| "grad_norm": 5.07288932800293, | |
| "learning_rate": 1.4818939061814993e-06, | |
| "loss": 11.7434, | |
| "step": 98150 | |
| }, | |
| { | |
| "epoch": 42.67720728063026, | |
| "grad_norm": 5.957432270050049, | |
| "learning_rate": 1.4775098640946956e-06, | |
| "loss": 11.7353, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 42.69894050529747, | |
| "grad_norm": 11.405596733093262, | |
| "learning_rate": 1.4731258220078914e-06, | |
| "loss": 11.7419, | |
| "step": 98250 | |
| }, | |
| { | |
| "epoch": 42.72067372996468, | |
| "grad_norm": 5.61083984375, | |
| "learning_rate": 1.4687417799210874e-06, | |
| "loss": 11.7435, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 42.742406954631896, | |
| "grad_norm": 5.795932292938232, | |
| "learning_rate": 1.4643577378342832e-06, | |
| "loss": 11.7536, | |
| "step": 98350 | |
| }, | |
| { | |
| "epoch": 42.764140179299105, | |
| "grad_norm": 14.968274116516113, | |
| "learning_rate": 1.4599736957474792e-06, | |
| "loss": 11.7532, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 42.78587340396631, | |
| "grad_norm": 4.567286491394043, | |
| "learning_rate": 1.4555896536606753e-06, | |
| "loss": 11.7412, | |
| "step": 98450 | |
| }, | |
| { | |
| "epoch": 42.80760662863352, | |
| "grad_norm": 3.640240430831909, | |
| "learning_rate": 1.4512056115738713e-06, | |
| "loss": 11.7526, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 42.82933985330073, | |
| "grad_norm": 7.847878456115723, | |
| "learning_rate": 1.446821569487067e-06, | |
| "loss": 11.7427, | |
| "step": 98550 | |
| }, | |
| { | |
| "epoch": 42.851073077967946, | |
| "grad_norm": 9.818808555603027, | |
| "learning_rate": 1.4424375274002631e-06, | |
| "loss": 11.7491, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 42.872806302635155, | |
| "grad_norm": 6.466832637786865, | |
| "learning_rate": 1.4380534853134591e-06, | |
| "loss": 11.7487, | |
| "step": 98650 | |
| }, | |
| { | |
| "epoch": 42.89453952730236, | |
| "grad_norm": 12.53541374206543, | |
| "learning_rate": 1.4336694432266552e-06, | |
| "loss": 11.7451, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 42.91627275196957, | |
| "grad_norm": 9.651721000671387, | |
| "learning_rate": 1.429285401139851e-06, | |
| "loss": 11.7455, | |
| "step": 98750 | |
| }, | |
| { | |
| "epoch": 42.93800597663678, | |
| "grad_norm": 7.363193511962891, | |
| "learning_rate": 1.424901359053047e-06, | |
| "loss": 11.7442, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 42.959739201304, | |
| "grad_norm": 6.456315040588379, | |
| "learning_rate": 1.4205173169662432e-06, | |
| "loss": 11.7378, | |
| "step": 98850 | |
| }, | |
| { | |
| "epoch": 42.981472425971205, | |
| "grad_norm": 4.924785137176514, | |
| "learning_rate": 1.416133274879439e-06, | |
| "loss": 11.7338, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 43.00304265145341, | |
| "grad_norm": 12.623988151550293, | |
| "learning_rate": 1.4117492327926348e-06, | |
| "loss": 11.6662, | |
| "step": 98950 | |
| }, | |
| { | |
| "epoch": 43.02477587612062, | |
| "grad_norm": 9.521653175354004, | |
| "learning_rate": 1.4073651907058309e-06, | |
| "loss": 11.7329, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 43.04650910078783, | |
| "grad_norm": 4.056196212768555, | |
| "learning_rate": 1.4029811486190267e-06, | |
| "loss": 11.7467, | |
| "step": 99050 | |
| }, | |
| { | |
| "epoch": 43.06824232545504, | |
| "grad_norm": 3.722097396850586, | |
| "learning_rate": 1.398597106532223e-06, | |
| "loss": 11.735, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 43.08997555012225, | |
| "grad_norm": 3.6029582023620605, | |
| "learning_rate": 1.3942130644454187e-06, | |
| "loss": 11.7395, | |
| "step": 99150 | |
| }, | |
| { | |
| "epoch": 43.111708774789456, | |
| "grad_norm": 4.464113712310791, | |
| "learning_rate": 1.3898290223586147e-06, | |
| "loss": 11.7417, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 43.13344199945667, | |
| "grad_norm": 7.3181986808776855, | |
| "learning_rate": 1.3854449802718106e-06, | |
| "loss": 11.7556, | |
| "step": 99250 | |
| }, | |
| { | |
| "epoch": 43.15517522412388, | |
| "grad_norm": 12.537137031555176, | |
| "learning_rate": 1.3810609381850068e-06, | |
| "loss": 11.7485, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 43.17690844879109, | |
| "grad_norm": 14.284486770629883, | |
| "learning_rate": 1.3766768960982028e-06, | |
| "loss": 11.7374, | |
| "step": 99350 | |
| }, | |
| { | |
| "epoch": 43.1986416734583, | |
| "grad_norm": 3.9522204399108887, | |
| "learning_rate": 1.3722928540113986e-06, | |
| "loss": 11.7394, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 43.220374898125506, | |
| "grad_norm": 3.287987232208252, | |
| "learning_rate": 1.3679088119245944e-06, | |
| "loss": 11.7358, | |
| "step": 99450 | |
| }, | |
| { | |
| "epoch": 43.24210812279272, | |
| "grad_norm": 5.351846694946289, | |
| "learning_rate": 1.3635247698377905e-06, | |
| "loss": 11.7402, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 43.26384134745993, | |
| "grad_norm": 13.18363094329834, | |
| "learning_rate": 1.3591407277509867e-06, | |
| "loss": 11.7343, | |
| "step": 99550 | |
| }, | |
| { | |
| "epoch": 43.28557457212714, | |
| "grad_norm": 2.8352560997009277, | |
| "learning_rate": 1.3547566856641825e-06, | |
| "loss": 11.7408, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 43.30730779679435, | |
| "grad_norm": 4.404722690582275, | |
| "learning_rate": 1.3503726435773783e-06, | |
| "loss": 11.7488, | |
| "step": 99650 | |
| }, | |
| { | |
| "epoch": 43.329041021461556, | |
| "grad_norm": 5.89795446395874, | |
| "learning_rate": 1.3459886014905743e-06, | |
| "loss": 11.7417, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 43.35077424612877, | |
| "grad_norm": 3.792579412460327, | |
| "learning_rate": 1.3416045594037706e-06, | |
| "loss": 11.742, | |
| "step": 99750 | |
| }, | |
| { | |
| "epoch": 43.37250747079598, | |
| "grad_norm": 6.295860290527344, | |
| "learning_rate": 1.3372205173169664e-06, | |
| "loss": 11.7419, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 43.39424069546319, | |
| "grad_norm": 4.617618560791016, | |
| "learning_rate": 1.3328364752301624e-06, | |
| "loss": 11.7388, | |
| "step": 99850 | |
| }, | |
| { | |
| "epoch": 43.4159739201304, | |
| "grad_norm": 3.46701717376709, | |
| "learning_rate": 1.3284524331433582e-06, | |
| "loss": 11.739, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 43.437707144797606, | |
| "grad_norm": 5.266394138336182, | |
| "learning_rate": 1.3240683910565544e-06, | |
| "loss": 11.7336, | |
| "step": 99950 | |
| }, | |
| { | |
| "epoch": 43.45944036946482, | |
| "grad_norm": 4.314075946807861, | |
| "learning_rate": 1.3196843489697503e-06, | |
| "loss": 11.7505, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 43.45944036946482, | |
| "eval_cer": 0.07396960981570859, | |
| "eval_loss": 2.4215219020843506, | |
| "eval_runtime": 398.4535, | |
| "eval_samples_per_second": 13.567, | |
| "eval_steps_per_second": 3.393, | |
| "eval_wer": 0.22507457999685979, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 43.48117359413203, | |
| "grad_norm": 4.137760639190674, | |
| "learning_rate": 1.3153003068829463e-06, | |
| "loss": 11.7369, | |
| "step": 100050 | |
| }, | |
| { | |
| "epoch": 43.50290681879924, | |
| "grad_norm": 16.71613311767578, | |
| "learning_rate": 1.310916264796142e-06, | |
| "loss": 11.7308, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 43.52464004346645, | |
| "grad_norm": 3.2521612644195557, | |
| "learning_rate": 1.306532222709338e-06, | |
| "loss": 11.7266, | |
| "step": 100150 | |
| }, | |
| { | |
| "epoch": 43.54637326813366, | |
| "grad_norm": 5.732780456542969, | |
| "learning_rate": 1.3021481806225341e-06, | |
| "loss": 11.7389, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 43.56810649280087, | |
| "grad_norm": 6.189255714416504, | |
| "learning_rate": 1.2977641385357302e-06, | |
| "loss": 11.7341, | |
| "step": 100250 | |
| }, | |
| { | |
| "epoch": 43.58983971746808, | |
| "grad_norm": 4.808958053588867, | |
| "learning_rate": 1.293380096448926e-06, | |
| "loss": 11.739, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 43.61157294213529, | |
| "grad_norm": 13.771133422851562, | |
| "learning_rate": 1.288996054362122e-06, | |
| "loss": 11.734, | |
| "step": 100350 | |
| }, | |
| { | |
| "epoch": 43.6333061668025, | |
| "grad_norm": 5.9765400886535645, | |
| "learning_rate": 1.284612012275318e-06, | |
| "loss": 11.7398, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 43.65503939146971, | |
| "grad_norm": 4.513848304748535, | |
| "learning_rate": 1.280227970188514e-06, | |
| "loss": 11.7321, | |
| "step": 100450 | |
| }, | |
| { | |
| "epoch": 43.67677261613692, | |
| "grad_norm": 3.8837485313415527, | |
| "learning_rate": 1.2758439281017098e-06, | |
| "loss": 11.7257, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 43.69850584080413, | |
| "grad_norm": 5.340435028076172, | |
| "learning_rate": 1.2714598860149059e-06, | |
| "loss": 11.7429, | |
| "step": 100550 | |
| }, | |
| { | |
| "epoch": 43.72023906547134, | |
| "grad_norm": 5.877197742462158, | |
| "learning_rate": 1.2670758439281017e-06, | |
| "loss": 11.728, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 43.74197229013855, | |
| "grad_norm": 11.274967193603516, | |
| "learning_rate": 1.262691801841298e-06, | |
| "loss": 11.7314, | |
| "step": 100650 | |
| }, | |
| { | |
| "epoch": 43.76370551480576, | |
| "grad_norm": 7.218222141265869, | |
| "learning_rate": 1.2583077597544937e-06, | |
| "loss": 11.726, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 43.78543873947297, | |
| "grad_norm": 3.588624954223633, | |
| "learning_rate": 1.2539237176676897e-06, | |
| "loss": 11.7394, | |
| "step": 100750 | |
| }, | |
| { | |
| "epoch": 43.80717196414018, | |
| "grad_norm": 10.603730201721191, | |
| "learning_rate": 1.2495396755808858e-06, | |
| "loss": 11.7442, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 43.82890518880739, | |
| "grad_norm": 4.9444379806518555, | |
| "learning_rate": 1.2451556334940816e-06, | |
| "loss": 11.7318, | |
| "step": 100850 | |
| }, | |
| { | |
| "epoch": 43.8506384134746, | |
| "grad_norm": 16.76546287536621, | |
| "learning_rate": 1.2407715914072776e-06, | |
| "loss": 11.731, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 43.87237163814181, | |
| "grad_norm": 3.7398102283477783, | |
| "learning_rate": 1.2363875493204736e-06, | |
| "loss": 11.7348, | |
| "step": 100950 | |
| }, | |
| { | |
| "epoch": 43.894104862809016, | |
| "grad_norm": 4.954889297485352, | |
| "learning_rate": 1.2320035072336696e-06, | |
| "loss": 11.7333, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 43.91583808747623, | |
| "grad_norm": 3.635148286819458, | |
| "learning_rate": 1.2276194651468654e-06, | |
| "loss": 11.7388, | |
| "step": 101050 | |
| }, | |
| { | |
| "epoch": 43.93757131214344, | |
| "grad_norm": 9.620790481567383, | |
| "learning_rate": 1.2232354230600615e-06, | |
| "loss": 11.7254, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 43.95930453681065, | |
| "grad_norm": 4.13824462890625, | |
| "learning_rate": 1.2188513809732575e-06, | |
| "loss": 11.7366, | |
| "step": 101150 | |
| }, | |
| { | |
| "epoch": 43.98103776147786, | |
| "grad_norm": 6.983582973480225, | |
| "learning_rate": 1.2144673388864533e-06, | |
| "loss": 11.7287, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 44.002607986960065, | |
| "grad_norm": 4.2465691566467285, | |
| "learning_rate": 1.2100832967996493e-06, | |
| "loss": 11.649, | |
| "step": 101250 | |
| }, | |
| { | |
| "epoch": 44.024341211627274, | |
| "grad_norm": 17.737117767333984, | |
| "learning_rate": 1.2056992547128453e-06, | |
| "loss": 11.7302, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 44.04607443629448, | |
| "grad_norm": 4.739099502563477, | |
| "learning_rate": 1.2013152126260414e-06, | |
| "loss": 11.75, | |
| "step": 101350 | |
| }, | |
| { | |
| "epoch": 44.0678076609617, | |
| "grad_norm": 3.695364236831665, | |
| "learning_rate": 1.1969311705392372e-06, | |
| "loss": 11.7399, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 44.08954088562891, | |
| "grad_norm": 6.371554851531982, | |
| "learning_rate": 1.1925471284524332e-06, | |
| "loss": 11.7314, | |
| "step": 101450 | |
| }, | |
| { | |
| "epoch": 44.111274110296115, | |
| "grad_norm": 24.45339584350586, | |
| "learning_rate": 1.1881630863656292e-06, | |
| "loss": 11.7215, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 44.133007334963324, | |
| "grad_norm": 5.760914325714111, | |
| "learning_rate": 1.1837790442788252e-06, | |
| "loss": 11.7376, | |
| "step": 101550 | |
| }, | |
| { | |
| "epoch": 44.15474055963053, | |
| "grad_norm": 4.051183700561523, | |
| "learning_rate": 1.179395002192021e-06, | |
| "loss": 11.7265, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 44.17647378429775, | |
| "grad_norm": 5.363418102264404, | |
| "learning_rate": 1.175010960105217e-06, | |
| "loss": 11.7367, | |
| "step": 101650 | |
| }, | |
| { | |
| "epoch": 44.19820700896496, | |
| "grad_norm": 3.471618890762329, | |
| "learning_rate": 1.170626918018413e-06, | |
| "loss": 11.7384, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 44.219940233632165, | |
| "grad_norm": 7.958500385284424, | |
| "learning_rate": 1.166242875931609e-06, | |
| "loss": 11.7332, | |
| "step": 101750 | |
| }, | |
| { | |
| "epoch": 44.241673458299374, | |
| "grad_norm": 25.589679718017578, | |
| "learning_rate": 1.161858833844805e-06, | |
| "loss": 11.7298, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 44.26340668296658, | |
| "grad_norm": 11.511533737182617, | |
| "learning_rate": 1.157474791758001e-06, | |
| "loss": 11.7272, | |
| "step": 101850 | |
| }, | |
| { | |
| "epoch": 44.2851399076338, | |
| "grad_norm": 4.467309474945068, | |
| "learning_rate": 1.153090749671197e-06, | |
| "loss": 11.7184, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 44.30687313230101, | |
| "grad_norm": 4.863615989685059, | |
| "learning_rate": 1.1487067075843928e-06, | |
| "loss": 11.7395, | |
| "step": 101950 | |
| }, | |
| { | |
| "epoch": 44.328606356968216, | |
| "grad_norm": 6.230271816253662, | |
| "learning_rate": 1.1443226654975888e-06, | |
| "loss": 11.7373, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 44.350339581635424, | |
| "grad_norm": 4.7530517578125, | |
| "learning_rate": 1.1399386234107848e-06, | |
| "loss": 11.7472, | |
| "step": 102050 | |
| }, | |
| { | |
| "epoch": 44.37207280630263, | |
| "grad_norm": 22.007238388061523, | |
| "learning_rate": 1.1355545813239809e-06, | |
| "loss": 11.7212, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 44.39380603096985, | |
| "grad_norm": 6.660682678222656, | |
| "learning_rate": 1.1311705392371767e-06, | |
| "loss": 11.7316, | |
| "step": 102150 | |
| }, | |
| { | |
| "epoch": 44.41553925563706, | |
| "grad_norm": 5.25778865814209, | |
| "learning_rate": 1.126786497150373e-06, | |
| "loss": 11.7305, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 44.437272480304266, | |
| "grad_norm": 5.097360134124756, | |
| "learning_rate": 1.1224024550635687e-06, | |
| "loss": 11.7314, | |
| "step": 102250 | |
| }, | |
| { | |
| "epoch": 44.459005704971474, | |
| "grad_norm": 4.272281169891357, | |
| "learning_rate": 1.1180184129767645e-06, | |
| "loss": 11.7419, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 44.48073892963868, | |
| "grad_norm": 6.060675144195557, | |
| "learning_rate": 1.1136343708899605e-06, | |
| "loss": 11.7367, | |
| "step": 102350 | |
| }, | |
| { | |
| "epoch": 44.50247215430589, | |
| "grad_norm": 5.883248329162598, | |
| "learning_rate": 1.1092503288031566e-06, | |
| "loss": 11.7368, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 44.52420537897311, | |
| "grad_norm": 6.329914093017578, | |
| "learning_rate": 1.1048662867163526e-06, | |
| "loss": 11.7303, | |
| "step": 102450 | |
| }, | |
| { | |
| "epoch": 44.545938603640316, | |
| "grad_norm": 6.62354850769043, | |
| "learning_rate": 1.1004822446295484e-06, | |
| "loss": 11.7375, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 44.567671828307525, | |
| "grad_norm": 10.634700775146484, | |
| "learning_rate": 1.0960982025427446e-06, | |
| "loss": 11.7295, | |
| "step": 102550 | |
| }, | |
| { | |
| "epoch": 44.58940505297473, | |
| "grad_norm": 2.787297487258911, | |
| "learning_rate": 1.0917141604559404e-06, | |
| "loss": 11.7311, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 44.61113827764194, | |
| "grad_norm": 4.915313720703125, | |
| "learning_rate": 1.0873301183691365e-06, | |
| "loss": 11.7368, | |
| "step": 102650 | |
| }, | |
| { | |
| "epoch": 44.63287150230916, | |
| "grad_norm": 13.359769821166992, | |
| "learning_rate": 1.0829460762823325e-06, | |
| "loss": 11.7417, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 44.654604726976366, | |
| "grad_norm": 3.9991888999938965, | |
| "learning_rate": 1.0785620341955285e-06, | |
| "loss": 11.7392, | |
| "step": 102750 | |
| }, | |
| { | |
| "epoch": 44.676337951643575, | |
| "grad_norm": 3.797086238861084, | |
| "learning_rate": 1.0741779921087243e-06, | |
| "loss": 11.7242, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 44.69807117631078, | |
| "grad_norm": 6.608884811401367, | |
| "learning_rate": 1.0697939500219203e-06, | |
| "loss": 11.7231, | |
| "step": 102850 | |
| }, | |
| { | |
| "epoch": 44.71980440097799, | |
| "grad_norm": 10.230695724487305, | |
| "learning_rate": 1.0654099079351164e-06, | |
| "loss": 11.7325, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 44.74153762564521, | |
| "grad_norm": 4.2929301261901855, | |
| "learning_rate": 1.0610258658483122e-06, | |
| "loss": 11.7303, | |
| "step": 102950 | |
| }, | |
| { | |
| "epoch": 44.763270850312416, | |
| "grad_norm": 5.952197074890137, | |
| "learning_rate": 1.0566418237615082e-06, | |
| "loss": 11.7291, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 44.785004074979625, | |
| "grad_norm": 10.304634094238281, | |
| "learning_rate": 1.0522577816747042e-06, | |
| "loss": 11.7452, | |
| "step": 103050 | |
| }, | |
| { | |
| "epoch": 44.806737299646834, | |
| "grad_norm": 4.195600509643555, | |
| "learning_rate": 1.0478737395879002e-06, | |
| "loss": 11.744, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 44.82847052431404, | |
| "grad_norm": 3.8358092308044434, | |
| "learning_rate": 1.043489697501096e-06, | |
| "loss": 11.7301, | |
| "step": 103150 | |
| }, | |
| { | |
| "epoch": 44.85020374898126, | |
| "grad_norm": 13.731127738952637, | |
| "learning_rate": 1.039105655414292e-06, | |
| "loss": 11.7318, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 44.87193697364847, | |
| "grad_norm": 4.197743892669678, | |
| "learning_rate": 1.034721613327488e-06, | |
| "loss": 11.7318, | |
| "step": 103250 | |
| }, | |
| { | |
| "epoch": 44.893670198315675, | |
| "grad_norm": 8.494996070861816, | |
| "learning_rate": 1.030337571240684e-06, | |
| "loss": 11.7357, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 44.915403422982884, | |
| "grad_norm": 3.6425983905792236, | |
| "learning_rate": 1.02595352915388e-06, | |
| "loss": 11.729, | |
| "step": 103350 | |
| }, | |
| { | |
| "epoch": 44.93713664765009, | |
| "grad_norm": 4.333567142486572, | |
| "learning_rate": 1.021569487067076e-06, | |
| "loss": 11.7339, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 44.95886987231731, | |
| "grad_norm": 4.8357930183410645, | |
| "learning_rate": 1.017185444980272e-06, | |
| "loss": 11.735, | |
| "step": 103450 | |
| }, | |
| { | |
| "epoch": 44.98060309698452, | |
| "grad_norm": 8.409868240356445, | |
| "learning_rate": 1.0128014028934678e-06, | |
| "loss": 11.7336, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 45.002173322466724, | |
| "grad_norm": 3.9856626987457275, | |
| "learning_rate": 1.0084173608066638e-06, | |
| "loss": 11.653, | |
| "step": 103550 | |
| }, | |
| { | |
| "epoch": 45.02390654713393, | |
| "grad_norm": 4.529376029968262, | |
| "learning_rate": 1.0040333187198598e-06, | |
| "loss": 11.7279, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 45.04563977180114, | |
| "grad_norm": 4.276280403137207, | |
| "learning_rate": 9.996492766330559e-07, | |
| "loss": 11.7362, | |
| "step": 103650 | |
| }, | |
| { | |
| "epoch": 45.06737299646835, | |
| "grad_norm": 4.415678024291992, | |
| "learning_rate": 9.952652345462517e-07, | |
| "loss": 11.7194, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 45.08910622113556, | |
| "grad_norm": 4.513889789581299, | |
| "learning_rate": 9.908811924594477e-07, | |
| "loss": 11.7178, | |
| "step": 103750 | |
| }, | |
| { | |
| "epoch": 45.11083944580277, | |
| "grad_norm": 5.300011157989502, | |
| "learning_rate": 9.864971503726437e-07, | |
| "loss": 11.727, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 45.13257267046998, | |
| "grad_norm": 3.8258767127990723, | |
| "learning_rate": 9.821131082858395e-07, | |
| "loss": 11.7328, | |
| "step": 103850 | |
| }, | |
| { | |
| "epoch": 45.15430589513719, | |
| "grad_norm": 7.767271995544434, | |
| "learning_rate": 9.777290661990355e-07, | |
| "loss": 11.7202, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 45.1760391198044, | |
| "grad_norm": 3.230754852294922, | |
| "learning_rate": 9.733450241122316e-07, | |
| "loss": 11.7268, | |
| "step": 103950 | |
| }, | |
| { | |
| "epoch": 45.19777234447161, | |
| "grad_norm": 3.8269119262695312, | |
| "learning_rate": 9.689609820254276e-07, | |
| "loss": 11.7249, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 45.21950556913882, | |
| "grad_norm": 5.030121803283691, | |
| "learning_rate": 9.645769399386234e-07, | |
| "loss": 11.7181, | |
| "step": 104050 | |
| }, | |
| { | |
| "epoch": 45.24123879380603, | |
| "grad_norm": 4.850019931793213, | |
| "learning_rate": 9.601928978518194e-07, | |
| "loss": 11.7272, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 45.26297201847324, | |
| "grad_norm": 6.58116340637207, | |
| "learning_rate": 9.558088557650154e-07, | |
| "loss": 11.7237, | |
| "step": 104150 | |
| }, | |
| { | |
| "epoch": 45.28470524314045, | |
| "grad_norm": 20.67346954345703, | |
| "learning_rate": 9.514248136782115e-07, | |
| "loss": 11.7302, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 45.30643846780766, | |
| "grad_norm": 3.362128973007202, | |
| "learning_rate": 9.470407715914074e-07, | |
| "loss": 11.7209, | |
| "step": 104250 | |
| }, | |
| { | |
| "epoch": 45.32817169247487, | |
| "grad_norm": 7.51302433013916, | |
| "learning_rate": 9.426567295046034e-07, | |
| "loss": 11.7303, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 45.349904917142084, | |
| "grad_norm": 4.610814094543457, | |
| "learning_rate": 9.382726874177993e-07, | |
| "loss": 11.7177, | |
| "step": 104350 | |
| }, | |
| { | |
| "epoch": 45.37163814180929, | |
| "grad_norm": 13.158862113952637, | |
| "learning_rate": 9.338886453309952e-07, | |
| "loss": 11.7227, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 45.3933713664765, | |
| "grad_norm": 4.248621940612793, | |
| "learning_rate": 9.295046032441913e-07, | |
| "loss": 11.7428, | |
| "step": 104450 | |
| }, | |
| { | |
| "epoch": 45.41510459114371, | |
| "grad_norm": 3.553060531616211, | |
| "learning_rate": 9.251205611573872e-07, | |
| "loss": 11.7294, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 45.43683781581092, | |
| "grad_norm": 5.807036399841309, | |
| "learning_rate": 9.207365190705832e-07, | |
| "loss": 11.7284, | |
| "step": 104550 | |
| }, | |
| { | |
| "epoch": 45.458571040478134, | |
| "grad_norm": 13.629132270812988, | |
| "learning_rate": 9.163524769837791e-07, | |
| "loss": 11.7301, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 45.48030426514534, | |
| "grad_norm": 4.1011857986450195, | |
| "learning_rate": 9.119684348969751e-07, | |
| "loss": 11.7319, | |
| "step": 104650 | |
| }, | |
| { | |
| "epoch": 45.50203748981255, | |
| "grad_norm": 10.649341583251953, | |
| "learning_rate": 9.075843928101711e-07, | |
| "loss": 11.7333, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 45.52377071447976, | |
| "grad_norm": 20.217660903930664, | |
| "learning_rate": 9.032003507233671e-07, | |
| "loss": 11.7319, | |
| "step": 104750 | |
| }, | |
| { | |
| "epoch": 45.54550393914697, | |
| "grad_norm": 7.371703624725342, | |
| "learning_rate": 8.98816308636563e-07, | |
| "loss": 11.7223, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 45.567237163814184, | |
| "grad_norm": 6.1061110496521, | |
| "learning_rate": 8.94432266549759e-07, | |
| "loss": 11.7134, | |
| "step": 104850 | |
| }, | |
| { | |
| "epoch": 45.58897038848139, | |
| "grad_norm": 3.3697314262390137, | |
| "learning_rate": 8.900482244629549e-07, | |
| "loss": 11.7206, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 45.6107036131486, | |
| "grad_norm": 5.704832077026367, | |
| "learning_rate": 8.856641823761508e-07, | |
| "loss": 11.7343, | |
| "step": 104950 | |
| }, | |
| { | |
| "epoch": 45.63243683781581, | |
| "grad_norm": 5.612690448760986, | |
| "learning_rate": 8.812801402893469e-07, | |
| "loss": 11.7228, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 45.65417006248302, | |
| "grad_norm": 4.661070823669434, | |
| "learning_rate": 8.768960982025428e-07, | |
| "loss": 11.7344, | |
| "step": 105050 | |
| }, | |
| { | |
| "epoch": 45.675903287150234, | |
| "grad_norm": 4.922998905181885, | |
| "learning_rate": 8.725120561157388e-07, | |
| "loss": 11.7161, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 45.69763651181744, | |
| "grad_norm": 6.320181369781494, | |
| "learning_rate": 8.681280140289347e-07, | |
| "loss": 11.7451, | |
| "step": 105150 | |
| }, | |
| { | |
| "epoch": 45.71936973648465, | |
| "grad_norm": 6.543067455291748, | |
| "learning_rate": 8.637439719421307e-07, | |
| "loss": 11.7187, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 45.74110296115186, | |
| "grad_norm": 7.506560802459717, | |
| "learning_rate": 8.593599298553267e-07, | |
| "loss": 11.7368, | |
| "step": 105250 | |
| }, | |
| { | |
| "epoch": 45.76283618581907, | |
| "grad_norm": 6.871926307678223, | |
| "learning_rate": 8.549758877685227e-07, | |
| "loss": 11.7347, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 45.784569410486284, | |
| "grad_norm": 4.491659641265869, | |
| "learning_rate": 8.505918456817186e-07, | |
| "loss": 11.7379, | |
| "step": 105350 | |
| }, | |
| { | |
| "epoch": 45.80630263515349, | |
| "grad_norm": 21.81031036376953, | |
| "learning_rate": 8.462078035949146e-07, | |
| "loss": 11.7307, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 45.8280358598207, | |
| "grad_norm": 20.492307662963867, | |
| "learning_rate": 8.418237615081105e-07, | |
| "loss": 11.7268, | |
| "step": 105450 | |
| }, | |
| { | |
| "epoch": 45.84976908448791, | |
| "grad_norm": 7.620596408843994, | |
| "learning_rate": 8.374397194213065e-07, | |
| "loss": 11.7384, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 45.87150230915512, | |
| "grad_norm": 4.937099456787109, | |
| "learning_rate": 8.330556773345025e-07, | |
| "loss": 11.7266, | |
| "step": 105550 | |
| }, | |
| { | |
| "epoch": 45.89323553382233, | |
| "grad_norm": 3.815049409866333, | |
| "learning_rate": 8.286716352476984e-07, | |
| "loss": 11.7281, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 45.91496875848954, | |
| "grad_norm": 9.32738208770752, | |
| "learning_rate": 8.242875931608944e-07, | |
| "loss": 11.7424, | |
| "step": 105650 | |
| }, | |
| { | |
| "epoch": 45.93670198315675, | |
| "grad_norm": 12.112308502197266, | |
| "learning_rate": 8.199035510740903e-07, | |
| "loss": 11.729, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 45.95843520782396, | |
| "grad_norm": 4.76987361907959, | |
| "learning_rate": 8.155195089872864e-07, | |
| "loss": 11.7292, | |
| "step": 105750 | |
| }, | |
| { | |
| "epoch": 45.98016843249117, | |
| "grad_norm": 11.38598346710205, | |
| "learning_rate": 8.111354669004823e-07, | |
| "loss": 11.7356, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 46.00173865797338, | |
| "grad_norm": 18.5734806060791, | |
| "learning_rate": 8.067514248136783e-07, | |
| "loss": 11.6526, | |
| "step": 105850 | |
| }, | |
| { | |
| "epoch": 46.023471882640585, | |
| "grad_norm": 3.3094968795776367, | |
| "learning_rate": 8.023673827268742e-07, | |
| "loss": 11.7246, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 46.045205107307794, | |
| "grad_norm": 10.625943183898926, | |
| "learning_rate": 7.979833406400702e-07, | |
| "loss": 11.7197, | |
| "step": 105950 | |
| }, | |
| { | |
| "epoch": 46.06693833197501, | |
| "grad_norm": 11.11587142944336, | |
| "learning_rate": 7.935992985532662e-07, | |
| "loss": 11.712, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 46.08867155664222, | |
| "grad_norm": 5.9816083908081055, | |
| "learning_rate": 7.892152564664621e-07, | |
| "loss": 11.7291, | |
| "step": 106050 | |
| }, | |
| { | |
| "epoch": 46.11040478130943, | |
| "grad_norm": 5.810311317443848, | |
| "learning_rate": 7.848312143796581e-07, | |
| "loss": 11.72, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 46.132138005976635, | |
| "grad_norm": 9.10987377166748, | |
| "learning_rate": 7.80447172292854e-07, | |
| "loss": 11.7341, | |
| "step": 106150 | |
| }, | |
| { | |
| "epoch": 46.153871230643844, | |
| "grad_norm": 3.9713680744171143, | |
| "learning_rate": 7.7606313020605e-07, | |
| "loss": 11.728, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 46.17560445531106, | |
| "grad_norm": 10.883817672729492, | |
| "learning_rate": 7.716790881192459e-07, | |
| "loss": 11.7321, | |
| "step": 106250 | |
| }, | |
| { | |
| "epoch": 46.19733767997827, | |
| "grad_norm": 3.261399745941162, | |
| "learning_rate": 7.67295046032442e-07, | |
| "loss": 11.7197, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 46.21907090464548, | |
| "grad_norm": 3.867229461669922, | |
| "learning_rate": 7.629110039456379e-07, | |
| "loss": 11.7311, | |
| "step": 106350 | |
| }, | |
| { | |
| "epoch": 46.240804129312686, | |
| "grad_norm": 5.125184059143066, | |
| "learning_rate": 7.585269618588339e-07, | |
| "loss": 11.7357, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 46.262537353979894, | |
| "grad_norm": 3.271857500076294, | |
| "learning_rate": 7.541429197720298e-07, | |
| "loss": 11.7182, | |
| "step": 106450 | |
| }, | |
| { | |
| "epoch": 46.28427057864711, | |
| "grad_norm": 2.972466230392456, | |
| "learning_rate": 7.49758877685226e-07, | |
| "loss": 11.7244, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 46.30600380331432, | |
| "grad_norm": 10.320878028869629, | |
| "learning_rate": 7.453748355984218e-07, | |
| "loss": 11.7296, | |
| "step": 106550 | |
| }, | |
| { | |
| "epoch": 46.32773702798153, | |
| "grad_norm": 7.7540483474731445, | |
| "learning_rate": 7.409907935116177e-07, | |
| "loss": 11.7175, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 46.349470252648736, | |
| "grad_norm": 5.142116546630859, | |
| "learning_rate": 7.366067514248137e-07, | |
| "loss": 11.7324, | |
| "step": 106650 | |
| }, | |
| { | |
| "epoch": 46.371203477315944, | |
| "grad_norm": 3.158510446548462, | |
| "learning_rate": 7.322227093380096e-07, | |
| "loss": 11.7292, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 46.39293670198316, | |
| "grad_norm": 3.982985258102417, | |
| "learning_rate": 7.278386672512057e-07, | |
| "loss": 11.7214, | |
| "step": 106750 | |
| }, | |
| { | |
| "epoch": 46.41466992665037, | |
| "grad_norm": 3.4331562519073486, | |
| "learning_rate": 7.234546251644016e-07, | |
| "loss": 11.7263, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 46.43640315131758, | |
| "grad_norm": 5.8017401695251465, | |
| "learning_rate": 7.190705830775977e-07, | |
| "loss": 11.7349, | |
| "step": 106850 | |
| }, | |
| { | |
| "epoch": 46.458136375984786, | |
| "grad_norm": 4.63163948059082, | |
| "learning_rate": 7.146865409907935e-07, | |
| "loss": 11.7241, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 46.479869600651995, | |
| "grad_norm": 4.267096996307373, | |
| "learning_rate": 7.103024989039896e-07, | |
| "loss": 11.7189, | |
| "step": 106950 | |
| }, | |
| { | |
| "epoch": 46.5016028253192, | |
| "grad_norm": 4.0522871017456055, | |
| "learning_rate": 7.059184568171855e-07, | |
| "loss": 11.7209, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 46.52333604998642, | |
| "grad_norm": 5.3363142013549805, | |
| "learning_rate": 7.015344147303816e-07, | |
| "loss": 11.7084, | |
| "step": 107050 | |
| }, | |
| { | |
| "epoch": 46.54506927465363, | |
| "grad_norm": 4.059858322143555, | |
| "learning_rate": 6.971503726435775e-07, | |
| "loss": 11.7125, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 46.566802499320836, | |
| "grad_norm": 3.252812623977661, | |
| "learning_rate": 6.927663305567733e-07, | |
| "loss": 11.7268, | |
| "step": 107150 | |
| }, | |
| { | |
| "epoch": 46.588535723988045, | |
| "grad_norm": 4.82172966003418, | |
| "learning_rate": 6.883822884699694e-07, | |
| "loss": 11.7139, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 46.61026894865525, | |
| "grad_norm": 8.201459884643555, | |
| "learning_rate": 6.839982463831653e-07, | |
| "loss": 11.7298, | |
| "step": 107250 | |
| }, | |
| { | |
| "epoch": 46.63200217332247, | |
| "grad_norm": 3.159785747528076, | |
| "learning_rate": 6.796142042963614e-07, | |
| "loss": 11.7209, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 46.65373539798968, | |
| "grad_norm": 11.2830171585083, | |
| "learning_rate": 6.752301622095573e-07, | |
| "loss": 11.7288, | |
| "step": 107350 | |
| }, | |
| { | |
| "epoch": 46.67546862265689, | |
| "grad_norm": 4.074632167816162, | |
| "learning_rate": 6.708461201227533e-07, | |
| "loss": 11.7174, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 46.697201847324095, | |
| "grad_norm": 12.465502738952637, | |
| "learning_rate": 6.664620780359492e-07, | |
| "loss": 11.7186, | |
| "step": 107450 | |
| }, | |
| { | |
| "epoch": 46.718935071991304, | |
| "grad_norm": 4.864065647125244, | |
| "learning_rate": 6.620780359491452e-07, | |
| "loss": 11.7249, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 46.74066829665852, | |
| "grad_norm": 7.3475341796875, | |
| "learning_rate": 6.576939938623411e-07, | |
| "loss": 11.7212, | |
| "step": 107550 | |
| }, | |
| { | |
| "epoch": 46.76240152132573, | |
| "grad_norm": 6.0634565353393555, | |
| "learning_rate": 6.533099517755372e-07, | |
| "loss": 11.7342, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 46.78413474599294, | |
| "grad_norm": 6.251104831695557, | |
| "learning_rate": 6.489259096887331e-07, | |
| "loss": 11.7225, | |
| "step": 107650 | |
| }, | |
| { | |
| "epoch": 46.805867970660145, | |
| "grad_norm": 5.822422027587891, | |
| "learning_rate": 6.44541867601929e-07, | |
| "loss": 11.7272, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 46.827601195327354, | |
| "grad_norm": 8.700297355651855, | |
| "learning_rate": 6.40157825515125e-07, | |
| "loss": 11.7297, | |
| "step": 107750 | |
| }, | |
| { | |
| "epoch": 46.84933441999457, | |
| "grad_norm": 5.136385917663574, | |
| "learning_rate": 6.357737834283209e-07, | |
| "loss": 11.726, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 46.87106764466178, | |
| "grad_norm": 5.658658981323242, | |
| "learning_rate": 6.31389741341517e-07, | |
| "loss": 11.7237, | |
| "step": 107850 | |
| }, | |
| { | |
| "epoch": 46.89280086932899, | |
| "grad_norm": 6.1630353927612305, | |
| "learning_rate": 6.270056992547129e-07, | |
| "loss": 11.728, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 46.914534093996195, | |
| "grad_norm": 11.851746559143066, | |
| "learning_rate": 6.226216571679088e-07, | |
| "loss": 11.7218, | |
| "step": 107950 | |
| }, | |
| { | |
| "epoch": 46.936267318663404, | |
| "grad_norm": 3.989478588104248, | |
| "learning_rate": 6.182376150811048e-07, | |
| "loss": 11.7246, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 46.95800054333062, | |
| "grad_norm": 10.78637981414795, | |
| "learning_rate": 6.138535729943007e-07, | |
| "loss": 11.7238, | |
| "step": 108050 | |
| }, | |
| { | |
| "epoch": 46.97973376799783, | |
| "grad_norm": 11.566459655761719, | |
| "learning_rate": 6.094695309074968e-07, | |
| "loss": 11.7193, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 47.001303993480036, | |
| "grad_norm": 9.378003120422363, | |
| "learning_rate": 6.050854888206927e-07, | |
| "loss": 11.6363, | |
| "step": 108150 | |
| }, | |
| { | |
| "epoch": 47.023037218147245, | |
| "grad_norm": 5.3512091636657715, | |
| "learning_rate": 6.007014467338887e-07, | |
| "loss": 11.7135, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 47.04477044281445, | |
| "grad_norm": 10.854682922363281, | |
| "learning_rate": 5.963174046470846e-07, | |
| "loss": 11.7218, | |
| "step": 108250 | |
| }, | |
| { | |
| "epoch": 47.06650366748166, | |
| "grad_norm": 3.557173728942871, | |
| "learning_rate": 5.919333625602806e-07, | |
| "loss": 11.7216, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 47.08823689214887, | |
| "grad_norm": 4.374483585357666, | |
| "learning_rate": 5.875493204734767e-07, | |
| "loss": 11.722, | |
| "step": 108350 | |
| }, | |
| { | |
| "epoch": 47.10997011681608, | |
| "grad_norm": 4.371666431427002, | |
| "learning_rate": 5.831652783866726e-07, | |
| "loss": 11.7127, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 47.131703341483295, | |
| "grad_norm": 6.458693504333496, | |
| "learning_rate": 5.787812362998686e-07, | |
| "loss": 11.7278, | |
| "step": 108450 | |
| }, | |
| { | |
| "epoch": 47.1534365661505, | |
| "grad_norm": 20.349096298217773, | |
| "learning_rate": 5.743971942130644e-07, | |
| "loss": 11.7243, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 47.17516979081771, | |
| "grad_norm": 24.32076072692871, | |
| "learning_rate": 5.700131521262604e-07, | |
| "loss": 11.7229, | |
| "step": 108550 | |
| }, | |
| { | |
| "epoch": 47.19690301548492, | |
| "grad_norm": 3.82700252532959, | |
| "learning_rate": 5.656291100394565e-07, | |
| "loss": 11.7084, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 47.21863624015213, | |
| "grad_norm": 3.007939338684082, | |
| "learning_rate": 5.612450679526524e-07, | |
| "loss": 11.7192, | |
| "step": 108650 | |
| }, | |
| { | |
| "epoch": 47.240369464819345, | |
| "grad_norm": 4.3855299949646, | |
| "learning_rate": 5.568610258658484e-07, | |
| "loss": 11.7216, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 47.262102689486554, | |
| "grad_norm": 3.7610890865325928, | |
| "learning_rate": 5.524769837790443e-07, | |
| "loss": 11.7107, | |
| "step": 108750 | |
| }, | |
| { | |
| "epoch": 47.28383591415376, | |
| "grad_norm": 5.21887731552124, | |
| "learning_rate": 5.480929416922403e-07, | |
| "loss": 11.7165, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 47.30556913882097, | |
| "grad_norm": 4.387558460235596, | |
| "learning_rate": 5.437088996054362e-07, | |
| "loss": 11.7145, | |
| "step": 108850 | |
| }, | |
| { | |
| "epoch": 47.32730236348818, | |
| "grad_norm": 2.78105092048645, | |
| "learning_rate": 5.393248575186323e-07, | |
| "loss": 11.7132, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 47.349035588155395, | |
| "grad_norm": 6.782215595245361, | |
| "learning_rate": 5.349408154318282e-07, | |
| "loss": 11.7313, | |
| "step": 108950 | |
| }, | |
| { | |
| "epoch": 47.370768812822604, | |
| "grad_norm": 4.510542392730713, | |
| "learning_rate": 5.305567733450242e-07, | |
| "loss": 11.7191, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 47.39250203748981, | |
| "grad_norm": 5.735984802246094, | |
| "learning_rate": 5.261727312582201e-07, | |
| "loss": 11.7178, | |
| "step": 109050 | |
| }, | |
| { | |
| "epoch": 47.41423526215702, | |
| "grad_norm": 9.908329963684082, | |
| "learning_rate": 5.21788689171416e-07, | |
| "loss": 11.7219, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 47.43596848682423, | |
| "grad_norm": 5.137279987335205, | |
| "learning_rate": 5.174046470846121e-07, | |
| "loss": 11.7231, | |
| "step": 109150 | |
| }, | |
| { | |
| "epoch": 47.457701711491445, | |
| "grad_norm": 15.530988693237305, | |
| "learning_rate": 5.13020604997808e-07, | |
| "loss": 11.7219, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 47.479434936158654, | |
| "grad_norm": 5.565670490264893, | |
| "learning_rate": 5.08636562911004e-07, | |
| "loss": 11.7231, | |
| "step": 109250 | |
| }, | |
| { | |
| "epoch": 47.50116816082586, | |
| "grad_norm": 3.390558958053589, | |
| "learning_rate": 5.042525208241999e-07, | |
| "loss": 11.7143, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 47.52290138549307, | |
| "grad_norm": 3.168869972229004, | |
| "learning_rate": 4.998684787373959e-07, | |
| "loss": 11.7107, | |
| "step": 109350 | |
| }, | |
| { | |
| "epoch": 47.54463461016028, | |
| "grad_norm": 4.391485691070557, | |
| "learning_rate": 4.954844366505919e-07, | |
| "loss": 11.7171, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 47.566367834827496, | |
| "grad_norm": 10.428187370300293, | |
| "learning_rate": 4.911003945637879e-07, | |
| "loss": 11.7214, | |
| "step": 109450 | |
| }, | |
| { | |
| "epoch": 47.588101059494704, | |
| "grad_norm": 8.480759620666504, | |
| "learning_rate": 4.867163524769838e-07, | |
| "loss": 11.7284, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 47.60983428416191, | |
| "grad_norm": 8.282448768615723, | |
| "learning_rate": 4.823323103901798e-07, | |
| "loss": 11.723, | |
| "step": 109550 | |
| }, | |
| { | |
| "epoch": 47.63156750882912, | |
| "grad_norm": 3.495969295501709, | |
| "learning_rate": 4.779482683033757e-07, | |
| "loss": 11.7213, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 47.65330073349633, | |
| "grad_norm": 4.484890937805176, | |
| "learning_rate": 4.735642262165717e-07, | |
| "loss": 11.7149, | |
| "step": 109650 | |
| }, | |
| { | |
| "epoch": 47.675033958163546, | |
| "grad_norm": 11.390790939331055, | |
| "learning_rate": 4.6918018412976767e-07, | |
| "loss": 11.7138, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 47.696767182830754, | |
| "grad_norm": 3.9627888202667236, | |
| "learning_rate": 4.6479614204296364e-07, | |
| "loss": 11.722, | |
| "step": 109750 | |
| }, | |
| { | |
| "epoch": 47.71850040749796, | |
| "grad_norm": 5.796283721923828, | |
| "learning_rate": 4.604120999561596e-07, | |
| "loss": 11.7182, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 47.74023363216517, | |
| "grad_norm": 8.347150802612305, | |
| "learning_rate": 4.560280578693556e-07, | |
| "loss": 11.7257, | |
| "step": 109850 | |
| }, | |
| { | |
| "epoch": 47.76196685683238, | |
| "grad_norm": 18.176475524902344, | |
| "learning_rate": 4.5164401578255155e-07, | |
| "loss": 11.7289, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 47.78370008149959, | |
| "grad_norm": 4.672854900360107, | |
| "learning_rate": 4.472599736957475e-07, | |
| "loss": 11.7134, | |
| "step": 109950 | |
| }, | |
| { | |
| "epoch": 47.805433306166805, | |
| "grad_norm": 4.16023588180542, | |
| "learning_rate": 4.428759316089435e-07, | |
| "loss": 11.7233, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 47.805433306166805, | |
| "eval_cer": 0.07368261883895177, | |
| "eval_loss": 2.426945209503174, | |
| "eval_runtime": 398.6555, | |
| "eval_samples_per_second": 13.561, | |
| "eval_steps_per_second": 3.391, | |
| "eval_wer": 0.224462238970011, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 47.82716653083401, | |
| "grad_norm": 6.881287097930908, | |
| "learning_rate": 4.3849188952213946e-07, | |
| "loss": 11.7274, | |
| "step": 110050 | |
| }, | |
| { | |
| "epoch": 47.84889975550122, | |
| "grad_norm": 22.432987213134766, | |
| "learning_rate": 4.3410784743533543e-07, | |
| "loss": 11.7251, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 47.87063298016843, | |
| "grad_norm": 5.372289180755615, | |
| "learning_rate": 4.2972380534853134e-07, | |
| "loss": 11.7208, | |
| "step": 110150 | |
| }, | |
| { | |
| "epoch": 47.89236620483564, | |
| "grad_norm": 7.969860553741455, | |
| "learning_rate": 4.253397632617273e-07, | |
| "loss": 11.714, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 47.914099429502855, | |
| "grad_norm": 4.329230308532715, | |
| "learning_rate": 4.209557211749233e-07, | |
| "loss": 11.7203, | |
| "step": 110250 | |
| }, | |
| { | |
| "epoch": 47.93583265417006, | |
| "grad_norm": 5.077062606811523, | |
| "learning_rate": 4.1657167908811925e-07, | |
| "loss": 11.7147, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 47.95756587883727, | |
| "grad_norm": 10.139983177185059, | |
| "learning_rate": 4.121876370013152e-07, | |
| "loss": 11.7224, | |
| "step": 110350 | |
| }, | |
| { | |
| "epoch": 47.97929910350448, | |
| "grad_norm": 7.735840320587158, | |
| "learning_rate": 4.078035949145112e-07, | |
| "loss": 11.7244, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 48.00086932898669, | |
| "grad_norm": 3.7973625659942627, | |
| "learning_rate": 4.0341955282770716e-07, | |
| "loss": 11.6379, | |
| "step": 110450 | |
| }, | |
| { | |
| "epoch": 48.0226025536539, | |
| "grad_norm": 30.568002700805664, | |
| "learning_rate": 3.990355107409032e-07, | |
| "loss": 11.7251, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 48.044335778321106, | |
| "grad_norm": 4.356334209442139, | |
| "learning_rate": 3.9465146865409915e-07, | |
| "loss": 11.7165, | |
| "step": 110550 | |
| }, | |
| { | |
| "epoch": 48.06606900298832, | |
| "grad_norm": 2.87776780128479, | |
| "learning_rate": 3.902674265672951e-07, | |
| "loss": 11.7094, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 48.08780222765553, | |
| "grad_norm": 7.523682117462158, | |
| "learning_rate": 3.858833844804911e-07, | |
| "loss": 11.7341, | |
| "step": 110650 | |
| }, | |
| { | |
| "epoch": 48.10953545232274, | |
| "grad_norm": 4.565247535705566, | |
| "learning_rate": 3.8149934239368695e-07, | |
| "loss": 11.7107, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 48.13126867698995, | |
| "grad_norm": 8.14168643951416, | |
| "learning_rate": 3.77115300306883e-07, | |
| "loss": 11.708, | |
| "step": 110750 | |
| }, | |
| { | |
| "epoch": 48.153001901657156, | |
| "grad_norm": 9.361733436584473, | |
| "learning_rate": 3.7273125822007895e-07, | |
| "loss": 11.7357, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 48.17473512632437, | |
| "grad_norm": 3.413947105407715, | |
| "learning_rate": 3.683472161332749e-07, | |
| "loss": 11.7106, | |
| "step": 110850 | |
| }, | |
| { | |
| "epoch": 48.19646835099158, | |
| "grad_norm": 3.5155258178710938, | |
| "learning_rate": 3.639631740464709e-07, | |
| "loss": 11.7122, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 48.21820157565879, | |
| "grad_norm": 3.1602470874786377, | |
| "learning_rate": 3.5957913195966685e-07, | |
| "loss": 11.7176, | |
| "step": 110950 | |
| }, | |
| { | |
| "epoch": 48.239934800326, | |
| "grad_norm": 9.386332511901855, | |
| "learning_rate": 3.551950898728628e-07, | |
| "loss": 11.724, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 48.261668024993206, | |
| "grad_norm": 4.479788780212402, | |
| "learning_rate": 3.508110477860588e-07, | |
| "loss": 11.7121, | |
| "step": 111050 | |
| }, | |
| { | |
| "epoch": 48.28340124966042, | |
| "grad_norm": 5.899557590484619, | |
| "learning_rate": 3.4642700569925476e-07, | |
| "loss": 11.7212, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 48.30513447432763, | |
| "grad_norm": 11.247237205505371, | |
| "learning_rate": 3.4204296361245073e-07, | |
| "loss": 11.7196, | |
| "step": 111150 | |
| }, | |
| { | |
| "epoch": 48.32686769899484, | |
| "grad_norm": 5.190598487854004, | |
| "learning_rate": 3.376589215256467e-07, | |
| "loss": 11.7078, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 48.34860092366205, | |
| "grad_norm": 4.109508037567139, | |
| "learning_rate": 3.332748794388426e-07, | |
| "loss": 11.7155, | |
| "step": 111250 | |
| }, | |
| { | |
| "epoch": 48.370334148329256, | |
| "grad_norm": 3.0162370204925537, | |
| "learning_rate": 3.288908373520386e-07, | |
| "loss": 11.7168, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 48.39206737299647, | |
| "grad_norm": 11.546656608581543, | |
| "learning_rate": 3.2450679526523456e-07, | |
| "loss": 11.7252, | |
| "step": 111350 | |
| }, | |
| { | |
| "epoch": 48.41380059766368, | |
| "grad_norm": 3.388889789581299, | |
| "learning_rate": 3.201227531784305e-07, | |
| "loss": 11.7268, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 48.43553382233089, | |
| "grad_norm": 6.033073902130127, | |
| "learning_rate": 3.157387110916265e-07, | |
| "loss": 11.7127, | |
| "step": 111450 | |
| }, | |
| { | |
| "epoch": 48.4572670469981, | |
| "grad_norm": 4.543982028961182, | |
| "learning_rate": 3.1135466900482246e-07, | |
| "loss": 11.7272, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 48.479000271665306, | |
| "grad_norm": 9.906218528747559, | |
| "learning_rate": 3.0697062691801843e-07, | |
| "loss": 11.7268, | |
| "step": 111550 | |
| }, | |
| { | |
| "epoch": 48.500733496332515, | |
| "grad_norm": 7.095948696136475, | |
| "learning_rate": 3.025865848312144e-07, | |
| "loss": 11.712, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 48.52246672099973, | |
| "grad_norm": 9.97701644897461, | |
| "learning_rate": 2.9820254274441037e-07, | |
| "loss": 11.722, | |
| "step": 111650 | |
| }, | |
| { | |
| "epoch": 48.54419994566694, | |
| "grad_norm": 4.398223876953125, | |
| "learning_rate": 2.9381850065760634e-07, | |
| "loss": 11.7101, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 48.56593317033415, | |
| "grad_norm": 3.77424693107605, | |
| "learning_rate": 2.894344585708023e-07, | |
| "loss": 11.7272, | |
| "step": 111750 | |
| }, | |
| { | |
| "epoch": 48.58766639500136, | |
| "grad_norm": 17.39592933654785, | |
| "learning_rate": 2.850504164839983e-07, | |
| "loss": 11.7154, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 48.609399619668565, | |
| "grad_norm": 3.8219528198242188, | |
| "learning_rate": 2.8066637439719425e-07, | |
| "loss": 11.7156, | |
| "step": 111850 | |
| }, | |
| { | |
| "epoch": 48.63113284433578, | |
| "grad_norm": 9.067111015319824, | |
| "learning_rate": 2.7628233231039017e-07, | |
| "loss": 11.7133, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 48.65286606900299, | |
| "grad_norm": 15.224953651428223, | |
| "learning_rate": 2.7189829022358614e-07, | |
| "loss": 11.7166, | |
| "step": 111950 | |
| }, | |
| { | |
| "epoch": 48.6745992936702, | |
| "grad_norm": 4.944436073303223, | |
| "learning_rate": 2.675142481367821e-07, | |
| "loss": 11.7152, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 48.69633251833741, | |
| "grad_norm": 11.312178611755371, | |
| "learning_rate": 2.6313020604997813e-07, | |
| "loss": 11.7157, | |
| "step": 112050 | |
| }, | |
| { | |
| "epoch": 48.718065743004615, | |
| "grad_norm": 5.6469011306762695, | |
| "learning_rate": 2.587461639631741e-07, | |
| "loss": 11.7188, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 48.73979896767183, | |
| "grad_norm": 3.34533429145813, | |
| "learning_rate": 2.5436212187637007e-07, | |
| "loss": 11.7275, | |
| "step": 112150 | |
| }, | |
| { | |
| "epoch": 48.76153219233904, | |
| "grad_norm": 9.967689514160156, | |
| "learning_rate": 2.49978079789566e-07, | |
| "loss": 11.717, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 48.78326541700625, | |
| "grad_norm": 5.482551574707031, | |
| "learning_rate": 2.4559403770276195e-07, | |
| "loss": 11.7111, | |
| "step": 112250 | |
| }, | |
| { | |
| "epoch": 48.80499864167346, | |
| "grad_norm": 4.191429615020752, | |
| "learning_rate": 2.412099956159579e-07, | |
| "loss": 11.7112, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 48.826731866340666, | |
| "grad_norm": 4.112410068511963, | |
| "learning_rate": 2.3682595352915392e-07, | |
| "loss": 11.7252, | |
| "step": 112350 | |
| }, | |
| { | |
| "epoch": 48.84846509100788, | |
| "grad_norm": 4.255959510803223, | |
| "learning_rate": 2.3244191144234989e-07, | |
| "loss": 11.7149, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 48.87019831567509, | |
| "grad_norm": 8.20151424407959, | |
| "learning_rate": 2.2805786935554583e-07, | |
| "loss": 11.7073, | |
| "step": 112450 | |
| }, | |
| { | |
| "epoch": 48.8919315403423, | |
| "grad_norm": 4.13128137588501, | |
| "learning_rate": 2.236738272687418e-07, | |
| "loss": 11.72, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 48.91366476500951, | |
| "grad_norm": 6.540150165557861, | |
| "learning_rate": 2.1928978518193777e-07, | |
| "loss": 11.707, | |
| "step": 112550 | |
| }, | |
| { | |
| "epoch": 48.935397989676716, | |
| "grad_norm": 10.835039138793945, | |
| "learning_rate": 2.1490574309513374e-07, | |
| "loss": 11.7185, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 48.95713121434393, | |
| "grad_norm": 11.767996788024902, | |
| "learning_rate": 2.105217010083297e-07, | |
| "loss": 11.7273, | |
| "step": 112650 | |
| }, | |
| { | |
| "epoch": 48.97886443901114, | |
| "grad_norm": 7.164200305938721, | |
| "learning_rate": 2.0613765892152568e-07, | |
| "loss": 11.7255, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 49.00043466449335, | |
| "grad_norm": 3.889307737350464, | |
| "learning_rate": 2.0175361683472162e-07, | |
| "loss": 11.6305, | |
| "step": 112750 | |
| }, | |
| { | |
| "epoch": 49.022167889160556, | |
| "grad_norm": 3.3905019760131836, | |
| "learning_rate": 1.973695747479176e-07, | |
| "loss": 11.7146, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 49.043901113827765, | |
| "grad_norm": 10.843219757080078, | |
| "learning_rate": 1.9298553266111356e-07, | |
| "loss": 11.7074, | |
| "step": 112850 | |
| }, | |
| { | |
| "epoch": 49.065634338494974, | |
| "grad_norm": 6.1026082038879395, | |
| "learning_rate": 1.8860149057430953e-07, | |
| "loss": 11.7187, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 49.08736756316218, | |
| "grad_norm": 12.958758354187012, | |
| "learning_rate": 1.842174484875055e-07, | |
| "loss": 11.7198, | |
| "step": 112950 | |
| }, | |
| { | |
| "epoch": 49.10910078782939, | |
| "grad_norm": 7.045960426330566, | |
| "learning_rate": 1.7983340640070144e-07, | |
| "loss": 11.7171, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 49.13083401249661, | |
| "grad_norm": 8.215546607971191, | |
| "learning_rate": 1.754493643138974e-07, | |
| "loss": 11.7161, | |
| "step": 113050 | |
| }, | |
| { | |
| "epoch": 49.152567237163815, | |
| "grad_norm": 3.4392971992492676, | |
| "learning_rate": 1.710653222270934e-07, | |
| "loss": 11.7033, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 49.174300461831024, | |
| "grad_norm": 4.333184242248535, | |
| "learning_rate": 1.6668128014028937e-07, | |
| "loss": 11.7221, | |
| "step": 113150 | |
| }, | |
| { | |
| "epoch": 49.19603368649823, | |
| "grad_norm": 8.148516654968262, | |
| "learning_rate": 1.6229723805348534e-07, | |
| "loss": 11.7099, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 49.21776691116544, | |
| "grad_norm": 10.723722457885742, | |
| "learning_rate": 1.579131959666813e-07, | |
| "loss": 11.7253, | |
| "step": 113250 | |
| }, | |
| { | |
| "epoch": 49.23950013583266, | |
| "grad_norm": 5.778897285461426, | |
| "learning_rate": 1.5352915387987726e-07, | |
| "loss": 11.7058, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 49.261233360499865, | |
| "grad_norm": 5.398443698883057, | |
| "learning_rate": 1.4914511179307322e-07, | |
| "loss": 11.7219, | |
| "step": 113350 | |
| }, | |
| { | |
| "epoch": 49.282966585167074, | |
| "grad_norm": 3.614530324935913, | |
| "learning_rate": 1.447610697062692e-07, | |
| "loss": 11.7105, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 49.30469980983428, | |
| "grad_norm": 4.205718040466309, | |
| "learning_rate": 1.4037702761946516e-07, | |
| "loss": 11.7128, | |
| "step": 113450 | |
| }, | |
| { | |
| "epoch": 49.32643303450149, | |
| "grad_norm": 5.203486442565918, | |
| "learning_rate": 1.3599298553266113e-07, | |
| "loss": 11.7145, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 49.34816625916871, | |
| "grad_norm": 3.4985852241516113, | |
| "learning_rate": 1.316089434458571e-07, | |
| "loss": 11.7212, | |
| "step": 113550 | |
| }, | |
| { | |
| "epoch": 49.369899483835916, | |
| "grad_norm": 9.43883991241455, | |
| "learning_rate": 1.2722490135905305e-07, | |
| "loss": 11.7124, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 49.391632708503124, | |
| "grad_norm": 7.489180088043213, | |
| "learning_rate": 1.2284085927224901e-07, | |
| "loss": 11.7207, | |
| "step": 113650 | |
| }, | |
| { | |
| "epoch": 49.41336593317033, | |
| "grad_norm": 9.499123573303223, | |
| "learning_rate": 1.18456817185445e-07, | |
| "loss": 11.7147, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 49.43509915783754, | |
| "grad_norm": 7.789849758148193, | |
| "learning_rate": 1.1407277509864095e-07, | |
| "loss": 11.7171, | |
| "step": 113750 | |
| }, | |
| { | |
| "epoch": 49.45683238250476, | |
| "grad_norm": 9.22687816619873, | |
| "learning_rate": 1.0968873301183692e-07, | |
| "loss": 11.7071, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 49.478565607171966, | |
| "grad_norm": 5.999274253845215, | |
| "learning_rate": 1.0530469092503289e-07, | |
| "loss": 11.7137, | |
| "step": 113850 | |
| }, | |
| { | |
| "epoch": 49.500298831839174, | |
| "grad_norm": 9.73884391784668, | |
| "learning_rate": 1.0092064883822885e-07, | |
| "loss": 11.7201, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 49.52203205650638, | |
| "grad_norm": 9.630657196044922, | |
| "learning_rate": 9.653660675142482e-08, | |
| "loss": 11.7212, | |
| "step": 113950 | |
| }, | |
| { | |
| "epoch": 49.54376528117359, | |
| "grad_norm": 4.612308979034424, | |
| "learning_rate": 9.21525646646208e-08, | |
| "loss": 11.7247, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 49.56549850584081, | |
| "grad_norm": 2.9876091480255127, | |
| "learning_rate": 8.776852257781676e-08, | |
| "loss": 11.7146, | |
| "step": 114050 | |
| }, | |
| { | |
| "epoch": 49.587231730508016, | |
| "grad_norm": 4.555498123168945, | |
| "learning_rate": 8.338448049101273e-08, | |
| "loss": 11.7073, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 49.608964955175225, | |
| "grad_norm": 5.015764236450195, | |
| "learning_rate": 7.900043840420868e-08, | |
| "loss": 11.7119, | |
| "step": 114150 | |
| }, | |
| { | |
| "epoch": 49.63069817984243, | |
| "grad_norm": 5.208141326904297, | |
| "learning_rate": 7.461639631740465e-08, | |
| "loss": 11.7178, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 49.65243140450964, | |
| "grad_norm": 5.420982837677002, | |
| "learning_rate": 7.023235423060062e-08, | |
| "loss": 11.7187, | |
| "step": 114250 | |
| }, | |
| { | |
| "epoch": 49.67416462917686, | |
| "grad_norm": 6.694780349731445, | |
| "learning_rate": 6.584831214379659e-08, | |
| "loss": 11.7102, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 49.695897853844066, | |
| "grad_norm": 4.203577995300293, | |
| "learning_rate": 6.146427005699255e-08, | |
| "loss": 11.7142, | |
| "step": 114350 | |
| }, | |
| { | |
| "epoch": 49.717631078511275, | |
| "grad_norm": 3.1716277599334717, | |
| "learning_rate": 5.7080227970188515e-08, | |
| "loss": 11.7139, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 49.73936430317848, | |
| "grad_norm": 3.897326946258545, | |
| "learning_rate": 5.269618588338448e-08, | |
| "loss": 11.712, | |
| "step": 114450 | |
| }, | |
| { | |
| "epoch": 49.76109752784569, | |
| "grad_norm": 13.347712516784668, | |
| "learning_rate": 4.8312143796580454e-08, | |
| "loss": 11.7155, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 49.78283075251291, | |
| "grad_norm": 6.420513153076172, | |
| "learning_rate": 4.392810170977642e-08, | |
| "loss": 11.7286, | |
| "step": 114550 | |
| }, | |
| { | |
| "epoch": 49.804563977180116, | |
| "grad_norm": 5.6966447830200195, | |
| "learning_rate": 3.954405962297238e-08, | |
| "loss": 11.7078, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 49.826297201847325, | |
| "grad_norm": 5.481497287750244, | |
| "learning_rate": 3.516001753616835e-08, | |
| "loss": 11.7061, | |
| "step": 114650 | |
| }, | |
| { | |
| "epoch": 49.84803042651453, | |
| "grad_norm": 6.5728840827941895, | |
| "learning_rate": 3.077597544936432e-08, | |
| "loss": 11.7121, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 49.86976365118174, | |
| "grad_norm": 7.013606071472168, | |
| "learning_rate": 2.639193336256028e-08, | |
| "loss": 11.7165, | |
| "step": 114750 | |
| }, | |
| { | |
| "epoch": 49.89149687584895, | |
| "grad_norm": 8.17546272277832, | |
| "learning_rate": 2.200789127575625e-08, | |
| "loss": 11.7226, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 49.91323010051617, | |
| "grad_norm": 11.53906536102295, | |
| "learning_rate": 1.7623849188952217e-08, | |
| "loss": 11.7159, | |
| "step": 114850 | |
| }, | |
| { | |
| "epoch": 49.934963325183375, | |
| "grad_norm": 3.259451389312744, | |
| "learning_rate": 1.3239807102148183e-08, | |
| "loss": 11.7256, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 49.956696549850584, | |
| "grad_norm": 9.84170913696289, | |
| "learning_rate": 8.855765015344147e-09, | |
| "loss": 11.7094, | |
| "step": 114950 | |
| }, | |
| { | |
| "epoch": 49.97842977451779, | |
| "grad_norm": 3.992011070251465, | |
| "learning_rate": 4.471722928540115e-09, | |
| "loss": 11.7083, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 7.805567264556885, | |
| "learning_rate": 8.768084173608068e-11, | |
| "loss": 11.6432, | |
| "step": 115050 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 115050, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |