| { |
| "best_metric": 0.09604515880346298, |
| "best_model_checkpoint": "./timit-english-v2/checkpoint-6500", |
| "epoch": 100.0, |
| "global_step": 6500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 2.2189, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 2.1219, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 1.4984, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.6169, |
| "step": 400 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 0.00015, |
| "loss": 0.3111, |
| "step": 500 |
| }, |
| { |
| "epoch": 7.69, |
| "eval_cer": 0.12291518457038969, |
| "eval_loss": 0.2245599329471588, |
| "eval_runtime": 23.772, |
| "eval_samples_per_second": 19.435, |
| "eval_steps_per_second": 1.22, |
| "step": 500 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.2147, |
| "step": 600 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.1641, |
| "step": 700 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.1424, |
| "step": 800 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 0.00027, |
| "loss": 0.1333, |
| "step": 900 |
| }, |
| { |
| "epoch": 15.38, |
| "learning_rate": 0.0003, |
| "loss": 0.1211, |
| "step": 1000 |
| }, |
| { |
| "epoch": 15.38, |
| "eval_cer": 0.07975654098490238, |
| "eval_loss": 0.14135180413722992, |
| "eval_runtime": 24.275, |
| "eval_samples_per_second": 19.032, |
| "eval_steps_per_second": 1.195, |
| "step": 1000 |
| }, |
| { |
| "epoch": 16.92, |
| "learning_rate": 0.0002945454545454545, |
| "loss": 0.1159, |
| "step": 1100 |
| }, |
| { |
| "epoch": 18.46, |
| "learning_rate": 0.00028909090909090904, |
| "loss": 0.0959, |
| "step": 1200 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 0.0002836363636363636, |
| "loss": 0.0881, |
| "step": 1300 |
| }, |
| { |
| "epoch": 21.54, |
| "learning_rate": 0.00027818181818181815, |
| "loss": 0.0768, |
| "step": 1400 |
| }, |
| { |
| "epoch": 23.08, |
| "learning_rate": 0.0002727272727272727, |
| "loss": 0.07, |
| "step": 1500 |
| }, |
| { |
| "epoch": 23.08, |
| "eval_cer": 0.0627618370089321, |
| "eval_loss": 0.11634844541549683, |
| "eval_runtime": 24.2853, |
| "eval_samples_per_second": 19.024, |
| "eval_steps_per_second": 1.194, |
| "step": 1500 |
| }, |
| { |
| "epoch": 24.62, |
| "learning_rate": 0.0002672727272727272, |
| "loss": 0.0613, |
| "step": 1600 |
| }, |
| { |
| "epoch": 26.15, |
| "learning_rate": 0.0002618181818181818, |
| "loss": 0.0558, |
| "step": 1700 |
| }, |
| { |
| "epoch": 27.69, |
| "learning_rate": 0.00025636363636363633, |
| "loss": 0.0556, |
| "step": 1800 |
| }, |
| { |
| "epoch": 29.23, |
| "learning_rate": 0.00025090909090909086, |
| "loss": 0.0491, |
| "step": 1900 |
| }, |
| { |
| "epoch": 30.77, |
| "learning_rate": 0.00024545454545454545, |
| "loss": 0.0516, |
| "step": 2000 |
| }, |
| { |
| "epoch": 30.77, |
| "eval_cer": 0.07082444075567149, |
| "eval_loss": 0.12194966524839401, |
| "eval_runtime": 32.2119, |
| "eval_samples_per_second": 14.343, |
| "eval_steps_per_second": 0.9, |
| "step": 2000 |
| }, |
| { |
| "epoch": 32.31, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.0449, |
| "step": 2100 |
| }, |
| { |
| "epoch": 33.85, |
| "learning_rate": 0.00023454545454545454, |
| "loss": 0.0365, |
| "step": 2200 |
| }, |
| { |
| "epoch": 35.38, |
| "learning_rate": 0.00022909090909090907, |
| "loss": 0.0362, |
| "step": 2300 |
| }, |
| { |
| "epoch": 36.92, |
| "learning_rate": 0.00022363636363636363, |
| "loss": 0.0374, |
| "step": 2400 |
| }, |
| { |
| "epoch": 38.46, |
| "learning_rate": 0.00021818181818181816, |
| "loss": 0.0338, |
| "step": 2500 |
| }, |
| { |
| "epoch": 38.46, |
| "eval_cer": 0.06363133349142361, |
| "eval_loss": 0.10956428200006485, |
| "eval_runtime": 32.1273, |
| "eval_samples_per_second": 14.38, |
| "eval_steps_per_second": 0.903, |
| "step": 2500 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 0.00021272727272727272, |
| "loss": 0.0315, |
| "step": 2600 |
| }, |
| { |
| "epoch": 41.54, |
| "learning_rate": 0.00020727272727272725, |
| "loss": 0.0318, |
| "step": 2700 |
| }, |
| { |
| "epoch": 43.08, |
| "learning_rate": 0.0002018181818181818, |
| "loss": 0.0259, |
| "step": 2800 |
| }, |
| { |
| "epoch": 44.62, |
| "learning_rate": 0.00019636363636363634, |
| "loss": 0.0264, |
| "step": 2900 |
| }, |
| { |
| "epoch": 46.15, |
| "learning_rate": 0.0001909090909090909, |
| "loss": 0.0256, |
| "step": 3000 |
| }, |
| { |
| "epoch": 46.15, |
| "eval_cer": 0.05438305272310489, |
| "eval_loss": 0.11163550615310669, |
| "eval_runtime": 32.5342, |
| "eval_samples_per_second": 14.2, |
| "eval_steps_per_second": 0.891, |
| "step": 3000 |
| }, |
| { |
| "epoch": 47.69, |
| "learning_rate": 0.00018545454545454543, |
| "loss": 0.0255, |
| "step": 3100 |
| }, |
| { |
| "epoch": 49.23, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.0221, |
| "step": 3200 |
| }, |
| { |
| "epoch": 50.77, |
| "learning_rate": 0.00017454545454545452, |
| "loss": 0.0242, |
| "step": 3300 |
| }, |
| { |
| "epoch": 52.31, |
| "learning_rate": 0.00016909090909090907, |
| "loss": 0.0196, |
| "step": 3400 |
| }, |
| { |
| "epoch": 53.85, |
| "learning_rate": 0.0001636363636363636, |
| "loss": 0.0226, |
| "step": 3500 |
| }, |
| { |
| "epoch": 53.85, |
| "eval_cer": 0.047664216267488735, |
| "eval_loss": 0.10824603587388992, |
| "eval_runtime": 32.9539, |
| "eval_samples_per_second": 14.02, |
| "eval_steps_per_second": 0.88, |
| "step": 3500 |
| }, |
| { |
| "epoch": 55.38, |
| "learning_rate": 0.00015818181818181816, |
| "loss": 0.0185, |
| "step": 3600 |
| }, |
| { |
| "epoch": 56.92, |
| "learning_rate": 0.0001527272727272727, |
| "loss": 0.0173, |
| "step": 3700 |
| }, |
| { |
| "epoch": 58.46, |
| "learning_rate": 0.00014727272727272725, |
| "loss": 0.0141, |
| "step": 3800 |
| }, |
| { |
| "epoch": 60.0, |
| "learning_rate": 0.0001418181818181818, |
| "loss": 0.0168, |
| "step": 3900 |
| }, |
| { |
| "epoch": 61.54, |
| "learning_rate": 0.00013636363636363634, |
| "loss": 0.016, |
| "step": 4000 |
| }, |
| { |
| "epoch": 61.54, |
| "eval_cer": 0.052644059758121885, |
| "eval_loss": 0.1161409541964531, |
| "eval_runtime": 32.1, |
| "eval_samples_per_second": 14.393, |
| "eval_steps_per_second": 0.903, |
| "step": 4000 |
| }, |
| { |
| "epoch": 63.08, |
| "learning_rate": 0.0001309090909090909, |
| "loss": 0.0154, |
| "step": 4100 |
| }, |
| { |
| "epoch": 64.62, |
| "learning_rate": 0.00012545454545454543, |
| "loss": 0.0138, |
| "step": 4200 |
| }, |
| { |
| "epoch": 66.15, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.0123, |
| "step": 4300 |
| }, |
| { |
| "epoch": 67.69, |
| "learning_rate": 0.00011454545454545453, |
| "loss": 0.011, |
| "step": 4400 |
| }, |
| { |
| "epoch": 69.23, |
| "learning_rate": 0.00010909090909090908, |
| "loss": 0.0123, |
| "step": 4500 |
| }, |
| { |
| "epoch": 69.23, |
| "eval_cer": 0.05873053513556241, |
| "eval_loss": 0.11413775384426117, |
| "eval_runtime": 37.8046, |
| "eval_samples_per_second": 12.221, |
| "eval_steps_per_second": 0.767, |
| "step": 4500 |
| }, |
| { |
| "epoch": 70.77, |
| "learning_rate": 0.00010363636363636362, |
| "loss": 0.011, |
| "step": 4600 |
| }, |
| { |
| "epoch": 72.31, |
| "learning_rate": 9.818181818181817e-05, |
| "loss": 0.0107, |
| "step": 4700 |
| }, |
| { |
| "epoch": 73.85, |
| "learning_rate": 9.272727272727271e-05, |
| "loss": 0.009, |
| "step": 4800 |
| }, |
| { |
| "epoch": 75.38, |
| "learning_rate": 8.727272727272726e-05, |
| "loss": 0.0092, |
| "step": 4900 |
| }, |
| { |
| "epoch": 76.92, |
| "learning_rate": 8.18181818181818e-05, |
| "loss": 0.008, |
| "step": 5000 |
| }, |
| { |
| "epoch": 76.92, |
| "eval_cer": 0.0479013516718046, |
| "eval_loss": 0.09953264147043228, |
| "eval_runtime": 32.7656, |
| "eval_samples_per_second": 14.1, |
| "eval_steps_per_second": 0.885, |
| "step": 5000 |
| }, |
| { |
| "epoch": 78.46, |
| "learning_rate": 7.636363636363635e-05, |
| "loss": 0.0091, |
| "step": 5100 |
| }, |
| { |
| "epoch": 80.0, |
| "learning_rate": 7.09090909090909e-05, |
| "loss": 0.0068, |
| "step": 5200 |
| }, |
| { |
| "epoch": 81.54, |
| "learning_rate": 6.545454545454545e-05, |
| "loss": 0.007, |
| "step": 5300 |
| }, |
| { |
| "epoch": 83.08, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 0.0071, |
| "step": 5400 |
| }, |
| { |
| "epoch": 84.62, |
| "learning_rate": 5.454545454545454e-05, |
| "loss": 0.0065, |
| "step": 5500 |
| }, |
| { |
| "epoch": 84.62, |
| "eval_cer": 0.05130029246699866, |
| "eval_loss": 0.10165167599916458, |
| "eval_runtime": 29.0978, |
| "eval_samples_per_second": 15.878, |
| "eval_steps_per_second": 0.997, |
| "step": 5500 |
| }, |
| { |
| "epoch": 86.15, |
| "learning_rate": 4.9090909090909084e-05, |
| "loss": 0.0056, |
| "step": 5600 |
| }, |
| { |
| "epoch": 87.69, |
| "learning_rate": 4.363636363636363e-05, |
| "loss": 0.0058, |
| "step": 5700 |
| }, |
| { |
| "epoch": 89.23, |
| "learning_rate": 3.8181818181818174e-05, |
| "loss": 0.0047, |
| "step": 5800 |
| }, |
| { |
| "epoch": 90.77, |
| "learning_rate": 3.2727272727272725e-05, |
| "loss": 0.0055, |
| "step": 5900 |
| }, |
| { |
| "epoch": 92.31, |
| "learning_rate": 2.727272727272727e-05, |
| "loss": 0.0041, |
| "step": 6000 |
| }, |
| { |
| "epoch": 92.31, |
| "eval_cer": 0.0445814560113825, |
| "eval_loss": 0.10709430277347565, |
| "eval_runtime": 35.6854, |
| "eval_samples_per_second": 12.946, |
| "eval_steps_per_second": 0.813, |
| "step": 6000 |
| }, |
| { |
| "epoch": 93.85, |
| "learning_rate": 2.1818181818181814e-05, |
| "loss": 0.0041, |
| "step": 6100 |
| }, |
| { |
| "epoch": 95.38, |
| "learning_rate": 1.6363636363636363e-05, |
| "loss": 0.0043, |
| "step": 6200 |
| }, |
| { |
| "epoch": 96.92, |
| "learning_rate": 1.0909090909090907e-05, |
| "loss": 0.0047, |
| "step": 6300 |
| }, |
| { |
| "epoch": 98.46, |
| "learning_rate": 5.454545454545454e-06, |
| "loss": 0.004, |
| "step": 6400 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 0.0, |
| "loss": 0.0033, |
| "step": 6500 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_cer": 0.04687376491976919, |
| "eval_loss": 0.09604515880346298, |
| "eval_runtime": 33.0546, |
| "eval_samples_per_second": 13.977, |
| "eval_steps_per_second": 0.877, |
| "step": 6500 |
| } |
| ], |
| "max_steps": 6500, |
| "num_train_epochs": 100, |
| "total_flos": 4.026656586456467e+19, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|