diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,30922 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.403080016515084, + "eval_steps": 3000, + "global_step": 216000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 9.330555937849166e-05, + "grad_norm": 93.67707824707031, + "learning_rate": 8.166666666666666e-07, + "loss": 16.1123, + "step": 50 + }, + { + "epoch": 0.00018661111875698333, + "grad_norm": 7.377334117889404, + "learning_rate": 1.65e-06, + "loss": 12.8959, + "step": 100 + }, + { + "epoch": 0.00027991667813547503, + "grad_norm": 8.839871406555176, + "learning_rate": 2.4833333333333334e-06, + "loss": 11.8846, + "step": 150 + }, + { + "epoch": 0.00037322223751396665, + "grad_norm": 16.873918533325195, + "learning_rate": 3.3166666666666665e-06, + "loss": 11.4614, + "step": 200 + }, + { + "epoch": 0.0004665277968924583, + "grad_norm": 60.141456604003906, + "learning_rate": 4.15e-06, + "loss": 10.2624, + "step": 250 + }, + { + "epoch": 0.0005598333562709501, + "grad_norm": 26.98830223083496, + "learning_rate": 4.983333333333334e-06, + "loss": 9.1782, + "step": 300 + }, + { + "epoch": 0.0006531389156494417, + "grad_norm": 5.853992462158203, + "learning_rate": 5.816666666666667e-06, + "loss": 8.381, + "step": 350 + }, + { + "epoch": 0.0007464444750279333, + "grad_norm": 38.342830657958984, + "learning_rate": 6.650000000000001e-06, + "loss": 7.5826, + "step": 400 + }, + { + "epoch": 0.000839750034406425, + "grad_norm": 19.33097267150879, + "learning_rate": 7.483333333333334e-06, + "loss": 7.4673, + "step": 450 + }, + { + "epoch": 0.0009330555937849167, + "grad_norm": 9.538616180419922, + "learning_rate": 8.316666666666668e-06, + "loss": 7.0077, + "step": 500 + }, + { + "epoch": 0.0010263611531634083, + "grad_norm": 10.989666938781738, + "learning_rate": 9.15e-06, + "loss": 7.2275, + "step": 550 + }, + { + "epoch": 0.0011196667125419001, + "grad_norm": 9.881364822387695, + "learning_rate": 9.983333333333333e-06, + "loss": 6.8267, + "step": 600 + }, + { + "epoch": 0.0012129722719203917, + "grad_norm": 12.614446640014648, + "learning_rate": 1.0816666666666666e-05, + "loss": 7.133, + "step": 650 + }, + { + "epoch": 0.0013062778312988835, + "grad_norm": 6.586646556854248, + "learning_rate": 1.1650000000000002e-05, + "loss": 7.0883, + "step": 700 + }, + { + "epoch": 0.001399583390677375, + "grad_norm": 10.388212203979492, + "learning_rate": 1.2483333333333335e-05, + "loss": 6.8631, + "step": 750 + }, + { + "epoch": 0.0014928889500558666, + "grad_norm": 5.741134166717529, + "learning_rate": 1.3316666666666666e-05, + "loss": 6.6987, + "step": 800 + }, + { + "epoch": 0.0015861945094343584, + "grad_norm": 6.758159160614014, + "learning_rate": 1.415e-05, + "loss": 6.8654, + "step": 850 + }, + { + "epoch": 0.00167950006881285, + "grad_norm": 11.188956260681152, + "learning_rate": 1.4983333333333336e-05, + "loss": 6.6844, + "step": 900 + }, + { + "epoch": 0.0017728056281913417, + "grad_norm": 9.388659477233887, + "learning_rate": 1.5816666666666667e-05, + "loss": 6.6985, + "step": 950 + }, + { + "epoch": 0.0018661111875698333, + "grad_norm": 17.585216522216797, + "learning_rate": 1.665e-05, + "loss": 6.5966, + "step": 1000 + }, + { + "epoch": 0.001959416746948325, + "grad_norm": 5.970058917999268, + "learning_rate": 1.7483333333333336e-05, + "loss": 6.7767, + "step": 1050 + }, + { + "epoch": 0.0020527223063268167, + "grad_norm": 13.584476470947266, + "learning_rate": 1.8316666666666667e-05, + "loss": 6.7776, + "step": 1100 + }, + { + "epoch": 0.0021460278657053082, + "grad_norm": 8.928284645080566, + "learning_rate": 1.915e-05, + "loss": 6.5926, + "step": 1150 + }, + { + "epoch": 0.0022393334250838002, + "grad_norm": 9.114434242248535, + "learning_rate": 1.9983333333333336e-05, + "loss": 6.687, + "step": 1200 + }, + { + "epoch": 0.002332638984462292, + "grad_norm": 11.371248245239258, + "learning_rate": 2.0816666666666667e-05, + "loss": 6.4247, + "step": 1250 + }, + { + "epoch": 0.0024259445438407834, + "grad_norm": 9.375133514404297, + "learning_rate": 2.165e-05, + "loss": 6.3891, + "step": 1300 + }, + { + "epoch": 0.002519250103219275, + "grad_norm": 7.963316440582275, + "learning_rate": 2.2483333333333335e-05, + "loss": 6.7139, + "step": 1350 + }, + { + "epoch": 0.002612555662597767, + "grad_norm": 9.820133209228516, + "learning_rate": 2.3316666666666666e-05, + "loss": 6.5634, + "step": 1400 + }, + { + "epoch": 0.0027058612219762585, + "grad_norm": 10.639363288879395, + "learning_rate": 2.415e-05, + "loss": 6.6883, + "step": 1450 + }, + { + "epoch": 0.00279916678135475, + "grad_norm": 6.419040679931641, + "learning_rate": 2.4983333333333335e-05, + "loss": 6.6271, + "step": 1500 + }, + { + "epoch": 0.0028924723407332416, + "grad_norm": 8.272869110107422, + "learning_rate": 2.5816666666666666e-05, + "loss": 6.4248, + "step": 1550 + }, + { + "epoch": 0.002985777900111733, + "grad_norm": 7.704101085662842, + "learning_rate": 2.6650000000000004e-05, + "loss": 6.3276, + "step": 1600 + }, + { + "epoch": 0.003079083459490225, + "grad_norm": 9.784399032592773, + "learning_rate": 2.748333333333333e-05, + "loss": 6.2883, + "step": 1650 + }, + { + "epoch": 0.0031723890188687168, + "grad_norm": 11.014293670654297, + "learning_rate": 2.831666666666667e-05, + "loss": 6.4566, + "step": 1700 + }, + { + "epoch": 0.0032656945782472083, + "grad_norm": 10.516816139221191, + "learning_rate": 2.915e-05, + "loss": 6.2076, + "step": 1750 + }, + { + "epoch": 0.0033590001376257, + "grad_norm": 11.228531837463379, + "learning_rate": 2.9983333333333335e-05, + "loss": 6.3539, + "step": 1800 + }, + { + "epoch": 0.003452305697004192, + "grad_norm": 9.591064453125, + "learning_rate": 3.0816666666666666e-05, + "loss": 6.418, + "step": 1850 + }, + { + "epoch": 0.0035456112563826835, + "grad_norm": 10.729142189025879, + "learning_rate": 3.1650000000000004e-05, + "loss": 6.0638, + "step": 1900 + }, + { + "epoch": 0.003638916815761175, + "grad_norm": 8.057198524475098, + "learning_rate": 3.2483333333333335e-05, + "loss": 6.2542, + "step": 1950 + }, + { + "epoch": 0.0037322223751396666, + "grad_norm": 8.599757194519043, + "learning_rate": 3.3316666666666666e-05, + "loss": 6.2936, + "step": 2000 + }, + { + "epoch": 0.0038255279345181586, + "grad_norm": 10.339815139770508, + "learning_rate": 3.415e-05, + "loss": 6.1887, + "step": 2050 + }, + { + "epoch": 0.00391883349389665, + "grad_norm": 8.34434700012207, + "learning_rate": 3.4983333333333334e-05, + "loss": 6.3519, + "step": 2100 + }, + { + "epoch": 0.004012139053275142, + "grad_norm": 8.931737899780273, + "learning_rate": 3.581666666666667e-05, + "loss": 5.9798, + "step": 2150 + }, + { + "epoch": 0.004105444612653633, + "grad_norm": 7.339552402496338, + "learning_rate": 3.665e-05, + "loss": 5.8089, + "step": 2200 + }, + { + "epoch": 0.004198750172032125, + "grad_norm": 10.72778606414795, + "learning_rate": 3.7483333333333334e-05, + "loss": 6.154, + "step": 2250 + }, + { + "epoch": 0.0042920557314106164, + "grad_norm": 7.218290328979492, + "learning_rate": 3.8316666666666665e-05, + "loss": 6.2445, + "step": 2300 + }, + { + "epoch": 0.004385361290789108, + "grad_norm": 7.798749923706055, + "learning_rate": 3.915e-05, + "loss": 5.9981, + "step": 2350 + }, + { + "epoch": 0.0044786668501676005, + "grad_norm": 7.12747049331665, + "learning_rate": 3.9983333333333334e-05, + "loss": 6.0511, + "step": 2400 + }, + { + "epoch": 0.004571972409546092, + "grad_norm": 6.343996524810791, + "learning_rate": 4.081666666666667e-05, + "loss": 6.0378, + "step": 2450 + }, + { + "epoch": 0.004665277968924584, + "grad_norm": 6.697947025299072, + "learning_rate": 4.165e-05, + "loss": 5.8112, + "step": 2500 + }, + { + "epoch": 0.004758583528303075, + "grad_norm": 8.327080726623535, + "learning_rate": 4.2483333333333334e-05, + "loss": 6.2906, + "step": 2550 + }, + { + "epoch": 0.004851889087681567, + "grad_norm": 5.573576927185059, + "learning_rate": 4.3316666666666665e-05, + "loss": 5.8131, + "step": 2600 + }, + { + "epoch": 0.004945194647060058, + "grad_norm": 6.693691253662109, + "learning_rate": 4.415e-05, + "loss": 5.7015, + "step": 2650 + }, + { + "epoch": 0.00503850020643855, + "grad_norm": 6.914066314697266, + "learning_rate": 4.4983333333333334e-05, + "loss": 5.8004, + "step": 2700 + }, + { + "epoch": 0.005131805765817041, + "grad_norm": 8.469379425048828, + "learning_rate": 4.581666666666667e-05, + "loss": 5.9158, + "step": 2750 + }, + { + "epoch": 0.005225111325195534, + "grad_norm": 7.3673295974731445, + "learning_rate": 4.665e-05, + "loss": 5.969, + "step": 2800 + }, + { + "epoch": 0.005318416884574025, + "grad_norm": 7.18582010269165, + "learning_rate": 4.748333333333333e-05, + "loss": 6.0261, + "step": 2850 + }, + { + "epoch": 0.005411722443952517, + "grad_norm": 5.982076644897461, + "learning_rate": 4.831666666666667e-05, + "loss": 6.023, + "step": 2900 + }, + { + "epoch": 0.0055050280033310086, + "grad_norm": 5.787336826324463, + "learning_rate": 4.915e-05, + "loss": 5.8281, + "step": 2950 + }, + { + "epoch": 0.0055983335627095, + "grad_norm": 4.3454718589782715, + "learning_rate": 4.998333333333334e-05, + "loss": 5.9889, + "step": 3000 + }, + { + "epoch": 0.0055983335627095, + "eval_loss": 6.374537944793701, + "eval_runtime": 232.2431, + "eval_samples_per_second": 11.23, + "eval_steps_per_second": 11.23, + "eval_tts_loss": 6.28711945315583, + "step": 3000 + }, + { + "epoch": 0.005691639122087992, + "grad_norm": 8.272449493408203, + "learning_rate": 5.081666666666667e-05, + "loss": 6.141, + "step": 3050 + }, + { + "epoch": 0.005784944681466483, + "grad_norm": 5.653373718261719, + "learning_rate": 5.1649999999999995e-05, + "loss": 6.1385, + "step": 3100 + }, + { + "epoch": 0.005878250240844975, + "grad_norm": 4.102393627166748, + "learning_rate": 5.248333333333334e-05, + "loss": 5.8571, + "step": 3150 + }, + { + "epoch": 0.005971555800223466, + "grad_norm": 5.258531093597412, + "learning_rate": 5.331666666666667e-05, + "loss": 5.7932, + "step": 3200 + }, + { + "epoch": 0.006064861359601959, + "grad_norm": 4.183366775512695, + "learning_rate": 5.415e-05, + "loss": 5.8642, + "step": 3250 + }, + { + "epoch": 0.00615816691898045, + "grad_norm": 5.612656593322754, + "learning_rate": 5.498333333333333e-05, + "loss": 5.8893, + "step": 3300 + }, + { + "epoch": 0.006251472478358942, + "grad_norm": 5.23427677154541, + "learning_rate": 5.581666666666667e-05, + "loss": 5.7405, + "step": 3350 + }, + { + "epoch": 0.0063447780377374335, + "grad_norm": 5.158674240112305, + "learning_rate": 5.665e-05, + "loss": 5.8673, + "step": 3400 + }, + { + "epoch": 0.006438083597115925, + "grad_norm": 5.116922378540039, + "learning_rate": 5.748333333333333e-05, + "loss": 5.7444, + "step": 3450 + }, + { + "epoch": 0.006531389156494417, + "grad_norm": 5.790384292602539, + "learning_rate": 5.831666666666668e-05, + "loss": 5.9559, + "step": 3500 + }, + { + "epoch": 0.006624694715872908, + "grad_norm": 4.801383972167969, + "learning_rate": 5.915000000000001e-05, + "loss": 6.1995, + "step": 3550 + }, + { + "epoch": 0.0067180002752514, + "grad_norm": 3.692594528198242, + "learning_rate": 5.998333333333334e-05, + "loss": 5.733, + "step": 3600 + }, + { + "epoch": 0.006811305834629892, + "grad_norm": 4.112286567687988, + "learning_rate": 6.081666666666666e-05, + "loss": 6.0937, + "step": 3650 + }, + { + "epoch": 0.006904611394008384, + "grad_norm": 4.726688385009766, + "learning_rate": 6.165000000000001e-05, + "loss": 5.8487, + "step": 3700 + }, + { + "epoch": 0.006997916953386875, + "grad_norm": 3.719205856323242, + "learning_rate": 6.248333333333334e-05, + "loss": 5.7952, + "step": 3750 + }, + { + "epoch": 0.007091222512765367, + "grad_norm": 4.780358791351318, + "learning_rate": 6.331666666666667e-05, + "loss": 5.9006, + "step": 3800 + }, + { + "epoch": 0.0071845280721438585, + "grad_norm": 5.655118942260742, + "learning_rate": 6.415e-05, + "loss": 5.7174, + "step": 3850 + }, + { + "epoch": 0.00727783363152235, + "grad_norm": 4.822659015655518, + "learning_rate": 6.498333333333335e-05, + "loss": 5.9248, + "step": 3900 + }, + { + "epoch": 0.007371139190900842, + "grad_norm": 5.104306697845459, + "learning_rate": 6.581666666666668e-05, + "loss": 5.8789, + "step": 3950 + }, + { + "epoch": 0.007464444750279333, + "grad_norm": 4.1900105476379395, + "learning_rate": 6.665000000000001e-05, + "loss": 5.8902, + "step": 4000 + }, + { + "epoch": 0.007557750309657825, + "grad_norm": 4.686253547668457, + "learning_rate": 6.748333333333334e-05, + "loss": 5.6845, + "step": 4050 + }, + { + "epoch": 0.007651055869036317, + "grad_norm": 4.2270188331604, + "learning_rate": 6.831666666666667e-05, + "loss": 5.9528, + "step": 4100 + }, + { + "epoch": 0.007744361428414809, + "grad_norm": 4.093949794769287, + "learning_rate": 6.915e-05, + "loss": 5.8561, + "step": 4150 + }, + { + "epoch": 0.0078376669877933, + "grad_norm": 4.820592403411865, + "learning_rate": 6.998333333333333e-05, + "loss": 5.6467, + "step": 4200 + }, + { + "epoch": 0.007930972547171792, + "grad_norm": 4.777779579162598, + "learning_rate": 7.081666666666668e-05, + "loss": 5.8914, + "step": 4250 + }, + { + "epoch": 0.008024278106550283, + "grad_norm": 2.8841679096221924, + "learning_rate": 7.165000000000001e-05, + "loss": 5.7689, + "step": 4300 + }, + { + "epoch": 0.008117583665928775, + "grad_norm": 3.7897868156433105, + "learning_rate": 7.248333333333334e-05, + "loss": 5.6131, + "step": 4350 + }, + { + "epoch": 0.008210889225307267, + "grad_norm": 3.63638973236084, + "learning_rate": 7.331666666666667e-05, + "loss": 5.6802, + "step": 4400 + }, + { + "epoch": 0.008304194784685758, + "grad_norm": 4.507631778717041, + "learning_rate": 7.415000000000001e-05, + "loss": 5.9008, + "step": 4450 + }, + { + "epoch": 0.00839750034406425, + "grad_norm": 4.462691783905029, + "learning_rate": 7.498333333333334e-05, + "loss": 5.7764, + "step": 4500 + }, + { + "epoch": 0.008490805903442741, + "grad_norm": 5.587769508361816, + "learning_rate": 7.581666666666668e-05, + "loss": 5.6366, + "step": 4550 + }, + { + "epoch": 0.008584111462821233, + "grad_norm": 3.457714319229126, + "learning_rate": 7.664999999999999e-05, + "loss": 5.7527, + "step": 4600 + }, + { + "epoch": 0.008677417022199724, + "grad_norm": 4.030301094055176, + "learning_rate": 7.748333333333334e-05, + "loss": 5.8454, + "step": 4650 + }, + { + "epoch": 0.008770722581578216, + "grad_norm": 3.3396108150482178, + "learning_rate": 7.831666666666667e-05, + "loss": 5.9134, + "step": 4700 + }, + { + "epoch": 0.00886402814095671, + "grad_norm": 3.4558310508728027, + "learning_rate": 7.915e-05, + "loss": 5.7332, + "step": 4750 + }, + { + "epoch": 0.008957333700335201, + "grad_norm": 4.230519771575928, + "learning_rate": 7.998333333333333e-05, + "loss": 5.86, + "step": 4800 + }, + { + "epoch": 0.009050639259713692, + "grad_norm": 3.126082181930542, + "learning_rate": 8.081666666666667e-05, + "loss": 5.7412, + "step": 4850 + }, + { + "epoch": 0.009143944819092184, + "grad_norm": 4.2145609855651855, + "learning_rate": 8.165e-05, + "loss": 5.9317, + "step": 4900 + }, + { + "epoch": 0.009237250378470676, + "grad_norm": 3.0938162803649902, + "learning_rate": 8.248333333333334e-05, + "loss": 5.6891, + "step": 4950 + }, + { + "epoch": 0.009330555937849167, + "grad_norm": 4.230195045471191, + "learning_rate": 8.331666666666668e-05, + "loss": 5.7216, + "step": 5000 + }, + { + "epoch": 0.009423861497227659, + "grad_norm": 3.7208573818206787, + "learning_rate": 8.415000000000001e-05, + "loss": 5.7874, + "step": 5050 + }, + { + "epoch": 0.00951716705660615, + "grad_norm": 3.4725289344787598, + "learning_rate": 8.498333333333334e-05, + "loss": 5.8627, + "step": 5100 + }, + { + "epoch": 0.009610472615984642, + "grad_norm": 3.3882782459259033, + "learning_rate": 8.581666666666666e-05, + "loss": 5.767, + "step": 5150 + }, + { + "epoch": 0.009703778175363133, + "grad_norm": 3.7585625648498535, + "learning_rate": 8.665e-05, + "loss": 5.6776, + "step": 5200 + }, + { + "epoch": 0.009797083734741625, + "grad_norm": 4.944695949554443, + "learning_rate": 8.748333333333334e-05, + "loss": 5.667, + "step": 5250 + }, + { + "epoch": 0.009890389294120117, + "grad_norm": 3.8249406814575195, + "learning_rate": 8.831666666666667e-05, + "loss": 5.7428, + "step": 5300 + }, + { + "epoch": 0.009983694853498608, + "grad_norm": 4.215453147888184, + "learning_rate": 8.915e-05, + "loss": 5.89, + "step": 5350 + }, + { + "epoch": 0.0100770004128771, + "grad_norm": 3.0438342094421387, + "learning_rate": 8.998333333333334e-05, + "loss": 5.639, + "step": 5400 + }, + { + "epoch": 0.010170305972255591, + "grad_norm": 4.088777542114258, + "learning_rate": 9.081666666666667e-05, + "loss": 5.5986, + "step": 5450 + }, + { + "epoch": 0.010263611531634083, + "grad_norm": 3.3326492309570312, + "learning_rate": 9.165e-05, + "loss": 5.5374, + "step": 5500 + }, + { + "epoch": 0.010356917091012574, + "grad_norm": 2.9593100547790527, + "learning_rate": 9.248333333333334e-05, + "loss": 5.4346, + "step": 5550 + }, + { + "epoch": 0.010450222650391068, + "grad_norm": 3.170647144317627, + "learning_rate": 9.331666666666668e-05, + "loss": 5.8167, + "step": 5600 + }, + { + "epoch": 0.01054352820976956, + "grad_norm": 2.9037222862243652, + "learning_rate": 9.415e-05, + "loss": 5.5978, + "step": 5650 + }, + { + "epoch": 0.01063683376914805, + "grad_norm": 3.341003894805908, + "learning_rate": 9.498333333333333e-05, + "loss": 5.6149, + "step": 5700 + }, + { + "epoch": 0.010730139328526542, + "grad_norm": 2.9810729026794434, + "learning_rate": 9.581666666666667e-05, + "loss": 5.7028, + "step": 5750 + }, + { + "epoch": 0.010823444887905034, + "grad_norm": 3.4222352504730225, + "learning_rate": 9.665e-05, + "loss": 5.5117, + "step": 5800 + }, + { + "epoch": 0.010916750447283526, + "grad_norm": 3.1818320751190186, + "learning_rate": 9.748333333333334e-05, + "loss": 5.7761, + "step": 5850 + }, + { + "epoch": 0.011010056006662017, + "grad_norm": 3.1308321952819824, + "learning_rate": 9.831666666666667e-05, + "loss": 5.6791, + "step": 5900 + }, + { + "epoch": 0.011103361566040509, + "grad_norm": 3.701707363128662, + "learning_rate": 9.915000000000001e-05, + "loss": 5.5769, + "step": 5950 + }, + { + "epoch": 0.011196667125419, + "grad_norm": 2.931779384613037, + "learning_rate": 9.998333333333334e-05, + "loss": 5.4739, + "step": 6000 + }, + { + "epoch": 0.011196667125419, + "eval_loss": 6.0997633934021, + "eval_runtime": 229.8318, + "eval_samples_per_second": 11.347, + "eval_steps_per_second": 11.347, + "eval_tts_loss": 6.378738236305035, + "step": 6000 + }, + { + "epoch": 0.011289972684797492, + "grad_norm": 3.05470871925354, + "learning_rate": 0.00010081666666666667, + "loss": 5.5917, + "step": 6050 + }, + { + "epoch": 0.011383278244175983, + "grad_norm": 3.2815566062927246, + "learning_rate": 0.00010165, + "loss": 5.4096, + "step": 6100 + }, + { + "epoch": 0.011476583803554475, + "grad_norm": 2.6623780727386475, + "learning_rate": 0.00010248333333333334, + "loss": 5.4688, + "step": 6150 + }, + { + "epoch": 0.011569889362932967, + "grad_norm": 3.355912923812866, + "learning_rate": 0.00010331666666666667, + "loss": 5.6187, + "step": 6200 + }, + { + "epoch": 0.011663194922311458, + "grad_norm": 2.9381911754608154, + "learning_rate": 0.00010415000000000001, + "loss": 5.8301, + "step": 6250 + }, + { + "epoch": 0.01175650048168995, + "grad_norm": 2.482517719268799, + "learning_rate": 0.00010498333333333334, + "loss": 5.5721, + "step": 6300 + }, + { + "epoch": 0.011849806041068441, + "grad_norm": 3.363173246383667, + "learning_rate": 0.00010581666666666667, + "loss": 5.8946, + "step": 6350 + }, + { + "epoch": 0.011943111600446933, + "grad_norm": 3.2402546405792236, + "learning_rate": 0.00010665, + "loss": 5.6961, + "step": 6400 + }, + { + "epoch": 0.012036417159825426, + "grad_norm": 3.1428112983703613, + "learning_rate": 0.00010748333333333333, + "loss": 5.8315, + "step": 6450 + }, + { + "epoch": 0.012129722719203918, + "grad_norm": 2.3944103717803955, + "learning_rate": 0.00010831666666666667, + "loss": 5.6921, + "step": 6500 + }, + { + "epoch": 0.01222302827858241, + "grad_norm": 2.7807068824768066, + "learning_rate": 0.00010915, + "loss": 5.563, + "step": 6550 + }, + { + "epoch": 0.0123163338379609, + "grad_norm": 2.692389965057373, + "learning_rate": 0.00010998333333333335, + "loss": 5.4729, + "step": 6600 + }, + { + "epoch": 0.012409639397339392, + "grad_norm": 1.9794119596481323, + "learning_rate": 0.00011081666666666669, + "loss": 5.7415, + "step": 6650 + }, + { + "epoch": 0.012502944956717884, + "grad_norm": 1.812415599822998, + "learning_rate": 0.00011165000000000002, + "loss": 5.4328, + "step": 6700 + }, + { + "epoch": 0.012596250516096376, + "grad_norm": 3.0957512855529785, + "learning_rate": 0.00011248333333333333, + "loss": 5.3841, + "step": 6750 + }, + { + "epoch": 0.012689556075474867, + "grad_norm": 3.101076126098633, + "learning_rate": 0.00011331666666666667, + "loss": 5.8037, + "step": 6800 + }, + { + "epoch": 0.012782861634853359, + "grad_norm": 3.04792857170105, + "learning_rate": 0.00011415, + "loss": 5.7408, + "step": 6850 + }, + { + "epoch": 0.01287616719423185, + "grad_norm": 3.4043712615966797, + "learning_rate": 0.00011498333333333333, + "loss": 5.6041, + "step": 6900 + }, + { + "epoch": 0.012969472753610342, + "grad_norm": 1.666723608970642, + "learning_rate": 0.00011581666666666666, + "loss": 5.6662, + "step": 6950 + }, + { + "epoch": 0.013062778312988833, + "grad_norm": 2.3120384216308594, + "learning_rate": 0.00011665000000000002, + "loss": 5.8835, + "step": 7000 + }, + { + "epoch": 0.013156083872367325, + "grad_norm": 4.411382675170898, + "learning_rate": 0.00011748333333333335, + "loss": 5.7121, + "step": 7050 + }, + { + "epoch": 0.013249389431745816, + "grad_norm": 2.192537546157837, + "learning_rate": 0.00011831666666666668, + "loss": 5.4821, + "step": 7100 + }, + { + "epoch": 0.013342694991124308, + "grad_norm": 2.196122646331787, + "learning_rate": 0.00011915000000000001, + "loss": 5.7267, + "step": 7150 + }, + { + "epoch": 0.0134360005505028, + "grad_norm": 2.2460808753967285, + "learning_rate": 0.00011998333333333334, + "loss": 5.5123, + "step": 7200 + }, + { + "epoch": 0.013529306109881291, + "grad_norm": 2.052151679992676, + "learning_rate": 0.00012081666666666667, + "loss": 5.8528, + "step": 7250 + }, + { + "epoch": 0.013622611669259784, + "grad_norm": 2.369872808456421, + "learning_rate": 0.00012165, + "loss": 5.5378, + "step": 7300 + }, + { + "epoch": 0.013715917228638276, + "grad_norm": 2.383234977722168, + "learning_rate": 0.00012248333333333335, + "loss": 5.7572, + "step": 7350 + }, + { + "epoch": 0.013809222788016768, + "grad_norm": 2.3634586334228516, + "learning_rate": 0.0001233166666666667, + "loss": 5.3572, + "step": 7400 + }, + { + "epoch": 0.01390252834739526, + "grad_norm": 2.631533622741699, + "learning_rate": 0.00012415, + "loss": 5.4715, + "step": 7450 + }, + { + "epoch": 0.01399583390677375, + "grad_norm": 1.962390661239624, + "learning_rate": 0.00012498333333333335, + "loss": 5.7204, + "step": 7500 + }, + { + "epoch": 0.014089139466152242, + "grad_norm": 2.6363272666931152, + "learning_rate": 0.00012581666666666667, + "loss": 5.6266, + "step": 7550 + }, + { + "epoch": 0.014182445025530734, + "grad_norm": 2.3159897327423096, + "learning_rate": 0.00012665, + "loss": 5.6025, + "step": 7600 + }, + { + "epoch": 0.014275750584909225, + "grad_norm": 2.558704137802124, + "learning_rate": 0.00012748333333333333, + "loss": 5.622, + "step": 7650 + }, + { + "epoch": 0.014369056144287717, + "grad_norm": 2.3983957767486572, + "learning_rate": 0.00012831666666666665, + "loss": 5.677, + "step": 7700 + }, + { + "epoch": 0.014462361703666209, + "grad_norm": 2.4474925994873047, + "learning_rate": 0.00012915000000000002, + "loss": 5.735, + "step": 7750 + }, + { + "epoch": 0.0145556672630447, + "grad_norm": 1.7076762914657593, + "learning_rate": 0.00012998333333333334, + "loss": 5.6703, + "step": 7800 + }, + { + "epoch": 0.014648972822423192, + "grad_norm": 2.520663261413574, + "learning_rate": 0.00013081666666666668, + "loss": 5.6445, + "step": 7850 + }, + { + "epoch": 0.014742278381801683, + "grad_norm": 1.7655445337295532, + "learning_rate": 0.00013165, + "loss": 5.5739, + "step": 7900 + }, + { + "epoch": 0.014835583941180175, + "grad_norm": 2.880577564239502, + "learning_rate": 0.00013248333333333335, + "loss": 5.5834, + "step": 7950 + }, + { + "epoch": 0.014928889500558666, + "grad_norm": 2.0654830932617188, + "learning_rate": 0.00013331666666666666, + "loss": 5.5844, + "step": 8000 + }, + { + "epoch": 0.015022195059937158, + "grad_norm": 1.5936386585235596, + "learning_rate": 0.00013415, + "loss": 5.4989, + "step": 8050 + }, + { + "epoch": 0.01511550061931565, + "grad_norm": 2.4710347652435303, + "learning_rate": 0.00013498333333333335, + "loss": 5.5193, + "step": 8100 + }, + { + "epoch": 0.015208806178694143, + "grad_norm": 1.680249571800232, + "learning_rate": 0.00013581666666666667, + "loss": 5.4661, + "step": 8150 + }, + { + "epoch": 0.015302111738072634, + "grad_norm": 1.9727201461791992, + "learning_rate": 0.00013665000000000001, + "loss": 5.7409, + "step": 8200 + }, + { + "epoch": 0.015395417297451126, + "grad_norm": 2.1253304481506348, + "learning_rate": 0.00013748333333333333, + "loss": 5.5681, + "step": 8250 + }, + { + "epoch": 0.015488722856829618, + "grad_norm": 2.3671233654022217, + "learning_rate": 0.00013831666666666668, + "loss": 5.5692, + "step": 8300 + }, + { + "epoch": 0.01558202841620811, + "grad_norm": 2.448765516281128, + "learning_rate": 0.00013915, + "loss": 5.8129, + "step": 8350 + }, + { + "epoch": 0.0156753339755866, + "grad_norm": 2.6392576694488525, + "learning_rate": 0.00013998333333333334, + "loss": 5.6357, + "step": 8400 + }, + { + "epoch": 0.01576863953496509, + "grad_norm": 2.3633005619049072, + "learning_rate": 0.00014081666666666666, + "loss": 5.6419, + "step": 8450 + }, + { + "epoch": 0.015861945094343584, + "grad_norm": 2.5643057823181152, + "learning_rate": 0.00014165000000000003, + "loss": 5.6119, + "step": 8500 + }, + { + "epoch": 0.015955250653722074, + "grad_norm": 2.252556324005127, + "learning_rate": 0.00014248333333333335, + "loss": 5.5832, + "step": 8550 + }, + { + "epoch": 0.016048556213100567, + "grad_norm": 2.5928521156311035, + "learning_rate": 0.0001433166666666667, + "loss": 5.6567, + "step": 8600 + }, + { + "epoch": 0.01614186177247906, + "grad_norm": 2.565758228302002, + "learning_rate": 0.00014415, + "loss": 5.5772, + "step": 8650 + }, + { + "epoch": 0.01623516733185755, + "grad_norm": 1.9352021217346191, + "learning_rate": 0.00014498333333333332, + "loss": 5.6935, + "step": 8700 + }, + { + "epoch": 0.016328472891236043, + "grad_norm": 1.7188228368759155, + "learning_rate": 0.00014581666666666667, + "loss": 5.5983, + "step": 8750 + }, + { + "epoch": 0.016421778450614533, + "grad_norm": 1.5790917873382568, + "learning_rate": 0.00014665, + "loss": 5.5048, + "step": 8800 + }, + { + "epoch": 0.016515084009993027, + "grad_norm": 2.046977996826172, + "learning_rate": 0.00014748333333333336, + "loss": 5.4909, + "step": 8850 + }, + { + "epoch": 0.016608389569371516, + "grad_norm": 2.3554604053497314, + "learning_rate": 0.00014831666666666668, + "loss": 5.5094, + "step": 8900 + }, + { + "epoch": 0.01670169512875001, + "grad_norm": 2.019984245300293, + "learning_rate": 0.00014915000000000002, + "loss": 5.5235, + "step": 8950 + }, + { + "epoch": 0.0167950006881285, + "grad_norm": 2.2459378242492676, + "learning_rate": 0.00014998333333333334, + "loss": 5.7603, + "step": 9000 + }, + { + "epoch": 0.0167950006881285, + "eval_loss": 5.949121475219727, + "eval_runtime": 232.0628, + "eval_samples_per_second": 11.238, + "eval_steps_per_second": 11.238, + "eval_tts_loss": 6.626402078707777, + "step": 9000 + }, + { + "epoch": 0.016888306247506993, + "grad_norm": 1.576752781867981, + "learning_rate": 0.00015081666666666668, + "loss": 5.425, + "step": 9050 + }, + { + "epoch": 0.016981611806885483, + "grad_norm": 1.950769066810608, + "learning_rate": 0.00015165, + "loss": 5.5398, + "step": 9100 + }, + { + "epoch": 0.017074917366263976, + "grad_norm": 1.870368242263794, + "learning_rate": 0.00015248333333333334, + "loss": 5.5754, + "step": 9150 + }, + { + "epoch": 0.017168222925642466, + "grad_norm": 1.75543212890625, + "learning_rate": 0.00015331666666666666, + "loss": 5.5328, + "step": 9200 + }, + { + "epoch": 0.01726152848502096, + "grad_norm": 1.626207947731018, + "learning_rate": 0.00015415, + "loss": 5.6149, + "step": 9250 + }, + { + "epoch": 0.01735483404439945, + "grad_norm": 2.190242052078247, + "learning_rate": 0.00015498333333333335, + "loss": 5.4885, + "step": 9300 + }, + { + "epoch": 0.017448139603777942, + "grad_norm": 1.4450881481170654, + "learning_rate": 0.00015581666666666667, + "loss": 5.4476, + "step": 9350 + }, + { + "epoch": 0.017541445163156432, + "grad_norm": 1.9197875261306763, + "learning_rate": 0.00015665, + "loss": 5.611, + "step": 9400 + }, + { + "epoch": 0.017634750722534925, + "grad_norm": 2.0102715492248535, + "learning_rate": 0.00015748333333333333, + "loss": 5.3812, + "step": 9450 + }, + { + "epoch": 0.01772805628191342, + "grad_norm": 1.8926249742507935, + "learning_rate": 0.00015831666666666667, + "loss": 5.4527, + "step": 9500 + }, + { + "epoch": 0.01782136184129191, + "grad_norm": 2.243831157684326, + "learning_rate": 0.00015915, + "loss": 5.4914, + "step": 9550 + }, + { + "epoch": 0.017914667400670402, + "grad_norm": 1.8967549800872803, + "learning_rate": 0.00015998333333333336, + "loss": 5.5251, + "step": 9600 + }, + { + "epoch": 0.01800797296004889, + "grad_norm": 1.785871982574463, + "learning_rate": 0.00016081666666666668, + "loss": 5.3667, + "step": 9650 + }, + { + "epoch": 0.018101278519427385, + "grad_norm": 1.634459376335144, + "learning_rate": 0.00016165000000000003, + "loss": 5.7341, + "step": 9700 + }, + { + "epoch": 0.018194584078805875, + "grad_norm": 2.1786787509918213, + "learning_rate": 0.00016248333333333334, + "loss": 5.4932, + "step": 9750 + }, + { + "epoch": 0.018287889638184368, + "grad_norm": 2.0094616413116455, + "learning_rate": 0.00016331666666666666, + "loss": 5.5438, + "step": 9800 + }, + { + "epoch": 0.018381195197562858, + "grad_norm": 1.5394549369812012, + "learning_rate": 0.00016415, + "loss": 5.3004, + "step": 9850 + }, + { + "epoch": 0.01847450075694135, + "grad_norm": 2.034067392349243, + "learning_rate": 0.00016498333333333332, + "loss": 5.6939, + "step": 9900 + }, + { + "epoch": 0.01856780631631984, + "grad_norm": 2.557887554168701, + "learning_rate": 0.00016581666666666667, + "loss": 5.4535, + "step": 9950 + }, + { + "epoch": 0.018661111875698334, + "grad_norm": 1.7101596593856812, + "learning_rate": 0.00016665, + "loss": 5.5991, + "step": 10000 + }, + { + "epoch": 0.018754417435076824, + "grad_norm": 1.1628295183181763, + "learning_rate": 0.00016748333333333336, + "loss": 5.5027, + "step": 10050 + }, + { + "epoch": 0.018847722994455317, + "grad_norm": 1.6788240671157837, + "learning_rate": 0.00016831666666666667, + "loss": 5.3345, + "step": 10100 + }, + { + "epoch": 0.018941028553833807, + "grad_norm": 2.166861057281494, + "learning_rate": 0.00016915000000000002, + "loss": 5.6807, + "step": 10150 + }, + { + "epoch": 0.0190343341132123, + "grad_norm": 1.7033262252807617, + "learning_rate": 0.00016998333333333334, + "loss": 5.6921, + "step": 10200 + }, + { + "epoch": 0.01912763967259079, + "grad_norm": 1.5504415035247803, + "learning_rate": 0.00017081666666666668, + "loss": 5.5782, + "step": 10250 + }, + { + "epoch": 0.019220945231969284, + "grad_norm": 2.4242970943450928, + "learning_rate": 0.00017165, + "loss": 5.4636, + "step": 10300 + }, + { + "epoch": 0.019314250791347777, + "grad_norm": 1.863472819328308, + "learning_rate": 0.00017248333333333334, + "loss": 5.4955, + "step": 10350 + }, + { + "epoch": 0.019407556350726267, + "grad_norm": 1.856552004814148, + "learning_rate": 0.0001733166666666667, + "loss": 5.526, + "step": 10400 + }, + { + "epoch": 0.01950086191010476, + "grad_norm": 2.124068260192871, + "learning_rate": 0.00017415, + "loss": 5.5683, + "step": 10450 + }, + { + "epoch": 0.01959416746948325, + "grad_norm": 1.7942883968353271, + "learning_rate": 0.00017498333333333335, + "loss": 5.3387, + "step": 10500 + }, + { + "epoch": 0.019687473028861743, + "grad_norm": 1.7919622659683228, + "learning_rate": 0.00017581666666666667, + "loss": 5.4876, + "step": 10550 + }, + { + "epoch": 0.019780778588240233, + "grad_norm": 2.1417160034179688, + "learning_rate": 0.00017665, + "loss": 5.3508, + "step": 10600 + }, + { + "epoch": 0.019874084147618726, + "grad_norm": 1.6931318044662476, + "learning_rate": 0.00017748333333333333, + "loss": 5.6171, + "step": 10650 + }, + { + "epoch": 0.019967389706997216, + "grad_norm": 1.4930100440979004, + "learning_rate": 0.00017831666666666667, + "loss": 5.5452, + "step": 10700 + }, + { + "epoch": 0.02006069526637571, + "grad_norm": 1.6434742212295532, + "learning_rate": 0.00017915000000000002, + "loss": 5.4924, + "step": 10750 + }, + { + "epoch": 0.0201540008257542, + "grad_norm": 1.96444571018219, + "learning_rate": 0.00017998333333333334, + "loss": 5.3689, + "step": 10800 + }, + { + "epoch": 0.020247306385132693, + "grad_norm": 2.2349231243133545, + "learning_rate": 0.00018081666666666668, + "loss": 5.3768, + "step": 10850 + }, + { + "epoch": 0.020340611944511183, + "grad_norm": 1.5882576704025269, + "learning_rate": 0.00018165, + "loss": 5.747, + "step": 10900 + }, + { + "epoch": 0.020433917503889676, + "grad_norm": 2.4228999614715576, + "learning_rate": 0.00018248333333333334, + "loss": 5.7638, + "step": 10950 + }, + { + "epoch": 0.020527223063268166, + "grad_norm": 2.138207197189331, + "learning_rate": 0.00018331666666666666, + "loss": 5.7262, + "step": 11000 + }, + { + "epoch": 0.02062052862264666, + "grad_norm": 1.9708890914916992, + "learning_rate": 0.00018415, + "loss": 5.5162, + "step": 11050 + }, + { + "epoch": 0.02071383418202515, + "grad_norm": 1.5397372245788574, + "learning_rate": 0.00018498333333333335, + "loss": 5.5007, + "step": 11100 + }, + { + "epoch": 0.020807139741403642, + "grad_norm": 1.579482913017273, + "learning_rate": 0.0001858166666666667, + "loss": 5.4921, + "step": 11150 + }, + { + "epoch": 0.020900445300782135, + "grad_norm": 1.2825592756271362, + "learning_rate": 0.00018665, + "loss": 5.5486, + "step": 11200 + }, + { + "epoch": 0.020993750860160625, + "grad_norm": 2.018866777420044, + "learning_rate": 0.00018748333333333335, + "loss": 5.4298, + "step": 11250 + }, + { + "epoch": 0.02108705641953912, + "grad_norm": 1.8341054916381836, + "learning_rate": 0.00018831666666666667, + "loss": 5.5407, + "step": 11300 + }, + { + "epoch": 0.02118036197891761, + "grad_norm": 1.6199865341186523, + "learning_rate": 0.00018915000000000002, + "loss": 5.3315, + "step": 11350 + }, + { + "epoch": 0.0212736675382961, + "grad_norm": 1.5176050662994385, + "learning_rate": 0.00018998333333333333, + "loss": 5.4755, + "step": 11400 + }, + { + "epoch": 0.02136697309767459, + "grad_norm": 1.8712049722671509, + "learning_rate": 0.00019081666666666668, + "loss": 5.4285, + "step": 11450 + }, + { + "epoch": 0.021460278657053085, + "grad_norm": 1.531267762184143, + "learning_rate": 0.00019165000000000002, + "loss": 5.6335, + "step": 11500 + }, + { + "epoch": 0.021553584216431575, + "grad_norm": 1.6521434783935547, + "learning_rate": 0.00019248333333333334, + "loss": 5.3548, + "step": 11550 + }, + { + "epoch": 0.021646889775810068, + "grad_norm": 1.4107913970947266, + "learning_rate": 0.00019331666666666669, + "loss": 5.446, + "step": 11600 + }, + { + "epoch": 0.021740195335188558, + "grad_norm": 1.8727481365203857, + "learning_rate": 0.00019415, + "loss": 5.5346, + "step": 11650 + }, + { + "epoch": 0.02183350089456705, + "grad_norm": 2.040088653564453, + "learning_rate": 0.00019498333333333335, + "loss": 5.7011, + "step": 11700 + }, + { + "epoch": 0.02192680645394554, + "grad_norm": 1.4047014713287354, + "learning_rate": 0.00019581666666666666, + "loss": 5.3325, + "step": 11750 + }, + { + "epoch": 0.022020112013324034, + "grad_norm": 1.2155386209487915, + "learning_rate": 0.00019665, + "loss": 5.5547, + "step": 11800 + }, + { + "epoch": 0.022113417572702524, + "grad_norm": 1.2685003280639648, + "learning_rate": 0.00019748333333333335, + "loss": 5.6444, + "step": 11850 + }, + { + "epoch": 0.022206723132081017, + "grad_norm": 1.1052175760269165, + "learning_rate": 0.00019831666666666667, + "loss": 5.4218, + "step": 11900 + }, + { + "epoch": 0.022300028691459507, + "grad_norm": 1.6609026193618774, + "learning_rate": 0.00019915000000000002, + "loss": 5.5406, + "step": 11950 + }, + { + "epoch": 0.022393334250838, + "grad_norm": 1.7019258737564087, + "learning_rate": 0.00019998333333333333, + "loss": 5.4288, + "step": 12000 + }, + { + "epoch": 0.022393334250838, + "eval_loss": 5.846170425415039, + "eval_runtime": 230.7006, + "eval_samples_per_second": 11.305, + "eval_steps_per_second": 11.305, + "eval_tts_loss": 6.596176531521512, + "step": 12000 + }, + { + "epoch": 0.022486639810216494, + "grad_norm": 1.391849160194397, + "learning_rate": 0.00019999999991525316, + "loss": 5.438, + "step": 12050 + }, + { + "epoch": 0.022579945369594984, + "grad_norm": 1.9549068212509155, + "learning_rate": 0.0001999999996540592, + "loss": 5.6524, + "step": 12100 + }, + { + "epoch": 0.022673250928973477, + "grad_norm": 1.765764832496643, + "learning_rate": 0.00019999999921638283, + "loss": 5.529, + "step": 12150 + }, + { + "epoch": 0.022766556488351967, + "grad_norm": 1.92975652217865, + "learning_rate": 0.00019999999860222407, + "loss": 5.1634, + "step": 12200 + }, + { + "epoch": 0.02285986204773046, + "grad_norm": 1.627753734588623, + "learning_rate": 0.00019999999781158292, + "loss": 5.6305, + "step": 12250 + }, + { + "epoch": 0.02295316760710895, + "grad_norm": 1.6781692504882812, + "learning_rate": 0.00019999999684445935, + "loss": 5.5936, + "step": 12300 + }, + { + "epoch": 0.023046473166487443, + "grad_norm": 1.200257658958435, + "learning_rate": 0.00019999999570085336, + "loss": 5.4897, + "step": 12350 + }, + { + "epoch": 0.023139778725865933, + "grad_norm": 1.7044615745544434, + "learning_rate": 0.00019999999438076503, + "loss": 5.2909, + "step": 12400 + }, + { + "epoch": 0.023233084285244426, + "grad_norm": 1.4790157079696655, + "learning_rate": 0.00019999999288419424, + "loss": 5.3771, + "step": 12450 + }, + { + "epoch": 0.023326389844622916, + "grad_norm": 1.5862489938735962, + "learning_rate": 0.0001999999912111411, + "loss": 5.2316, + "step": 12500 + }, + { + "epoch": 0.02341969540400141, + "grad_norm": 1.9885330200195312, + "learning_rate": 0.00019999998936160555, + "loss": 5.3982, + "step": 12550 + }, + { + "epoch": 0.0235130009633799, + "grad_norm": 1.4161193370819092, + "learning_rate": 0.00019999998733558766, + "loss": 5.4459, + "step": 12600 + }, + { + "epoch": 0.023606306522758393, + "grad_norm": 1.5754268169403076, + "learning_rate": 0.00019999998513308732, + "loss": 5.3697, + "step": 12650 + }, + { + "epoch": 0.023699612082136882, + "grad_norm": 1.0978269577026367, + "learning_rate": 0.00019999998275410462, + "loss": 5.4439, + "step": 12700 + }, + { + "epoch": 0.023792917641515376, + "grad_norm": 1.4332829713821411, + "learning_rate": 0.0001999999801986396, + "loss": 5.2534, + "step": 12750 + }, + { + "epoch": 0.023886223200893866, + "grad_norm": 1.157253623008728, + "learning_rate": 0.00019999997746669216, + "loss": 5.4755, + "step": 12800 + }, + { + "epoch": 0.02397952876027236, + "grad_norm": 1.577779769897461, + "learning_rate": 0.00019999997455826237, + "loss": 5.4148, + "step": 12850 + }, + { + "epoch": 0.024072834319650852, + "grad_norm": 1.4218207597732544, + "learning_rate": 0.00019999997147335022, + "loss": 5.4531, + "step": 12900 + }, + { + "epoch": 0.024166139879029342, + "grad_norm": 2.003883123397827, + "learning_rate": 0.0001999999682119557, + "loss": 5.6188, + "step": 12950 + }, + { + "epoch": 0.024259445438407835, + "grad_norm": 2.01855731010437, + "learning_rate": 0.00019999996477407884, + "loss": 5.5149, + "step": 13000 + }, + { + "epoch": 0.024352750997786325, + "grad_norm": 1.6317418813705444, + "learning_rate": 0.00019999996115971965, + "loss": 5.3213, + "step": 13050 + }, + { + "epoch": 0.02444605655716482, + "grad_norm": 1.6599922180175781, + "learning_rate": 0.0001999999573688781, + "loss": 5.4164, + "step": 13100 + }, + { + "epoch": 0.02453936211654331, + "grad_norm": 1.4030259847640991, + "learning_rate": 0.00019999995340155423, + "loss": 5.254, + "step": 13150 + }, + { + "epoch": 0.0246326676759218, + "grad_norm": 1.507149338722229, + "learning_rate": 0.00019999994925774806, + "loss": 5.2633, + "step": 13200 + }, + { + "epoch": 0.02472597323530029, + "grad_norm": 1.6153885126113892, + "learning_rate": 0.00019999994493745957, + "loss": 5.4768, + "step": 13250 + }, + { + "epoch": 0.024819278794678785, + "grad_norm": 1.2192950248718262, + "learning_rate": 0.00019999994044068875, + "loss": 5.465, + "step": 13300 + }, + { + "epoch": 0.024912584354057275, + "grad_norm": 1.3811638355255127, + "learning_rate": 0.00019999993576743565, + "loss": 5.4459, + "step": 13350 + }, + { + "epoch": 0.025005889913435768, + "grad_norm": 1.7365858554840088, + "learning_rate": 0.00019999993091770026, + "loss": 5.4616, + "step": 13400 + }, + { + "epoch": 0.025099195472814258, + "grad_norm": 1.4087482690811157, + "learning_rate": 0.00019999992589148256, + "loss": 5.4608, + "step": 13450 + }, + { + "epoch": 0.02519250103219275, + "grad_norm": 1.7531602382659912, + "learning_rate": 0.00019999992068878264, + "loss": 5.3973, + "step": 13500 + }, + { + "epoch": 0.02528580659157124, + "grad_norm": 1.653184175491333, + "learning_rate": 0.0001999999153096004, + "loss": 5.219, + "step": 13550 + }, + { + "epoch": 0.025379112150949734, + "grad_norm": 1.3364681005477905, + "learning_rate": 0.00019999990975393592, + "loss": 5.4375, + "step": 13600 + }, + { + "epoch": 0.025472417710328224, + "grad_norm": 1.3480881452560425, + "learning_rate": 0.0001999999040217892, + "loss": 5.4107, + "step": 13650 + }, + { + "epoch": 0.025565723269706717, + "grad_norm": 1.8013604879379272, + "learning_rate": 0.00019999989811316025, + "loss": 5.4488, + "step": 13700 + }, + { + "epoch": 0.02565902882908521, + "grad_norm": 1.5909210443496704, + "learning_rate": 0.00019999989202804908, + "loss": 5.6496, + "step": 13750 + }, + { + "epoch": 0.0257523343884637, + "grad_norm": 1.897382140159607, + "learning_rate": 0.0001999998857664557, + "loss": 5.3464, + "step": 13800 + }, + { + "epoch": 0.025845639947842194, + "grad_norm": 1.844641089439392, + "learning_rate": 0.0001999998793283801, + "loss": 5.536, + "step": 13850 + }, + { + "epoch": 0.025938945507220684, + "grad_norm": 1.41611909866333, + "learning_rate": 0.00019999987271382232, + "loss": 5.5876, + "step": 13900 + }, + { + "epoch": 0.026032251066599177, + "grad_norm": 1.5440382957458496, + "learning_rate": 0.00019999986592278234, + "loss": 5.2344, + "step": 13950 + }, + { + "epoch": 0.026125556625977667, + "grad_norm": 1.3486498594284058, + "learning_rate": 0.0001999998589552602, + "loss": 5.4819, + "step": 14000 + }, + { + "epoch": 0.02621886218535616, + "grad_norm": 1.6134167909622192, + "learning_rate": 0.0001999998518112559, + "loss": 5.6369, + "step": 14050 + }, + { + "epoch": 0.02631216774473465, + "grad_norm": 1.950914740562439, + "learning_rate": 0.0001999998444907695, + "loss": 5.3324, + "step": 14100 + }, + { + "epoch": 0.026405473304113143, + "grad_norm": 1.515940546989441, + "learning_rate": 0.00019999983699380095, + "loss": 5.5848, + "step": 14150 + }, + { + "epoch": 0.026498778863491633, + "grad_norm": 1.8684641122817993, + "learning_rate": 0.00019999982932035023, + "loss": 5.4585, + "step": 14200 + }, + { + "epoch": 0.026592084422870126, + "grad_norm": 1.9417451620101929, + "learning_rate": 0.00019999982147041748, + "loss": 5.3855, + "step": 14250 + }, + { + "epoch": 0.026685389982248616, + "grad_norm": 1.5790482759475708, + "learning_rate": 0.0001999998134440026, + "loss": 5.4521, + "step": 14300 + }, + { + "epoch": 0.02677869554162711, + "grad_norm": 1.9342408180236816, + "learning_rate": 0.00019999980524110564, + "loss": 5.3272, + "step": 14350 + }, + { + "epoch": 0.0268720011010056, + "grad_norm": 1.617722988128662, + "learning_rate": 0.00019999979686172664, + "loss": 5.2174, + "step": 14400 + }, + { + "epoch": 0.026965306660384093, + "grad_norm": 1.0585445165634155, + "learning_rate": 0.00019999978830586557, + "loss": 5.1622, + "step": 14450 + }, + { + "epoch": 0.027058612219762582, + "grad_norm": 1.661190390586853, + "learning_rate": 0.00019999977957352248, + "loss": 5.1873, + "step": 14500 + }, + { + "epoch": 0.027151917779141076, + "grad_norm": 1.5143160820007324, + "learning_rate": 0.0001999997706646974, + "loss": 5.507, + "step": 14550 + }, + { + "epoch": 0.02724522333851957, + "grad_norm": 1.7400487661361694, + "learning_rate": 0.0001999997615793903, + "loss": 5.0888, + "step": 14600 + }, + { + "epoch": 0.02733852889789806, + "grad_norm": 1.344720482826233, + "learning_rate": 0.0001999997523176012, + "loss": 5.2325, + "step": 14650 + }, + { + "epoch": 0.027431834457276552, + "grad_norm": 1.390224575996399, + "learning_rate": 0.00019999974287933015, + "loss": 5.2955, + "step": 14700 + }, + { + "epoch": 0.027525140016655042, + "grad_norm": 1.685595154762268, + "learning_rate": 0.00019999973326457714, + "loss": 5.3243, + "step": 14750 + }, + { + "epoch": 0.027618445576033535, + "grad_norm": 1.574565052986145, + "learning_rate": 0.0001999997234733422, + "loss": 5.5154, + "step": 14800 + }, + { + "epoch": 0.027711751135412025, + "grad_norm": 1.8520110845565796, + "learning_rate": 0.0001999997135056254, + "loss": 5.4337, + "step": 14850 + }, + { + "epoch": 0.02780505669479052, + "grad_norm": 1.9176346063613892, + "learning_rate": 0.0001999997033614266, + "loss": 5.2195, + "step": 14900 + }, + { + "epoch": 0.027898362254169008, + "grad_norm": 1.4428346157073975, + "learning_rate": 0.000199999693040746, + "loss": 5.1029, + "step": 14950 + }, + { + "epoch": 0.0279916678135475, + "grad_norm": 1.408713459968567, + "learning_rate": 0.0001999996825435835, + "loss": 5.4174, + "step": 15000 + }, + { + "epoch": 0.0279916678135475, + "eval_loss": 5.698703289031982, + "eval_runtime": 230.7647, + "eval_samples_per_second": 11.302, + "eval_steps_per_second": 11.302, + "eval_tts_loss": 6.7004290100964115, + "step": 15000 + }, + { + "epoch": 0.02808497337292599, + "grad_norm": 1.4793498516082764, + "learning_rate": 0.00019999967186993915, + "loss": 5.5412, + "step": 15050 + }, + { + "epoch": 0.028178278932304485, + "grad_norm": 1.2378696203231812, + "learning_rate": 0.000199999661019813, + "loss": 5.3518, + "step": 15100 + }, + { + "epoch": 0.028271584491682974, + "grad_norm": 1.4232487678527832, + "learning_rate": 0.000199999649993205, + "loss": 5.4232, + "step": 15150 + }, + { + "epoch": 0.028364890051061468, + "grad_norm": 1.6765247583389282, + "learning_rate": 0.00019999963879011525, + "loss": 5.5725, + "step": 15200 + }, + { + "epoch": 0.028458195610439958, + "grad_norm": 1.599683403968811, + "learning_rate": 0.00019999962741054373, + "loss": 5.4653, + "step": 15250 + }, + { + "epoch": 0.02855150116981845, + "grad_norm": 1.5458168983459473, + "learning_rate": 0.00019999961585449047, + "loss": 5.0618, + "step": 15300 + }, + { + "epoch": 0.02864480672919694, + "grad_norm": 1.09707510471344, + "learning_rate": 0.0001999996041219555, + "loss": 5.5357, + "step": 15350 + }, + { + "epoch": 0.028738112288575434, + "grad_norm": 2.0242269039154053, + "learning_rate": 0.00019999959221293878, + "loss": 5.3426, + "step": 15400 + }, + { + "epoch": 0.028831417847953927, + "grad_norm": 1.6951383352279663, + "learning_rate": 0.00019999958012744038, + "loss": 5.4521, + "step": 15450 + }, + { + "epoch": 0.028924723407332417, + "grad_norm": 1.8190480470657349, + "learning_rate": 0.00019999956786546035, + "loss": 5.2914, + "step": 15500 + }, + { + "epoch": 0.02901802896671091, + "grad_norm": 1.7063592672348022, + "learning_rate": 0.00019999955542699863, + "loss": 5.2852, + "step": 15550 + }, + { + "epoch": 0.0291113345260894, + "grad_norm": 1.7153364419937134, + "learning_rate": 0.00019999954281205533, + "loss": 5.0807, + "step": 15600 + }, + { + "epoch": 0.029204640085467894, + "grad_norm": 1.677153468132019, + "learning_rate": 0.0001999995300206304, + "loss": 5.1755, + "step": 15650 + }, + { + "epoch": 0.029297945644846383, + "grad_norm": 1.5905139446258545, + "learning_rate": 0.00019999951705272393, + "loss": 5.2782, + "step": 15700 + }, + { + "epoch": 0.029391251204224877, + "grad_norm": 1.6873728036880493, + "learning_rate": 0.0001999995039083359, + "loss": 5.51, + "step": 15750 + }, + { + "epoch": 0.029484556763603367, + "grad_norm": 1.4348547458648682, + "learning_rate": 0.00019999949058746632, + "loss": 5.2227, + "step": 15800 + }, + { + "epoch": 0.02957786232298186, + "grad_norm": 1.599501132965088, + "learning_rate": 0.00019999947709011524, + "loss": 5.4851, + "step": 15850 + }, + { + "epoch": 0.02967116788236035, + "grad_norm": 1.2430881261825562, + "learning_rate": 0.0001999994634162827, + "loss": 5.059, + "step": 15900 + }, + { + "epoch": 0.029764473441738843, + "grad_norm": 1.2983192205429077, + "learning_rate": 0.00019999944956596867, + "loss": 5.3391, + "step": 15950 + }, + { + "epoch": 0.029857779001117333, + "grad_norm": 1.8286632299423218, + "learning_rate": 0.00019999943553917324, + "loss": 5.068, + "step": 16000 + }, + { + "epoch": 0.029951084560495826, + "grad_norm": 1.7763135433197021, + "learning_rate": 0.0001999994213358964, + "loss": 5.2494, + "step": 16050 + }, + { + "epoch": 0.030044390119874316, + "grad_norm": 1.5806595087051392, + "learning_rate": 0.00019999940695613816, + "loss": 5.3536, + "step": 16100 + }, + { + "epoch": 0.03013769567925281, + "grad_norm": 1.542901635169983, + "learning_rate": 0.00019999939239989857, + "loss": 5.2556, + "step": 16150 + }, + { + "epoch": 0.0302310012386313, + "grad_norm": 1.390985369682312, + "learning_rate": 0.00019999937766717764, + "loss": 5.155, + "step": 16200 + }, + { + "epoch": 0.030324306798009792, + "grad_norm": 1.0642412900924683, + "learning_rate": 0.0001999993627579754, + "loss": 5.3142, + "step": 16250 + }, + { + "epoch": 0.030417612357388286, + "grad_norm": 1.5385115146636963, + "learning_rate": 0.00019999934767229188, + "loss": 5.2755, + "step": 16300 + }, + { + "epoch": 0.030510917916766776, + "grad_norm": 1.719664216041565, + "learning_rate": 0.00019999933241012711, + "loss": 5.3522, + "step": 16350 + }, + { + "epoch": 0.03060422347614527, + "grad_norm": 1.6435158252716064, + "learning_rate": 0.00019999931697148112, + "loss": 5.1949, + "step": 16400 + }, + { + "epoch": 0.03069752903552376, + "grad_norm": 1.2330780029296875, + "learning_rate": 0.00019999930135635395, + "loss": 5.0197, + "step": 16450 + }, + { + "epoch": 0.030790834594902252, + "grad_norm": 1.433413028717041, + "learning_rate": 0.0001999992855647456, + "loss": 5.4277, + "step": 16500 + }, + { + "epoch": 0.030884140154280742, + "grad_norm": 1.8009040355682373, + "learning_rate": 0.00019999926959665608, + "loss": 5.0877, + "step": 16550 + }, + { + "epoch": 0.030977445713659235, + "grad_norm": 1.1439430713653564, + "learning_rate": 0.00019999925345208546, + "loss": 5.3289, + "step": 16600 + }, + { + "epoch": 0.031070751273037725, + "grad_norm": 1.3732225894927979, + "learning_rate": 0.00019999923713103374, + "loss": 5.1679, + "step": 16650 + }, + { + "epoch": 0.03116405683241622, + "grad_norm": 1.722701072692871, + "learning_rate": 0.000199999220633501, + "loss": 5.3267, + "step": 16700 + }, + { + "epoch": 0.03125736239179471, + "grad_norm": 1.6895197629928589, + "learning_rate": 0.00019999920395948718, + "loss": 5.1153, + "step": 16750 + }, + { + "epoch": 0.0313506679511732, + "grad_norm": 1.7803030014038086, + "learning_rate": 0.00019999918710899238, + "loss": 5.2153, + "step": 16800 + }, + { + "epoch": 0.03144397351055169, + "grad_norm": 1.187790036201477, + "learning_rate": 0.00019999917008201663, + "loss": 5.2711, + "step": 16850 + }, + { + "epoch": 0.03153727906993018, + "grad_norm": 1.3633534908294678, + "learning_rate": 0.00019999915287855993, + "loss": 5.5233, + "step": 16900 + }, + { + "epoch": 0.03163058462930868, + "grad_norm": 1.4957891702651978, + "learning_rate": 0.00019999913549862233, + "loss": 5.2524, + "step": 16950 + }, + { + "epoch": 0.03172389018868717, + "grad_norm": 1.6286832094192505, + "learning_rate": 0.00019999911794220382, + "loss": 5.1533, + "step": 17000 + }, + { + "epoch": 0.03181719574806566, + "grad_norm": 1.2257238626480103, + "learning_rate": 0.00019999910020930447, + "loss": 5.1066, + "step": 17050 + }, + { + "epoch": 0.03191050130744415, + "grad_norm": 1.537248134613037, + "learning_rate": 0.00019999908229992433, + "loss": 5.1558, + "step": 17100 + }, + { + "epoch": 0.032003806866822644, + "grad_norm": 1.7594759464263916, + "learning_rate": 0.00019999906421406337, + "loss": 5.2488, + "step": 17150 + }, + { + "epoch": 0.032097112426201134, + "grad_norm": 2.0060057640075684, + "learning_rate": 0.00019999904595172167, + "loss": 5.1244, + "step": 17200 + }, + { + "epoch": 0.032190417985579624, + "grad_norm": 1.306092381477356, + "learning_rate": 0.00019999902751289926, + "loss": 5.2285, + "step": 17250 + }, + { + "epoch": 0.03228372354495812, + "grad_norm": 1.605584979057312, + "learning_rate": 0.00019999900889759617, + "loss": 5.148, + "step": 17300 + }, + { + "epoch": 0.03237702910433661, + "grad_norm": 1.3685698509216309, + "learning_rate": 0.0001999989901058124, + "loss": 5.2305, + "step": 17350 + }, + { + "epoch": 0.0324703346637151, + "grad_norm": 1.7700085639953613, + "learning_rate": 0.00019999897113754801, + "loss": 5.2642, + "step": 17400 + }, + { + "epoch": 0.03256364022309359, + "grad_norm": 1.8209742307662964, + "learning_rate": 0.00019999895199280306, + "loss": 5.2634, + "step": 17450 + }, + { + "epoch": 0.03265694578247209, + "grad_norm": 1.7181389331817627, + "learning_rate": 0.00019999893267157753, + "loss": 5.3703, + "step": 17500 + }, + { + "epoch": 0.03275025134185058, + "grad_norm": 1.6699622869491577, + "learning_rate": 0.00019999891317387149, + "loss": 5.1961, + "step": 17550 + }, + { + "epoch": 0.032843556901229067, + "grad_norm": 1.7012041807174683, + "learning_rate": 0.00019999889349968494, + "loss": 5.2565, + "step": 17600 + }, + { + "epoch": 0.032936862460607556, + "grad_norm": 1.4097135066986084, + "learning_rate": 0.00019999887364901796, + "loss": 5.3606, + "step": 17650 + }, + { + "epoch": 0.03303016801998605, + "grad_norm": 1.8546066284179688, + "learning_rate": 0.00019999885362187057, + "loss": 5.1426, + "step": 17700 + }, + { + "epoch": 0.03312347357936454, + "grad_norm": 1.6862845420837402, + "learning_rate": 0.00019999883341824282, + "loss": 5.4663, + "step": 17750 + }, + { + "epoch": 0.03321677913874303, + "grad_norm": 1.4715492725372314, + "learning_rate": 0.00019999881303813468, + "loss": 5.2308, + "step": 17800 + }, + { + "epoch": 0.03331008469812152, + "grad_norm": 1.50525963306427, + "learning_rate": 0.00019999879248154624, + "loss": 5.2341, + "step": 17850 + }, + { + "epoch": 0.03340339025750002, + "grad_norm": 1.6435831785202026, + "learning_rate": 0.00019999877174847752, + "loss": 5.2826, + "step": 17900 + }, + { + "epoch": 0.03349669581687851, + "grad_norm": 1.4661362171173096, + "learning_rate": 0.00019999875083892858, + "loss": 5.2187, + "step": 17950 + }, + { + "epoch": 0.033590001376257, + "grad_norm": 1.255449652671814, + "learning_rate": 0.00019999872975289944, + "loss": 5.1868, + "step": 18000 + }, + { + "epoch": 0.033590001376257, + "eval_loss": 5.565659046173096, + "eval_runtime": 233.225, + "eval_samples_per_second": 11.182, + "eval_steps_per_second": 11.182, + "eval_tts_loss": 6.789093455502569, + "step": 18000 + }, + { + "epoch": 0.033683306935635496, + "grad_norm": 1.6058536767959595, + "learning_rate": 0.00019999870849039014, + "loss": 5.0283, + "step": 18050 + }, + { + "epoch": 0.033776612495013986, + "grad_norm": 1.931518793106079, + "learning_rate": 0.0001999986870514007, + "loss": 5.0369, + "step": 18100 + }, + { + "epoch": 0.033869918054392475, + "grad_norm": 1.3181180953979492, + "learning_rate": 0.00019999866543593116, + "loss": 5.0891, + "step": 18150 + }, + { + "epoch": 0.033963223613770965, + "grad_norm": 1.6189442873001099, + "learning_rate": 0.0001999986436439816, + "loss": 5.2425, + "step": 18200 + }, + { + "epoch": 0.03405652917314946, + "grad_norm": 1.4186607599258423, + "learning_rate": 0.000199998621675552, + "loss": 4.9802, + "step": 18250 + }, + { + "epoch": 0.03414983473252795, + "grad_norm": 1.727707028388977, + "learning_rate": 0.00019999859953064245, + "loss": 5.1903, + "step": 18300 + }, + { + "epoch": 0.03424314029190644, + "grad_norm": 2.1892459392547607, + "learning_rate": 0.00019999857720925297, + "loss": 5.0998, + "step": 18350 + }, + { + "epoch": 0.03433644585128493, + "grad_norm": 1.567331075668335, + "learning_rate": 0.00019999855471138358, + "loss": 5.241, + "step": 18400 + }, + { + "epoch": 0.03442975141066343, + "grad_norm": 1.5945489406585693, + "learning_rate": 0.00019999853203703436, + "loss": 5.1888, + "step": 18450 + }, + { + "epoch": 0.03452305697004192, + "grad_norm": 1.468598484992981, + "learning_rate": 0.0001999985091862053, + "loss": 5.2305, + "step": 18500 + }, + { + "epoch": 0.03461636252942041, + "grad_norm": 1.801831603050232, + "learning_rate": 0.00019999848615889648, + "loss": 5.0736, + "step": 18550 + }, + { + "epoch": 0.0347096680887989, + "grad_norm": 1.983370065689087, + "learning_rate": 0.00019999846295510793, + "loss": 5.1701, + "step": 18600 + }, + { + "epoch": 0.034802973648177395, + "grad_norm": 1.4538954496383667, + "learning_rate": 0.00019999843957483967, + "loss": 5.1838, + "step": 18650 + }, + { + "epoch": 0.034896279207555884, + "grad_norm": 2.007187843322754, + "learning_rate": 0.0001999984160180918, + "loss": 5.1847, + "step": 18700 + }, + { + "epoch": 0.034989584766934374, + "grad_norm": 1.3527206182479858, + "learning_rate": 0.00019999839228486427, + "loss": 5.21, + "step": 18750 + }, + { + "epoch": 0.035082890326312864, + "grad_norm": 0.93910813331604, + "learning_rate": 0.0001999983683751572, + "loss": 5.217, + "step": 18800 + }, + { + "epoch": 0.03517619588569136, + "grad_norm": 1.9449275732040405, + "learning_rate": 0.00019999834428897063, + "loss": 5.2404, + "step": 18850 + }, + { + "epoch": 0.03526950144506985, + "grad_norm": 1.5186761617660522, + "learning_rate": 0.00019999832002630453, + "loss": 5.1827, + "step": 18900 + }, + { + "epoch": 0.03536280700444834, + "grad_norm": 1.8738884925842285, + "learning_rate": 0.000199998295587159, + "loss": 5.2015, + "step": 18950 + }, + { + "epoch": 0.03545611256382684, + "grad_norm": 1.6167317628860474, + "learning_rate": 0.0001999982709715341, + "loss": 5.1333, + "step": 19000 + }, + { + "epoch": 0.03554941812320533, + "grad_norm": 1.5436670780181885, + "learning_rate": 0.00019999824617942985, + "loss": 5.2181, + "step": 19050 + }, + { + "epoch": 0.03564272368258382, + "grad_norm": 1.9840049743652344, + "learning_rate": 0.00019999822121084625, + "loss": 4.9849, + "step": 19100 + }, + { + "epoch": 0.03573602924196231, + "grad_norm": 2.000702381134033, + "learning_rate": 0.00019999819606578343, + "loss": 5.0625, + "step": 19150 + }, + { + "epoch": 0.035829334801340804, + "grad_norm": 1.5434222221374512, + "learning_rate": 0.00019999817074424137, + "loss": 5.0164, + "step": 19200 + }, + { + "epoch": 0.03592264036071929, + "grad_norm": 1.492538332939148, + "learning_rate": 0.0001999981452462201, + "loss": 5.2329, + "step": 19250 + }, + { + "epoch": 0.03601594592009778, + "grad_norm": 1.3936853408813477, + "learning_rate": 0.00019999811957171975, + "loss": 5.2539, + "step": 19300 + }, + { + "epoch": 0.03610925147947627, + "grad_norm": 1.3118730783462524, + "learning_rate": 0.00019999809372074027, + "loss": 5.1528, + "step": 19350 + }, + { + "epoch": 0.03620255703885477, + "grad_norm": 1.0975465774536133, + "learning_rate": 0.00019999806769328178, + "loss": 5.1746, + "step": 19400 + }, + { + "epoch": 0.03629586259823326, + "grad_norm": 1.3111509084701538, + "learning_rate": 0.0001999980414893443, + "loss": 5.1834, + "step": 19450 + }, + { + "epoch": 0.03638916815761175, + "grad_norm": 1.45402193069458, + "learning_rate": 0.00019999801510892784, + "loss": 5.2616, + "step": 19500 + }, + { + "epoch": 0.03648247371699024, + "grad_norm": 1.5854758024215698, + "learning_rate": 0.0001999979885520325, + "loss": 5.1249, + "step": 19550 + }, + { + "epoch": 0.036575779276368736, + "grad_norm": 1.6653951406478882, + "learning_rate": 0.00019999796181865832, + "loss": 5.1441, + "step": 19600 + }, + { + "epoch": 0.036669084835747226, + "grad_norm": 1.6704020500183105, + "learning_rate": 0.0001999979349088053, + "loss": 5.1533, + "step": 19650 + }, + { + "epoch": 0.036762390395125716, + "grad_norm": 1.5533548593521118, + "learning_rate": 0.00019999790782247354, + "loss": 5.3171, + "step": 19700 + }, + { + "epoch": 0.03685569595450421, + "grad_norm": 1.6590992212295532, + "learning_rate": 0.0001999978805596631, + "loss": 5.3232, + "step": 19750 + }, + { + "epoch": 0.0369490015138827, + "grad_norm": 1.5630754232406616, + "learning_rate": 0.00019999785312037392, + "loss": 5.138, + "step": 19800 + }, + { + "epoch": 0.03704230707326119, + "grad_norm": 1.771061897277832, + "learning_rate": 0.00019999782550460618, + "loss": 5.1169, + "step": 19850 + }, + { + "epoch": 0.03713561263263968, + "grad_norm": 1.6629877090454102, + "learning_rate": 0.00019999779771235983, + "loss": 5.207, + "step": 19900 + }, + { + "epoch": 0.03722891819201818, + "grad_norm": 1.2950810194015503, + "learning_rate": 0.00019999776974363503, + "loss": 5.1812, + "step": 19950 + }, + { + "epoch": 0.03732222375139667, + "grad_norm": 0.9732388257980347, + "learning_rate": 0.00019999774159843171, + "loss": 5.2428, + "step": 20000 + }, + { + "epoch": 0.03741552931077516, + "grad_norm": 1.3945764303207397, + "learning_rate": 0.00019999771327675, + "loss": 5.2425, + "step": 20050 + }, + { + "epoch": 0.03750883487015365, + "grad_norm": 1.880929946899414, + "learning_rate": 0.0001999976847785899, + "loss": 5.3158, + "step": 20100 + }, + { + "epoch": 0.037602140429532145, + "grad_norm": 1.3822988271713257, + "learning_rate": 0.00019999765610395146, + "loss": 5.1829, + "step": 20150 + }, + { + "epoch": 0.037695445988910635, + "grad_norm": 1.7985529899597168, + "learning_rate": 0.00019999762725283479, + "loss": 5.0514, + "step": 20200 + }, + { + "epoch": 0.037788751548289125, + "grad_norm": 1.568518877029419, + "learning_rate": 0.0001999975982252399, + "loss": 5.2333, + "step": 20250 + }, + { + "epoch": 0.037882057107667615, + "grad_norm": 1.4257228374481201, + "learning_rate": 0.00019999756902116684, + "loss": 5.1142, + "step": 20300 + }, + { + "epoch": 0.03797536266704611, + "grad_norm": 1.0664187669754028, + "learning_rate": 0.00019999753964061566, + "loss": 5.204, + "step": 20350 + }, + { + "epoch": 0.0380686682264246, + "grad_norm": 1.2831335067749023, + "learning_rate": 0.00019999751008358642, + "loss": 5.2272, + "step": 20400 + }, + { + "epoch": 0.03816197378580309, + "grad_norm": 1.7534427642822266, + "learning_rate": 0.00019999748035007917, + "loss": 5.1009, + "step": 20450 + }, + { + "epoch": 0.03825527934518158, + "grad_norm": 1.8466085195541382, + "learning_rate": 0.00019999745044009396, + "loss": 5.1638, + "step": 20500 + }, + { + "epoch": 0.03834858490456008, + "grad_norm": 1.4553605318069458, + "learning_rate": 0.00019999742035363083, + "loss": 5.0138, + "step": 20550 + }, + { + "epoch": 0.03844189046393857, + "grad_norm": 1.5238099098205566, + "learning_rate": 0.00019999739009068988, + "loss": 5.1723, + "step": 20600 + }, + { + "epoch": 0.03853519602331706, + "grad_norm": 1.434646487236023, + "learning_rate": 0.0001999973596512711, + "loss": 5.1743, + "step": 20650 + }, + { + "epoch": 0.038628501582695554, + "grad_norm": 1.4512965679168701, + "learning_rate": 0.0001999973290353746, + "loss": 5.2082, + "step": 20700 + }, + { + "epoch": 0.038721807142074044, + "grad_norm": 1.4691851139068604, + "learning_rate": 0.0001999972982430004, + "loss": 5.3011, + "step": 20750 + }, + { + "epoch": 0.038815112701452534, + "grad_norm": 1.7574752569198608, + "learning_rate": 0.00019999726727414855, + "loss": 4.8797, + "step": 20800 + }, + { + "epoch": 0.038908418260831024, + "grad_norm": 1.9155274629592896, + "learning_rate": 0.00019999723612881915, + "loss": 5.2745, + "step": 20850 + }, + { + "epoch": 0.03900172382020952, + "grad_norm": 1.9480228424072266, + "learning_rate": 0.0001999972048070122, + "loss": 5.1093, + "step": 20900 + }, + { + "epoch": 0.03909502937958801, + "grad_norm": 1.314173698425293, + "learning_rate": 0.0001999971733087278, + "loss": 4.9488, + "step": 20950 + }, + { + "epoch": 0.0391883349389665, + "grad_norm": 1.5758183002471924, + "learning_rate": 0.00019999714163396597, + "loss": 5.0716, + "step": 21000 + }, + { + "epoch": 0.0391883349389665, + "eval_loss": 5.446030616760254, + "eval_runtime": 233.6236, + "eval_samples_per_second": 11.163, + "eval_steps_per_second": 11.163, + "eval_tts_loss": 6.94306072605591, + "step": 21000 + }, + { + "epoch": 0.03928164049834499, + "grad_norm": 1.767700433731079, + "learning_rate": 0.00019999710978272676, + "loss": 5.152, + "step": 21050 + }, + { + "epoch": 0.03937494605772349, + "grad_norm": 1.171859622001648, + "learning_rate": 0.00019999707775501026, + "loss": 4.9265, + "step": 21100 + }, + { + "epoch": 0.039468251617101976, + "grad_norm": 1.6448893547058105, + "learning_rate": 0.00019999704555081653, + "loss": 5.2328, + "step": 21150 + }, + { + "epoch": 0.039561557176480466, + "grad_norm": 1.2383092641830444, + "learning_rate": 0.00019999701317014557, + "loss": 5.0509, + "step": 21200 + }, + { + "epoch": 0.039654862735858956, + "grad_norm": 1.47544264793396, + "learning_rate": 0.00019999698061299752, + "loss": 5.0847, + "step": 21250 + }, + { + "epoch": 0.03974816829523745, + "grad_norm": 1.4962610006332397, + "learning_rate": 0.00019999694787937238, + "loss": 5.1514, + "step": 21300 + }, + { + "epoch": 0.03984147385461594, + "grad_norm": 1.7069225311279297, + "learning_rate": 0.00019999691496927022, + "loss": 5.0082, + "step": 21350 + }, + { + "epoch": 0.03993477941399443, + "grad_norm": 1.970062017440796, + "learning_rate": 0.00019999688188269111, + "loss": 5.162, + "step": 21400 + }, + { + "epoch": 0.04002808497337293, + "grad_norm": 1.9130045175552368, + "learning_rate": 0.00019999684861963507, + "loss": 5.1627, + "step": 21450 + }, + { + "epoch": 0.04012139053275142, + "grad_norm": 1.7424521446228027, + "learning_rate": 0.00019999681518010221, + "loss": 5.1727, + "step": 21500 + }, + { + "epoch": 0.04021469609212991, + "grad_norm": 1.2897844314575195, + "learning_rate": 0.00019999678156409258, + "loss": 5.0252, + "step": 21550 + }, + { + "epoch": 0.0403080016515084, + "grad_norm": 1.493475079536438, + "learning_rate": 0.0001999967477716062, + "loss": 5.029, + "step": 21600 + }, + { + "epoch": 0.040401307210886896, + "grad_norm": 1.2159661054611206, + "learning_rate": 0.00019999671380264316, + "loss": 5.0264, + "step": 21650 + }, + { + "epoch": 0.040494612770265385, + "grad_norm": 1.4600199460983276, + "learning_rate": 0.0001999966796572035, + "loss": 4.9779, + "step": 21700 + }, + { + "epoch": 0.040587918329643875, + "grad_norm": 1.526752233505249, + "learning_rate": 0.00019999664533528732, + "loss": 5.1797, + "step": 21750 + }, + { + "epoch": 0.040681223889022365, + "grad_norm": 1.6486625671386719, + "learning_rate": 0.00019999661083689466, + "loss": 5.1361, + "step": 21800 + }, + { + "epoch": 0.04077452944840086, + "grad_norm": 1.9790832996368408, + "learning_rate": 0.00019999657616202556, + "loss": 4.9149, + "step": 21850 + }, + { + "epoch": 0.04086783500777935, + "grad_norm": 1.4455928802490234, + "learning_rate": 0.00019999654131068012, + "loss": 5.1656, + "step": 21900 + }, + { + "epoch": 0.04096114056715784, + "grad_norm": 1.556361436843872, + "learning_rate": 0.00019999650628285836, + "loss": 5.1226, + "step": 21950 + }, + { + "epoch": 0.04105444612653633, + "grad_norm": 1.2878562211990356, + "learning_rate": 0.00019999647107856036, + "loss": 5.0742, + "step": 22000 + }, + { + "epoch": 0.04114775168591483, + "grad_norm": 1.4406765699386597, + "learning_rate": 0.0001999964356977862, + "loss": 5.167, + "step": 22050 + }, + { + "epoch": 0.04124105724529332, + "grad_norm": 1.3805707693099976, + "learning_rate": 0.0001999964001405359, + "loss": 4.9802, + "step": 22100 + }, + { + "epoch": 0.04133436280467181, + "grad_norm": 1.7562808990478516, + "learning_rate": 0.00019999636440680955, + "loss": 5.2414, + "step": 22150 + }, + { + "epoch": 0.0414276683640503, + "grad_norm": 1.5996421575546265, + "learning_rate": 0.00019999632849660722, + "loss": 5.2813, + "step": 22200 + }, + { + "epoch": 0.041520973923428794, + "grad_norm": 1.2786633968353271, + "learning_rate": 0.00019999629240992898, + "loss": 5.3503, + "step": 22250 + }, + { + "epoch": 0.041614279482807284, + "grad_norm": 1.7056688070297241, + "learning_rate": 0.00019999625614677488, + "loss": 5.205, + "step": 22300 + }, + { + "epoch": 0.041707585042185774, + "grad_norm": 1.5653877258300781, + "learning_rate": 0.00019999621970714495, + "loss": 5.1535, + "step": 22350 + }, + { + "epoch": 0.04180089060156427, + "grad_norm": 1.6800984144210815, + "learning_rate": 0.00019999618309103935, + "loss": 5.0171, + "step": 22400 + }, + { + "epoch": 0.04189419616094276, + "grad_norm": 1.608587384223938, + "learning_rate": 0.000199996146298458, + "loss": 5.0021, + "step": 22450 + }, + { + "epoch": 0.04198750172032125, + "grad_norm": 1.1938655376434326, + "learning_rate": 0.00019999610932940108, + "loss": 4.7871, + "step": 22500 + }, + { + "epoch": 0.04208080727969974, + "grad_norm": 2.0445282459259033, + "learning_rate": 0.00019999607218386864, + "loss": 5.2227, + "step": 22550 + }, + { + "epoch": 0.04217411283907824, + "grad_norm": 1.440500020980835, + "learning_rate": 0.0001999960348618607, + "loss": 4.9996, + "step": 22600 + }, + { + "epoch": 0.04226741839845673, + "grad_norm": 1.4636365175247192, + "learning_rate": 0.0001999959973633774, + "loss": 5.2787, + "step": 22650 + }, + { + "epoch": 0.04236072395783522, + "grad_norm": 1.6443440914154053, + "learning_rate": 0.0001999959596884187, + "loss": 5.1626, + "step": 22700 + }, + { + "epoch": 0.04245402951721371, + "grad_norm": 1.3920623064041138, + "learning_rate": 0.00019999592183698477, + "loss": 5.1757, + "step": 22750 + }, + { + "epoch": 0.0425473350765922, + "grad_norm": 1.5439649820327759, + "learning_rate": 0.00019999588380907563, + "loss": 5.1367, + "step": 22800 + }, + { + "epoch": 0.04264064063597069, + "grad_norm": 1.575374960899353, + "learning_rate": 0.00019999584560469134, + "loss": 5.0247, + "step": 22850 + }, + { + "epoch": 0.04273394619534918, + "grad_norm": 1.206429123878479, + "learning_rate": 0.00019999580722383198, + "loss": 5.1514, + "step": 22900 + }, + { + "epoch": 0.04282725175472767, + "grad_norm": 1.5674757957458496, + "learning_rate": 0.0001999957686664976, + "loss": 5.208, + "step": 22950 + }, + { + "epoch": 0.04292055731410617, + "grad_norm": 1.2532552480697632, + "learning_rate": 0.00019999572993268828, + "loss": 4.9993, + "step": 23000 + }, + { + "epoch": 0.04301386287348466, + "grad_norm": 1.4708222150802612, + "learning_rate": 0.00019999569102240413, + "loss": 5.121, + "step": 23050 + }, + { + "epoch": 0.04310716843286315, + "grad_norm": 1.6088930368423462, + "learning_rate": 0.00019999565193564513, + "loss": 5.0844, + "step": 23100 + }, + { + "epoch": 0.043200473992241646, + "grad_norm": 1.4088431596755981, + "learning_rate": 0.00019999561267241146, + "loss": 5.0259, + "step": 23150 + }, + { + "epoch": 0.043293779551620136, + "grad_norm": 1.430076003074646, + "learning_rate": 0.00019999557323270307, + "loss": 5.2003, + "step": 23200 + }, + { + "epoch": 0.043387085110998626, + "grad_norm": 1.7545416355133057, + "learning_rate": 0.0001999955336165201, + "loss": 4.9939, + "step": 23250 + }, + { + "epoch": 0.043480390670377116, + "grad_norm": 1.3078407049179077, + "learning_rate": 0.00019999549382386262, + "loss": 5.2717, + "step": 23300 + }, + { + "epoch": 0.04357369622975561, + "grad_norm": 1.6095589399337769, + "learning_rate": 0.0001999954538547307, + "loss": 5.2214, + "step": 23350 + }, + { + "epoch": 0.0436670017891341, + "grad_norm": 1.4139171838760376, + "learning_rate": 0.00019999541370912437, + "loss": 5.1498, + "step": 23400 + }, + { + "epoch": 0.04376030734851259, + "grad_norm": 1.6955764293670654, + "learning_rate": 0.00019999537338704376, + "loss": 5.2248, + "step": 23450 + }, + { + "epoch": 0.04385361290789108, + "grad_norm": 1.632716178894043, + "learning_rate": 0.0001999953328884889, + "loss": 4.9997, + "step": 23500 + }, + { + "epoch": 0.04394691846726958, + "grad_norm": 1.7285034656524658, + "learning_rate": 0.00019999529221345985, + "loss": 5.029, + "step": 23550 + }, + { + "epoch": 0.04404022402664807, + "grad_norm": 1.7824739217758179, + "learning_rate": 0.0001999952513619567, + "loss": 4.9897, + "step": 23600 + }, + { + "epoch": 0.04413352958602656, + "grad_norm": 1.517888069152832, + "learning_rate": 0.00019999521033397957, + "loss": 5.1368, + "step": 23650 + }, + { + "epoch": 0.04422683514540505, + "grad_norm": 1.3025915622711182, + "learning_rate": 0.00019999516912952846, + "loss": 5.1239, + "step": 23700 + }, + { + "epoch": 0.044320140704783545, + "grad_norm": 1.4753954410552979, + "learning_rate": 0.00019999512774860348, + "loss": 5.1521, + "step": 23750 + }, + { + "epoch": 0.044413446264162035, + "grad_norm": 1.6234376430511475, + "learning_rate": 0.00019999508619120468, + "loss": 5.1371, + "step": 23800 + }, + { + "epoch": 0.044506751823540525, + "grad_norm": 1.5803349018096924, + "learning_rate": 0.00019999504445733217, + "loss": 5.2971, + "step": 23850 + }, + { + "epoch": 0.044600057382919014, + "grad_norm": 1.5795341730117798, + "learning_rate": 0.00019999500254698596, + "loss": 4.8945, + "step": 23900 + }, + { + "epoch": 0.04469336294229751, + "grad_norm": 1.5893889665603638, + "learning_rate": 0.0001999949604601662, + "loss": 5.1103, + "step": 23950 + }, + { + "epoch": 0.044786668501676, + "grad_norm": 1.1029318571090698, + "learning_rate": 0.00019999491819687294, + "loss": 5.0341, + "step": 24000 + }, + { + "epoch": 0.044786668501676, + "eval_loss": 5.369950771331787, + "eval_runtime": 228.2176, + "eval_samples_per_second": 11.428, + "eval_steps_per_second": 11.428, + "eval_tts_loss": 7.0878655090813805, + "step": 24000 + }, + { + "epoch": 0.04487997406105449, + "grad_norm": 1.5672223567962646, + "learning_rate": 0.00019999487575710623, + "loss": 5.1548, + "step": 24050 + }, + { + "epoch": 0.04497327962043299, + "grad_norm": 1.3560975790023804, + "learning_rate": 0.0001999948331408662, + "loss": 5.1028, + "step": 24100 + }, + { + "epoch": 0.04506658517981148, + "grad_norm": 1.365408182144165, + "learning_rate": 0.0001999947903481528, + "loss": 5.0995, + "step": 24150 + }, + { + "epoch": 0.04515989073918997, + "grad_norm": 1.3482069969177246, + "learning_rate": 0.00019999474737896625, + "loss": 4.9473, + "step": 24200 + }, + { + "epoch": 0.04525319629856846, + "grad_norm": 1.2890368700027466, + "learning_rate": 0.00019999470423330654, + "loss": 4.9009, + "step": 24250 + }, + { + "epoch": 0.045346501857946954, + "grad_norm": 1.2973231077194214, + "learning_rate": 0.00019999466091117377, + "loss": 5.0809, + "step": 24300 + }, + { + "epoch": 0.045439807417325444, + "grad_norm": 1.5761609077453613, + "learning_rate": 0.000199994617412568, + "loss": 4.9994, + "step": 24350 + }, + { + "epoch": 0.045533112976703934, + "grad_norm": 1.3380223512649536, + "learning_rate": 0.00019999457373748937, + "loss": 4.916, + "step": 24400 + }, + { + "epoch": 0.04562641853608242, + "grad_norm": 1.7922528982162476, + "learning_rate": 0.00019999452988593789, + "loss": 5.0799, + "step": 24450 + }, + { + "epoch": 0.04571972409546092, + "grad_norm": 1.3923864364624023, + "learning_rate": 0.00019999448585791369, + "loss": 5.0239, + "step": 24500 + }, + { + "epoch": 0.04581302965483941, + "grad_norm": 1.2720636129379272, + "learning_rate": 0.0001999944416534168, + "loss": 4.9941, + "step": 24550 + }, + { + "epoch": 0.0459063352142179, + "grad_norm": 1.602018117904663, + "learning_rate": 0.00019999439727244728, + "loss": 5.1485, + "step": 24600 + }, + { + "epoch": 0.04599964077359639, + "grad_norm": 1.5410170555114746, + "learning_rate": 0.00019999435271500526, + "loss": 5.2976, + "step": 24650 + }, + { + "epoch": 0.046092946332974886, + "grad_norm": 1.6076123714447021, + "learning_rate": 0.00019999430798109082, + "loss": 4.9226, + "step": 24700 + }, + { + "epoch": 0.046186251892353376, + "grad_norm": 1.3308855295181274, + "learning_rate": 0.00019999426307070402, + "loss": 5.0585, + "step": 24750 + }, + { + "epoch": 0.046279557451731866, + "grad_norm": 1.0817416906356812, + "learning_rate": 0.00019999421798384492, + "loss": 4.9678, + "step": 24800 + }, + { + "epoch": 0.04637286301111036, + "grad_norm": 1.4719314575195312, + "learning_rate": 0.00019999417272051364, + "loss": 4.734, + "step": 24850 + }, + { + "epoch": 0.04646616857048885, + "grad_norm": 1.911425232887268, + "learning_rate": 0.00019999412728071022, + "loss": 5.023, + "step": 24900 + }, + { + "epoch": 0.04655947412986734, + "grad_norm": 1.2537506818771362, + "learning_rate": 0.0001999940816644348, + "loss": 5.0473, + "step": 24950 + }, + { + "epoch": 0.04665277968924583, + "grad_norm": 1.2723119258880615, + "learning_rate": 0.00019999403587168737, + "loss": 5.0075, + "step": 25000 + }, + { + "epoch": 0.04674608524862433, + "grad_norm": 1.221137523651123, + "learning_rate": 0.00019999398990246808, + "loss": 5.0694, + "step": 25050 + }, + { + "epoch": 0.04683939080800282, + "grad_norm": 1.6972732543945312, + "learning_rate": 0.000199993943756777, + "loss": 5.0661, + "step": 25100 + }, + { + "epoch": 0.04693269636738131, + "grad_norm": 1.5249754190444946, + "learning_rate": 0.00019999389743461418, + "loss": 5.0894, + "step": 25150 + }, + { + "epoch": 0.0470260019267598, + "grad_norm": 1.5337498188018799, + "learning_rate": 0.00019999385093597975, + "loss": 5.2557, + "step": 25200 + }, + { + "epoch": 0.047119307486138295, + "grad_norm": 1.6065733432769775, + "learning_rate": 0.00019999380426087378, + "loss": 5.2814, + "step": 25250 + }, + { + "epoch": 0.047212613045516785, + "grad_norm": 1.6499677896499634, + "learning_rate": 0.0001999937574092963, + "loss": 5.0703, + "step": 25300 + }, + { + "epoch": 0.047305918604895275, + "grad_norm": 0.9138503670692444, + "learning_rate": 0.00019999371038124746, + "loss": 5.1375, + "step": 25350 + }, + { + "epoch": 0.047399224164273765, + "grad_norm": 1.533087968826294, + "learning_rate": 0.00019999366317672733, + "loss": 5.0459, + "step": 25400 + }, + { + "epoch": 0.04749252972365226, + "grad_norm": 1.8451659679412842, + "learning_rate": 0.00019999361579573592, + "loss": 4.9868, + "step": 25450 + }, + { + "epoch": 0.04758583528303075, + "grad_norm": 1.0893833637237549, + "learning_rate": 0.00019999356823827342, + "loss": 4.9291, + "step": 25500 + }, + { + "epoch": 0.04767914084240924, + "grad_norm": 1.099062204360962, + "learning_rate": 0.00019999352050433986, + "loss": 5.0681, + "step": 25550 + }, + { + "epoch": 0.04777244640178773, + "grad_norm": 1.3058807849884033, + "learning_rate": 0.00019999347259393532, + "loss": 4.8777, + "step": 25600 + }, + { + "epoch": 0.04786575196116623, + "grad_norm": 1.6822357177734375, + "learning_rate": 0.0001999934245070599, + "loss": 5.2018, + "step": 25650 + }, + { + "epoch": 0.04795905752054472, + "grad_norm": 1.1984933614730835, + "learning_rate": 0.0001999933762437137, + "loss": 5.0762, + "step": 25700 + }, + { + "epoch": 0.04805236307992321, + "grad_norm": 1.4918663501739502, + "learning_rate": 0.00019999332780389677, + "loss": 5.0357, + "step": 25750 + }, + { + "epoch": 0.048145668639301704, + "grad_norm": 1.7189278602600098, + "learning_rate": 0.0001999932791876092, + "loss": 5.0787, + "step": 25800 + }, + { + "epoch": 0.048238974198680194, + "grad_norm": 1.2158808708190918, + "learning_rate": 0.00019999323039485113, + "loss": 4.9543, + "step": 25850 + }, + { + "epoch": 0.048332279758058684, + "grad_norm": 1.298972725868225, + "learning_rate": 0.00019999318142562253, + "loss": 4.8591, + "step": 25900 + }, + { + "epoch": 0.048425585317437174, + "grad_norm": 1.8158042430877686, + "learning_rate": 0.0001999931322799236, + "loss": 4.8411, + "step": 25950 + }, + { + "epoch": 0.04851889087681567, + "grad_norm": 1.2872735261917114, + "learning_rate": 0.00019999308295775442, + "loss": 4.9664, + "step": 26000 + }, + { + "epoch": 0.04861219643619416, + "grad_norm": 1.3019319772720337, + "learning_rate": 0.00019999303345911496, + "loss": 5.1429, + "step": 26050 + }, + { + "epoch": 0.04870550199557265, + "grad_norm": 1.3941707611083984, + "learning_rate": 0.00019999298378400545, + "loss": 5.0969, + "step": 26100 + }, + { + "epoch": 0.04879880755495114, + "grad_norm": 1.3208707571029663, + "learning_rate": 0.0001999929339324259, + "loss": 5.0603, + "step": 26150 + }, + { + "epoch": 0.04889211311432964, + "grad_norm": 1.7119523286819458, + "learning_rate": 0.00019999288390437644, + "loss": 5.0922, + "step": 26200 + }, + { + "epoch": 0.04898541867370813, + "grad_norm": 1.4633029699325562, + "learning_rate": 0.0001999928336998571, + "loss": 5.0348, + "step": 26250 + }, + { + "epoch": 0.04907872423308662, + "grad_norm": 1.695513367652893, + "learning_rate": 0.00019999278331886804, + "loss": 4.984, + "step": 26300 + }, + { + "epoch": 0.049172029792465106, + "grad_norm": 1.6055431365966797, + "learning_rate": 0.0001999927327614093, + "loss": 5.0255, + "step": 26350 + }, + { + "epoch": 0.0492653353518436, + "grad_norm": 1.5823806524276733, + "learning_rate": 0.00019999268202748092, + "loss": 4.9689, + "step": 26400 + }, + { + "epoch": 0.04935864091122209, + "grad_norm": 1.099482774734497, + "learning_rate": 0.00019999263111708312, + "loss": 5.1056, + "step": 26450 + }, + { + "epoch": 0.04945194647060058, + "grad_norm": 1.2539039850234985, + "learning_rate": 0.0001999925800302159, + "loss": 5.1003, + "step": 26500 + }, + { + "epoch": 0.04954525202997908, + "grad_norm": 1.8513319492340088, + "learning_rate": 0.0001999925287668794, + "loss": 5.0544, + "step": 26550 + }, + { + "epoch": 0.04963855758935757, + "grad_norm": 1.6248927116394043, + "learning_rate": 0.00019999247732707364, + "loss": 5.0591, + "step": 26600 + }, + { + "epoch": 0.04973186314873606, + "grad_norm": 1.4035404920578003, + "learning_rate": 0.00019999242571079874, + "loss": 4.7997, + "step": 26650 + }, + { + "epoch": 0.04982516870811455, + "grad_norm": 1.5909003019332886, + "learning_rate": 0.00019999237391805482, + "loss": 5.0233, + "step": 26700 + }, + { + "epoch": 0.049918474267493046, + "grad_norm": 1.187412977218628, + "learning_rate": 0.00019999232194884198, + "loss": 4.9776, + "step": 26750 + }, + { + "epoch": 0.050011779826871536, + "grad_norm": 0.9005943536758423, + "learning_rate": 0.00019999226980316025, + "loss": 4.9586, + "step": 26800 + }, + { + "epoch": 0.050105085386250026, + "grad_norm": 1.1468946933746338, + "learning_rate": 0.0001999922174810098, + "loss": 5.0651, + "step": 26850 + }, + { + "epoch": 0.050198390945628515, + "grad_norm": 1.2562627792358398, + "learning_rate": 0.00019999216498239064, + "loss": 4.8625, + "step": 26900 + }, + { + "epoch": 0.05029169650500701, + "grad_norm": 1.2052425146102905, + "learning_rate": 0.00019999211230730294, + "loss": 4.9507, + "step": 26950 + }, + { + "epoch": 0.0503850020643855, + "grad_norm": 1.3357577323913574, + "learning_rate": 0.00019999205945574671, + "loss": 4.9069, + "step": 27000 + }, + { + "epoch": 0.0503850020643855, + "eval_loss": 5.300256252288818, + "eval_runtime": 230.7705, + "eval_samples_per_second": 11.301, + "eval_steps_per_second": 11.301, + "eval_tts_loss": 6.999222072712689, + "step": 27000 + }, + { + "epoch": 0.05047830762376399, + "grad_norm": 1.0808535814285278, + "learning_rate": 0.00019999200642772214, + "loss": 4.9514, + "step": 27050 + }, + { + "epoch": 0.05057161318314248, + "grad_norm": 1.6835181713104248, + "learning_rate": 0.00019999195322322922, + "loss": 5.1339, + "step": 27100 + }, + { + "epoch": 0.05066491874252098, + "grad_norm": 1.3736001253128052, + "learning_rate": 0.00019999189984226814, + "loss": 4.8656, + "step": 27150 + }, + { + "epoch": 0.05075822430189947, + "grad_norm": 1.404284954071045, + "learning_rate": 0.00019999184628483892, + "loss": 4.8601, + "step": 27200 + }, + { + "epoch": 0.05085152986127796, + "grad_norm": 1.566685676574707, + "learning_rate": 0.0001999917925509417, + "loss": 4.9994, + "step": 27250 + }, + { + "epoch": 0.05094483542065645, + "grad_norm": 1.4436407089233398, + "learning_rate": 0.00019999173864057658, + "loss": 4.9391, + "step": 27300 + }, + { + "epoch": 0.051038140980034945, + "grad_norm": 1.1985220909118652, + "learning_rate": 0.0001999916845537436, + "loss": 4.9497, + "step": 27350 + }, + { + "epoch": 0.051131446539413435, + "grad_norm": 1.4295037984848022, + "learning_rate": 0.00019999163029044292, + "loss": 5.0714, + "step": 27400 + }, + { + "epoch": 0.051224752098791924, + "grad_norm": 1.8160730600357056, + "learning_rate": 0.00019999157585067458, + "loss": 4.9091, + "step": 27450 + }, + { + "epoch": 0.05131805765817042, + "grad_norm": 1.26568603515625, + "learning_rate": 0.00019999152123443872, + "loss": 5.0946, + "step": 27500 + }, + { + "epoch": 0.05141136321754891, + "grad_norm": 1.7634291648864746, + "learning_rate": 0.00019999146644173542, + "loss": 4.6454, + "step": 27550 + }, + { + "epoch": 0.0515046687769274, + "grad_norm": 1.3475509881973267, + "learning_rate": 0.00019999141147256476, + "loss": 4.9264, + "step": 27600 + }, + { + "epoch": 0.05159797433630589, + "grad_norm": 1.6052281856536865, + "learning_rate": 0.00019999135632692687, + "loss": 5.0712, + "step": 27650 + }, + { + "epoch": 0.05169127989568439, + "grad_norm": 1.3022325038909912, + "learning_rate": 0.00019999130100482182, + "loss": 5.0866, + "step": 27700 + }, + { + "epoch": 0.05178458545506288, + "grad_norm": 1.5663292407989502, + "learning_rate": 0.00019999124550624972, + "loss": 5.2636, + "step": 27750 + }, + { + "epoch": 0.05187789101444137, + "grad_norm": 1.6265084743499756, + "learning_rate": 0.00019999118983121065, + "loss": 5.0156, + "step": 27800 + }, + { + "epoch": 0.05197119657381986, + "grad_norm": 1.2428834438323975, + "learning_rate": 0.00019999113397970475, + "loss": 5.0459, + "step": 27850 + }, + { + "epoch": 0.052064502133198354, + "grad_norm": 1.7661765813827515, + "learning_rate": 0.0001999910779517321, + "loss": 4.9641, + "step": 27900 + }, + { + "epoch": 0.052157807692576844, + "grad_norm": 1.2354525327682495, + "learning_rate": 0.00019999102174729277, + "loss": 4.8599, + "step": 27950 + }, + { + "epoch": 0.05225111325195533, + "grad_norm": 1.1708762645721436, + "learning_rate": 0.0001999909653663869, + "loss": 4.8535, + "step": 28000 + }, + { + "epoch": 0.05234441881133382, + "grad_norm": 1.4422293901443481, + "learning_rate": 0.00019999090880901455, + "loss": 4.8957, + "step": 28050 + }, + { + "epoch": 0.05243772437071232, + "grad_norm": 1.058984637260437, + "learning_rate": 0.00019999085207517584, + "loss": 5.0249, + "step": 28100 + }, + { + "epoch": 0.05253102993009081, + "grad_norm": 1.315977692604065, + "learning_rate": 0.00019999079516487087, + "loss": 5.0057, + "step": 28150 + }, + { + "epoch": 0.0526243354894693, + "grad_norm": 1.4370442628860474, + "learning_rate": 0.00019999073807809974, + "loss": 5.0101, + "step": 28200 + }, + { + "epoch": 0.05271764104884779, + "grad_norm": 1.54827880859375, + "learning_rate": 0.00019999068081486256, + "loss": 5.054, + "step": 28250 + }, + { + "epoch": 0.052810946608226286, + "grad_norm": 1.4660943746566772, + "learning_rate": 0.00019999062337515945, + "loss": 5.1078, + "step": 28300 + }, + { + "epoch": 0.052904252167604776, + "grad_norm": 1.483725666999817, + "learning_rate": 0.00019999056575899043, + "loss": 4.935, + "step": 28350 + }, + { + "epoch": 0.052997557726983266, + "grad_norm": 1.4430571794509888, + "learning_rate": 0.00019999050796635568, + "loss": 4.9998, + "step": 28400 + }, + { + "epoch": 0.05309086328636176, + "grad_norm": 1.6983498334884644, + "learning_rate": 0.00019999044999725527, + "loss": 5.2524, + "step": 28450 + }, + { + "epoch": 0.05318416884574025, + "grad_norm": 1.6908702850341797, + "learning_rate": 0.0001999903918516893, + "loss": 5.0378, + "step": 28500 + }, + { + "epoch": 0.05327747440511874, + "grad_norm": 1.5972379446029663, + "learning_rate": 0.00019999033352965793, + "loss": 4.9017, + "step": 28550 + }, + { + "epoch": 0.05337077996449723, + "grad_norm": 1.5607515573501587, + "learning_rate": 0.00019999027503116116, + "loss": 4.9577, + "step": 28600 + }, + { + "epoch": 0.05346408552387573, + "grad_norm": 1.344556450843811, + "learning_rate": 0.00019999021635619917, + "loss": 5.0421, + "step": 28650 + }, + { + "epoch": 0.05355739108325422, + "grad_norm": 1.3197835683822632, + "learning_rate": 0.00019999015750477207, + "loss": 5.1273, + "step": 28700 + }, + { + "epoch": 0.05365069664263271, + "grad_norm": 1.4137849807739258, + "learning_rate": 0.00019999009847687988, + "loss": 4.8728, + "step": 28750 + }, + { + "epoch": 0.0537440022020112, + "grad_norm": 1.2608588933944702, + "learning_rate": 0.00019999003927252278, + "loss": 5.0248, + "step": 28800 + }, + { + "epoch": 0.053837307761389695, + "grad_norm": 1.033444881439209, + "learning_rate": 0.00019998997989170087, + "loss": 5.1288, + "step": 28850 + }, + { + "epoch": 0.053930613320768185, + "grad_norm": 1.6949776411056519, + "learning_rate": 0.00019998992033441423, + "loss": 5.0369, + "step": 28900 + }, + { + "epoch": 0.054023918880146675, + "grad_norm": 1.2738184928894043, + "learning_rate": 0.00019998986060066297, + "loss": 5.0106, + "step": 28950 + }, + { + "epoch": 0.054117224439525165, + "grad_norm": 1.1779210567474365, + "learning_rate": 0.0001999898006904472, + "loss": 4.9383, + "step": 29000 + }, + { + "epoch": 0.05421052999890366, + "grad_norm": 1.4241044521331787, + "learning_rate": 0.00019998974060376706, + "loss": 5.1722, + "step": 29050 + }, + { + "epoch": 0.05430383555828215, + "grad_norm": 1.421637773513794, + "learning_rate": 0.00019998968034062257, + "loss": 5.1138, + "step": 29100 + }, + { + "epoch": 0.05439714111766064, + "grad_norm": 1.2639997005462646, + "learning_rate": 0.00019998961990101393, + "loss": 5.1452, + "step": 29150 + }, + { + "epoch": 0.05449044667703914, + "grad_norm": 1.241308569908142, + "learning_rate": 0.0001999895592849412, + "loss": 5.0645, + "step": 29200 + }, + { + "epoch": 0.05458375223641763, + "grad_norm": 1.6818753480911255, + "learning_rate": 0.00019998949849240444, + "loss": 5.2461, + "step": 29250 + }, + { + "epoch": 0.05467705779579612, + "grad_norm": 0.8954434990882874, + "learning_rate": 0.00019998943752340387, + "loss": 4.9691, + "step": 29300 + }, + { + "epoch": 0.05477036335517461, + "grad_norm": 1.670721411705017, + "learning_rate": 0.0001999893763779395, + "loss": 5.0303, + "step": 29350 + }, + { + "epoch": 0.054863668914553104, + "grad_norm": 1.2590948343276978, + "learning_rate": 0.00019998931505601147, + "loss": 4.961, + "step": 29400 + }, + { + "epoch": 0.054956974473931594, + "grad_norm": 1.306104302406311, + "learning_rate": 0.00019998925355761994, + "loss": 4.889, + "step": 29450 + }, + { + "epoch": 0.055050280033310084, + "grad_norm": 1.4491585493087769, + "learning_rate": 0.00019998919188276494, + "loss": 4.8524, + "step": 29500 + }, + { + "epoch": 0.055143585592688574, + "grad_norm": 1.5823389291763306, + "learning_rate": 0.0001999891300314466, + "loss": 4.994, + "step": 29550 + }, + { + "epoch": 0.05523689115206707, + "grad_norm": 1.5305702686309814, + "learning_rate": 0.00019998906800366506, + "loss": 4.8367, + "step": 29600 + }, + { + "epoch": 0.05533019671144556, + "grad_norm": 1.538707971572876, + "learning_rate": 0.0001999890057994204, + "loss": 4.9698, + "step": 29650 + }, + { + "epoch": 0.05542350227082405, + "grad_norm": 1.4228105545043945, + "learning_rate": 0.00019998894341871274, + "loss": 5.1362, + "step": 29700 + }, + { + "epoch": 0.05551680783020254, + "grad_norm": 1.1229313611984253, + "learning_rate": 0.00019998888086154217, + "loss": 4.9158, + "step": 29750 + }, + { + "epoch": 0.05561011338958104, + "grad_norm": 1.2107417583465576, + "learning_rate": 0.00019998881812790883, + "loss": 5.0441, + "step": 29800 + }, + { + "epoch": 0.05570341894895953, + "grad_norm": 1.2901204824447632, + "learning_rate": 0.00019998875521781283, + "loss": 5.0762, + "step": 29850 + }, + { + "epoch": 0.055796724508338016, + "grad_norm": 1.3414353132247925, + "learning_rate": 0.00019998869213125428, + "loss": 4.9442, + "step": 29900 + }, + { + "epoch": 0.055890030067716506, + "grad_norm": 1.3812546730041504, + "learning_rate": 0.00019998862886823327, + "loss": 4.8799, + "step": 29950 + }, + { + "epoch": 0.055983335627095, + "grad_norm": 1.2533543109893799, + "learning_rate": 0.00019998856542874992, + "loss": 4.8482, + "step": 30000 + }, + { + "epoch": 0.055983335627095, + "eval_loss": 5.270205020904541, + "eval_runtime": 231.0564, + "eval_samples_per_second": 11.287, + "eval_steps_per_second": 11.287, + "eval_tts_loss": 7.119214073906258, + "step": 30000 + }, + { + "epoch": 0.05607664118647349, + "grad_norm": 1.4817043542861938, + "learning_rate": 0.00019998850181280432, + "loss": 4.9294, + "step": 30050 + }, + { + "epoch": 0.05616994674585198, + "grad_norm": 1.3725059032440186, + "learning_rate": 0.00019998843802039663, + "loss": 4.7408, + "step": 30100 + }, + { + "epoch": 0.05626325230523048, + "grad_norm": 1.1192152500152588, + "learning_rate": 0.00019998837405152696, + "loss": 4.975, + "step": 30150 + }, + { + "epoch": 0.05635655786460897, + "grad_norm": 1.4426796436309814, + "learning_rate": 0.0001999883099061954, + "loss": 5.1216, + "step": 30200 + }, + { + "epoch": 0.05644986342398746, + "grad_norm": 1.060693383216858, + "learning_rate": 0.00019998824558440203, + "loss": 4.8414, + "step": 30250 + }, + { + "epoch": 0.05654316898336595, + "grad_norm": 1.2075954675674438, + "learning_rate": 0.00019998818108614704, + "loss": 4.8929, + "step": 30300 + }, + { + "epoch": 0.056636474542744446, + "grad_norm": 1.2428367137908936, + "learning_rate": 0.00019998811641143047, + "loss": 5.1015, + "step": 30350 + }, + { + "epoch": 0.056729780102122936, + "grad_norm": 1.2443081140518188, + "learning_rate": 0.0001999880515602525, + "loss": 5.0629, + "step": 30400 + }, + { + "epoch": 0.056823085661501425, + "grad_norm": 1.4452126026153564, + "learning_rate": 0.0001999879865326132, + "loss": 4.942, + "step": 30450 + }, + { + "epoch": 0.056916391220879915, + "grad_norm": 0.8212096691131592, + "learning_rate": 0.0001999879213285127, + "loss": 4.9346, + "step": 30500 + }, + { + "epoch": 0.05700969678025841, + "grad_norm": 1.2501893043518066, + "learning_rate": 0.0001999878559479511, + "loss": 4.8447, + "step": 30550 + }, + { + "epoch": 0.0571030023396369, + "grad_norm": 1.0458223819732666, + "learning_rate": 0.00019998779039092854, + "loss": 4.929, + "step": 30600 + }, + { + "epoch": 0.05719630789901539, + "grad_norm": 1.5575039386749268, + "learning_rate": 0.0001999877246574451, + "loss": 5.0728, + "step": 30650 + }, + { + "epoch": 0.05728961345839388, + "grad_norm": 0.9992212057113647, + "learning_rate": 0.00019998765874750096, + "loss": 5.0842, + "step": 30700 + }, + { + "epoch": 0.05738291901777238, + "grad_norm": 1.637300729751587, + "learning_rate": 0.00019998759266109617, + "loss": 4.9798, + "step": 30750 + }, + { + "epoch": 0.05747622457715087, + "grad_norm": 1.372078537940979, + "learning_rate": 0.00019998752639823088, + "loss": 5.1645, + "step": 30800 + }, + { + "epoch": 0.05756953013652936, + "grad_norm": 1.0558440685272217, + "learning_rate": 0.0001999874599589052, + "loss": 4.833, + "step": 30850 + }, + { + "epoch": 0.057662835695907855, + "grad_norm": 1.3635761737823486, + "learning_rate": 0.00019998739334311922, + "loss": 4.7573, + "step": 30900 + }, + { + "epoch": 0.057756141255286345, + "grad_norm": 1.2139973640441895, + "learning_rate": 0.00019998732655087312, + "loss": 5.1266, + "step": 30950 + }, + { + "epoch": 0.057849446814664834, + "grad_norm": 1.2504178285598755, + "learning_rate": 0.00019998725958216695, + "loss": 5.1571, + "step": 31000 + }, + { + "epoch": 0.057942752374043324, + "grad_norm": 1.4248679876327515, + "learning_rate": 0.00019998719243700087, + "loss": 5.1027, + "step": 31050 + }, + { + "epoch": 0.05803605793342182, + "grad_norm": 4.532711029052734, + "learning_rate": 0.000199987125115375, + "loss": 5.1239, + "step": 31100 + }, + { + "epoch": 0.05812936349280031, + "grad_norm": 1.717026710510254, + "learning_rate": 0.00019998705761728943, + "loss": 5.1622, + "step": 31150 + }, + { + "epoch": 0.0582226690521788, + "grad_norm": 1.3576337099075317, + "learning_rate": 0.00019998698994274435, + "loss": 4.9084, + "step": 31200 + }, + { + "epoch": 0.05831597461155729, + "grad_norm": 1.2413636445999146, + "learning_rate": 0.00019998692209173977, + "loss": 4.791, + "step": 31250 + }, + { + "epoch": 0.05840928017093579, + "grad_norm": 1.3183064460754395, + "learning_rate": 0.00019998685406427588, + "loss": 5.1716, + "step": 31300 + }, + { + "epoch": 0.05850258573031428, + "grad_norm": 1.0643527507781982, + "learning_rate": 0.0001999867858603528, + "loss": 4.8196, + "step": 31350 + }, + { + "epoch": 0.05859589128969277, + "grad_norm": 1.1903074979782104, + "learning_rate": 0.00019998671747997062, + "loss": 4.8136, + "step": 31400 + }, + { + "epoch": 0.05868919684907126, + "grad_norm": 1.2945053577423096, + "learning_rate": 0.00019998664892312948, + "loss": 4.9986, + "step": 31450 + }, + { + "epoch": 0.058782502408449754, + "grad_norm": 1.5691232681274414, + "learning_rate": 0.0001999865801898295, + "loss": 5.0726, + "step": 31500 + }, + { + "epoch": 0.05887580796782824, + "grad_norm": 1.3560112714767456, + "learning_rate": 0.0001999865112800708, + "loss": 4.9443, + "step": 31550 + }, + { + "epoch": 0.05896911352720673, + "grad_norm": 1.1290074586868286, + "learning_rate": 0.0001999864421938535, + "loss": 4.9773, + "step": 31600 + }, + { + "epoch": 0.05906241908658522, + "grad_norm": 1.4924343824386597, + "learning_rate": 0.00019998637293117773, + "loss": 5.1788, + "step": 31650 + }, + { + "epoch": 0.05915572464596372, + "grad_norm": 1.2043700218200684, + "learning_rate": 0.0001999863034920436, + "loss": 4.8067, + "step": 31700 + }, + { + "epoch": 0.05924903020534221, + "grad_norm": 1.2821531295776367, + "learning_rate": 0.00019998623387645122, + "loss": 5.01, + "step": 31750 + }, + { + "epoch": 0.0593423357647207, + "grad_norm": 0.9705774188041687, + "learning_rate": 0.00019998616408440075, + "loss": 5.0704, + "step": 31800 + }, + { + "epoch": 0.059435641324099196, + "grad_norm": 1.632771372795105, + "learning_rate": 0.00019998609411589228, + "loss": 5.1367, + "step": 31850 + }, + { + "epoch": 0.059528946883477686, + "grad_norm": 1.5198719501495361, + "learning_rate": 0.00019998602397092597, + "loss": 4.806, + "step": 31900 + }, + { + "epoch": 0.059622252442856176, + "grad_norm": 1.2326031923294067, + "learning_rate": 0.00019998595364950191, + "loss": 4.9858, + "step": 31950 + }, + { + "epoch": 0.059715558002234666, + "grad_norm": 1.4651892185211182, + "learning_rate": 0.00019998588315162018, + "loss": 4.9011, + "step": 32000 + }, + { + "epoch": 0.05980886356161316, + "grad_norm": 1.704825520515442, + "learning_rate": 0.00019998581247728103, + "loss": 5.1689, + "step": 32050 + }, + { + "epoch": 0.05990216912099165, + "grad_norm": 1.4185510873794556, + "learning_rate": 0.0001999857416264845, + "loss": 4.984, + "step": 32100 + }, + { + "epoch": 0.05999547468037014, + "grad_norm": 1.7740285396575928, + "learning_rate": 0.00019998567059923073, + "loss": 5.0549, + "step": 32150 + }, + { + "epoch": 0.06008878023974863, + "grad_norm": 1.117332100868225, + "learning_rate": 0.00019998559939551983, + "loss": 4.9824, + "step": 32200 + }, + { + "epoch": 0.06018208579912713, + "grad_norm": 1.4967530965805054, + "learning_rate": 0.00019998552801535192, + "loss": 5.0119, + "step": 32250 + }, + { + "epoch": 0.06027539135850562, + "grad_norm": 1.1387840509414673, + "learning_rate": 0.0001999854564587272, + "loss": 5.0042, + "step": 32300 + }, + { + "epoch": 0.06036869691788411, + "grad_norm": 1.2850226163864136, + "learning_rate": 0.0001999853847256457, + "loss": 4.7995, + "step": 32350 + }, + { + "epoch": 0.0604620024772626, + "grad_norm": 1.3279659748077393, + "learning_rate": 0.00019998531281610762, + "loss": 4.9891, + "step": 32400 + }, + { + "epoch": 0.060555308036641095, + "grad_norm": 1.3600449562072754, + "learning_rate": 0.00019998524073011305, + "loss": 4.9334, + "step": 32450 + }, + { + "epoch": 0.060648613596019585, + "grad_norm": 1.053346872329712, + "learning_rate": 0.0001999851684676621, + "loss": 4.8668, + "step": 32500 + }, + { + "epoch": 0.060741919155398075, + "grad_norm": 1.311558485031128, + "learning_rate": 0.00019998509602875495, + "loss": 4.9149, + "step": 32550 + }, + { + "epoch": 0.06083522471477657, + "grad_norm": 1.0660821199417114, + "learning_rate": 0.00019998502341339166, + "loss": 4.8516, + "step": 32600 + }, + { + "epoch": 0.06092853027415506, + "grad_norm": 1.450120210647583, + "learning_rate": 0.00019998495062157243, + "loss": 5.0558, + "step": 32650 + }, + { + "epoch": 0.06102183583353355, + "grad_norm": 1.1899629831314087, + "learning_rate": 0.00019998487765329734, + "loss": 4.9774, + "step": 32700 + }, + { + "epoch": 0.06111514139291204, + "grad_norm": 1.3648253679275513, + "learning_rate": 0.00019998480450856654, + "loss": 4.9348, + "step": 32750 + }, + { + "epoch": 0.06120844695229054, + "grad_norm": 1.1677742004394531, + "learning_rate": 0.00019998473118738015, + "loss": 4.9243, + "step": 32800 + }, + { + "epoch": 0.06130175251166903, + "grad_norm": 1.0506433248519897, + "learning_rate": 0.0001999846576897383, + "loss": 4.9175, + "step": 32850 + }, + { + "epoch": 0.06139505807104752, + "grad_norm": 1.6663371324539185, + "learning_rate": 0.00019998458401564113, + "loss": 5.0618, + "step": 32900 + }, + { + "epoch": 0.06148836363042601, + "grad_norm": 1.1988695859909058, + "learning_rate": 0.00019998451016508875, + "loss": 4.9605, + "step": 32950 + }, + { + "epoch": 0.061581669189804504, + "grad_norm": 1.0045593976974487, + "learning_rate": 0.0001999844361380813, + "loss": 4.9856, + "step": 33000 + }, + { + "epoch": 0.061581669189804504, + "eval_loss": 5.222363471984863, + "eval_runtime": 230.7018, + "eval_samples_per_second": 11.305, + "eval_steps_per_second": 11.305, + "eval_tts_loss": 7.172317200288908, + "step": 33000 + }, + { + "epoch": 0.061674974749182994, + "grad_norm": 1.099878191947937, + "learning_rate": 0.00019998436193461894, + "loss": 4.8131, + "step": 33050 + }, + { + "epoch": 0.061768280308561484, + "grad_norm": 1.5296263694763184, + "learning_rate": 0.00019998428755470176, + "loss": 4.8085, + "step": 33100 + }, + { + "epoch": 0.061861585867939974, + "grad_norm": 1.1066418886184692, + "learning_rate": 0.0001999842129983299, + "loss": 4.9998, + "step": 33150 + }, + { + "epoch": 0.06195489142731847, + "grad_norm": 1.0647575855255127, + "learning_rate": 0.00019998413826550349, + "loss": 4.9304, + "step": 33200 + }, + { + "epoch": 0.06204819698669696, + "grad_norm": 1.4591343402862549, + "learning_rate": 0.0001999840633562227, + "loss": 4.7926, + "step": 33250 + }, + { + "epoch": 0.06214150254607545, + "grad_norm": 1.5085718631744385, + "learning_rate": 0.0001999839882704876, + "loss": 4.9535, + "step": 33300 + }, + { + "epoch": 0.06223480810545394, + "grad_norm": 1.4653303623199463, + "learning_rate": 0.00019998391300829834, + "loss": 5.0559, + "step": 33350 + }, + { + "epoch": 0.06232811366483244, + "grad_norm": 1.4487497806549072, + "learning_rate": 0.00019998383756965508, + "loss": 4.7687, + "step": 33400 + }, + { + "epoch": 0.062421419224210926, + "grad_norm": 1.0603256225585938, + "learning_rate": 0.00019998376195455797, + "loss": 5.0553, + "step": 33450 + }, + { + "epoch": 0.06251472478358942, + "grad_norm": 1.2172232866287231, + "learning_rate": 0.0001999836861630071, + "loss": 5.016, + "step": 33500 + }, + { + "epoch": 0.06260803034296791, + "grad_norm": 1.0388457775115967, + "learning_rate": 0.0001999836101950026, + "loss": 4.9595, + "step": 33550 + }, + { + "epoch": 0.0627013359023464, + "grad_norm": 1.064448595046997, + "learning_rate": 0.00019998353405054463, + "loss": 4.9284, + "step": 33600 + }, + { + "epoch": 0.06279464146172489, + "grad_norm": 1.3554415702819824, + "learning_rate": 0.0001999834577296333, + "loss": 4.9094, + "step": 33650 + }, + { + "epoch": 0.06288794702110338, + "grad_norm": 1.323470950126648, + "learning_rate": 0.00019998338123226878, + "loss": 5.2709, + "step": 33700 + }, + { + "epoch": 0.06298125258048187, + "grad_norm": 1.378735899925232, + "learning_rate": 0.00019998330455845114, + "loss": 5.002, + "step": 33750 + }, + { + "epoch": 0.06307455813986036, + "grad_norm": 1.3667746782302856, + "learning_rate": 0.00019998322770818064, + "loss": 5.029, + "step": 33800 + }, + { + "epoch": 0.06316786369923887, + "grad_norm": 1.2016565799713135, + "learning_rate": 0.0001999831506814573, + "loss": 4.7827, + "step": 33850 + }, + { + "epoch": 0.06326116925861736, + "grad_norm": 1.6369880437850952, + "learning_rate": 0.00019998307347828128, + "loss": 4.9345, + "step": 33900 + }, + { + "epoch": 0.06335447481799585, + "grad_norm": 1.6404528617858887, + "learning_rate": 0.00019998299609865272, + "loss": 4.9889, + "step": 33950 + }, + { + "epoch": 0.06344778037737434, + "grad_norm": 1.322618007659912, + "learning_rate": 0.00019998291854257175, + "loss": 4.9924, + "step": 34000 + }, + { + "epoch": 0.06354108593675283, + "grad_norm": 1.2779415845870972, + "learning_rate": 0.00019998284081003856, + "loss": 4.7519, + "step": 34050 + }, + { + "epoch": 0.06363439149613132, + "grad_norm": 1.0011979341506958, + "learning_rate": 0.00019998276290105325, + "loss": 5.0784, + "step": 34100 + }, + { + "epoch": 0.0637276970555098, + "grad_norm": 1.2990458011627197, + "learning_rate": 0.00019998268481561594, + "loss": 4.85, + "step": 34150 + }, + { + "epoch": 0.0638210026148883, + "grad_norm": 0.8773101568222046, + "learning_rate": 0.00019998260655372678, + "loss": 4.9803, + "step": 34200 + }, + { + "epoch": 0.0639143081742668, + "grad_norm": 0.890830934047699, + "learning_rate": 0.00019998252811538595, + "loss": 5.0072, + "step": 34250 + }, + { + "epoch": 0.06400761373364529, + "grad_norm": 1.118335485458374, + "learning_rate": 0.00019998244950059348, + "loss": 4.9289, + "step": 34300 + }, + { + "epoch": 0.06410091929302378, + "grad_norm": 1.284605860710144, + "learning_rate": 0.00019998237070934964, + "loss": 5.1104, + "step": 34350 + }, + { + "epoch": 0.06419422485240227, + "grad_norm": 1.3694264888763428, + "learning_rate": 0.00019998229174165446, + "loss": 5.0689, + "step": 34400 + }, + { + "epoch": 0.06428753041178076, + "grad_norm": 1.252570390701294, + "learning_rate": 0.00019998221259750814, + "loss": 4.9514, + "step": 34450 + }, + { + "epoch": 0.06438083597115925, + "grad_norm": 1.339966893196106, + "learning_rate": 0.00019998213327691085, + "loss": 5.0985, + "step": 34500 + }, + { + "epoch": 0.06447414153053774, + "grad_norm": 1.0073368549346924, + "learning_rate": 0.00019998205377986267, + "loss": 4.7595, + "step": 34550 + }, + { + "epoch": 0.06456744708991624, + "grad_norm": 1.4048619270324707, + "learning_rate": 0.00019998197410636372, + "loss": 4.9045, + "step": 34600 + }, + { + "epoch": 0.06466075264929473, + "grad_norm": 0.9788991808891296, + "learning_rate": 0.00019998189425641423, + "loss": 4.4886, + "step": 34650 + }, + { + "epoch": 0.06475405820867322, + "grad_norm": 1.4313668012619019, + "learning_rate": 0.00019998181423001423, + "loss": 4.7508, + "step": 34700 + }, + { + "epoch": 0.06484736376805171, + "grad_norm": 1.3244096040725708, + "learning_rate": 0.00019998173402716396, + "loss": 5.0475, + "step": 34750 + }, + { + "epoch": 0.0649406693274302, + "grad_norm": 1.0660874843597412, + "learning_rate": 0.00019998165364786352, + "loss": 4.9674, + "step": 34800 + }, + { + "epoch": 0.06503397488680869, + "grad_norm": 1.2351161241531372, + "learning_rate": 0.00019998157309211305, + "loss": 5.0454, + "step": 34850 + }, + { + "epoch": 0.06512728044618718, + "grad_norm": 1.4965345859527588, + "learning_rate": 0.00019998149235991267, + "loss": 4.8981, + "step": 34900 + }, + { + "epoch": 0.06522058600556567, + "grad_norm": 1.113448143005371, + "learning_rate": 0.00019998141145126258, + "loss": 5.1095, + "step": 34950 + }, + { + "epoch": 0.06531389156494417, + "grad_norm": 1.1176252365112305, + "learning_rate": 0.0001999813303661629, + "loss": 5.0817, + "step": 35000 + }, + { + "epoch": 0.06540719712432266, + "grad_norm": 1.1764382123947144, + "learning_rate": 0.00019998124910461376, + "loss": 4.8815, + "step": 35050 + }, + { + "epoch": 0.06550050268370115, + "grad_norm": 1.3247041702270508, + "learning_rate": 0.00019998116766661527, + "loss": 4.7398, + "step": 35100 + }, + { + "epoch": 0.06559380824307964, + "grad_norm": 1.155849575996399, + "learning_rate": 0.00019998108605216762, + "loss": 4.7499, + "step": 35150 + }, + { + "epoch": 0.06568711380245813, + "grad_norm": 1.5121906995773315, + "learning_rate": 0.000199981004261271, + "loss": 4.8656, + "step": 35200 + }, + { + "epoch": 0.06578041936183662, + "grad_norm": 1.2283687591552734, + "learning_rate": 0.00019998092229392544, + "loss": 5.0256, + "step": 35250 + }, + { + "epoch": 0.06587372492121511, + "grad_norm": 1.1128344535827637, + "learning_rate": 0.00019998084015013115, + "loss": 5.0056, + "step": 35300 + }, + { + "epoch": 0.06596703048059362, + "grad_norm": 0.7951078414916992, + "learning_rate": 0.0001999807578298883, + "loss": 4.8792, + "step": 35350 + }, + { + "epoch": 0.0660603360399721, + "grad_norm": 1.197862148284912, + "learning_rate": 0.000199980675333197, + "loss": 4.7815, + "step": 35400 + }, + { + "epoch": 0.0661536415993506, + "grad_norm": 1.3143230676651, + "learning_rate": 0.0001999805926600574, + "loss": 4.8473, + "step": 35450 + }, + { + "epoch": 0.06624694715872909, + "grad_norm": 1.100462555885315, + "learning_rate": 0.00019998050981046963, + "loss": 5.1148, + "step": 35500 + }, + { + "epoch": 0.06634025271810758, + "grad_norm": 0.8429204225540161, + "learning_rate": 0.00019998042678443385, + "loss": 5.0559, + "step": 35550 + }, + { + "epoch": 0.06643355827748607, + "grad_norm": 1.61745023727417, + "learning_rate": 0.0001999803435819502, + "loss": 4.8, + "step": 35600 + }, + { + "epoch": 0.06652686383686456, + "grad_norm": 1.1721575260162354, + "learning_rate": 0.00019998026020301887, + "loss": 4.746, + "step": 35650 + }, + { + "epoch": 0.06662016939624305, + "grad_norm": 1.6032483577728271, + "learning_rate": 0.00019998017664763995, + "loss": 5.0657, + "step": 35700 + }, + { + "epoch": 0.06671347495562155, + "grad_norm": 1.3949735164642334, + "learning_rate": 0.0001999800929158136, + "loss": 4.8643, + "step": 35750 + }, + { + "epoch": 0.06680678051500004, + "grad_norm": 1.1598511934280396, + "learning_rate": 0.00019998000900754001, + "loss": 4.8064, + "step": 35800 + }, + { + "epoch": 0.06690008607437853, + "grad_norm": 1.3976051807403564, + "learning_rate": 0.00019997992492281927, + "loss": 4.9548, + "step": 35850 + }, + { + "epoch": 0.06699339163375702, + "grad_norm": 1.2537288665771484, + "learning_rate": 0.0001999798406616516, + "loss": 5.0992, + "step": 35900 + }, + { + "epoch": 0.06708669719313551, + "grad_norm": 0.9580097198486328, + "learning_rate": 0.00019997975622403704, + "loss": 4.7681, + "step": 35950 + }, + { + "epoch": 0.067180002752514, + "grad_norm": 1.2984302043914795, + "learning_rate": 0.0001999796716099758, + "loss": 4.7869, + "step": 36000 + }, + { + "epoch": 0.067180002752514, + "eval_loss": 5.1954779624938965, + "eval_runtime": 233.7952, + "eval_samples_per_second": 11.155, + "eval_steps_per_second": 11.155, + "eval_tts_loss": 7.21724026617337, + "step": 36000 + }, + { + "epoch": 0.06727330831189249, + "grad_norm": 1.1834443807601929, + "learning_rate": 0.00019997958681946808, + "loss": 4.9615, + "step": 36050 + }, + { + "epoch": 0.06736661387127099, + "grad_norm": 1.4487770795822144, + "learning_rate": 0.00019997950185251398, + "loss": 4.9015, + "step": 36100 + }, + { + "epoch": 0.06745991943064948, + "grad_norm": 1.003225326538086, + "learning_rate": 0.00019997941670911362, + "loss": 5.1782, + "step": 36150 + }, + { + "epoch": 0.06755322499002797, + "grad_norm": 1.2664480209350586, + "learning_rate": 0.0001999793313892672, + "loss": 4.8188, + "step": 36200 + }, + { + "epoch": 0.06764653054940646, + "grad_norm": 0.9945356845855713, + "learning_rate": 0.00019997924589297482, + "loss": 5.017, + "step": 36250 + }, + { + "epoch": 0.06773983610878495, + "grad_norm": 1.232276201248169, + "learning_rate": 0.00019997916022023674, + "loss": 4.8427, + "step": 36300 + }, + { + "epoch": 0.06783314166816344, + "grad_norm": 1.5773524045944214, + "learning_rate": 0.00019997907437105297, + "loss": 4.8048, + "step": 36350 + }, + { + "epoch": 0.06792644722754193, + "grad_norm": 1.3550626039505005, + "learning_rate": 0.00019997898834542372, + "loss": 4.9573, + "step": 36400 + }, + { + "epoch": 0.06801975278692042, + "grad_norm": 1.5464149713516235, + "learning_rate": 0.0001999789021433492, + "loss": 4.9219, + "step": 36450 + }, + { + "epoch": 0.06811305834629892, + "grad_norm": 1.3397859334945679, + "learning_rate": 0.00019997881576482945, + "loss": 5.1099, + "step": 36500 + }, + { + "epoch": 0.06820636390567741, + "grad_norm": 1.3835015296936035, + "learning_rate": 0.0001999787292098647, + "loss": 4.9419, + "step": 36550 + }, + { + "epoch": 0.0682996694650559, + "grad_norm": 0.9244956374168396, + "learning_rate": 0.0001999786424784551, + "loss": 5.0481, + "step": 36600 + }, + { + "epoch": 0.0683929750244344, + "grad_norm": 1.0417383909225464, + "learning_rate": 0.00019997855557060077, + "loss": 4.8458, + "step": 36650 + }, + { + "epoch": 0.06848628058381288, + "grad_norm": 1.2598869800567627, + "learning_rate": 0.0001999784684863019, + "loss": 4.8585, + "step": 36700 + }, + { + "epoch": 0.06857958614319137, + "grad_norm": 1.0697765350341797, + "learning_rate": 0.00019997838122555862, + "loss": 4.7396, + "step": 36750 + }, + { + "epoch": 0.06867289170256986, + "grad_norm": 1.1050491333007812, + "learning_rate": 0.0001999782937883711, + "loss": 4.8543, + "step": 36800 + }, + { + "epoch": 0.06876619726194837, + "grad_norm": 1.0602041482925415, + "learning_rate": 0.00019997820617473946, + "loss": 4.8054, + "step": 36850 + }, + { + "epoch": 0.06885950282132686, + "grad_norm": 1.0901793241500854, + "learning_rate": 0.0001999781183846639, + "loss": 4.943, + "step": 36900 + }, + { + "epoch": 0.06895280838070535, + "grad_norm": 1.1608269214630127, + "learning_rate": 0.00019997803041814453, + "loss": 4.8669, + "step": 36950 + }, + { + "epoch": 0.06904611394008384, + "grad_norm": 1.428856611251831, + "learning_rate": 0.00019997794227518152, + "loss": 4.9257, + "step": 37000 + }, + { + "epoch": 0.06913941949946233, + "grad_norm": 1.2141627073287964, + "learning_rate": 0.00019997785395577507, + "loss": 4.8884, + "step": 37050 + }, + { + "epoch": 0.06923272505884082, + "grad_norm": 0.917578935623169, + "learning_rate": 0.00019997776545992526, + "loss": 4.7185, + "step": 37100 + }, + { + "epoch": 0.0693260306182193, + "grad_norm": 1.1432592868804932, + "learning_rate": 0.00019997767678763234, + "loss": 5.1003, + "step": 37150 + }, + { + "epoch": 0.0694193361775978, + "grad_norm": 1.5270577669143677, + "learning_rate": 0.00019997758793889635, + "loss": 4.7651, + "step": 37200 + }, + { + "epoch": 0.0695126417369763, + "grad_norm": 0.8794492483139038, + "learning_rate": 0.00019997749891371752, + "loss": 4.9522, + "step": 37250 + }, + { + "epoch": 0.06960594729635479, + "grad_norm": 1.5430872440338135, + "learning_rate": 0.000199977409712096, + "loss": 4.8259, + "step": 37300 + }, + { + "epoch": 0.06969925285573328, + "grad_norm": 1.2974966764450073, + "learning_rate": 0.000199977320334032, + "loss": 5.0415, + "step": 37350 + }, + { + "epoch": 0.06979255841511177, + "grad_norm": 1.3754868507385254, + "learning_rate": 0.00019997723077952556, + "loss": 4.9111, + "step": 37400 + }, + { + "epoch": 0.06988586397449026, + "grad_norm": 1.1165885925292969, + "learning_rate": 0.00019997714104857693, + "loss": 5.0136, + "step": 37450 + }, + { + "epoch": 0.06997916953386875, + "grad_norm": 1.127763032913208, + "learning_rate": 0.0001999770511411862, + "loss": 4.6376, + "step": 37500 + }, + { + "epoch": 0.07007247509324724, + "grad_norm": 1.1348410844802856, + "learning_rate": 0.00019997696105735358, + "loss": 4.7574, + "step": 37550 + }, + { + "epoch": 0.07016578065262573, + "grad_norm": 1.012832522392273, + "learning_rate": 0.00019997687079707926, + "loss": 4.9673, + "step": 37600 + }, + { + "epoch": 0.07025908621200423, + "grad_norm": 0.8529366254806519, + "learning_rate": 0.0001999767803603633, + "loss": 4.9508, + "step": 37650 + }, + { + "epoch": 0.07035239177138272, + "grad_norm": 1.2480065822601318, + "learning_rate": 0.00019997668974720595, + "loss": 4.8772, + "step": 37700 + }, + { + "epoch": 0.07044569733076121, + "grad_norm": 1.318292498588562, + "learning_rate": 0.00019997659895760732, + "loss": 4.8596, + "step": 37750 + }, + { + "epoch": 0.0705390028901397, + "grad_norm": 1.3873937129974365, + "learning_rate": 0.0001999765079915676, + "loss": 4.8429, + "step": 37800 + }, + { + "epoch": 0.07063230844951819, + "grad_norm": 1.1860456466674805, + "learning_rate": 0.0001999764168490869, + "loss": 4.8605, + "step": 37850 + }, + { + "epoch": 0.07072561400889668, + "grad_norm": 1.0608043670654297, + "learning_rate": 0.00019997632553016544, + "loss": 5.0347, + "step": 37900 + }, + { + "epoch": 0.07081891956827517, + "grad_norm": 1.168198585510254, + "learning_rate": 0.00019997623403480335, + "loss": 4.7621, + "step": 37950 + }, + { + "epoch": 0.07091222512765367, + "grad_norm": 0.9395182132720947, + "learning_rate": 0.0001999761423630008, + "loss": 4.9286, + "step": 38000 + }, + { + "epoch": 0.07100553068703216, + "grad_norm": 1.2564831972122192, + "learning_rate": 0.00019997605051475794, + "loss": 4.9218, + "step": 38050 + }, + { + "epoch": 0.07109883624641065, + "grad_norm": 1.497332215309143, + "learning_rate": 0.00019997595849007496, + "loss": 5.0628, + "step": 38100 + }, + { + "epoch": 0.07119214180578914, + "grad_norm": 1.2820568084716797, + "learning_rate": 0.00019997586628895197, + "loss": 4.8333, + "step": 38150 + }, + { + "epoch": 0.07128544736516763, + "grad_norm": 1.5082170963287354, + "learning_rate": 0.0001999757739113892, + "loss": 5.0072, + "step": 38200 + }, + { + "epoch": 0.07137875292454612, + "grad_norm": 1.0686591863632202, + "learning_rate": 0.00019997568135738677, + "loss": 4.8872, + "step": 38250 + }, + { + "epoch": 0.07147205848392461, + "grad_norm": 1.14346444606781, + "learning_rate": 0.00019997558862694484, + "loss": 4.8167, + "step": 38300 + }, + { + "epoch": 0.0715653640433031, + "grad_norm": 0.6724060773849487, + "learning_rate": 0.00019997549572006365, + "loss": 4.8189, + "step": 38350 + }, + { + "epoch": 0.07165866960268161, + "grad_norm": 1.5035301446914673, + "learning_rate": 0.00019997540263674322, + "loss": 4.9308, + "step": 38400 + }, + { + "epoch": 0.0717519751620601, + "grad_norm": 1.1256290674209595, + "learning_rate": 0.00019997530937698382, + "loss": 4.8645, + "step": 38450 + }, + { + "epoch": 0.07184528072143859, + "grad_norm": 0.9030829668045044, + "learning_rate": 0.00019997521594078562, + "loss": 4.9792, + "step": 38500 + }, + { + "epoch": 0.07193858628081708, + "grad_norm": 1.1105825901031494, + "learning_rate": 0.00019997512232814875, + "loss": 4.9466, + "step": 38550 + }, + { + "epoch": 0.07203189184019557, + "grad_norm": 1.272040843963623, + "learning_rate": 0.00019997502853907336, + "loss": 4.9014, + "step": 38600 + }, + { + "epoch": 0.07212519739957406, + "grad_norm": 1.1471481323242188, + "learning_rate": 0.00019997493457355963, + "loss": 4.9466, + "step": 38650 + }, + { + "epoch": 0.07221850295895255, + "grad_norm": 1.134016990661621, + "learning_rate": 0.00019997484043160775, + "loss": 4.8908, + "step": 38700 + }, + { + "epoch": 0.07231180851833105, + "grad_norm": 1.214316964149475, + "learning_rate": 0.00019997474611321787, + "loss": 4.9729, + "step": 38750 + }, + { + "epoch": 0.07240511407770954, + "grad_norm": 0.9872856140136719, + "learning_rate": 0.00019997465161839014, + "loss": 4.9633, + "step": 38800 + }, + { + "epoch": 0.07249841963708803, + "grad_norm": 1.3392361402511597, + "learning_rate": 0.00019997455694712474, + "loss": 5.0015, + "step": 38850 + }, + { + "epoch": 0.07259172519646652, + "grad_norm": 1.0813117027282715, + "learning_rate": 0.00019997446209942186, + "loss": 5.0432, + "step": 38900 + }, + { + "epoch": 0.07268503075584501, + "grad_norm": 1.2037447690963745, + "learning_rate": 0.0001999743670752816, + "loss": 4.8759, + "step": 38950 + }, + { + "epoch": 0.0727783363152235, + "grad_norm": 1.0193177461624146, + "learning_rate": 0.00019997427187470423, + "loss": 4.9456, + "step": 39000 + }, + { + "epoch": 0.0727783363152235, + "eval_loss": 5.16133975982666, + "eval_runtime": 230.6318, + "eval_samples_per_second": 11.308, + "eval_steps_per_second": 11.308, + "eval_tts_loss": 7.213367073268178, + "step": 39000 + }, + { + "epoch": 0.07287164187460199, + "grad_norm": 1.2097446918487549, + "learning_rate": 0.00019997417649768982, + "loss": 5.0498, + "step": 39050 + }, + { + "epoch": 0.07296494743398048, + "grad_norm": 1.0760691165924072, + "learning_rate": 0.00019997408094423862, + "loss": 4.6899, + "step": 39100 + }, + { + "epoch": 0.07305825299335898, + "grad_norm": 1.1207256317138672, + "learning_rate": 0.0001999739852143507, + "loss": 4.9026, + "step": 39150 + }, + { + "epoch": 0.07315155855273747, + "grad_norm": 0.801072359085083, + "learning_rate": 0.00019997388930802635, + "loss": 4.8494, + "step": 39200 + }, + { + "epoch": 0.07324486411211596, + "grad_norm": 1.1047637462615967, + "learning_rate": 0.00019997379322526563, + "loss": 4.7812, + "step": 39250 + }, + { + "epoch": 0.07333816967149445, + "grad_norm": 1.152491807937622, + "learning_rate": 0.00019997369696606878, + "loss": 4.9095, + "step": 39300 + }, + { + "epoch": 0.07343147523087294, + "grad_norm": 1.1126306056976318, + "learning_rate": 0.00019997360053043592, + "loss": 4.9798, + "step": 39350 + }, + { + "epoch": 0.07352478079025143, + "grad_norm": 1.156316876411438, + "learning_rate": 0.00019997350391836726, + "loss": 4.9773, + "step": 39400 + }, + { + "epoch": 0.07361808634962992, + "grad_norm": 1.0668503046035767, + "learning_rate": 0.00019997340712986296, + "loss": 5.1012, + "step": 39450 + }, + { + "epoch": 0.07371139190900843, + "grad_norm": 1.3585624694824219, + "learning_rate": 0.0001999733101649232, + "loss": 4.6893, + "step": 39500 + }, + { + "epoch": 0.07380469746838692, + "grad_norm": 1.5277858972549438, + "learning_rate": 0.00019997321302354812, + "loss": 4.8931, + "step": 39550 + }, + { + "epoch": 0.0738980030277654, + "grad_norm": 1.2020436525344849, + "learning_rate": 0.00019997311570573792, + "loss": 5.1182, + "step": 39600 + }, + { + "epoch": 0.0739913085871439, + "grad_norm": 1.1372538805007935, + "learning_rate": 0.00019997301821149274, + "loss": 4.9238, + "step": 39650 + }, + { + "epoch": 0.07408461414652238, + "grad_norm": 0.7125515341758728, + "learning_rate": 0.0001999729205408128, + "loss": 4.8202, + "step": 39700 + }, + { + "epoch": 0.07417791970590087, + "grad_norm": 0.9166264533996582, + "learning_rate": 0.00019997282269369824, + "loss": 4.8832, + "step": 39750 + }, + { + "epoch": 0.07427122526527936, + "grad_norm": 1.099700927734375, + "learning_rate": 0.00019997272467014922, + "loss": 4.9073, + "step": 39800 + }, + { + "epoch": 0.07436453082465785, + "grad_norm": 1.3260917663574219, + "learning_rate": 0.00019997262647016597, + "loss": 4.8271, + "step": 39850 + }, + { + "epoch": 0.07445783638403636, + "grad_norm": 1.2185454368591309, + "learning_rate": 0.0001999725280937486, + "loss": 4.8525, + "step": 39900 + }, + { + "epoch": 0.07455114194341485, + "grad_norm": 1.0137861967086792, + "learning_rate": 0.0001999724295408973, + "loss": 4.749, + "step": 39950 + }, + { + "epoch": 0.07464444750279334, + "grad_norm": 0.8680387139320374, + "learning_rate": 0.00019997233081161226, + "loss": 5.0129, + "step": 40000 + }, + { + "epoch": 0.07473775306217183, + "grad_norm": 1.314736008644104, + "learning_rate": 0.00019997223190589366, + "loss": 5.0179, + "step": 40050 + }, + { + "epoch": 0.07483105862155032, + "grad_norm": 1.209912657737732, + "learning_rate": 0.00019997213282374165, + "loss": 5.0474, + "step": 40100 + }, + { + "epoch": 0.0749243641809288, + "grad_norm": 0.9931349158287048, + "learning_rate": 0.0001999720335651564, + "loss": 4.8296, + "step": 40150 + }, + { + "epoch": 0.0750176697403073, + "grad_norm": 0.9877680540084839, + "learning_rate": 0.00019997193413013814, + "loss": 4.7287, + "step": 40200 + }, + { + "epoch": 0.0751109752996858, + "grad_norm": 1.2788828611373901, + "learning_rate": 0.00019997183451868697, + "loss": 4.7418, + "step": 40250 + }, + { + "epoch": 0.07520428085906429, + "grad_norm": 1.1528019905090332, + "learning_rate": 0.00019997173473080312, + "loss": 4.7367, + "step": 40300 + }, + { + "epoch": 0.07529758641844278, + "grad_norm": 1.3090732097625732, + "learning_rate": 0.00019997163476648676, + "loss": 4.8197, + "step": 40350 + }, + { + "epoch": 0.07539089197782127, + "grad_norm": 1.2664506435394287, + "learning_rate": 0.00019997153462573804, + "loss": 4.6739, + "step": 40400 + }, + { + "epoch": 0.07548419753719976, + "grad_norm": 1.1950277090072632, + "learning_rate": 0.00019997143430855715, + "loss": 5.007, + "step": 40450 + }, + { + "epoch": 0.07557750309657825, + "grad_norm": 0.9949437975883484, + "learning_rate": 0.00019997133381494426, + "loss": 4.9069, + "step": 40500 + }, + { + "epoch": 0.07567080865595674, + "grad_norm": 1.1204107999801636, + "learning_rate": 0.00019997123314489956, + "loss": 4.8598, + "step": 40550 + }, + { + "epoch": 0.07576411421533523, + "grad_norm": 1.2911077737808228, + "learning_rate": 0.00019997113229842323, + "loss": 4.9724, + "step": 40600 + }, + { + "epoch": 0.07585741977471373, + "grad_norm": 0.9205583930015564, + "learning_rate": 0.00019997103127551548, + "loss": 4.8516, + "step": 40650 + }, + { + "epoch": 0.07595072533409222, + "grad_norm": 1.0593533515930176, + "learning_rate": 0.0001999709300761764, + "loss": 4.8309, + "step": 40700 + }, + { + "epoch": 0.07604403089347071, + "grad_norm": 1.1807661056518555, + "learning_rate": 0.00019997082870040625, + "loss": 4.9056, + "step": 40750 + }, + { + "epoch": 0.0761373364528492, + "grad_norm": 1.105985164642334, + "learning_rate": 0.00019997072714820514, + "loss": 4.8024, + "step": 40800 + }, + { + "epoch": 0.07623064201222769, + "grad_norm": 1.2069274187088013, + "learning_rate": 0.00019997062541957333, + "loss": 4.9473, + "step": 40850 + }, + { + "epoch": 0.07632394757160618, + "grad_norm": 1.3253543376922607, + "learning_rate": 0.00019997052351451093, + "loss": 4.9783, + "step": 40900 + }, + { + "epoch": 0.07641725313098467, + "grad_norm": 1.22372567653656, + "learning_rate": 0.00019997042143301815, + "loss": 4.9517, + "step": 40950 + }, + { + "epoch": 0.07651055869036316, + "grad_norm": 1.091848611831665, + "learning_rate": 0.00019997031917509515, + "loss": 4.7118, + "step": 41000 + }, + { + "epoch": 0.07660386424974167, + "grad_norm": 1.0672491788864136, + "learning_rate": 0.00019997021674074213, + "loss": 4.9462, + "step": 41050 + }, + { + "epoch": 0.07669716980912016, + "grad_norm": 1.0972808599472046, + "learning_rate": 0.0001999701141299593, + "loss": 4.8651, + "step": 41100 + }, + { + "epoch": 0.07679047536849865, + "grad_norm": 1.0306609869003296, + "learning_rate": 0.0001999700113427468, + "loss": 4.9256, + "step": 41150 + }, + { + "epoch": 0.07688378092787714, + "grad_norm": 1.1696428060531616, + "learning_rate": 0.0001999699083791048, + "loss": 4.7898, + "step": 41200 + }, + { + "epoch": 0.07697708648725562, + "grad_norm": 1.2328746318817139, + "learning_rate": 0.00019996980523903353, + "loss": 4.942, + "step": 41250 + }, + { + "epoch": 0.07707039204663411, + "grad_norm": 1.0950030088424683, + "learning_rate": 0.00019996970192253313, + "loss": 4.8404, + "step": 41300 + }, + { + "epoch": 0.0771636976060126, + "grad_norm": 1.0388070344924927, + "learning_rate": 0.00019996959842960379, + "loss": 5.0406, + "step": 41350 + }, + { + "epoch": 0.07725700316539111, + "grad_norm": 1.3518260717391968, + "learning_rate": 0.0001999694947602457, + "loss": 4.7539, + "step": 41400 + }, + { + "epoch": 0.0773503087247696, + "grad_norm": 0.8718711733818054, + "learning_rate": 0.00019996939091445904, + "loss": 4.8804, + "step": 41450 + }, + { + "epoch": 0.07744361428414809, + "grad_norm": 1.1106172800064087, + "learning_rate": 0.00019996928689224402, + "loss": 4.9871, + "step": 41500 + }, + { + "epoch": 0.07753691984352658, + "grad_norm": 1.3443810939788818, + "learning_rate": 0.00019996918269360076, + "loss": 4.7499, + "step": 41550 + }, + { + "epoch": 0.07763022540290507, + "grad_norm": 0.968607485294342, + "learning_rate": 0.00019996907831852951, + "loss": 4.7705, + "step": 41600 + }, + { + "epoch": 0.07772353096228356, + "grad_norm": 1.1009609699249268, + "learning_rate": 0.0001999689737670304, + "loss": 4.8043, + "step": 41650 + }, + { + "epoch": 0.07781683652166205, + "grad_norm": 0.9938008785247803, + "learning_rate": 0.00019996886903910367, + "loss": 4.9927, + "step": 41700 + }, + { + "epoch": 0.07791014208104054, + "grad_norm": 1.0750339031219482, + "learning_rate": 0.00019996876413474947, + "loss": 4.9449, + "step": 41750 + }, + { + "epoch": 0.07800344764041904, + "grad_norm": 1.2172019481658936, + "learning_rate": 0.000199968659053968, + "loss": 5.0049, + "step": 41800 + }, + { + "epoch": 0.07809675319979753, + "grad_norm": 1.288533329963684, + "learning_rate": 0.0001999685537967594, + "loss": 4.6487, + "step": 41850 + }, + { + "epoch": 0.07819005875917602, + "grad_norm": 1.087883710861206, + "learning_rate": 0.00019996844836312394, + "loss": 4.8504, + "step": 41900 + }, + { + "epoch": 0.07828336431855451, + "grad_norm": 1.2223601341247559, + "learning_rate": 0.00019996834275306174, + "loss": 4.8588, + "step": 41950 + }, + { + "epoch": 0.078376669877933, + "grad_norm": 0.8469075560569763, + "learning_rate": 0.00019996823696657299, + "loss": 4.9408, + "step": 42000 + }, + { + "epoch": 0.078376669877933, + "eval_loss": 5.132350444793701, + "eval_runtime": 230.8355, + "eval_samples_per_second": 11.298, + "eval_steps_per_second": 11.298, + "eval_tts_loss": 7.264853509248697, + "step": 42000 + }, + { + "epoch": 0.07846997543731149, + "grad_norm": 1.1458607912063599, + "learning_rate": 0.0001999681310036579, + "loss": 4.9473, + "step": 42050 + }, + { + "epoch": 0.07856328099668998, + "grad_norm": 0.9783209562301636, + "learning_rate": 0.00019996802486431665, + "loss": 4.9335, + "step": 42100 + }, + { + "epoch": 0.07865658655606848, + "grad_norm": 1.3118724822998047, + "learning_rate": 0.00019996791854854943, + "loss": 4.9134, + "step": 42150 + }, + { + "epoch": 0.07874989211544697, + "grad_norm": 1.4106804132461548, + "learning_rate": 0.0001999678120563564, + "loss": 4.9419, + "step": 42200 + }, + { + "epoch": 0.07884319767482546, + "grad_norm": 1.3338770866394043, + "learning_rate": 0.0001999677053877378, + "loss": 4.9377, + "step": 42250 + }, + { + "epoch": 0.07893650323420395, + "grad_norm": 1.1981934309005737, + "learning_rate": 0.00019996759854269377, + "loss": 4.9804, + "step": 42300 + }, + { + "epoch": 0.07902980879358244, + "grad_norm": 1.1797490119934082, + "learning_rate": 0.00019996749152122455, + "loss": 4.86, + "step": 42350 + }, + { + "epoch": 0.07912311435296093, + "grad_norm": 0.7988554239273071, + "learning_rate": 0.0001999673843233303, + "loss": 5.0499, + "step": 42400 + }, + { + "epoch": 0.07921641991233942, + "grad_norm": 0.9866506457328796, + "learning_rate": 0.00019996727694901117, + "loss": 4.8936, + "step": 42450 + }, + { + "epoch": 0.07930972547171791, + "grad_norm": 1.507678747177124, + "learning_rate": 0.0001999671693982674, + "loss": 4.7716, + "step": 42500 + }, + { + "epoch": 0.07940303103109642, + "grad_norm": 1.2090741395950317, + "learning_rate": 0.00019996706167109917, + "loss": 4.7974, + "step": 42550 + }, + { + "epoch": 0.0794963365904749, + "grad_norm": 0.9674336314201355, + "learning_rate": 0.00019996695376750667, + "loss": 4.8457, + "step": 42600 + }, + { + "epoch": 0.0795896421498534, + "grad_norm": 0.8843348622322083, + "learning_rate": 0.00019996684568749008, + "loss": 4.8981, + "step": 42650 + }, + { + "epoch": 0.07968294770923189, + "grad_norm": 1.0419225692749023, + "learning_rate": 0.00019996673743104956, + "loss": 4.6177, + "step": 42700 + }, + { + "epoch": 0.07977625326861038, + "grad_norm": 0.8469592928886414, + "learning_rate": 0.0001999666289981854, + "loss": 4.8767, + "step": 42750 + }, + { + "epoch": 0.07986955882798887, + "grad_norm": 0.8583770394325256, + "learning_rate": 0.00019996652038889768, + "loss": 4.9546, + "step": 42800 + }, + { + "epoch": 0.07996286438736736, + "grad_norm": 0.9213618636131287, + "learning_rate": 0.00019996641160318666, + "loss": 4.5851, + "step": 42850 + }, + { + "epoch": 0.08005616994674586, + "grad_norm": 1.247279405593872, + "learning_rate": 0.00019996630264105253, + "loss": 4.8712, + "step": 42900 + }, + { + "epoch": 0.08014947550612435, + "grad_norm": 1.3103673458099365, + "learning_rate": 0.00019996619350249546, + "loss": 4.9501, + "step": 42950 + }, + { + "epoch": 0.08024278106550284, + "grad_norm": 0.9502874612808228, + "learning_rate": 0.00019996608418751564, + "loss": 4.9463, + "step": 43000 + }, + { + "epoch": 0.08033608662488133, + "grad_norm": 1.1887316703796387, + "learning_rate": 0.0001999659746961133, + "loss": 4.7633, + "step": 43050 + }, + { + "epoch": 0.08042939218425982, + "grad_norm": 1.132875680923462, + "learning_rate": 0.00019996586502828856, + "loss": 4.7873, + "step": 43100 + }, + { + "epoch": 0.08052269774363831, + "grad_norm": 1.1734470129013062, + "learning_rate": 0.00019996575518404166, + "loss": 4.9746, + "step": 43150 + }, + { + "epoch": 0.0806160033030168, + "grad_norm": 1.2493457794189453, + "learning_rate": 0.0001999656451633728, + "loss": 4.7152, + "step": 43200 + }, + { + "epoch": 0.08070930886239529, + "grad_norm": 1.2277987003326416, + "learning_rate": 0.00019996553496628216, + "loss": 4.8031, + "step": 43250 + }, + { + "epoch": 0.08080261442177379, + "grad_norm": 0.9987390041351318, + "learning_rate": 0.00019996542459276996, + "loss": 4.6355, + "step": 43300 + }, + { + "epoch": 0.08089591998115228, + "grad_norm": 0.8293343186378479, + "learning_rate": 0.0001999653140428364, + "loss": 4.845, + "step": 43350 + }, + { + "epoch": 0.08098922554053077, + "grad_norm": 1.0015592575073242, + "learning_rate": 0.0001999652033164816, + "loss": 4.8523, + "step": 43400 + }, + { + "epoch": 0.08108253109990926, + "grad_norm": 1.1842169761657715, + "learning_rate": 0.0001999650924137058, + "loss": 5.0227, + "step": 43450 + }, + { + "epoch": 0.08117583665928775, + "grad_norm": 1.0604639053344727, + "learning_rate": 0.00019996498133450924, + "loss": 4.9266, + "step": 43500 + }, + { + "epoch": 0.08126914221866624, + "grad_norm": 1.1228868961334229, + "learning_rate": 0.00019996487007889206, + "loss": 4.9329, + "step": 43550 + }, + { + "epoch": 0.08136244777804473, + "grad_norm": 1.149283766746521, + "learning_rate": 0.0001999647586468545, + "loss": 4.6841, + "step": 43600 + }, + { + "epoch": 0.08145575333742322, + "grad_norm": 1.1383576393127441, + "learning_rate": 0.00019996464703839667, + "loss": 4.9698, + "step": 43650 + }, + { + "epoch": 0.08154905889680172, + "grad_norm": 1.0777636766433716, + "learning_rate": 0.00019996453525351887, + "loss": 4.7305, + "step": 43700 + }, + { + "epoch": 0.08164236445618021, + "grad_norm": 1.4025020599365234, + "learning_rate": 0.00019996442329222125, + "loss": 4.9614, + "step": 43750 + }, + { + "epoch": 0.0817356700155587, + "grad_norm": 1.1779139041900635, + "learning_rate": 0.00019996431115450403, + "loss": 4.8422, + "step": 43800 + }, + { + "epoch": 0.0818289755749372, + "grad_norm": 0.9133360981941223, + "learning_rate": 0.00019996419884036733, + "loss": 5.0592, + "step": 43850 + }, + { + "epoch": 0.08192228113431568, + "grad_norm": 1.0266224145889282, + "learning_rate": 0.00019996408634981148, + "loss": 4.9319, + "step": 43900 + }, + { + "epoch": 0.08201558669369417, + "grad_norm": 0.882277250289917, + "learning_rate": 0.00019996397368283656, + "loss": 4.8332, + "step": 43950 + }, + { + "epoch": 0.08210889225307266, + "grad_norm": 1.1271204948425293, + "learning_rate": 0.00019996386083944283, + "loss": 5.1547, + "step": 44000 + }, + { + "epoch": 0.08220219781245117, + "grad_norm": 1.0688495635986328, + "learning_rate": 0.0001999637478196305, + "loss": 5.0013, + "step": 44050 + }, + { + "epoch": 0.08229550337182966, + "grad_norm": 1.0313043594360352, + "learning_rate": 0.0001999636346233997, + "loss": 4.9081, + "step": 44100 + }, + { + "epoch": 0.08238880893120815, + "grad_norm": 0.9132486581802368, + "learning_rate": 0.00019996352125075067, + "loss": 4.5762, + "step": 44150 + }, + { + "epoch": 0.08248211449058664, + "grad_norm": 1.090657353401184, + "learning_rate": 0.00019996340770168363, + "loss": 4.9538, + "step": 44200 + }, + { + "epoch": 0.08257542004996513, + "grad_norm": 1.004305124282837, + "learning_rate": 0.0001999632939761988, + "loss": 4.9216, + "step": 44250 + }, + { + "epoch": 0.08266872560934362, + "grad_norm": 1.309733510017395, + "learning_rate": 0.00019996318007429632, + "loss": 4.8428, + "step": 44300 + }, + { + "epoch": 0.0827620311687221, + "grad_norm": 1.159638524055481, + "learning_rate": 0.0001999630659959764, + "loss": 4.9621, + "step": 44350 + }, + { + "epoch": 0.0828553367281006, + "grad_norm": 1.147161602973938, + "learning_rate": 0.00019996295174123927, + "loss": 4.7534, + "step": 44400 + }, + { + "epoch": 0.0829486422874791, + "grad_norm": 0.9718197584152222, + "learning_rate": 0.00019996283731008513, + "loss": 4.8755, + "step": 44450 + }, + { + "epoch": 0.08304194784685759, + "grad_norm": 0.9277188181877136, + "learning_rate": 0.00019996272270251416, + "loss": 4.8344, + "step": 44500 + }, + { + "epoch": 0.08313525340623608, + "grad_norm": 1.071356177330017, + "learning_rate": 0.00019996260791852656, + "loss": 4.8703, + "step": 44550 + }, + { + "epoch": 0.08322855896561457, + "grad_norm": 1.029938817024231, + "learning_rate": 0.00019996249295812257, + "loss": 4.8064, + "step": 44600 + }, + { + "epoch": 0.08332186452499306, + "grad_norm": 1.397524356842041, + "learning_rate": 0.00019996237782130236, + "loss": 4.8744, + "step": 44650 + }, + { + "epoch": 0.08341517008437155, + "grad_norm": 1.0888935327529907, + "learning_rate": 0.00019996226250806615, + "loss": 4.8386, + "step": 44700 + }, + { + "epoch": 0.08350847564375004, + "grad_norm": 0.727277934551239, + "learning_rate": 0.0001999621470184141, + "loss": 4.8482, + "step": 44750 + }, + { + "epoch": 0.08360178120312854, + "grad_norm": 1.148036241531372, + "learning_rate": 0.00019996203135234648, + "loss": 4.8495, + "step": 44800 + }, + { + "epoch": 0.08369508676250703, + "grad_norm": 1.073508858680725, + "learning_rate": 0.00019996191550986347, + "loss": 4.8289, + "step": 44850 + }, + { + "epoch": 0.08378839232188552, + "grad_norm": 0.7582327723503113, + "learning_rate": 0.00019996179949096528, + "loss": 4.7814, + "step": 44900 + }, + { + "epoch": 0.08388169788126401, + "grad_norm": 0.9114712476730347, + "learning_rate": 0.00019996168329565207, + "loss": 4.8783, + "step": 44950 + }, + { + "epoch": 0.0839750034406425, + "grad_norm": 1.14763343334198, + "learning_rate": 0.00019996156692392408, + "loss": 4.6919, + "step": 45000 + }, + { + "epoch": 0.0839750034406425, + "eval_loss": 5.113004684448242, + "eval_runtime": 231.8555, + "eval_samples_per_second": 11.248, + "eval_steps_per_second": 11.248, + "eval_tts_loss": 7.2781233883782805, + "step": 45000 + }, + { + "epoch": 0.08406830900002099, + "grad_norm": 1.0536847114562988, + "learning_rate": 0.00019996145037578153, + "loss": 4.741, + "step": 45050 + }, + { + "epoch": 0.08416161455939948, + "grad_norm": 1.0102460384368896, + "learning_rate": 0.0001999613336512246, + "loss": 4.9963, + "step": 45100 + }, + { + "epoch": 0.08425492011877797, + "grad_norm": 1.3230600357055664, + "learning_rate": 0.0001999612167502535, + "loss": 4.8845, + "step": 45150 + }, + { + "epoch": 0.08434822567815647, + "grad_norm": 1.181218147277832, + "learning_rate": 0.00019996109967286845, + "loss": 4.7497, + "step": 45200 + }, + { + "epoch": 0.08444153123753496, + "grad_norm": 1.2599127292633057, + "learning_rate": 0.00019996098241906963, + "loss": 4.9527, + "step": 45250 + }, + { + "epoch": 0.08453483679691345, + "grad_norm": 1.0429056882858276, + "learning_rate": 0.0001999608649888573, + "loss": 4.8438, + "step": 45300 + }, + { + "epoch": 0.08462814235629194, + "grad_norm": 0.9174293279647827, + "learning_rate": 0.00019996074738223157, + "loss": 4.885, + "step": 45350 + }, + { + "epoch": 0.08472144791567043, + "grad_norm": 0.7659981846809387, + "learning_rate": 0.00019996062959919277, + "loss": 4.8716, + "step": 45400 + }, + { + "epoch": 0.08481475347504892, + "grad_norm": 1.2928154468536377, + "learning_rate": 0.00019996051163974102, + "loss": 4.9047, + "step": 45450 + }, + { + "epoch": 0.08490805903442741, + "grad_norm": 1.145384669303894, + "learning_rate": 0.00019996039350387657, + "loss": 4.9983, + "step": 45500 + }, + { + "epoch": 0.08500136459380592, + "grad_norm": 1.0447450876235962, + "learning_rate": 0.0001999602751915996, + "loss": 4.7498, + "step": 45550 + }, + { + "epoch": 0.0850946701531844, + "grad_norm": 1.108702301979065, + "learning_rate": 0.00019996015670291036, + "loss": 5.0015, + "step": 45600 + }, + { + "epoch": 0.0851879757125629, + "grad_norm": 1.2329564094543457, + "learning_rate": 0.00019996003803780902, + "loss": 4.7922, + "step": 45650 + }, + { + "epoch": 0.08528128127194139, + "grad_norm": 1.1614669561386108, + "learning_rate": 0.00019995991919629578, + "loss": 5.0659, + "step": 45700 + }, + { + "epoch": 0.08537458683131988, + "grad_norm": 1.1069921255111694, + "learning_rate": 0.00019995980017837087, + "loss": 5.134, + "step": 45750 + }, + { + "epoch": 0.08546789239069837, + "grad_norm": 1.1076229810714722, + "learning_rate": 0.00019995968098403453, + "loss": 4.7386, + "step": 45800 + }, + { + "epoch": 0.08556119795007686, + "grad_norm": 0.8227840662002563, + "learning_rate": 0.00019995956161328692, + "loss": 4.854, + "step": 45850 + }, + { + "epoch": 0.08565450350945535, + "grad_norm": 0.9372345805168152, + "learning_rate": 0.0001999594420661283, + "loss": 4.8924, + "step": 45900 + }, + { + "epoch": 0.08574780906883385, + "grad_norm": 1.0700907707214355, + "learning_rate": 0.00019995932234255883, + "loss": 4.7795, + "step": 45950 + }, + { + "epoch": 0.08584111462821234, + "grad_norm": 1.173011064529419, + "learning_rate": 0.00019995920244257872, + "loss": 4.8779, + "step": 46000 + }, + { + "epoch": 0.08593442018759083, + "grad_norm": 1.0453685522079468, + "learning_rate": 0.00019995908236618824, + "loss": 4.7893, + "step": 46050 + }, + { + "epoch": 0.08602772574696932, + "grad_norm": 1.3147802352905273, + "learning_rate": 0.00019995896211338757, + "loss": 4.8669, + "step": 46100 + }, + { + "epoch": 0.08612103130634781, + "grad_norm": 1.0655571222305298, + "learning_rate": 0.00019995884168417692, + "loss": 4.8534, + "step": 46150 + }, + { + "epoch": 0.0862143368657263, + "grad_norm": 1.1845111846923828, + "learning_rate": 0.00019995872107855648, + "loss": 4.8177, + "step": 46200 + }, + { + "epoch": 0.08630764242510479, + "grad_norm": 1.0939445495605469, + "learning_rate": 0.0001999586002965265, + "loss": 4.7689, + "step": 46250 + }, + { + "epoch": 0.08640094798448329, + "grad_norm": 1.0770061016082764, + "learning_rate": 0.00019995847933808718, + "loss": 4.7746, + "step": 46300 + }, + { + "epoch": 0.08649425354386178, + "grad_norm": 1.2694488763809204, + "learning_rate": 0.00019995835820323872, + "loss": 4.7283, + "step": 46350 + }, + { + "epoch": 0.08658755910324027, + "grad_norm": 1.181217908859253, + "learning_rate": 0.00019995823689198137, + "loss": 4.9545, + "step": 46400 + }, + { + "epoch": 0.08668086466261876, + "grad_norm": 1.1785484552383423, + "learning_rate": 0.0001999581154043153, + "loss": 4.7946, + "step": 46450 + }, + { + "epoch": 0.08677417022199725, + "grad_norm": 0.8478593826293945, + "learning_rate": 0.00019995799374024077, + "loss": 4.5888, + "step": 46500 + }, + { + "epoch": 0.08686747578137574, + "grad_norm": 0.7958502173423767, + "learning_rate": 0.00019995787189975794, + "loss": 4.7813, + "step": 46550 + }, + { + "epoch": 0.08696078134075423, + "grad_norm": 1.2592016458511353, + "learning_rate": 0.00019995774988286707, + "loss": 4.7929, + "step": 46600 + }, + { + "epoch": 0.08705408690013272, + "grad_norm": 0.9072364568710327, + "learning_rate": 0.00019995762768956833, + "loss": 4.7389, + "step": 46650 + }, + { + "epoch": 0.08714739245951122, + "grad_norm": 0.8660083413124084, + "learning_rate": 0.000199957505319862, + "loss": 4.9873, + "step": 46700 + }, + { + "epoch": 0.08724069801888971, + "grad_norm": 1.458755373954773, + "learning_rate": 0.00019995738277374825, + "loss": 4.851, + "step": 46750 + }, + { + "epoch": 0.0873340035782682, + "grad_norm": 1.0809298753738403, + "learning_rate": 0.0001999572600512273, + "loss": 4.8688, + "step": 46800 + }, + { + "epoch": 0.0874273091376467, + "grad_norm": 1.176781415939331, + "learning_rate": 0.00019995713715229937, + "loss": 4.848, + "step": 46850 + }, + { + "epoch": 0.08752061469702518, + "grad_norm": 1.1064727306365967, + "learning_rate": 0.0001999570140769647, + "loss": 4.9472, + "step": 46900 + }, + { + "epoch": 0.08761392025640367, + "grad_norm": 1.0191171169281006, + "learning_rate": 0.00019995689082522348, + "loss": 5.0196, + "step": 46950 + }, + { + "epoch": 0.08770722581578216, + "grad_norm": 1.0902825593948364, + "learning_rate": 0.00019995676739707597, + "loss": 4.8434, + "step": 47000 + }, + { + "epoch": 0.08780053137516065, + "grad_norm": 1.0799312591552734, + "learning_rate": 0.0001999566437925223, + "loss": 4.7215, + "step": 47050 + }, + { + "epoch": 0.08789383693453916, + "grad_norm": 0.7429112792015076, + "learning_rate": 0.00019995652001156278, + "loss": 4.7088, + "step": 47100 + }, + { + "epoch": 0.08798714249391765, + "grad_norm": 1.215327262878418, + "learning_rate": 0.00019995639605419757, + "loss": 4.7287, + "step": 47150 + }, + { + "epoch": 0.08808044805329614, + "grad_norm": 0.9942157864570618, + "learning_rate": 0.0001999562719204269, + "loss": 4.8316, + "step": 47200 + }, + { + "epoch": 0.08817375361267463, + "grad_norm": 1.1454399824142456, + "learning_rate": 0.00019995614761025098, + "loss": 4.9424, + "step": 47250 + }, + { + "epoch": 0.08826705917205312, + "grad_norm": 1.0674301385879517, + "learning_rate": 0.0001999560231236701, + "loss": 4.8136, + "step": 47300 + }, + { + "epoch": 0.0883603647314316, + "grad_norm": 1.0504412651062012, + "learning_rate": 0.0001999558984606844, + "loss": 4.8764, + "step": 47350 + }, + { + "epoch": 0.0884536702908101, + "grad_norm": 1.0923010110855103, + "learning_rate": 0.00019995577362129412, + "loss": 4.8347, + "step": 47400 + }, + { + "epoch": 0.0885469758501886, + "grad_norm": 0.7745321989059448, + "learning_rate": 0.0001999556486054995, + "loss": 4.9508, + "step": 47450 + }, + { + "epoch": 0.08864028140956709, + "grad_norm": 0.992335319519043, + "learning_rate": 0.00019995552341330074, + "loss": 4.8245, + "step": 47500 + }, + { + "epoch": 0.08873358696894558, + "grad_norm": 1.1324732303619385, + "learning_rate": 0.00019995539804469807, + "loss": 4.7327, + "step": 47550 + }, + { + "epoch": 0.08882689252832407, + "grad_norm": 1.0124810934066772, + "learning_rate": 0.00019995527249969174, + "loss": 4.7606, + "step": 47600 + }, + { + "epoch": 0.08892019808770256, + "grad_norm": 1.1177074909210205, + "learning_rate": 0.00019995514677828192, + "loss": 4.9257, + "step": 47650 + }, + { + "epoch": 0.08901350364708105, + "grad_norm": 1.0227179527282715, + "learning_rate": 0.00019995502088046885, + "loss": 4.7303, + "step": 47700 + }, + { + "epoch": 0.08910680920645954, + "grad_norm": 0.9861817359924316, + "learning_rate": 0.00019995489480625277, + "loss": 4.932, + "step": 47750 + }, + { + "epoch": 0.08920011476583803, + "grad_norm": 1.1230319738388062, + "learning_rate": 0.00019995476855563384, + "loss": 4.7706, + "step": 47800 + }, + { + "epoch": 0.08929342032521653, + "grad_norm": 0.8386600613594055, + "learning_rate": 0.00019995464212861237, + "loss": 4.7139, + "step": 47850 + }, + { + "epoch": 0.08938672588459502, + "grad_norm": 1.322847604751587, + "learning_rate": 0.00019995451552518853, + "loss": 4.8769, + "step": 47900 + }, + { + "epoch": 0.08948003144397351, + "grad_norm": 1.3985576629638672, + "learning_rate": 0.00019995438874536258, + "loss": 4.9127, + "step": 47950 + }, + { + "epoch": 0.089573337003352, + "grad_norm": 1.1547143459320068, + "learning_rate": 0.0001999542617891347, + "loss": 4.8883, + "step": 48000 + }, + { + "epoch": 0.089573337003352, + "eval_loss": 5.081268310546875, + "eval_runtime": 231.9586, + "eval_samples_per_second": 11.243, + "eval_steps_per_second": 11.243, + "eval_tts_loss": 7.271998688881428, + "step": 48000 + }, + { + "epoch": 0.08966664256273049, + "grad_norm": 0.7190650701522827, + "learning_rate": 0.00019995413465650515, + "loss": 4.971, + "step": 48050 + }, + { + "epoch": 0.08975994812210898, + "grad_norm": 1.1452605724334717, + "learning_rate": 0.00019995400734747413, + "loss": 4.9515, + "step": 48100 + }, + { + "epoch": 0.08985325368148747, + "grad_norm": 0.7332000136375427, + "learning_rate": 0.00019995387986204187, + "loss": 4.7901, + "step": 48150 + }, + { + "epoch": 0.08994655924086598, + "grad_norm": 1.2664438486099243, + "learning_rate": 0.00019995375220020862, + "loss": 4.7153, + "step": 48200 + }, + { + "epoch": 0.09003986480024447, + "grad_norm": 0.9586982727050781, + "learning_rate": 0.00019995362436197459, + "loss": 4.8614, + "step": 48250 + }, + { + "epoch": 0.09013317035962295, + "grad_norm": 1.139235019683838, + "learning_rate": 0.00019995349634733994, + "loss": 4.8637, + "step": 48300 + }, + { + "epoch": 0.09022647591900144, + "grad_norm": 1.2145622968673706, + "learning_rate": 0.000199953368156305, + "loss": 4.8257, + "step": 48350 + }, + { + "epoch": 0.09031978147837993, + "grad_norm": 1.1296002864837646, + "learning_rate": 0.00019995323978886997, + "loss": 4.8457, + "step": 48400 + }, + { + "epoch": 0.09041308703775842, + "grad_norm": 0.7904325723648071, + "learning_rate": 0.00019995311124503505, + "loss": 4.8158, + "step": 48450 + }, + { + "epoch": 0.09050639259713691, + "grad_norm": 1.2136282920837402, + "learning_rate": 0.00019995298252480046, + "loss": 4.7482, + "step": 48500 + }, + { + "epoch": 0.0905996981565154, + "grad_norm": 1.312458872795105, + "learning_rate": 0.00019995285362816643, + "loss": 4.7005, + "step": 48550 + }, + { + "epoch": 0.09069300371589391, + "grad_norm": 1.1623774766921997, + "learning_rate": 0.00019995272455513324, + "loss": 4.8313, + "step": 48600 + }, + { + "epoch": 0.0907863092752724, + "grad_norm": 1.2712230682373047, + "learning_rate": 0.00019995259530570104, + "loss": 5.055, + "step": 48650 + }, + { + "epoch": 0.09087961483465089, + "grad_norm": 1.1107847690582275, + "learning_rate": 0.00019995246587987012, + "loss": 4.9028, + "step": 48700 + }, + { + "epoch": 0.09097292039402938, + "grad_norm": 1.1616772413253784, + "learning_rate": 0.00019995233627764067, + "loss": 4.7912, + "step": 48750 + }, + { + "epoch": 0.09106622595340787, + "grad_norm": 0.872082531452179, + "learning_rate": 0.00019995220649901297, + "loss": 4.8532, + "step": 48800 + }, + { + "epoch": 0.09115953151278636, + "grad_norm": 0.9554139971733093, + "learning_rate": 0.00019995207654398714, + "loss": 4.7248, + "step": 48850 + }, + { + "epoch": 0.09125283707216485, + "grad_norm": 1.1058579683303833, + "learning_rate": 0.00019995194641256355, + "loss": 4.6829, + "step": 48900 + }, + { + "epoch": 0.09134614263154335, + "grad_norm": 1.0344289541244507, + "learning_rate": 0.00019995181610474238, + "loss": 4.9281, + "step": 48950 + }, + { + "epoch": 0.09143944819092184, + "grad_norm": 1.0128055810928345, + "learning_rate": 0.00019995168562052379, + "loss": 4.8823, + "step": 49000 + }, + { + "epoch": 0.09153275375030033, + "grad_norm": 1.189562201499939, + "learning_rate": 0.00019995155495990805, + "loss": 4.8325, + "step": 49050 + }, + { + "epoch": 0.09162605930967882, + "grad_norm": 1.1200377941131592, + "learning_rate": 0.00019995142412289544, + "loss": 5.0455, + "step": 49100 + }, + { + "epoch": 0.09171936486905731, + "grad_norm": 1.298416256904602, + "learning_rate": 0.00019995129310948612, + "loss": 4.8515, + "step": 49150 + }, + { + "epoch": 0.0918126704284358, + "grad_norm": 1.1552833318710327, + "learning_rate": 0.00019995116191968038, + "loss": 4.8612, + "step": 49200 + }, + { + "epoch": 0.09190597598781429, + "grad_norm": 1.0328021049499512, + "learning_rate": 0.00019995103055347842, + "loss": 5.0486, + "step": 49250 + }, + { + "epoch": 0.09199928154719278, + "grad_norm": 0.7570762038230896, + "learning_rate": 0.00019995089901088048, + "loss": 4.7099, + "step": 49300 + }, + { + "epoch": 0.09209258710657128, + "grad_norm": 0.9117104411125183, + "learning_rate": 0.00019995076729188677, + "loss": 4.8546, + "step": 49350 + }, + { + "epoch": 0.09218589266594977, + "grad_norm": 1.0194332599639893, + "learning_rate": 0.00019995063539649758, + "loss": 4.8198, + "step": 49400 + }, + { + "epoch": 0.09227919822532826, + "grad_norm": 0.7058835625648499, + "learning_rate": 0.00019995050332471308, + "loss": 4.8416, + "step": 49450 + }, + { + "epoch": 0.09237250378470675, + "grad_norm": 1.046873688697815, + "learning_rate": 0.00019995037107653355, + "loss": 4.7131, + "step": 49500 + }, + { + "epoch": 0.09246580934408524, + "grad_norm": 1.1325424909591675, + "learning_rate": 0.00019995023865195918, + "loss": 4.8868, + "step": 49550 + }, + { + "epoch": 0.09255911490346373, + "grad_norm": 0.933991551399231, + "learning_rate": 0.00019995010605099024, + "loss": 4.8612, + "step": 49600 + }, + { + "epoch": 0.09265242046284222, + "grad_norm": 1.2743916511535645, + "learning_rate": 0.0001999499732736269, + "loss": 4.7381, + "step": 49650 + }, + { + "epoch": 0.09274572602222073, + "grad_norm": 0.9663635492324829, + "learning_rate": 0.0001999498403198695, + "loss": 4.8429, + "step": 49700 + }, + { + "epoch": 0.09283903158159922, + "grad_norm": 1.0755882263183594, + "learning_rate": 0.0001999497071897182, + "loss": 4.8957, + "step": 49750 + }, + { + "epoch": 0.0929323371409777, + "grad_norm": 1.0110505819320679, + "learning_rate": 0.0001999495738831733, + "loss": 5.0335, + "step": 49800 + }, + { + "epoch": 0.0930256427003562, + "grad_norm": 1.1059298515319824, + "learning_rate": 0.0001999494404002349, + "loss": 4.9632, + "step": 49850 + }, + { + "epoch": 0.09311894825973469, + "grad_norm": 1.1788870096206665, + "learning_rate": 0.00019994930674090337, + "loss": 5.0718, + "step": 49900 + }, + { + "epoch": 0.09321225381911317, + "grad_norm": 1.0631682872772217, + "learning_rate": 0.0001999491729051789, + "loss": 4.6773, + "step": 49950 + }, + { + "epoch": 0.09330555937849166, + "grad_norm": 0.9250020384788513, + "learning_rate": 0.00019994903889306172, + "loss": 4.7364, + "step": 50000 + }, + { + "epoch": 0.09339886493787015, + "grad_norm": 1.3172533512115479, + "learning_rate": 0.0001999489047045521, + "loss": 4.8955, + "step": 50050 + }, + { + "epoch": 0.09349217049724866, + "grad_norm": 1.0621302127838135, + "learning_rate": 0.00019994877033965023, + "loss": 4.7293, + "step": 50100 + }, + { + "epoch": 0.09358547605662715, + "grad_norm": 1.2123887538909912, + "learning_rate": 0.00019994863579835638, + "loss": 4.7187, + "step": 50150 + }, + { + "epoch": 0.09367878161600564, + "grad_norm": 1.2413897514343262, + "learning_rate": 0.00019994850108067075, + "loss": 4.9169, + "step": 50200 + }, + { + "epoch": 0.09377208717538413, + "grad_norm": 1.3045127391815186, + "learning_rate": 0.0001999483661865936, + "loss": 4.8983, + "step": 50250 + }, + { + "epoch": 0.09386539273476262, + "grad_norm": 1.0260050296783447, + "learning_rate": 0.00019994823111612518, + "loss": 4.8541, + "step": 50300 + }, + { + "epoch": 0.09395869829414111, + "grad_norm": 0.8468745350837708, + "learning_rate": 0.00019994809586926576, + "loss": 4.7221, + "step": 50350 + }, + { + "epoch": 0.0940520038535196, + "grad_norm": 0.558533251285553, + "learning_rate": 0.00019994796044601549, + "loss": 4.8051, + "step": 50400 + }, + { + "epoch": 0.09414530941289809, + "grad_norm": 1.0962811708450317, + "learning_rate": 0.00019994782484637465, + "loss": 5.0269, + "step": 50450 + }, + { + "epoch": 0.09423861497227659, + "grad_norm": 1.1004583835601807, + "learning_rate": 0.00019994768907034352, + "loss": 4.6689, + "step": 50500 + }, + { + "epoch": 0.09433192053165508, + "grad_norm": 0.585641086101532, + "learning_rate": 0.00019994755311792227, + "loss": 4.913, + "step": 50550 + }, + { + "epoch": 0.09442522609103357, + "grad_norm": 1.0821267366409302, + "learning_rate": 0.0001999474169891112, + "loss": 4.8242, + "step": 50600 + }, + { + "epoch": 0.09451853165041206, + "grad_norm": 0.7899723649024963, + "learning_rate": 0.0001999472806839105, + "loss": 4.8794, + "step": 50650 + }, + { + "epoch": 0.09461183720979055, + "grad_norm": 1.1543865203857422, + "learning_rate": 0.00019994714420232046, + "loss": 4.9543, + "step": 50700 + }, + { + "epoch": 0.09470514276916904, + "grad_norm": 0.8708122372627258, + "learning_rate": 0.00019994700754434128, + "loss": 4.6829, + "step": 50750 + }, + { + "epoch": 0.09479844832854753, + "grad_norm": 1.07050359249115, + "learning_rate": 0.00019994687070997325, + "loss": 4.8473, + "step": 50800 + }, + { + "epoch": 0.09489175388792603, + "grad_norm": 0.9256060123443604, + "learning_rate": 0.00019994673369921656, + "loss": 4.6233, + "step": 50850 + }, + { + "epoch": 0.09498505944730452, + "grad_norm": 1.1149330139160156, + "learning_rate": 0.00019994659651207143, + "loss": 4.88, + "step": 50900 + }, + { + "epoch": 0.09507836500668301, + "grad_norm": 1.1947017908096313, + "learning_rate": 0.00019994645914853817, + "loss": 4.7873, + "step": 50950 + }, + { + "epoch": 0.0951716705660615, + "grad_norm": 0.9794861674308777, + "learning_rate": 0.00019994632160861704, + "loss": 4.8523, + "step": 51000 + }, + { + "epoch": 0.0951716705660615, + "eval_loss": 5.061774730682373, + "eval_runtime": 231.5203, + "eval_samples_per_second": 11.265, + "eval_steps_per_second": 11.265, + "eval_tts_loss": 7.302327345133947, + "step": 51000 + }, + { + "epoch": 0.09526497612543999, + "grad_norm": 1.0766355991363525, + "learning_rate": 0.0001999461838923082, + "loss": 4.662, + "step": 51050 + }, + { + "epoch": 0.09535828168481848, + "grad_norm": 1.242478609085083, + "learning_rate": 0.00019994604599961194, + "loss": 5.0057, + "step": 51100 + }, + { + "epoch": 0.09545158724419697, + "grad_norm": 1.0459357500076294, + "learning_rate": 0.0001999459079305285, + "loss": 4.8375, + "step": 51150 + }, + { + "epoch": 0.09554489280357546, + "grad_norm": 1.0175572633743286, + "learning_rate": 0.0001999457696850581, + "loss": 4.7238, + "step": 51200 + }, + { + "epoch": 0.09563819836295397, + "grad_norm": 1.033231258392334, + "learning_rate": 0.000199945631263201, + "loss": 5.1248, + "step": 51250 + }, + { + "epoch": 0.09573150392233246, + "grad_norm": 1.0977270603179932, + "learning_rate": 0.00019994549266495745, + "loss": 4.7472, + "step": 51300 + }, + { + "epoch": 0.09582480948171095, + "grad_norm": 1.0846363306045532, + "learning_rate": 0.00019994535389032772, + "loss": 4.7239, + "step": 51350 + }, + { + "epoch": 0.09591811504108944, + "grad_norm": 0.9102774858474731, + "learning_rate": 0.000199945214939312, + "loss": 4.9151, + "step": 51400 + }, + { + "epoch": 0.09601142060046793, + "grad_norm": 1.0620869398117065, + "learning_rate": 0.00019994507581191058, + "loss": 4.5926, + "step": 51450 + }, + { + "epoch": 0.09610472615984642, + "grad_norm": 1.1233978271484375, + "learning_rate": 0.00019994493650812368, + "loss": 5.0843, + "step": 51500 + }, + { + "epoch": 0.0961980317192249, + "grad_norm": 1.2142945528030396, + "learning_rate": 0.00019994479702795155, + "loss": 4.874, + "step": 51550 + }, + { + "epoch": 0.09629133727860341, + "grad_norm": 1.21846342086792, + "learning_rate": 0.00019994465737139443, + "loss": 4.888, + "step": 51600 + }, + { + "epoch": 0.0963846428379819, + "grad_norm": 1.1796367168426514, + "learning_rate": 0.00019994451753845257, + "loss": 4.9034, + "step": 51650 + }, + { + "epoch": 0.09647794839736039, + "grad_norm": 0.9840373992919922, + "learning_rate": 0.00019994437752912623, + "loss": 4.759, + "step": 51700 + }, + { + "epoch": 0.09657125395673888, + "grad_norm": 1.2641807794570923, + "learning_rate": 0.00019994423734341567, + "loss": 4.7452, + "step": 51750 + }, + { + "epoch": 0.09666455951611737, + "grad_norm": 1.1618438959121704, + "learning_rate": 0.0001999440969813211, + "loss": 4.6881, + "step": 51800 + }, + { + "epoch": 0.09675786507549586, + "grad_norm": 0.8106015920639038, + "learning_rate": 0.0001999439564428428, + "loss": 5.0901, + "step": 51850 + }, + { + "epoch": 0.09685117063487435, + "grad_norm": 1.3673611879348755, + "learning_rate": 0.00019994381572798096, + "loss": 4.9073, + "step": 51900 + }, + { + "epoch": 0.09694447619425284, + "grad_norm": 0.9756952524185181, + "learning_rate": 0.0001999436748367359, + "loss": 4.9472, + "step": 51950 + }, + { + "epoch": 0.09703778175363134, + "grad_norm": 1.3207862377166748, + "learning_rate": 0.00019994353376910783, + "loss": 4.7792, + "step": 52000 + }, + { + "epoch": 0.09713108731300983, + "grad_norm": 1.0687203407287598, + "learning_rate": 0.00019994339252509704, + "loss": 4.7979, + "step": 52050 + }, + { + "epoch": 0.09722439287238832, + "grad_norm": 1.2489949464797974, + "learning_rate": 0.00019994325110470371, + "loss": 4.8534, + "step": 52100 + }, + { + "epoch": 0.09731769843176681, + "grad_norm": 1.4121779203414917, + "learning_rate": 0.00019994310950792815, + "loss": 4.8141, + "step": 52150 + }, + { + "epoch": 0.0974110039911453, + "grad_norm": 1.0948699712753296, + "learning_rate": 0.00019994296773477056, + "loss": 4.9626, + "step": 52200 + }, + { + "epoch": 0.09750430955052379, + "grad_norm": 0.8050052523612976, + "learning_rate": 0.00019994282578523126, + "loss": 4.838, + "step": 52250 + }, + { + "epoch": 0.09759761510990228, + "grad_norm": 1.2219855785369873, + "learning_rate": 0.00019994268365931044, + "loss": 5.0231, + "step": 52300 + }, + { + "epoch": 0.09769092066928078, + "grad_norm": 1.0798107385635376, + "learning_rate": 0.00019994254135700837, + "loss": 4.9951, + "step": 52350 + }, + { + "epoch": 0.09778422622865927, + "grad_norm": 1.1264784336090088, + "learning_rate": 0.0001999423988783253, + "loss": 4.9835, + "step": 52400 + }, + { + "epoch": 0.09787753178803776, + "grad_norm": 1.0738651752471924, + "learning_rate": 0.00019994225622326143, + "loss": 4.6703, + "step": 52450 + }, + { + "epoch": 0.09797083734741625, + "grad_norm": 0.9834763407707214, + "learning_rate": 0.0001999421133918171, + "loss": 4.8748, + "step": 52500 + }, + { + "epoch": 0.09806414290679474, + "grad_norm": 0.9889846444129944, + "learning_rate": 0.00019994197038399256, + "loss": 5.0041, + "step": 52550 + }, + { + "epoch": 0.09815744846617323, + "grad_norm": 0.6598125696182251, + "learning_rate": 0.000199941827199788, + "loss": 4.7931, + "step": 52600 + }, + { + "epoch": 0.09825075402555172, + "grad_norm": 0.7698989510536194, + "learning_rate": 0.00019994168383920372, + "loss": 4.6956, + "step": 52650 + }, + { + "epoch": 0.09834405958493021, + "grad_norm": 0.9986603260040283, + "learning_rate": 0.00019994154030223994, + "loss": 4.7647, + "step": 52700 + }, + { + "epoch": 0.09843736514430872, + "grad_norm": 0.9387907385826111, + "learning_rate": 0.0001999413965888969, + "loss": 4.7919, + "step": 52750 + }, + { + "epoch": 0.0985306707036872, + "grad_norm": 0.997485339641571, + "learning_rate": 0.0001999412526991749, + "loss": 4.864, + "step": 52800 + }, + { + "epoch": 0.0986239762630657, + "grad_norm": 1.0263787508010864, + "learning_rate": 0.0001999411086330742, + "loss": 5.0764, + "step": 52850 + }, + { + "epoch": 0.09871728182244419, + "grad_norm": 1.1817213296890259, + "learning_rate": 0.000199940964390595, + "loss": 4.9443, + "step": 52900 + }, + { + "epoch": 0.09881058738182268, + "grad_norm": 1.2216764688491821, + "learning_rate": 0.00019994081997173758, + "loss": 4.9013, + "step": 52950 + }, + { + "epoch": 0.09890389294120117, + "grad_norm": 1.020540475845337, + "learning_rate": 0.00019994067537650221, + "loss": 4.9208, + "step": 53000 + }, + { + "epoch": 0.09899719850057966, + "grad_norm": 1.07760488986969, + "learning_rate": 0.00019994053060488911, + "loss": 4.8805, + "step": 53050 + }, + { + "epoch": 0.09909050405995816, + "grad_norm": 0.9263754487037659, + "learning_rate": 0.00019994038565689857, + "loss": 4.6882, + "step": 53100 + }, + { + "epoch": 0.09918380961933665, + "grad_norm": 1.0581586360931396, + "learning_rate": 0.00019994024053253084, + "loss": 4.7098, + "step": 53150 + }, + { + "epoch": 0.09927711517871514, + "grad_norm": 0.9992542266845703, + "learning_rate": 0.00019994009523178617, + "loss": 4.8672, + "step": 53200 + }, + { + "epoch": 0.09937042073809363, + "grad_norm": 0.9917731285095215, + "learning_rate": 0.00019993994975466483, + "loss": 4.7594, + "step": 53250 + }, + { + "epoch": 0.09946372629747212, + "grad_norm": 0.8953468799591064, + "learning_rate": 0.00019993980410116705, + "loss": 4.9375, + "step": 53300 + }, + { + "epoch": 0.09955703185685061, + "grad_norm": 1.0633111000061035, + "learning_rate": 0.0001999396582712931, + "loss": 4.7604, + "step": 53350 + }, + { + "epoch": 0.0996503374162291, + "grad_norm": 0.9700153470039368, + "learning_rate": 0.00019993951226504323, + "loss": 4.8346, + "step": 53400 + }, + { + "epoch": 0.09974364297560759, + "grad_norm": 1.198554277420044, + "learning_rate": 0.00019993936608241771, + "loss": 4.7809, + "step": 53450 + }, + { + "epoch": 0.09983694853498609, + "grad_norm": 0.8726997971534729, + "learning_rate": 0.0001999392197234168, + "loss": 4.9039, + "step": 53500 + }, + { + "epoch": 0.09993025409436458, + "grad_norm": 1.296087384223938, + "learning_rate": 0.0001999390731880408, + "loss": 4.7456, + "step": 53550 + }, + { + "epoch": 0.10002355965374307, + "grad_norm": 0.9055943489074707, + "learning_rate": 0.00019993892647628987, + "loss": 4.964, + "step": 53600 + }, + { + "epoch": 0.10011686521312156, + "grad_norm": 0.900660514831543, + "learning_rate": 0.0001999387795881643, + "loss": 4.92, + "step": 53650 + }, + { + "epoch": 0.10021017077250005, + "grad_norm": 1.2182655334472656, + "learning_rate": 0.0001999386325236644, + "loss": 4.8738, + "step": 53700 + }, + { + "epoch": 0.10030347633187854, + "grad_norm": 1.0977277755737305, + "learning_rate": 0.0001999384852827904, + "loss": 4.4823, + "step": 53750 + }, + { + "epoch": 0.10039678189125703, + "grad_norm": 0.7490143775939941, + "learning_rate": 0.00019993833786554252, + "loss": 4.8447, + "step": 53800 + }, + { + "epoch": 0.10049008745063552, + "grad_norm": 1.216125726699829, + "learning_rate": 0.0001999381902719211, + "loss": 4.7476, + "step": 53850 + }, + { + "epoch": 0.10058339301001402, + "grad_norm": 0.9461069703102112, + "learning_rate": 0.00019993804250192633, + "loss": 4.7481, + "step": 53900 + }, + { + "epoch": 0.10067669856939251, + "grad_norm": 0.9821112751960754, + "learning_rate": 0.00019993789455555853, + "loss": 4.6914, + "step": 53950 + }, + { + "epoch": 0.100770004128771, + "grad_norm": 1.069184422492981, + "learning_rate": 0.00019993774643281792, + "loss": 4.8143, + "step": 54000 + }, + { + "epoch": 0.100770004128771, + "eval_loss": 5.050092697143555, + "eval_runtime": 233.5139, + "eval_samples_per_second": 11.169, + "eval_steps_per_second": 11.169, + "eval_tts_loss": 7.309272037400504, + "step": 54000 + }, + { + "epoch": 0.1008633096881495, + "grad_norm": 0.8678284883499146, + "learning_rate": 0.00019993759813370475, + "loss": 4.7695, + "step": 54050 + }, + { + "epoch": 0.10095661524752798, + "grad_norm": 1.0255569219589233, + "learning_rate": 0.00019993744965821932, + "loss": 4.7333, + "step": 54100 + }, + { + "epoch": 0.10104992080690647, + "grad_norm": 0.9796172976493835, + "learning_rate": 0.0001999373010063619, + "loss": 4.9283, + "step": 54150 + }, + { + "epoch": 0.10114322636628496, + "grad_norm": 0.9378018975257874, + "learning_rate": 0.00019993715217813272, + "loss": 4.7371, + "step": 54200 + }, + { + "epoch": 0.10123653192566347, + "grad_norm": 1.0029010772705078, + "learning_rate": 0.00019993700317353201, + "loss": 4.6258, + "step": 54250 + }, + { + "epoch": 0.10132983748504196, + "grad_norm": 0.922479510307312, + "learning_rate": 0.0001999368539925601, + "loss": 5.0026, + "step": 54300 + }, + { + "epoch": 0.10142314304442045, + "grad_norm": 1.0813497304916382, + "learning_rate": 0.00019993670463521723, + "loss": 4.8323, + "step": 54350 + }, + { + "epoch": 0.10151644860379894, + "grad_norm": 1.0883569717407227, + "learning_rate": 0.00019993655510150367, + "loss": 4.8957, + "step": 54400 + }, + { + "epoch": 0.10160975416317743, + "grad_norm": 0.792852520942688, + "learning_rate": 0.00019993640539141966, + "loss": 5.0127, + "step": 54450 + }, + { + "epoch": 0.10170305972255592, + "grad_norm": 0.8241887092590332, + "learning_rate": 0.0001999362555049655, + "loss": 4.7821, + "step": 54500 + }, + { + "epoch": 0.1017963652819344, + "grad_norm": 1.088869571685791, + "learning_rate": 0.0001999361054421414, + "loss": 4.8543, + "step": 54550 + }, + { + "epoch": 0.1018896708413129, + "grad_norm": 1.0283081531524658, + "learning_rate": 0.00019993595520294765, + "loss": 4.7311, + "step": 54600 + }, + { + "epoch": 0.1019829764006914, + "grad_norm": 0.7650019526481628, + "learning_rate": 0.00019993580478738456, + "loss": 4.7729, + "step": 54650 + }, + { + "epoch": 0.10207628196006989, + "grad_norm": 0.8107566237449646, + "learning_rate": 0.00019993565419545236, + "loss": 5.0273, + "step": 54700 + }, + { + "epoch": 0.10216958751944838, + "grad_norm": 1.2203528881072998, + "learning_rate": 0.00019993550342715127, + "loss": 4.6369, + "step": 54750 + }, + { + "epoch": 0.10226289307882687, + "grad_norm": 0.9938797354698181, + "learning_rate": 0.00019993535248248164, + "loss": 4.7657, + "step": 54800 + }, + { + "epoch": 0.10235619863820536, + "grad_norm": 1.1002684831619263, + "learning_rate": 0.0001999352013614437, + "loss": 4.8818, + "step": 54850 + }, + { + "epoch": 0.10244950419758385, + "grad_norm": 0.9734085202217102, + "learning_rate": 0.0001999350500640377, + "loss": 4.8906, + "step": 54900 + }, + { + "epoch": 0.10254280975696234, + "grad_norm": 0.9943709373474121, + "learning_rate": 0.0001999348985902639, + "loss": 4.8368, + "step": 54950 + }, + { + "epoch": 0.10263611531634084, + "grad_norm": 0.8544298410415649, + "learning_rate": 0.00019993474694012263, + "loss": 4.62, + "step": 55000 + }, + { + "epoch": 0.10272942087571933, + "grad_norm": 0.7274461388587952, + "learning_rate": 0.0001999345951136141, + "loss": 4.7548, + "step": 55050 + }, + { + "epoch": 0.10282272643509782, + "grad_norm": 1.0700294971466064, + "learning_rate": 0.00019993444311073859, + "loss": 4.7764, + "step": 55100 + }, + { + "epoch": 0.10291603199447631, + "grad_norm": 1.211059808731079, + "learning_rate": 0.00019993429093149637, + "loss": 4.7714, + "step": 55150 + }, + { + "epoch": 0.1030093375538548, + "grad_norm": 1.0298805236816406, + "learning_rate": 0.00019993413857588772, + "loss": 4.8341, + "step": 55200 + }, + { + "epoch": 0.10310264311323329, + "grad_norm": 1.2335697412490845, + "learning_rate": 0.0001999339860439129, + "loss": 4.7979, + "step": 55250 + }, + { + "epoch": 0.10319594867261178, + "grad_norm": 1.05863618850708, + "learning_rate": 0.00019993383333557216, + "loss": 5.0299, + "step": 55300 + }, + { + "epoch": 0.10328925423199027, + "grad_norm": 0.8664848804473877, + "learning_rate": 0.0001999336804508658, + "loss": 4.7784, + "step": 55350 + }, + { + "epoch": 0.10338255979136877, + "grad_norm": 1.1443610191345215, + "learning_rate": 0.00019993352738979407, + "loss": 4.93, + "step": 55400 + }, + { + "epoch": 0.10347586535074726, + "grad_norm": 1.0898007154464722, + "learning_rate": 0.00019993337415235725, + "loss": 4.6334, + "step": 55450 + }, + { + "epoch": 0.10356917091012575, + "grad_norm": 0.9437859654426575, + "learning_rate": 0.00019993322073855562, + "loss": 4.8104, + "step": 55500 + }, + { + "epoch": 0.10366247646950424, + "grad_norm": 1.0893656015396118, + "learning_rate": 0.00019993306714838943, + "loss": 4.8592, + "step": 55550 + }, + { + "epoch": 0.10375578202888273, + "grad_norm": 1.0553542375564575, + "learning_rate": 0.00019993291338185897, + "loss": 4.8595, + "step": 55600 + }, + { + "epoch": 0.10384908758826122, + "grad_norm": 0.9406054019927979, + "learning_rate": 0.00019993275943896448, + "loss": 4.7845, + "step": 55650 + }, + { + "epoch": 0.10394239314763971, + "grad_norm": 0.9452017545700073, + "learning_rate": 0.00019993260531970627, + "loss": 4.6355, + "step": 55700 + }, + { + "epoch": 0.10403569870701822, + "grad_norm": 0.8077996969223022, + "learning_rate": 0.00019993245102408459, + "loss": 4.9368, + "step": 55750 + }, + { + "epoch": 0.10412900426639671, + "grad_norm": 0.9253317713737488, + "learning_rate": 0.00019993229655209974, + "loss": 4.6939, + "step": 55800 + }, + { + "epoch": 0.1042223098257752, + "grad_norm": 1.2367135286331177, + "learning_rate": 0.0001999321419037519, + "loss": 5.0054, + "step": 55850 + }, + { + "epoch": 0.10431561538515369, + "grad_norm": 0.9509602189064026, + "learning_rate": 0.00019993198707904148, + "loss": 4.7925, + "step": 55900 + }, + { + "epoch": 0.10440892094453218, + "grad_norm": 0.9606555700302124, + "learning_rate": 0.00019993183207796868, + "loss": 4.7034, + "step": 55950 + }, + { + "epoch": 0.10450222650391067, + "grad_norm": 0.7478787899017334, + "learning_rate": 0.00019993167690053376, + "loss": 4.9531, + "step": 56000 + }, + { + "epoch": 0.10459553206328916, + "grad_norm": 0.9634212851524353, + "learning_rate": 0.00019993152154673704, + "loss": 4.9075, + "step": 56050 + }, + { + "epoch": 0.10468883762266765, + "grad_norm": 0.83271723985672, + "learning_rate": 0.00019993136601657873, + "loss": 4.6116, + "step": 56100 + }, + { + "epoch": 0.10478214318204615, + "grad_norm": 1.1285144090652466, + "learning_rate": 0.0001999312103100592, + "loss": 4.6186, + "step": 56150 + }, + { + "epoch": 0.10487544874142464, + "grad_norm": 0.8766820430755615, + "learning_rate": 0.00019993105442717862, + "loss": 4.7467, + "step": 56200 + }, + { + "epoch": 0.10496875430080313, + "grad_norm": 1.0494366884231567, + "learning_rate": 0.00019993089836793732, + "loss": 4.7853, + "step": 56250 + }, + { + "epoch": 0.10506205986018162, + "grad_norm": 0.9477689862251282, + "learning_rate": 0.00019993074213233556, + "loss": 4.8802, + "step": 56300 + }, + { + "epoch": 0.10515536541956011, + "grad_norm": 1.2510788440704346, + "learning_rate": 0.00019993058572037364, + "loss": 4.8521, + "step": 56350 + }, + { + "epoch": 0.1052486709789386, + "grad_norm": 1.0136951208114624, + "learning_rate": 0.0001999304291320518, + "loss": 4.6612, + "step": 56400 + }, + { + "epoch": 0.10534197653831709, + "grad_norm": 1.1237655878067017, + "learning_rate": 0.00019993027236737038, + "loss": 5.043, + "step": 56450 + }, + { + "epoch": 0.10543528209769558, + "grad_norm": 0.8546143770217896, + "learning_rate": 0.00019993011542632956, + "loss": 4.6913, + "step": 56500 + }, + { + "epoch": 0.10552858765707408, + "grad_norm": 1.2368345260620117, + "learning_rate": 0.00019992995830892972, + "loss": 4.9539, + "step": 56550 + }, + { + "epoch": 0.10562189321645257, + "grad_norm": 1.055706262588501, + "learning_rate": 0.00019992980101517102, + "loss": 4.8411, + "step": 56600 + }, + { + "epoch": 0.10571519877583106, + "grad_norm": 0.8533573746681213, + "learning_rate": 0.00019992964354505387, + "loss": 4.6453, + "step": 56650 + }, + { + "epoch": 0.10580850433520955, + "grad_norm": 1.2660565376281738, + "learning_rate": 0.00019992948589857843, + "loss": 4.9287, + "step": 56700 + }, + { + "epoch": 0.10590180989458804, + "grad_norm": 1.2691367864608765, + "learning_rate": 0.00019992932807574507, + "loss": 4.7645, + "step": 56750 + }, + { + "epoch": 0.10599511545396653, + "grad_norm": 1.0898069143295288, + "learning_rate": 0.00019992917007655403, + "loss": 4.924, + "step": 56800 + }, + { + "epoch": 0.10608842101334502, + "grad_norm": 1.1141910552978516, + "learning_rate": 0.00019992901190100555, + "loss": 4.7186, + "step": 56850 + }, + { + "epoch": 0.10618172657272353, + "grad_norm": 1.047428846359253, + "learning_rate": 0.00019992885354909998, + "loss": 4.5938, + "step": 56900 + }, + { + "epoch": 0.10627503213210202, + "grad_norm": 1.3936903476715088, + "learning_rate": 0.00019992869502083755, + "loss": 4.661, + "step": 56950 + }, + { + "epoch": 0.1063683376914805, + "grad_norm": 1.2814745903015137, + "learning_rate": 0.00019992853631621858, + "loss": 5.1075, + "step": 57000 + }, + { + "epoch": 0.1063683376914805, + "eval_loss": 5.03396463394165, + "eval_runtime": 231.8405, + "eval_samples_per_second": 11.249, + "eval_steps_per_second": 11.249, + "eval_tts_loss": 7.338024720789556, + "step": 57000 + }, + { + "epoch": 0.106461643250859, + "grad_norm": 0.9817240834236145, + "learning_rate": 0.0001999283774352433, + "loss": 4.6815, + "step": 57050 + }, + { + "epoch": 0.10655494881023748, + "grad_norm": 0.8881374001502991, + "learning_rate": 0.000199928218377912, + "loss": 4.7038, + "step": 57100 + }, + { + "epoch": 0.10664825436961597, + "grad_norm": 0.8835790157318115, + "learning_rate": 0.00019992805914422501, + "loss": 4.805, + "step": 57150 + }, + { + "epoch": 0.10674155992899446, + "grad_norm": 1.0517323017120361, + "learning_rate": 0.0001999278997341826, + "loss": 4.8259, + "step": 57200 + }, + { + "epoch": 0.10683486548837295, + "grad_norm": 0.9587489366531372, + "learning_rate": 0.000199927740147785, + "loss": 4.7815, + "step": 57250 + }, + { + "epoch": 0.10692817104775146, + "grad_norm": 1.1249165534973145, + "learning_rate": 0.00019992758038503249, + "loss": 4.5363, + "step": 57300 + }, + { + "epoch": 0.10702147660712995, + "grad_norm": 1.0124765634536743, + "learning_rate": 0.00019992742044592542, + "loss": 4.6893, + "step": 57350 + }, + { + "epoch": 0.10711478216650844, + "grad_norm": 0.969052255153656, + "learning_rate": 0.00019992726033046405, + "loss": 4.7008, + "step": 57400 + }, + { + "epoch": 0.10720808772588693, + "grad_norm": 1.0661418437957764, + "learning_rate": 0.00019992710003864864, + "loss": 4.9145, + "step": 57450 + }, + { + "epoch": 0.10730139328526542, + "grad_norm": 0.9736279845237732, + "learning_rate": 0.00019992693957047948, + "loss": 4.5799, + "step": 57500 + }, + { + "epoch": 0.10739469884464391, + "grad_norm": 0.8981762528419495, + "learning_rate": 0.0001999267789259568, + "loss": 4.9882, + "step": 57550 + }, + { + "epoch": 0.1074880044040224, + "grad_norm": 1.3107281923294067, + "learning_rate": 0.000199926618105081, + "loss": 4.5749, + "step": 57600 + }, + { + "epoch": 0.1075813099634009, + "grad_norm": 0.8268171548843384, + "learning_rate": 0.00019992645710785228, + "loss": 4.7451, + "step": 57650 + }, + { + "epoch": 0.10767461552277939, + "grad_norm": 0.8941455483436584, + "learning_rate": 0.00019992629593427097, + "loss": 4.7114, + "step": 57700 + }, + { + "epoch": 0.10776792108215788, + "grad_norm": 1.0125592947006226, + "learning_rate": 0.0001999261345843373, + "loss": 4.7143, + "step": 57750 + }, + { + "epoch": 0.10786122664153637, + "grad_norm": 1.0623835325241089, + "learning_rate": 0.0001999259730580516, + "loss": 4.8096, + "step": 57800 + }, + { + "epoch": 0.10795453220091486, + "grad_norm": 0.9189125299453735, + "learning_rate": 0.00019992581135541414, + "loss": 4.5356, + "step": 57850 + }, + { + "epoch": 0.10804783776029335, + "grad_norm": 1.0694884061813354, + "learning_rate": 0.0001999256494764252, + "loss": 4.902, + "step": 57900 + }, + { + "epoch": 0.10814114331967184, + "grad_norm": 1.200424313545227, + "learning_rate": 0.00019992548742108507, + "loss": 4.6708, + "step": 57950 + }, + { + "epoch": 0.10823444887905033, + "grad_norm": 1.0127471685409546, + "learning_rate": 0.00019992532518939404, + "loss": 4.6113, + "step": 58000 + }, + { + "epoch": 0.10832775443842883, + "grad_norm": 1.081398367881775, + "learning_rate": 0.0001999251627813524, + "loss": 4.7227, + "step": 58050 + }, + { + "epoch": 0.10842105999780732, + "grad_norm": 1.0527719259262085, + "learning_rate": 0.00019992500019696042, + "loss": 4.7023, + "step": 58100 + }, + { + "epoch": 0.10851436555718581, + "grad_norm": 0.9371632933616638, + "learning_rate": 0.0001999248374362184, + "loss": 4.7177, + "step": 58150 + }, + { + "epoch": 0.1086076711165643, + "grad_norm": 1.2352396249771118, + "learning_rate": 0.00019992467449912664, + "loss": 4.7167, + "step": 58200 + }, + { + "epoch": 0.10870097667594279, + "grad_norm": 1.295750379562378, + "learning_rate": 0.0001999245113856854, + "loss": 4.6888, + "step": 58250 + }, + { + "epoch": 0.10879428223532128, + "grad_norm": 0.9914165735244751, + "learning_rate": 0.00019992434809589494, + "loss": 4.6626, + "step": 58300 + }, + { + "epoch": 0.10888758779469977, + "grad_norm": 1.038947582244873, + "learning_rate": 0.00019992418462975566, + "loss": 4.8624, + "step": 58350 + }, + { + "epoch": 0.10898089335407828, + "grad_norm": 0.9562987685203552, + "learning_rate": 0.00019992402098726776, + "loss": 4.8079, + "step": 58400 + }, + { + "epoch": 0.10907419891345677, + "grad_norm": 1.1640965938568115, + "learning_rate": 0.0001999238571684315, + "loss": 4.749, + "step": 58450 + }, + { + "epoch": 0.10916750447283526, + "grad_norm": 0.8574897646903992, + "learning_rate": 0.00019992369317324725, + "loss": 4.5478, + "step": 58500 + }, + { + "epoch": 0.10926081003221375, + "grad_norm": 1.1693881750106812, + "learning_rate": 0.00019992352900171523, + "loss": 4.8086, + "step": 58550 + }, + { + "epoch": 0.10935411559159224, + "grad_norm": 1.0062192678451538, + "learning_rate": 0.0001999233646538358, + "loss": 4.7885, + "step": 58600 + }, + { + "epoch": 0.10944742115097073, + "grad_norm": 1.1878728866577148, + "learning_rate": 0.0001999232001296092, + "loss": 4.7585, + "step": 58650 + }, + { + "epoch": 0.10954072671034921, + "grad_norm": 0.6779685020446777, + "learning_rate": 0.00019992303542903575, + "loss": 4.8446, + "step": 58700 + }, + { + "epoch": 0.1096340322697277, + "grad_norm": 1.022136926651001, + "learning_rate": 0.0001999228705521157, + "loss": 4.7668, + "step": 58750 + }, + { + "epoch": 0.10972733782910621, + "grad_norm": 0.8543698787689209, + "learning_rate": 0.00019992270549884936, + "loss": 4.9229, + "step": 58800 + }, + { + "epoch": 0.1098206433884847, + "grad_norm": 1.0954478979110718, + "learning_rate": 0.00019992254026923704, + "loss": 4.7663, + "step": 58850 + }, + { + "epoch": 0.10991394894786319, + "grad_norm": 1.051382303237915, + "learning_rate": 0.00019992237486327904, + "loss": 4.9512, + "step": 58900 + }, + { + "epoch": 0.11000725450724168, + "grad_norm": 1.2128174304962158, + "learning_rate": 0.0001999222092809756, + "loss": 4.677, + "step": 58950 + }, + { + "epoch": 0.11010056006662017, + "grad_norm": 0.8626731038093567, + "learning_rate": 0.00019992204352232707, + "loss": 4.6943, + "step": 59000 + }, + { + "epoch": 0.11019386562599866, + "grad_norm": 1.2140212059020996, + "learning_rate": 0.00019992187758733368, + "loss": 4.8939, + "step": 59050 + }, + { + "epoch": 0.11028717118537715, + "grad_norm": 1.127719759941101, + "learning_rate": 0.00019992171147599578, + "loss": 4.8684, + "step": 59100 + }, + { + "epoch": 0.11038047674475565, + "grad_norm": 1.0840810537338257, + "learning_rate": 0.00019992154518831364, + "loss": 4.947, + "step": 59150 + }, + { + "epoch": 0.11047378230413414, + "grad_norm": 1.0713996887207031, + "learning_rate": 0.00019992137872428754, + "loss": 4.7424, + "step": 59200 + }, + { + "epoch": 0.11056708786351263, + "grad_norm": 0.8163297176361084, + "learning_rate": 0.0001999212120839178, + "loss": 4.8426, + "step": 59250 + }, + { + "epoch": 0.11066039342289112, + "grad_norm": 0.9037017226219177, + "learning_rate": 0.0001999210452672047, + "loss": 5.0381, + "step": 59300 + }, + { + "epoch": 0.11075369898226961, + "grad_norm": 1.091113567352295, + "learning_rate": 0.00019992087827414853, + "loss": 4.5411, + "step": 59350 + }, + { + "epoch": 0.1108470045416481, + "grad_norm": 0.872543454170227, + "learning_rate": 0.0001999207111047496, + "loss": 4.6965, + "step": 59400 + }, + { + "epoch": 0.11094031010102659, + "grad_norm": 0.9569900035858154, + "learning_rate": 0.0001999205437590082, + "loss": 5.0155, + "step": 59450 + }, + { + "epoch": 0.11103361566040508, + "grad_norm": 0.8814337253570557, + "learning_rate": 0.00019992037623692461, + "loss": 4.9238, + "step": 59500 + }, + { + "epoch": 0.11112692121978358, + "grad_norm": 1.0400547981262207, + "learning_rate": 0.00019992020853849916, + "loss": 4.7072, + "step": 59550 + }, + { + "epoch": 0.11122022677916207, + "grad_norm": 1.0725575685501099, + "learning_rate": 0.0001999200406637321, + "loss": 4.7132, + "step": 59600 + }, + { + "epoch": 0.11131353233854056, + "grad_norm": 1.1183878183364868, + "learning_rate": 0.00019991987261262377, + "loss": 4.8993, + "step": 59650 + }, + { + "epoch": 0.11140683789791905, + "grad_norm": 1.005928874015808, + "learning_rate": 0.0001999197043851744, + "loss": 4.7833, + "step": 59700 + }, + { + "epoch": 0.11150014345729754, + "grad_norm": 0.9358251690864563, + "learning_rate": 0.0001999195359813844, + "loss": 4.6399, + "step": 59750 + }, + { + "epoch": 0.11159344901667603, + "grad_norm": 0.9199875593185425, + "learning_rate": 0.00019991936740125396, + "loss": 4.9291, + "step": 59800 + }, + { + "epoch": 0.11168675457605452, + "grad_norm": 1.1673732995986938, + "learning_rate": 0.00019991919864478344, + "loss": 4.8272, + "step": 59850 + }, + { + "epoch": 0.11178006013543301, + "grad_norm": 1.0224826335906982, + "learning_rate": 0.0001999190297119731, + "loss": 4.6601, + "step": 59900 + }, + { + "epoch": 0.11187336569481152, + "grad_norm": 1.0551832914352417, + "learning_rate": 0.00019991886060282324, + "loss": 4.9511, + "step": 59950 + }, + { + "epoch": 0.11196667125419, + "grad_norm": 0.7255629301071167, + "learning_rate": 0.00019991869131733418, + "loss": 4.8392, + "step": 60000 + }, + { + "epoch": 0.11196667125419, + "eval_loss": 5.020105838775635, + "eval_runtime": 233.6602, + "eval_samples_per_second": 11.162, + "eval_steps_per_second": 11.162, + "eval_tts_loss": 7.413437219613741, + "step": 60000 + }, + { + "epoch": 0.1120599768135685, + "grad_norm": 1.0283355712890625, + "learning_rate": 0.00019991852185550623, + "loss": 4.8832, + "step": 60050 + }, + { + "epoch": 0.11215328237294699, + "grad_norm": 0.8838055729866028, + "learning_rate": 0.00019991835221733965, + "loss": 4.6478, + "step": 60100 + }, + { + "epoch": 0.11224658793232548, + "grad_norm": 1.2853875160217285, + "learning_rate": 0.00019991818240283477, + "loss": 4.7012, + "step": 60150 + }, + { + "epoch": 0.11233989349170397, + "grad_norm": 1.3384721279144287, + "learning_rate": 0.0001999180124119919, + "loss": 4.8132, + "step": 60200 + }, + { + "epoch": 0.11243319905108246, + "grad_norm": 1.0799473524093628, + "learning_rate": 0.00019991784224481128, + "loss": 4.793, + "step": 60250 + }, + { + "epoch": 0.11252650461046096, + "grad_norm": 0.8523072004318237, + "learning_rate": 0.00019991767190129326, + "loss": 4.9316, + "step": 60300 + }, + { + "epoch": 0.11261981016983945, + "grad_norm": 0.9551059007644653, + "learning_rate": 0.00019991750138143813, + "loss": 4.8224, + "step": 60350 + }, + { + "epoch": 0.11271311572921794, + "grad_norm": 1.0353354215621948, + "learning_rate": 0.0001999173306852462, + "loss": 4.6761, + "step": 60400 + }, + { + "epoch": 0.11280642128859643, + "grad_norm": 0.6829168796539307, + "learning_rate": 0.00019991715981271778, + "loss": 4.8235, + "step": 60450 + }, + { + "epoch": 0.11289972684797492, + "grad_norm": 0.7613895535469055, + "learning_rate": 0.00019991698876385314, + "loss": 4.759, + "step": 60500 + }, + { + "epoch": 0.11299303240735341, + "grad_norm": 0.9478106498718262, + "learning_rate": 0.0001999168175386526, + "loss": 4.6671, + "step": 60550 + }, + { + "epoch": 0.1130863379667319, + "grad_norm": 0.9536855220794678, + "learning_rate": 0.00019991664613711643, + "loss": 4.8579, + "step": 60600 + }, + { + "epoch": 0.11317964352611039, + "grad_norm": 1.144857406616211, + "learning_rate": 0.00019991647455924497, + "loss": 4.9233, + "step": 60650 + }, + { + "epoch": 0.11327294908548889, + "grad_norm": 1.1187453269958496, + "learning_rate": 0.00019991630280503852, + "loss": 4.7061, + "step": 60700 + }, + { + "epoch": 0.11336625464486738, + "grad_norm": 1.1460309028625488, + "learning_rate": 0.0001999161308744974, + "loss": 4.8344, + "step": 60750 + }, + { + "epoch": 0.11345956020424587, + "grad_norm": 0.9266796112060547, + "learning_rate": 0.00019991595876762185, + "loss": 4.6502, + "step": 60800 + }, + { + "epoch": 0.11355286576362436, + "grad_norm": 0.9658082127571106, + "learning_rate": 0.00019991578648441224, + "loss": 4.9107, + "step": 60850 + }, + { + "epoch": 0.11364617132300285, + "grad_norm": 0.9839101433753967, + "learning_rate": 0.00019991561402486885, + "loss": 4.6822, + "step": 60900 + }, + { + "epoch": 0.11373947688238134, + "grad_norm": 1.0026122331619263, + "learning_rate": 0.00019991544138899195, + "loss": 4.6888, + "step": 60950 + }, + { + "epoch": 0.11383278244175983, + "grad_norm": 1.139105200767517, + "learning_rate": 0.0001999152685767819, + "loss": 4.6663, + "step": 61000 + }, + { + "epoch": 0.11392608800113833, + "grad_norm": 0.9084557294845581, + "learning_rate": 0.00019991509558823898, + "loss": 4.77, + "step": 61050 + }, + { + "epoch": 0.11401939356051682, + "grad_norm": 1.2279844284057617, + "learning_rate": 0.0001999149224233635, + "loss": 4.8361, + "step": 61100 + }, + { + "epoch": 0.11411269911989531, + "grad_norm": 0.8564290404319763, + "learning_rate": 0.00019991474908215574, + "loss": 4.8206, + "step": 61150 + }, + { + "epoch": 0.1142060046792738, + "grad_norm": 0.92253577709198, + "learning_rate": 0.00019991457556461605, + "loss": 4.8973, + "step": 61200 + }, + { + "epoch": 0.1142993102386523, + "grad_norm": 1.0969637632369995, + "learning_rate": 0.0001999144018707447, + "loss": 4.9673, + "step": 61250 + }, + { + "epoch": 0.11439261579803078, + "grad_norm": 1.1410199403762817, + "learning_rate": 0.00019991422800054204, + "loss": 4.6975, + "step": 61300 + }, + { + "epoch": 0.11448592135740927, + "grad_norm": 1.0572748184204102, + "learning_rate": 0.00019991405395400835, + "loss": 4.7044, + "step": 61350 + }, + { + "epoch": 0.11457922691678776, + "grad_norm": 1.115541696548462, + "learning_rate": 0.0001999138797311439, + "loss": 4.7546, + "step": 61400 + }, + { + "epoch": 0.11467253247616627, + "grad_norm": 0.9594268798828125, + "learning_rate": 0.00019991370533194904, + "loss": 4.7057, + "step": 61450 + }, + { + "epoch": 0.11476583803554476, + "grad_norm": 1.0258264541625977, + "learning_rate": 0.00019991353075642408, + "loss": 4.7853, + "step": 61500 + }, + { + "epoch": 0.11485914359492325, + "grad_norm": 1.0233960151672363, + "learning_rate": 0.00019991335600456932, + "loss": 4.7917, + "step": 61550 + }, + { + "epoch": 0.11495244915430174, + "grad_norm": 0.901678204536438, + "learning_rate": 0.00019991318107638506, + "loss": 4.9225, + "step": 61600 + }, + { + "epoch": 0.11504575471368023, + "grad_norm": 0.7477099299430847, + "learning_rate": 0.00019991300597187165, + "loss": 4.7892, + "step": 61650 + }, + { + "epoch": 0.11513906027305872, + "grad_norm": 1.0212405920028687, + "learning_rate": 0.00019991283069102932, + "loss": 4.789, + "step": 61700 + }, + { + "epoch": 0.1152323658324372, + "grad_norm": 0.9930858016014099, + "learning_rate": 0.00019991265523385842, + "loss": 4.7956, + "step": 61750 + }, + { + "epoch": 0.11532567139181571, + "grad_norm": 0.9863542318344116, + "learning_rate": 0.0001999124796003593, + "loss": 4.7178, + "step": 61800 + }, + { + "epoch": 0.1154189769511942, + "grad_norm": 0.8697104454040527, + "learning_rate": 0.0001999123037905322, + "loss": 4.7792, + "step": 61850 + }, + { + "epoch": 0.11551228251057269, + "grad_norm": 1.0396831035614014, + "learning_rate": 0.0001999121278043775, + "loss": 4.8915, + "step": 61900 + }, + { + "epoch": 0.11560558806995118, + "grad_norm": 1.0583255290985107, + "learning_rate": 0.00019991195164189546, + "loss": 4.7479, + "step": 61950 + }, + { + "epoch": 0.11569889362932967, + "grad_norm": 0.8265174627304077, + "learning_rate": 0.0001999117753030864, + "loss": 4.7255, + "step": 62000 + }, + { + "epoch": 0.11579219918870816, + "grad_norm": 1.1002154350280762, + "learning_rate": 0.00019991159878795064, + "loss": 4.8505, + "step": 62050 + }, + { + "epoch": 0.11588550474808665, + "grad_norm": 1.1061955690383911, + "learning_rate": 0.00019991142209648848, + "loss": 4.6528, + "step": 62100 + }, + { + "epoch": 0.11597881030746514, + "grad_norm": 1.06044340133667, + "learning_rate": 0.00019991124522870024, + "loss": 4.8613, + "step": 62150 + }, + { + "epoch": 0.11607211586684364, + "grad_norm": 1.1321583986282349, + "learning_rate": 0.00019991106818458622, + "loss": 4.6643, + "step": 62200 + }, + { + "epoch": 0.11616542142622213, + "grad_norm": 1.0335965156555176, + "learning_rate": 0.00019991089096414678, + "loss": 4.7951, + "step": 62250 + }, + { + "epoch": 0.11625872698560062, + "grad_norm": 1.1257051229476929, + "learning_rate": 0.00019991071356738216, + "loss": 4.8458, + "step": 62300 + }, + { + "epoch": 0.11635203254497911, + "grad_norm": 0.9923104047775269, + "learning_rate": 0.00019991053599429273, + "loss": 4.7476, + "step": 62350 + }, + { + "epoch": 0.1164453381043576, + "grad_norm": 1.1084821224212646, + "learning_rate": 0.00019991035824487878, + "loss": 4.6966, + "step": 62400 + }, + { + "epoch": 0.11653864366373609, + "grad_norm": 0.9355782866477966, + "learning_rate": 0.00019991018031914064, + "loss": 4.7396, + "step": 62450 + }, + { + "epoch": 0.11663194922311458, + "grad_norm": 1.1987919807434082, + "learning_rate": 0.0001999100022170786, + "loss": 4.7583, + "step": 62500 + }, + { + "epoch": 0.11672525478249308, + "grad_norm": 1.2184990644454956, + "learning_rate": 0.000199909823938693, + "loss": 5.0186, + "step": 62550 + }, + { + "epoch": 0.11681856034187157, + "grad_norm": 1.0517642498016357, + "learning_rate": 0.00019990964548398412, + "loss": 4.9411, + "step": 62600 + }, + { + "epoch": 0.11691186590125006, + "grad_norm": 1.2433594465255737, + "learning_rate": 0.00019990946685295228, + "loss": 4.7959, + "step": 62650 + }, + { + "epoch": 0.11700517146062855, + "grad_norm": 1.1244089603424072, + "learning_rate": 0.00019990928804559786, + "loss": 4.6779, + "step": 62700 + }, + { + "epoch": 0.11709847702000704, + "grad_norm": 1.0606389045715332, + "learning_rate": 0.00019990910906192107, + "loss": 4.8322, + "step": 62750 + }, + { + "epoch": 0.11719178257938553, + "grad_norm": 1.1183406114578247, + "learning_rate": 0.00019990892990192233, + "loss": 4.7931, + "step": 62800 + }, + { + "epoch": 0.11728508813876402, + "grad_norm": 0.8485277891159058, + "learning_rate": 0.0001999087505656019, + "loss": 4.8164, + "step": 62850 + }, + { + "epoch": 0.11737839369814251, + "grad_norm": 0.9464021921157837, + "learning_rate": 0.0001999085710529601, + "loss": 4.7665, + "step": 62900 + }, + { + "epoch": 0.11747169925752102, + "grad_norm": 0.7202494144439697, + "learning_rate": 0.00019990839136399725, + "loss": 4.7638, + "step": 62950 + }, + { + "epoch": 0.11756500481689951, + "grad_norm": 1.0719358921051025, + "learning_rate": 0.00019990821149871364, + "loss": 4.8894, + "step": 63000 + }, + { + "epoch": 0.11756500481689951, + "eval_loss": 5.0077104568481445, + "eval_runtime": 233.0923, + "eval_samples_per_second": 11.189, + "eval_steps_per_second": 11.189, + "eval_tts_loss": 7.356751820346786, + "step": 63000 + }, + { + "epoch": 0.117658310376278, + "grad_norm": 1.0613762140274048, + "learning_rate": 0.00019990803145710965, + "loss": 4.8346, + "step": 63050 + }, + { + "epoch": 0.11775161593565649, + "grad_norm": 0.9940460324287415, + "learning_rate": 0.00019990785123918557, + "loss": 4.8047, + "step": 63100 + }, + { + "epoch": 0.11784492149503498, + "grad_norm": 1.0382481813430786, + "learning_rate": 0.0001999076708449417, + "loss": 4.6179, + "step": 63150 + }, + { + "epoch": 0.11793822705441347, + "grad_norm": 1.2504392862319946, + "learning_rate": 0.00019990749027437837, + "loss": 4.7242, + "step": 63200 + }, + { + "epoch": 0.11803153261379196, + "grad_norm": 1.3020495176315308, + "learning_rate": 0.00019990730952749587, + "loss": 4.7754, + "step": 63250 + }, + { + "epoch": 0.11812483817317045, + "grad_norm": 0.9937262535095215, + "learning_rate": 0.00019990712860429457, + "loss": 4.6131, + "step": 63300 + }, + { + "epoch": 0.11821814373254895, + "grad_norm": 0.7087301015853882, + "learning_rate": 0.0001999069475047748, + "loss": 4.8183, + "step": 63350 + }, + { + "epoch": 0.11831144929192744, + "grad_norm": 1.2310640811920166, + "learning_rate": 0.0001999067662289368, + "loss": 4.7773, + "step": 63400 + }, + { + "epoch": 0.11840475485130593, + "grad_norm": 1.2428086996078491, + "learning_rate": 0.00019990658477678097, + "loss": 4.7297, + "step": 63450 + }, + { + "epoch": 0.11849806041068442, + "grad_norm": 0.7657245397567749, + "learning_rate": 0.00019990640314830756, + "loss": 4.9017, + "step": 63500 + }, + { + "epoch": 0.11859136597006291, + "grad_norm": 1.0270503759384155, + "learning_rate": 0.00019990622134351694, + "loss": 4.7397, + "step": 63550 + }, + { + "epoch": 0.1186846715294414, + "grad_norm": 0.7919912338256836, + "learning_rate": 0.00019990603936240943, + "loss": 4.8849, + "step": 63600 + }, + { + "epoch": 0.11877797708881989, + "grad_norm": 1.0639008283615112, + "learning_rate": 0.00019990585720498536, + "loss": 4.8832, + "step": 63650 + }, + { + "epoch": 0.11887128264819839, + "grad_norm": 1.0238081216812134, + "learning_rate": 0.00019990567487124502, + "loss": 4.6403, + "step": 63700 + }, + { + "epoch": 0.11896458820757688, + "grad_norm": 1.0058157444000244, + "learning_rate": 0.00019990549236118872, + "loss": 4.5824, + "step": 63750 + }, + { + "epoch": 0.11905789376695537, + "grad_norm": 0.7168673276901245, + "learning_rate": 0.00019990530967481683, + "loss": 4.8433, + "step": 63800 + }, + { + "epoch": 0.11915119932633386, + "grad_norm": 0.9038125276565552, + "learning_rate": 0.0001999051268121296, + "loss": 4.8536, + "step": 63850 + }, + { + "epoch": 0.11924450488571235, + "grad_norm": 0.9761154651641846, + "learning_rate": 0.00019990494377312745, + "loss": 4.7876, + "step": 63900 + }, + { + "epoch": 0.11933781044509084, + "grad_norm": 1.0363209247589111, + "learning_rate": 0.00019990476055781065, + "loss": 4.8438, + "step": 63950 + }, + { + "epoch": 0.11943111600446933, + "grad_norm": 0.9672982096672058, + "learning_rate": 0.0001999045771661795, + "loss": 4.7065, + "step": 64000 + }, + { + "epoch": 0.11952442156384782, + "grad_norm": 1.009544014930725, + "learning_rate": 0.00019990439359823437, + "loss": 4.8137, + "step": 64050 + }, + { + "epoch": 0.11961772712322633, + "grad_norm": 0.8747443556785583, + "learning_rate": 0.00019990420985397552, + "loss": 4.6859, + "step": 64100 + }, + { + "epoch": 0.11971103268260481, + "grad_norm": 1.121934413909912, + "learning_rate": 0.0001999040259334034, + "loss": 4.8379, + "step": 64150 + }, + { + "epoch": 0.1198043382419833, + "grad_norm": 0.8262189030647278, + "learning_rate": 0.0001999038418365182, + "loss": 4.7602, + "step": 64200 + }, + { + "epoch": 0.1198976438013618, + "grad_norm": 0.9570197463035583, + "learning_rate": 0.0001999036575633203, + "loss": 4.8409, + "step": 64250 + }, + { + "epoch": 0.11999094936074028, + "grad_norm": 0.7180740833282471, + "learning_rate": 0.00019990347311381004, + "loss": 4.9055, + "step": 64300 + }, + { + "epoch": 0.12008425492011877, + "grad_norm": 0.7505408525466919, + "learning_rate": 0.0001999032884879877, + "loss": 4.8526, + "step": 64350 + }, + { + "epoch": 0.12017756047949726, + "grad_norm": 0.9490662813186646, + "learning_rate": 0.00019990310368585366, + "loss": 4.6729, + "step": 64400 + }, + { + "epoch": 0.12027086603887577, + "grad_norm": 1.0772948265075684, + "learning_rate": 0.0001999029187074082, + "loss": 4.6781, + "step": 64450 + }, + { + "epoch": 0.12036417159825426, + "grad_norm": 0.6764106154441833, + "learning_rate": 0.00019990273355265168, + "loss": 4.8261, + "step": 64500 + }, + { + "epoch": 0.12045747715763275, + "grad_norm": 0.8827162981033325, + "learning_rate": 0.00019990254822158443, + "loss": 4.8375, + "step": 64550 + }, + { + "epoch": 0.12055078271701124, + "grad_norm": 0.9409701228141785, + "learning_rate": 0.00019990236271420674, + "loss": 4.8701, + "step": 64600 + }, + { + "epoch": 0.12064408827638973, + "grad_norm": 0.8678632974624634, + "learning_rate": 0.00019990217703051894, + "loss": 4.9678, + "step": 64650 + }, + { + "epoch": 0.12073739383576822, + "grad_norm": 1.1174262762069702, + "learning_rate": 0.00019990199117052141, + "loss": 4.8362, + "step": 64700 + }, + { + "epoch": 0.1208306993951467, + "grad_norm": 0.8138838410377502, + "learning_rate": 0.00019990180513421445, + "loss": 4.9131, + "step": 64750 + }, + { + "epoch": 0.1209240049545252, + "grad_norm": 1.0765495300292969, + "learning_rate": 0.00019990161892159835, + "loss": 4.8251, + "step": 64800 + }, + { + "epoch": 0.1210173105139037, + "grad_norm": 1.0338910818099976, + "learning_rate": 0.0001999014325326735, + "loss": 4.4701, + "step": 64850 + }, + { + "epoch": 0.12111061607328219, + "grad_norm": 0.7882349491119385, + "learning_rate": 0.00019990124596744018, + "loss": 4.6385, + "step": 64900 + }, + { + "epoch": 0.12120392163266068, + "grad_norm": 1.1244629621505737, + "learning_rate": 0.00019990105922589876, + "loss": 4.6897, + "step": 64950 + }, + { + "epoch": 0.12129722719203917, + "grad_norm": 1.0500319004058838, + "learning_rate": 0.0001999008723080495, + "loss": 4.6929, + "step": 65000 + }, + { + "epoch": 0.12139053275141766, + "grad_norm": 0.9561808705329895, + "learning_rate": 0.0001999006852138928, + "loss": 4.866, + "step": 65050 + }, + { + "epoch": 0.12148383831079615, + "grad_norm": 1.0104138851165771, + "learning_rate": 0.000199900497943429, + "loss": 4.8589, + "step": 65100 + }, + { + "epoch": 0.12157714387017464, + "grad_norm": 0.975612998008728, + "learning_rate": 0.00019990031049665837, + "loss": 4.9448, + "step": 65150 + }, + { + "epoch": 0.12167044942955314, + "grad_norm": 0.8425891399383545, + "learning_rate": 0.0001999001228735813, + "loss": 4.6693, + "step": 65200 + }, + { + "epoch": 0.12176375498893163, + "grad_norm": 1.091673493385315, + "learning_rate": 0.00019989993507419805, + "loss": 4.7452, + "step": 65250 + }, + { + "epoch": 0.12185706054831012, + "grad_norm": 1.0499212741851807, + "learning_rate": 0.00019989974709850903, + "loss": 4.9866, + "step": 65300 + }, + { + "epoch": 0.12195036610768861, + "grad_norm": 0.9292690753936768, + "learning_rate": 0.0001998995589465145, + "loss": 4.9873, + "step": 65350 + }, + { + "epoch": 0.1220436716670671, + "grad_norm": 0.9211663603782654, + "learning_rate": 0.00019989937061821485, + "loss": 4.8513, + "step": 65400 + }, + { + "epoch": 0.12213697722644559, + "grad_norm": 1.0245435237884521, + "learning_rate": 0.00019989918211361037, + "loss": 4.813, + "step": 65450 + }, + { + "epoch": 0.12223028278582408, + "grad_norm": 0.8180379867553711, + "learning_rate": 0.00019989899343270145, + "loss": 4.8255, + "step": 65500 + }, + { + "epoch": 0.12232358834520257, + "grad_norm": 1.1613210439682007, + "learning_rate": 0.00019989880457548832, + "loss": 5.14, + "step": 65550 + }, + { + "epoch": 0.12241689390458108, + "grad_norm": 1.1680033206939697, + "learning_rate": 0.00019989861554197143, + "loss": 4.7817, + "step": 65600 + }, + { + "epoch": 0.12251019946395957, + "grad_norm": 1.1515231132507324, + "learning_rate": 0.00019989842633215106, + "loss": 4.8963, + "step": 65650 + }, + { + "epoch": 0.12260350502333806, + "grad_norm": 0.9831023216247559, + "learning_rate": 0.00019989823694602753, + "loss": 4.613, + "step": 65700 + }, + { + "epoch": 0.12269681058271655, + "grad_norm": 1.052646279335022, + "learning_rate": 0.0001998980473836012, + "loss": 4.7509, + "step": 65750 + }, + { + "epoch": 0.12279011614209503, + "grad_norm": 0.8736461400985718, + "learning_rate": 0.00019989785764487237, + "loss": 4.8964, + "step": 65800 + }, + { + "epoch": 0.12288342170147352, + "grad_norm": 0.9044458866119385, + "learning_rate": 0.0001998976677298414, + "loss": 4.8151, + "step": 65850 + }, + { + "epoch": 0.12297672726085201, + "grad_norm": 0.8736430406570435, + "learning_rate": 0.00019989747763850865, + "loss": 4.6468, + "step": 65900 + }, + { + "epoch": 0.12307003282023052, + "grad_norm": 1.0747185945510864, + "learning_rate": 0.0001998972873708744, + "loss": 4.8394, + "step": 65950 + }, + { + "epoch": 0.12316333837960901, + "grad_norm": 1.0368112325668335, + "learning_rate": 0.00019989709692693906, + "loss": 4.7296, + "step": 66000 + }, + { + "epoch": 0.12316333837960901, + "eval_loss": 4.993486404418945, + "eval_runtime": 231.2413, + "eval_samples_per_second": 11.278, + "eval_steps_per_second": 11.278, + "eval_tts_loss": 7.411762504489875, + "step": 66000 + }, + { + "epoch": 0.1232566439389875, + "grad_norm": 1.0206661224365234, + "learning_rate": 0.0001998969063067029, + "loss": 4.8384, + "step": 66050 + }, + { + "epoch": 0.12334994949836599, + "grad_norm": 1.2967489957809448, + "learning_rate": 0.00019989671551016625, + "loss": 4.6992, + "step": 66100 + }, + { + "epoch": 0.12344325505774448, + "grad_norm": 0.8370565176010132, + "learning_rate": 0.00019989652453732948, + "loss": 4.7676, + "step": 66150 + }, + { + "epoch": 0.12353656061712297, + "grad_norm": 1.1219327449798584, + "learning_rate": 0.00019989633338819295, + "loss": 4.913, + "step": 66200 + }, + { + "epoch": 0.12362986617650146, + "grad_norm": 1.0357359647750854, + "learning_rate": 0.00019989614206275696, + "loss": 4.7011, + "step": 66250 + }, + { + "epoch": 0.12372317173587995, + "grad_norm": 1.0418895483016968, + "learning_rate": 0.00019989595056102183, + "loss": 4.858, + "step": 66300 + }, + { + "epoch": 0.12381647729525845, + "grad_norm": 0.9631625413894653, + "learning_rate": 0.00019989575888298795, + "loss": 4.9325, + "step": 66350 + }, + { + "epoch": 0.12390978285463694, + "grad_norm": 1.2581260204315186, + "learning_rate": 0.00019989556702865565, + "loss": 4.7477, + "step": 66400 + }, + { + "epoch": 0.12400308841401543, + "grad_norm": 0.7702711224555969, + "learning_rate": 0.0001998953749980252, + "loss": 4.9714, + "step": 66450 + }, + { + "epoch": 0.12409639397339392, + "grad_norm": 1.0711308717727661, + "learning_rate": 0.00019989518279109705, + "loss": 4.836, + "step": 66500 + }, + { + "epoch": 0.12418969953277241, + "grad_norm": 1.082642912864685, + "learning_rate": 0.00019989499040787146, + "loss": 4.7201, + "step": 66550 + }, + { + "epoch": 0.1242830050921509, + "grad_norm": 1.1112911701202393, + "learning_rate": 0.00019989479784834877, + "loss": 4.6956, + "step": 66600 + }, + { + "epoch": 0.12437631065152939, + "grad_norm": 0.9402649998664856, + "learning_rate": 0.00019989460511252936, + "loss": 4.6172, + "step": 66650 + }, + { + "epoch": 0.12446961621090788, + "grad_norm": 1.1066251993179321, + "learning_rate": 0.00019989441220041353, + "loss": 4.7511, + "step": 66700 + }, + { + "epoch": 0.12456292177028638, + "grad_norm": 1.2885663509368896, + "learning_rate": 0.00019989421911200162, + "loss": 4.8006, + "step": 66750 + }, + { + "epoch": 0.12465622732966487, + "grad_norm": 0.717598557472229, + "learning_rate": 0.00019989402584729405, + "loss": 4.6387, + "step": 66800 + }, + { + "epoch": 0.12474953288904336, + "grad_norm": 1.0635030269622803, + "learning_rate": 0.00019989383240629106, + "loss": 4.6984, + "step": 66850 + }, + { + "epoch": 0.12484283844842185, + "grad_norm": 1.0408589839935303, + "learning_rate": 0.00019989363878899306, + "loss": 4.696, + "step": 66900 + }, + { + "epoch": 0.12493614400780034, + "grad_norm": 0.8848728537559509, + "learning_rate": 0.00019989344499540032, + "loss": 4.6144, + "step": 66950 + }, + { + "epoch": 0.12502944956717885, + "grad_norm": 0.9865880608558655, + "learning_rate": 0.00019989325102551326, + "loss": 4.8072, + "step": 67000 + }, + { + "epoch": 0.12512275512655732, + "grad_norm": 2.2153608798980713, + "learning_rate": 0.00019989305687933217, + "loss": 4.7434, + "step": 67050 + }, + { + "epoch": 0.12521606068593583, + "grad_norm": 0.9598948955535889, + "learning_rate": 0.00019989286255685742, + "loss": 4.7729, + "step": 67100 + }, + { + "epoch": 0.1253093662453143, + "grad_norm": 1.0703397989273071, + "learning_rate": 0.00019989266805808936, + "loss": 4.9212, + "step": 67150 + }, + { + "epoch": 0.1254026718046928, + "grad_norm": 0.954094648361206, + "learning_rate": 0.0001998924733830283, + "loss": 4.7929, + "step": 67200 + }, + { + "epoch": 0.12549597736407128, + "grad_norm": 0.7662084102630615, + "learning_rate": 0.0001998922785316746, + "loss": 4.9457, + "step": 67250 + }, + { + "epoch": 0.12558928292344979, + "grad_norm": 1.1596436500549316, + "learning_rate": 0.0001998920835040286, + "loss": 4.658, + "step": 67300 + }, + { + "epoch": 0.1256825884828283, + "grad_norm": 1.0783514976501465, + "learning_rate": 0.00019989188830009068, + "loss": 4.9466, + "step": 67350 + }, + { + "epoch": 0.12577589404220677, + "grad_norm": 0.8832924365997314, + "learning_rate": 0.00019989169291986114, + "loss": 4.7456, + "step": 67400 + }, + { + "epoch": 0.12586919960158527, + "grad_norm": 0.9739583730697632, + "learning_rate": 0.00019989149736334032, + "loss": 4.9385, + "step": 67450 + }, + { + "epoch": 0.12596250516096374, + "grad_norm": 1.0089017152786255, + "learning_rate": 0.0001998913016305286, + "loss": 4.6565, + "step": 67500 + }, + { + "epoch": 0.12605581072034225, + "grad_norm": 0.9786615371704102, + "learning_rate": 0.0001998911057214263, + "loss": 4.7881, + "step": 67550 + }, + { + "epoch": 0.12614911627972072, + "grad_norm": 0.9184353351593018, + "learning_rate": 0.00019989090963603377, + "loss": 4.779, + "step": 67600 + }, + { + "epoch": 0.12624242183909923, + "grad_norm": 0.8026026487350464, + "learning_rate": 0.00019989071337435136, + "loss": 4.6157, + "step": 67650 + }, + { + "epoch": 0.12633572739847773, + "grad_norm": 0.8393520712852478, + "learning_rate": 0.00019989051693637943, + "loss": 4.5899, + "step": 67700 + }, + { + "epoch": 0.1264290329578562, + "grad_norm": 0.9405346512794495, + "learning_rate": 0.00019989032032211834, + "loss": 4.5016, + "step": 67750 + }, + { + "epoch": 0.1265223385172347, + "grad_norm": 1.0097512006759644, + "learning_rate": 0.00019989012353156834, + "loss": 4.8901, + "step": 67800 + }, + { + "epoch": 0.1266156440766132, + "grad_norm": 1.1094014644622803, + "learning_rate": 0.0001998899265647299, + "loss": 4.8235, + "step": 67850 + }, + { + "epoch": 0.1267089496359917, + "grad_norm": 1.045980453491211, + "learning_rate": 0.00019988972942160333, + "loss": 4.8714, + "step": 67900 + }, + { + "epoch": 0.12680225519537017, + "grad_norm": 0.897870659828186, + "learning_rate": 0.0001998895321021889, + "loss": 4.78, + "step": 67950 + }, + { + "epoch": 0.12689556075474867, + "grad_norm": 0.8801417946815491, + "learning_rate": 0.0001998893346064871, + "loss": 4.7189, + "step": 68000 + }, + { + "epoch": 0.12698886631412717, + "grad_norm": 0.8801950812339783, + "learning_rate": 0.00019988913693449816, + "loss": 4.817, + "step": 68050 + }, + { + "epoch": 0.12708217187350565, + "grad_norm": 1.0675042867660522, + "learning_rate": 0.00019988893908622245, + "loss": 4.9578, + "step": 68100 + }, + { + "epoch": 0.12717547743288415, + "grad_norm": 0.8822778463363647, + "learning_rate": 0.00019988874106166037, + "loss": 4.9018, + "step": 68150 + }, + { + "epoch": 0.12726878299226263, + "grad_norm": 0.8060069680213928, + "learning_rate": 0.00019988854286081225, + "loss": 4.6307, + "step": 68200 + }, + { + "epoch": 0.12736208855164113, + "grad_norm": 1.0484051704406738, + "learning_rate": 0.00019988834448367838, + "loss": 4.928, + "step": 68250 + }, + { + "epoch": 0.1274553941110196, + "grad_norm": 1.0150614976882935, + "learning_rate": 0.0001998881459302592, + "loss": 4.7119, + "step": 68300 + }, + { + "epoch": 0.1275486996703981, + "grad_norm": 0.8359283208847046, + "learning_rate": 0.000199887947200555, + "loss": 4.6128, + "step": 68350 + }, + { + "epoch": 0.1276420052297766, + "grad_norm": 0.9599153399467468, + "learning_rate": 0.00019988774829456617, + "loss": 4.7391, + "step": 68400 + }, + { + "epoch": 0.1277353107891551, + "grad_norm": 1.055501103401184, + "learning_rate": 0.000199887549212293, + "loss": 4.7485, + "step": 68450 + }, + { + "epoch": 0.1278286163485336, + "grad_norm": 0.6903030872344971, + "learning_rate": 0.0001998873499537359, + "loss": 4.8311, + "step": 68500 + }, + { + "epoch": 0.12792192190791207, + "grad_norm": 1.1893553733825684, + "learning_rate": 0.0001998871505188952, + "loss": 4.8558, + "step": 68550 + }, + { + "epoch": 0.12801522746729058, + "grad_norm": 1.0602223873138428, + "learning_rate": 0.00019988695090777126, + "loss": 4.8572, + "step": 68600 + }, + { + "epoch": 0.12810853302666905, + "grad_norm": 1.0478551387786865, + "learning_rate": 0.00019988675112036442, + "loss": 4.8788, + "step": 68650 + }, + { + "epoch": 0.12820183858604756, + "grad_norm": 0.6287881731987, + "learning_rate": 0.00019988655115667507, + "loss": 4.8575, + "step": 68700 + }, + { + "epoch": 0.12829514414542603, + "grad_norm": 1.2025055885314941, + "learning_rate": 0.0001998863510167035, + "loss": 4.5946, + "step": 68750 + }, + { + "epoch": 0.12838844970480454, + "grad_norm": 1.10360586643219, + "learning_rate": 0.0001998861507004501, + "loss": 4.8477, + "step": 68800 + }, + { + "epoch": 0.12848175526418304, + "grad_norm": 1.1787991523742676, + "learning_rate": 0.00019988595020791524, + "loss": 4.5538, + "step": 68850 + }, + { + "epoch": 0.12857506082356152, + "grad_norm": 1.1096701622009277, + "learning_rate": 0.00019988574953909923, + "loss": 4.8392, + "step": 68900 + }, + { + "epoch": 0.12866836638294002, + "grad_norm": 1.0045229196548462, + "learning_rate": 0.00019988554869400247, + "loss": 4.8437, + "step": 68950 + }, + { + "epoch": 0.1287616719423185, + "grad_norm": 0.7254719734191895, + "learning_rate": 0.00019988534767262527, + "loss": 4.6926, + "step": 69000 + }, + { + "epoch": 0.1287616719423185, + "eval_loss": 4.980347156524658, + "eval_runtime": 233.3792, + "eval_samples_per_second": 11.175, + "eval_steps_per_second": 11.175, + "eval_tts_loss": 7.437576909478114, + "step": 69000 + }, + { + "epoch": 0.128854977501697, + "grad_norm": 1.043537974357605, + "learning_rate": 0.00019988514647496802, + "loss": 4.7274, + "step": 69050 + }, + { + "epoch": 0.12894828306107547, + "grad_norm": 1.02115797996521, + "learning_rate": 0.00019988494510103104, + "loss": 4.7884, + "step": 69100 + }, + { + "epoch": 0.12904158862045398, + "grad_norm": 1.1048721075057983, + "learning_rate": 0.00019988474355081475, + "loss": 4.4432, + "step": 69150 + }, + { + "epoch": 0.12913489417983248, + "grad_norm": 0.9006786942481995, + "learning_rate": 0.0001998845418243194, + "loss": 4.703, + "step": 69200 + }, + { + "epoch": 0.12922819973921096, + "grad_norm": 0.9208207130432129, + "learning_rate": 0.00019988433992154545, + "loss": 4.6751, + "step": 69250 + }, + { + "epoch": 0.12932150529858946, + "grad_norm": 0.9180870652198792, + "learning_rate": 0.00019988413784249322, + "loss": 4.5272, + "step": 69300 + }, + { + "epoch": 0.12941481085796794, + "grad_norm": 1.102773904800415, + "learning_rate": 0.00019988393558716306, + "loss": 4.7114, + "step": 69350 + }, + { + "epoch": 0.12950811641734644, + "grad_norm": 0.9914569854736328, + "learning_rate": 0.00019988373315555532, + "loss": 4.8268, + "step": 69400 + }, + { + "epoch": 0.12960142197672492, + "grad_norm": 0.9935908913612366, + "learning_rate": 0.00019988353054767037, + "loss": 4.8687, + "step": 69450 + }, + { + "epoch": 0.12969472753610342, + "grad_norm": 0.8525168895721436, + "learning_rate": 0.00019988332776350856, + "loss": 4.7935, + "step": 69500 + }, + { + "epoch": 0.12978803309548193, + "grad_norm": 0.8857572674751282, + "learning_rate": 0.00019988312480307026, + "loss": 4.7981, + "step": 69550 + }, + { + "epoch": 0.1298813386548604, + "grad_norm": 1.1465469598770142, + "learning_rate": 0.0001998829216663558, + "loss": 4.6701, + "step": 69600 + }, + { + "epoch": 0.1299746442142389, + "grad_norm": 0.91568523645401, + "learning_rate": 0.00019988271835336558, + "loss": 4.7749, + "step": 69650 + }, + { + "epoch": 0.13006794977361738, + "grad_norm": 0.9274788498878479, + "learning_rate": 0.00019988251486409992, + "loss": 4.6415, + "step": 69700 + }, + { + "epoch": 0.13016125533299588, + "grad_norm": 1.1520894765853882, + "learning_rate": 0.0001998823111985592, + "loss": 4.5849, + "step": 69750 + }, + { + "epoch": 0.13025456089237436, + "grad_norm": 1.3699066638946533, + "learning_rate": 0.0001998821073567438, + "loss": 4.7611, + "step": 69800 + }, + { + "epoch": 0.13034786645175286, + "grad_norm": 0.9259943962097168, + "learning_rate": 0.00019988190333865402, + "loss": 4.8313, + "step": 69850 + }, + { + "epoch": 0.13044117201113134, + "grad_norm": 0.8618115782737732, + "learning_rate": 0.00019988169914429026, + "loss": 4.6103, + "step": 69900 + }, + { + "epoch": 0.13053447757050984, + "grad_norm": 1.0242559909820557, + "learning_rate": 0.0001998814947736529, + "loss": 4.9478, + "step": 69950 + }, + { + "epoch": 0.13062778312988835, + "grad_norm": 1.2048753499984741, + "learning_rate": 0.00019988129022674228, + "loss": 4.9015, + "step": 70000 + }, + { + "epoch": 0.13072108868926682, + "grad_norm": 0.8471056818962097, + "learning_rate": 0.0001998810855035587, + "loss": 4.5826, + "step": 70050 + }, + { + "epoch": 0.13081439424864533, + "grad_norm": 0.9561209082603455, + "learning_rate": 0.00019988088060410265, + "loss": 4.7941, + "step": 70100 + }, + { + "epoch": 0.1309076998080238, + "grad_norm": 0.6860484480857849, + "learning_rate": 0.00019988067552837439, + "loss": 4.6454, + "step": 70150 + }, + { + "epoch": 0.1310010053674023, + "grad_norm": 1.028444528579712, + "learning_rate": 0.0001998804702763743, + "loss": 4.8362, + "step": 70200 + }, + { + "epoch": 0.13109431092678078, + "grad_norm": 0.9688596129417419, + "learning_rate": 0.00019988026484810275, + "loss": 4.8212, + "step": 70250 + }, + { + "epoch": 0.1311876164861593, + "grad_norm": 1.0312488079071045, + "learning_rate": 0.00019988005924356013, + "loss": 4.8123, + "step": 70300 + }, + { + "epoch": 0.1312809220455378, + "grad_norm": 0.8787838220596313, + "learning_rate": 0.0001998798534627468, + "loss": 4.6147, + "step": 70350 + }, + { + "epoch": 0.13137422760491627, + "grad_norm": 1.1073654890060425, + "learning_rate": 0.00019987964750566305, + "loss": 4.7933, + "step": 70400 + }, + { + "epoch": 0.13146753316429477, + "grad_norm": 1.085288643836975, + "learning_rate": 0.00019987944137230934, + "loss": 4.8248, + "step": 70450 + }, + { + "epoch": 0.13156083872367325, + "grad_norm": 1.2170988321304321, + "learning_rate": 0.00019987923506268595, + "loss": 4.783, + "step": 70500 + }, + { + "epoch": 0.13165414428305175, + "grad_norm": 1.0648763179779053, + "learning_rate": 0.00019987902857679333, + "loss": 4.9425, + "step": 70550 + }, + { + "epoch": 0.13174744984243023, + "grad_norm": 1.1931159496307373, + "learning_rate": 0.00019987882191463177, + "loss": 4.6804, + "step": 70600 + }, + { + "epoch": 0.13184075540180873, + "grad_norm": 0.9888035655021667, + "learning_rate": 0.00019987861507620168, + "loss": 4.7612, + "step": 70650 + }, + { + "epoch": 0.13193406096118723, + "grad_norm": 0.9140545725822449, + "learning_rate": 0.0001998784080615034, + "loss": 4.6891, + "step": 70700 + }, + { + "epoch": 0.1320273665205657, + "grad_norm": 1.096142053604126, + "learning_rate": 0.00019987820087053728, + "loss": 4.9494, + "step": 70750 + }, + { + "epoch": 0.1321206720799442, + "grad_norm": 1.1193305253982544, + "learning_rate": 0.00019987799350330374, + "loss": 4.8832, + "step": 70800 + }, + { + "epoch": 0.1322139776393227, + "grad_norm": 1.2808094024658203, + "learning_rate": 0.0001998777859598031, + "loss": 4.9684, + "step": 70850 + }, + { + "epoch": 0.1323072831987012, + "grad_norm": 0.852861225605011, + "learning_rate": 0.00019987757824003577, + "loss": 4.911, + "step": 70900 + }, + { + "epoch": 0.13240058875807967, + "grad_norm": 0.8314254283905029, + "learning_rate": 0.00019987737034400204, + "loss": 4.6268, + "step": 70950 + }, + { + "epoch": 0.13249389431745817, + "grad_norm": 1.0888328552246094, + "learning_rate": 0.00019987716227170234, + "loss": 4.837, + "step": 71000 + }, + { + "epoch": 0.13258719987683665, + "grad_norm": 0.9562662243843079, + "learning_rate": 0.00019987695402313706, + "loss": 4.8852, + "step": 71050 + }, + { + "epoch": 0.13268050543621515, + "grad_norm": 1.0483977794647217, + "learning_rate": 0.00019987674559830652, + "loss": 4.6314, + "step": 71100 + }, + { + "epoch": 0.13277381099559366, + "grad_norm": 1.0416483879089355, + "learning_rate": 0.00019987653699721106, + "loss": 4.6485, + "step": 71150 + }, + { + "epoch": 0.13286711655497213, + "grad_norm": 0.9139002561569214, + "learning_rate": 0.00019987632821985113, + "loss": 4.4885, + "step": 71200 + }, + { + "epoch": 0.13296042211435063, + "grad_norm": 1.0126796960830688, + "learning_rate": 0.00019987611926622703, + "loss": 4.7368, + "step": 71250 + }, + { + "epoch": 0.1330537276737291, + "grad_norm": 0.8071941137313843, + "learning_rate": 0.00019987591013633912, + "loss": 4.7529, + "step": 71300 + }, + { + "epoch": 0.13314703323310761, + "grad_norm": 0.8318772912025452, + "learning_rate": 0.00019987570083018788, + "loss": 4.686, + "step": 71350 + }, + { + "epoch": 0.1332403387924861, + "grad_norm": 1.0917198657989502, + "learning_rate": 0.00019987549134777355, + "loss": 4.6129, + "step": 71400 + }, + { + "epoch": 0.1333336443518646, + "grad_norm": 1.0430625677108765, + "learning_rate": 0.00019987528168909655, + "loss": 4.5787, + "step": 71450 + }, + { + "epoch": 0.1334269499112431, + "grad_norm": 1.307949423789978, + "learning_rate": 0.00019987507185415727, + "loss": 4.7455, + "step": 71500 + }, + { + "epoch": 0.13352025547062157, + "grad_norm": 0.9685766696929932, + "learning_rate": 0.00019987486184295606, + "loss": 4.7564, + "step": 71550 + }, + { + "epoch": 0.13361356103000008, + "grad_norm": 0.9537215232849121, + "learning_rate": 0.00019987465165549327, + "loss": 4.7439, + "step": 71600 + }, + { + "epoch": 0.13370686658937855, + "grad_norm": 0.8062765598297119, + "learning_rate": 0.00019987444129176933, + "loss": 4.6103, + "step": 71650 + }, + { + "epoch": 0.13380017214875706, + "grad_norm": 1.0107309818267822, + "learning_rate": 0.00019987423075178455, + "loss": 4.8355, + "step": 71700 + }, + { + "epoch": 0.13389347770813553, + "grad_norm": 0.8485686779022217, + "learning_rate": 0.00019987402003553934, + "loss": 4.7971, + "step": 71750 + }, + { + "epoch": 0.13398678326751404, + "grad_norm": 1.2249001264572144, + "learning_rate": 0.00019987380914303403, + "loss": 4.823, + "step": 71800 + }, + { + "epoch": 0.13408008882689254, + "grad_norm": 1.0090044736862183, + "learning_rate": 0.00019987359807426905, + "loss": 4.9343, + "step": 71850 + }, + { + "epoch": 0.13417339438627102, + "grad_norm": 1.018228530883789, + "learning_rate": 0.00019987338682924476, + "loss": 4.8365, + "step": 71900 + }, + { + "epoch": 0.13426669994564952, + "grad_norm": 0.947043240070343, + "learning_rate": 0.00019987317540796147, + "loss": 4.7334, + "step": 71950 + }, + { + "epoch": 0.134360005505028, + "grad_norm": 1.1589524745941162, + "learning_rate": 0.00019987296381041963, + "loss": 4.6444, + "step": 72000 + }, + { + "epoch": 0.134360005505028, + "eval_loss": 4.982792377471924, + "eval_runtime": 233.4346, + "eval_samples_per_second": 11.172, + "eval_steps_per_second": 11.172, + "eval_tts_loss": 7.417768215367777, + "step": 72000 + }, + { + "epoch": 0.1344533110644065, + "grad_norm": 1.0631495714187622, + "learning_rate": 0.00019987275203661956, + "loss": 4.8445, + "step": 72050 + }, + { + "epoch": 0.13454661662378498, + "grad_norm": 1.056353211402893, + "learning_rate": 0.00019987254008656165, + "loss": 4.8737, + "step": 72100 + }, + { + "epoch": 0.13463992218316348, + "grad_norm": 1.192908525466919, + "learning_rate": 0.0001998723279602463, + "loss": 4.7873, + "step": 72150 + }, + { + "epoch": 0.13473322774254198, + "grad_norm": 1.1714168787002563, + "learning_rate": 0.00019987211565767384, + "loss": 4.7542, + "step": 72200 + }, + { + "epoch": 0.13482653330192046, + "grad_norm": 0.7499253153800964, + "learning_rate": 0.00019987190317884469, + "loss": 4.8558, + "step": 72250 + }, + { + "epoch": 0.13491983886129896, + "grad_norm": 1.1434695720672607, + "learning_rate": 0.0001998716905237592, + "loss": 4.684, + "step": 72300 + }, + { + "epoch": 0.13501314442067744, + "grad_norm": 0.9762905836105347, + "learning_rate": 0.00019987147769241775, + "loss": 4.6689, + "step": 72350 + }, + { + "epoch": 0.13510644998005594, + "grad_norm": 1.05728018283844, + "learning_rate": 0.00019987126468482073, + "loss": 4.7129, + "step": 72400 + }, + { + "epoch": 0.13519975553943442, + "grad_norm": 1.0053168535232544, + "learning_rate": 0.00019987105150096849, + "loss": 4.6524, + "step": 72450 + }, + { + "epoch": 0.13529306109881292, + "grad_norm": 0.9576205611228943, + "learning_rate": 0.0001998708381408614, + "loss": 4.6339, + "step": 72500 + }, + { + "epoch": 0.1353863666581914, + "grad_norm": 1.0552775859832764, + "learning_rate": 0.00019987062460449984, + "loss": 4.6316, + "step": 72550 + }, + { + "epoch": 0.1354796722175699, + "grad_norm": 1.1562297344207764, + "learning_rate": 0.00019987041089188426, + "loss": 4.7064, + "step": 72600 + }, + { + "epoch": 0.1355729777769484, + "grad_norm": 1.0458017587661743, + "learning_rate": 0.00019987019700301494, + "loss": 4.8765, + "step": 72650 + }, + { + "epoch": 0.13566628333632688, + "grad_norm": 0.8299524784088135, + "learning_rate": 0.00019986998293789233, + "loss": 4.8115, + "step": 72700 + }, + { + "epoch": 0.13575958889570539, + "grad_norm": 1.1313036680221558, + "learning_rate": 0.0001998697686965167, + "loss": 4.7096, + "step": 72750 + }, + { + "epoch": 0.13585289445508386, + "grad_norm": 1.0760210752487183, + "learning_rate": 0.00019986955427888855, + "loss": 4.886, + "step": 72800 + }, + { + "epoch": 0.13594620001446237, + "grad_norm": 1.1115272045135498, + "learning_rate": 0.0001998693396850082, + "loss": 4.8623, + "step": 72850 + }, + { + "epoch": 0.13603950557384084, + "grad_norm": 0.6817601919174194, + "learning_rate": 0.00019986912491487603, + "loss": 4.7888, + "step": 72900 + }, + { + "epoch": 0.13613281113321934, + "grad_norm": 1.2269861698150635, + "learning_rate": 0.00019986890996849245, + "loss": 4.8229, + "step": 72950 + }, + { + "epoch": 0.13622611669259785, + "grad_norm": 1.13284170627594, + "learning_rate": 0.00019986869484585782, + "loss": 4.7043, + "step": 73000 + }, + { + "epoch": 0.13631942225197632, + "grad_norm": 1.085242748260498, + "learning_rate": 0.0001998684795469725, + "loss": 4.8635, + "step": 73050 + }, + { + "epoch": 0.13641272781135483, + "grad_norm": 0.9383891224861145, + "learning_rate": 0.00019986826407183687, + "loss": 4.7206, + "step": 73100 + }, + { + "epoch": 0.1365060333707333, + "grad_norm": 0.9130173921585083, + "learning_rate": 0.00019986804842045134, + "loss": 4.783, + "step": 73150 + }, + { + "epoch": 0.1365993389301118, + "grad_norm": 0.9875626564025879, + "learning_rate": 0.0001998678325928163, + "loss": 4.6706, + "step": 73200 + }, + { + "epoch": 0.13669264448949028, + "grad_norm": 0.775663435459137, + "learning_rate": 0.00019986761658893207, + "loss": 4.5753, + "step": 73250 + }, + { + "epoch": 0.1367859500488688, + "grad_norm": 0.9947422742843628, + "learning_rate": 0.00019986740040879908, + "loss": 4.59, + "step": 73300 + }, + { + "epoch": 0.1368792556082473, + "grad_norm": 0.7354251742362976, + "learning_rate": 0.00019986718405241773, + "loss": 4.5268, + "step": 73350 + }, + { + "epoch": 0.13697256116762577, + "grad_norm": 0.9858702421188354, + "learning_rate": 0.00019986696751978834, + "loss": 4.7284, + "step": 73400 + }, + { + "epoch": 0.13706586672700427, + "grad_norm": 1.1084253787994385, + "learning_rate": 0.00019986675081091134, + "loss": 4.864, + "step": 73450 + }, + { + "epoch": 0.13715917228638275, + "grad_norm": 1.0216785669326782, + "learning_rate": 0.00019986653392578706, + "loss": 4.6343, + "step": 73500 + }, + { + "epoch": 0.13725247784576125, + "grad_norm": 1.0096267461776733, + "learning_rate": 0.00019986631686441596, + "loss": 4.7856, + "step": 73550 + }, + { + "epoch": 0.13734578340513973, + "grad_norm": 0.9455108642578125, + "learning_rate": 0.0001998660996267984, + "loss": 4.6775, + "step": 73600 + }, + { + "epoch": 0.13743908896451823, + "grad_norm": 1.4153708219528198, + "learning_rate": 0.0001998658822129347, + "loss": 4.6638, + "step": 73650 + }, + { + "epoch": 0.13753239452389673, + "grad_norm": 0.8518409729003906, + "learning_rate": 0.00019986566462282533, + "loss": 4.9213, + "step": 73700 + }, + { + "epoch": 0.1376257000832752, + "grad_norm": 1.1150931119918823, + "learning_rate": 0.0001998654468564706, + "loss": 4.7676, + "step": 73750 + }, + { + "epoch": 0.1377190056426537, + "grad_norm": 0.9590360522270203, + "learning_rate": 0.00019986522891387093, + "loss": 4.9231, + "step": 73800 + }, + { + "epoch": 0.1378123112020322, + "grad_norm": 0.7029619216918945, + "learning_rate": 0.0001998650107950267, + "loss": 4.692, + "step": 73850 + }, + { + "epoch": 0.1379056167614107, + "grad_norm": 0.8161832690238953, + "learning_rate": 0.00019986479249993832, + "loss": 4.7669, + "step": 73900 + }, + { + "epoch": 0.13799892232078917, + "grad_norm": 1.2208623886108398, + "learning_rate": 0.00019986457402860618, + "loss": 4.774, + "step": 73950 + }, + { + "epoch": 0.13809222788016767, + "grad_norm": 0.6693832278251648, + "learning_rate": 0.00019986435538103058, + "loss": 4.6447, + "step": 74000 + }, + { + "epoch": 0.13818553343954615, + "grad_norm": 1.1013367176055908, + "learning_rate": 0.000199864136557212, + "loss": 4.9094, + "step": 74050 + }, + { + "epoch": 0.13827883899892465, + "grad_norm": 0.6470247507095337, + "learning_rate": 0.00019986391755715075, + "loss": 4.7323, + "step": 74100 + }, + { + "epoch": 0.13837214455830316, + "grad_norm": 1.0333960056304932, + "learning_rate": 0.0001998636983808473, + "loss": 4.6899, + "step": 74150 + }, + { + "epoch": 0.13846545011768163, + "grad_norm": 1.0309245586395264, + "learning_rate": 0.00019986347902830196, + "loss": 4.8887, + "step": 74200 + }, + { + "epoch": 0.13855875567706014, + "grad_norm": 1.0553001165390015, + "learning_rate": 0.00019986325949951516, + "loss": 4.654, + "step": 74250 + }, + { + "epoch": 0.1386520612364386, + "grad_norm": 1.3011856079101562, + "learning_rate": 0.0001998630397944873, + "loss": 4.8259, + "step": 74300 + }, + { + "epoch": 0.13874536679581712, + "grad_norm": 1.04082190990448, + "learning_rate": 0.00019986281991321873, + "loss": 4.9014, + "step": 74350 + }, + { + "epoch": 0.1388386723551956, + "grad_norm": 0.9634092450141907, + "learning_rate": 0.00019986259985570987, + "loss": 4.7242, + "step": 74400 + }, + { + "epoch": 0.1389319779145741, + "grad_norm": 1.2973626852035522, + "learning_rate": 0.00019986237962196107, + "loss": 4.7606, + "step": 74450 + }, + { + "epoch": 0.1390252834739526, + "grad_norm": 0.8108509182929993, + "learning_rate": 0.00019986215921197277, + "loss": 4.7773, + "step": 74500 + }, + { + "epoch": 0.13911858903333107, + "grad_norm": 1.048374891281128, + "learning_rate": 0.0001998619386257453, + "loss": 4.8176, + "step": 74550 + }, + { + "epoch": 0.13921189459270958, + "grad_norm": 0.9757275581359863, + "learning_rate": 0.0001998617178632791, + "loss": 4.5193, + "step": 74600 + }, + { + "epoch": 0.13930520015208805, + "grad_norm": 1.088984727859497, + "learning_rate": 0.00019986149692457453, + "loss": 4.738, + "step": 74650 + }, + { + "epoch": 0.13939850571146656, + "grad_norm": 1.0206042528152466, + "learning_rate": 0.00019986127580963199, + "loss": 4.7663, + "step": 74700 + }, + { + "epoch": 0.13949181127084503, + "grad_norm": 0.859443724155426, + "learning_rate": 0.00019986105451845186, + "loss": 4.7207, + "step": 74750 + }, + { + "epoch": 0.13958511683022354, + "grad_norm": 0.9786943793296814, + "learning_rate": 0.00019986083305103457, + "loss": 4.8639, + "step": 74800 + }, + { + "epoch": 0.13967842238960204, + "grad_norm": 1.2023348808288574, + "learning_rate": 0.00019986061140738043, + "loss": 4.7091, + "step": 74850 + }, + { + "epoch": 0.13977172794898052, + "grad_norm": 1.1484274864196777, + "learning_rate": 0.00019986038958748993, + "loss": 4.6658, + "step": 74900 + }, + { + "epoch": 0.13986503350835902, + "grad_norm": 1.0033177137374878, + "learning_rate": 0.0001998601675913634, + "loss": 4.8025, + "step": 74950 + }, + { + "epoch": 0.1399583390677375, + "grad_norm": 1.1093839406967163, + "learning_rate": 0.00019985994541900124, + "loss": 4.7705, + "step": 75000 + }, + { + "epoch": 0.1399583390677375, + "eval_loss": 4.963533878326416, + "eval_runtime": 229.2936, + "eval_samples_per_second": 11.374, + "eval_steps_per_second": 11.374, + "eval_tts_loss": 7.357835572455764, + "step": 75000 + }, + { + "epoch": 0.140051644627116, + "grad_norm": 0.8389537930488586, + "learning_rate": 0.00019985972307040383, + "loss": 4.8843, + "step": 75050 + }, + { + "epoch": 0.14014495018649448, + "grad_norm": 1.009690761566162, + "learning_rate": 0.00019985950054557164, + "loss": 4.6147, + "step": 75100 + }, + { + "epoch": 0.14023825574587298, + "grad_norm": 1.0867621898651123, + "learning_rate": 0.00019985927784450493, + "loss": 4.7178, + "step": 75150 + }, + { + "epoch": 0.14033156130525146, + "grad_norm": 0.7208017706871033, + "learning_rate": 0.0001998590549672042, + "loss": 4.918, + "step": 75200 + }, + { + "epoch": 0.14042486686462996, + "grad_norm": 0.9712778925895691, + "learning_rate": 0.0001998588319136698, + "loss": 4.8301, + "step": 75250 + }, + { + "epoch": 0.14051817242400846, + "grad_norm": 0.7589443922042847, + "learning_rate": 0.00019985860868390213, + "loss": 4.7432, + "step": 75300 + }, + { + "epoch": 0.14061147798338694, + "grad_norm": 0.7034851312637329, + "learning_rate": 0.0001998583852779016, + "loss": 4.6202, + "step": 75350 + }, + { + "epoch": 0.14070478354276544, + "grad_norm": 1.2004735469818115, + "learning_rate": 0.0001998581616956686, + "loss": 4.4097, + "step": 75400 + }, + { + "epoch": 0.14079808910214392, + "grad_norm": 1.1603310108184814, + "learning_rate": 0.0001998579379372035, + "loss": 4.8106, + "step": 75450 + }, + { + "epoch": 0.14089139466152242, + "grad_norm": 0.9474809169769287, + "learning_rate": 0.00019985771400250668, + "loss": 4.6189, + "step": 75500 + }, + { + "epoch": 0.1409847002209009, + "grad_norm": 0.9576531052589417, + "learning_rate": 0.0001998574898915786, + "loss": 4.6265, + "step": 75550 + }, + { + "epoch": 0.1410780057802794, + "grad_norm": 0.7832580804824829, + "learning_rate": 0.00019985726560441962, + "loss": 4.656, + "step": 75600 + }, + { + "epoch": 0.1411713113396579, + "grad_norm": 1.3878700733184814, + "learning_rate": 0.00019985704114103015, + "loss": 4.5926, + "step": 75650 + }, + { + "epoch": 0.14126461689903638, + "grad_norm": 0.8612073659896851, + "learning_rate": 0.00019985681650141053, + "loss": 4.8437, + "step": 75700 + }, + { + "epoch": 0.1413579224584149, + "grad_norm": 1.2866979837417603, + "learning_rate": 0.00019985659168556124, + "loss": 4.9809, + "step": 75750 + }, + { + "epoch": 0.14145122801779336, + "grad_norm": 1.1674981117248535, + "learning_rate": 0.0001998563666934826, + "loss": 4.7633, + "step": 75800 + }, + { + "epoch": 0.14154453357717187, + "grad_norm": 1.0047423839569092, + "learning_rate": 0.0001998561415251751, + "loss": 4.6555, + "step": 75850 + }, + { + "epoch": 0.14163783913655034, + "grad_norm": 0.8399094343185425, + "learning_rate": 0.00019985591618063903, + "loss": 5.0474, + "step": 75900 + }, + { + "epoch": 0.14173114469592885, + "grad_norm": 1.3186641931533813, + "learning_rate": 0.00019985569065987484, + "loss": 4.7141, + "step": 75950 + }, + { + "epoch": 0.14182445025530735, + "grad_norm": 1.2126065492630005, + "learning_rate": 0.00019985546496288295, + "loss": 4.5208, + "step": 76000 + }, + { + "epoch": 0.14191775581468583, + "grad_norm": 1.0247917175292969, + "learning_rate": 0.0001998552390896637, + "loss": 4.6857, + "step": 76050 + }, + { + "epoch": 0.14201106137406433, + "grad_norm": 0.8075127005577087, + "learning_rate": 0.00019985501304021756, + "loss": 4.6593, + "step": 76100 + }, + { + "epoch": 0.1421043669334428, + "grad_norm": 0.8528395295143127, + "learning_rate": 0.00019985478681454487, + "loss": 4.7271, + "step": 76150 + }, + { + "epoch": 0.1421976724928213, + "grad_norm": 0.9807457327842712, + "learning_rate": 0.00019985456041264605, + "loss": 4.5915, + "step": 76200 + }, + { + "epoch": 0.14229097805219978, + "grad_norm": 1.1197192668914795, + "learning_rate": 0.0001998543338345215, + "loss": 4.4433, + "step": 76250 + }, + { + "epoch": 0.1423842836115783, + "grad_norm": 1.044595718383789, + "learning_rate": 0.00019985410708017165, + "loss": 4.7313, + "step": 76300 + }, + { + "epoch": 0.1424775891709568, + "grad_norm": 0.8226972818374634, + "learning_rate": 0.00019985388014959684, + "loss": 4.5888, + "step": 76350 + }, + { + "epoch": 0.14257089473033527, + "grad_norm": 0.9106892943382263, + "learning_rate": 0.00019985365304279752, + "loss": 4.7119, + "step": 76400 + }, + { + "epoch": 0.14266420028971377, + "grad_norm": 1.2753225564956665, + "learning_rate": 0.00019985342575977406, + "loss": 4.7376, + "step": 76450 + }, + { + "epoch": 0.14275750584909225, + "grad_norm": 0.9028472304344177, + "learning_rate": 0.00019985319830052688, + "loss": 4.6413, + "step": 76500 + }, + { + "epoch": 0.14285081140847075, + "grad_norm": 1.0299334526062012, + "learning_rate": 0.00019985297066505637, + "loss": 4.6844, + "step": 76550 + }, + { + "epoch": 0.14294411696784923, + "grad_norm": 1.1871273517608643, + "learning_rate": 0.00019985274285336293, + "loss": 4.8803, + "step": 76600 + }, + { + "epoch": 0.14303742252722773, + "grad_norm": 0.5859182476997375, + "learning_rate": 0.000199852514865447, + "loss": 4.627, + "step": 76650 + }, + { + "epoch": 0.1431307280866062, + "grad_norm": 0.8444706201553345, + "learning_rate": 0.00019985228670130894, + "loss": 4.7149, + "step": 76700 + }, + { + "epoch": 0.1432240336459847, + "grad_norm": 0.889392614364624, + "learning_rate": 0.0001998520583609491, + "loss": 4.6281, + "step": 76750 + }, + { + "epoch": 0.14331733920536321, + "grad_norm": 0.9411704540252686, + "learning_rate": 0.00019985182984436802, + "loss": 4.6086, + "step": 76800 + }, + { + "epoch": 0.1434106447647417, + "grad_norm": 0.8570734262466431, + "learning_rate": 0.000199851601151566, + "loss": 4.7565, + "step": 76850 + }, + { + "epoch": 0.1435039503241202, + "grad_norm": 0.8784626722335815, + "learning_rate": 0.0001998513722825435, + "loss": 4.891, + "step": 76900 + }, + { + "epoch": 0.14359725588349867, + "grad_norm": 1.0024194717407227, + "learning_rate": 0.00019985114323730087, + "loss": 4.7093, + "step": 76950 + }, + { + "epoch": 0.14369056144287717, + "grad_norm": 1.1379250288009644, + "learning_rate": 0.00019985091401583855, + "loss": 4.8616, + "step": 77000 + }, + { + "epoch": 0.14378386700225565, + "grad_norm": 0.9281609058380127, + "learning_rate": 0.00019985068461815693, + "loss": 4.7413, + "step": 77050 + }, + { + "epoch": 0.14387717256163415, + "grad_norm": 1.4726687669754028, + "learning_rate": 0.00019985045504425643, + "loss": 4.7395, + "step": 77100 + }, + { + "epoch": 0.14397047812101266, + "grad_norm": 1.1294130086898804, + "learning_rate": 0.00019985022529413744, + "loss": 4.8413, + "step": 77150 + }, + { + "epoch": 0.14406378368039113, + "grad_norm": 0.7833269834518433, + "learning_rate": 0.00019984999536780038, + "loss": 4.7995, + "step": 77200 + }, + { + "epoch": 0.14415708923976964, + "grad_norm": 0.861925482749939, + "learning_rate": 0.00019984976526524567, + "loss": 4.7661, + "step": 77250 + }, + { + "epoch": 0.1442503947991481, + "grad_norm": 1.15879225730896, + "learning_rate": 0.00019984953498647365, + "loss": 4.8043, + "step": 77300 + }, + { + "epoch": 0.14434370035852662, + "grad_norm": 0.854045569896698, + "learning_rate": 0.0001998493045314848, + "loss": 4.6388, + "step": 77350 + }, + { + "epoch": 0.1444370059179051, + "grad_norm": 1.0074321031570435, + "learning_rate": 0.00019984907390027944, + "loss": 4.7215, + "step": 77400 + }, + { + "epoch": 0.1445303114772836, + "grad_norm": 0.9455142617225647, + "learning_rate": 0.0001998488430928581, + "loss": 4.7325, + "step": 77450 + }, + { + "epoch": 0.1446236170366621, + "grad_norm": 1.0225214958190918, + "learning_rate": 0.00019984861210922109, + "loss": 4.7662, + "step": 77500 + }, + { + "epoch": 0.14471692259604058, + "grad_norm": 0.8790587186813354, + "learning_rate": 0.00019984838094936884, + "loss": 4.7582, + "step": 77550 + }, + { + "epoch": 0.14481022815541908, + "grad_norm": 0.7881041169166565, + "learning_rate": 0.00019984814961330178, + "loss": 4.7529, + "step": 77600 + }, + { + "epoch": 0.14490353371479756, + "grad_norm": 0.9009997844696045, + "learning_rate": 0.00019984791810102032, + "loss": 4.8476, + "step": 77650 + }, + { + "epoch": 0.14499683927417606, + "grad_norm": 1.1333011388778687, + "learning_rate": 0.00019984768641252484, + "loss": 4.7743, + "step": 77700 + }, + { + "epoch": 0.14509014483355454, + "grad_norm": 1.0094295740127563, + "learning_rate": 0.00019984745454781577, + "loss": 4.7856, + "step": 77750 + }, + { + "epoch": 0.14518345039293304, + "grad_norm": 1.1273713111877441, + "learning_rate": 0.00019984722250689348, + "loss": 4.6921, + "step": 77800 + }, + { + "epoch": 0.14527675595231151, + "grad_norm": 1.0432093143463135, + "learning_rate": 0.00019984699028975845, + "loss": 4.6715, + "step": 77850 + }, + { + "epoch": 0.14537006151169002, + "grad_norm": 1.0306214094161987, + "learning_rate": 0.00019984675789641102, + "loss": 4.9655, + "step": 77900 + }, + { + "epoch": 0.14546336707106852, + "grad_norm": 0.5953212380409241, + "learning_rate": 0.00019984652532685165, + "loss": 4.6442, + "step": 77950 + }, + { + "epoch": 0.145556672630447, + "grad_norm": 0.900357723236084, + "learning_rate": 0.00019984629258108073, + "loss": 4.6404, + "step": 78000 + }, + { + "epoch": 0.145556672630447, + "eval_loss": 4.966691017150879, + "eval_runtime": 229.295, + "eval_samples_per_second": 11.374, + "eval_steps_per_second": 11.374, + "eval_tts_loss": 7.41557245793541, + "step": 78000 + }, + { + "epoch": 0.1456499781898255, + "grad_norm": 0.9374669194221497, + "learning_rate": 0.00019984605965909865, + "loss": 4.6393, + "step": 78050 + }, + { + "epoch": 0.14574328374920398, + "grad_norm": 0.7869539856910706, + "learning_rate": 0.00019984582656090586, + "loss": 4.5879, + "step": 78100 + }, + { + "epoch": 0.14583658930858248, + "grad_norm": 0.6721176505088806, + "learning_rate": 0.00019984559328650278, + "loss": 4.8905, + "step": 78150 + }, + { + "epoch": 0.14592989486796096, + "grad_norm": 0.9904831647872925, + "learning_rate": 0.00019984535983588977, + "loss": 4.8523, + "step": 78200 + }, + { + "epoch": 0.14602320042733946, + "grad_norm": 0.9806076884269714, + "learning_rate": 0.00019984512620906727, + "loss": 4.8636, + "step": 78250 + }, + { + "epoch": 0.14611650598671796, + "grad_norm": 1.2187144756317139, + "learning_rate": 0.0001998448924060357, + "loss": 4.7684, + "step": 78300 + }, + { + "epoch": 0.14620981154609644, + "grad_norm": 0.9885228276252747, + "learning_rate": 0.00019984465842679545, + "loss": 4.743, + "step": 78350 + }, + { + "epoch": 0.14630311710547494, + "grad_norm": 0.8706178665161133, + "learning_rate": 0.00019984442427134694, + "loss": 4.7663, + "step": 78400 + }, + { + "epoch": 0.14639642266485342, + "grad_norm": 0.9276357889175415, + "learning_rate": 0.0001998441899396906, + "loss": 4.9867, + "step": 78450 + }, + { + "epoch": 0.14648972822423192, + "grad_norm": 1.1178739070892334, + "learning_rate": 0.00019984395543182687, + "loss": 4.7049, + "step": 78500 + }, + { + "epoch": 0.1465830337836104, + "grad_norm": 0.9989400506019592, + "learning_rate": 0.00019984372074775605, + "loss": 4.6692, + "step": 78550 + }, + { + "epoch": 0.1466763393429889, + "grad_norm": 1.0636882781982422, + "learning_rate": 0.0001998434858874787, + "loss": 4.8389, + "step": 78600 + }, + { + "epoch": 0.1467696449023674, + "grad_norm": 1.0234490633010864, + "learning_rate": 0.00019984325085099515, + "loss": 4.7642, + "step": 78650 + }, + { + "epoch": 0.14686295046174588, + "grad_norm": 0.9406095743179321, + "learning_rate": 0.00019984301563830579, + "loss": 4.8147, + "step": 78700 + }, + { + "epoch": 0.1469562560211244, + "grad_norm": 1.0061767101287842, + "learning_rate": 0.0001998427802494111, + "loss": 4.8082, + "step": 78750 + }, + { + "epoch": 0.14704956158050286, + "grad_norm": 0.9918909668922424, + "learning_rate": 0.00019984254468431148, + "loss": 4.7704, + "step": 78800 + }, + { + "epoch": 0.14714286713988137, + "grad_norm": 1.0421675443649292, + "learning_rate": 0.00019984230894300731, + "loss": 4.916, + "step": 78850 + }, + { + "epoch": 0.14723617269925984, + "grad_norm": 1.1399683952331543, + "learning_rate": 0.00019984207302549907, + "loss": 4.7578, + "step": 78900 + }, + { + "epoch": 0.14732947825863835, + "grad_norm": 1.0678455829620361, + "learning_rate": 0.0001998418369317871, + "loss": 4.838, + "step": 78950 + }, + { + "epoch": 0.14742278381801685, + "grad_norm": 0.9881036877632141, + "learning_rate": 0.00019984160066187188, + "loss": 4.8102, + "step": 79000 + }, + { + "epoch": 0.14751608937739533, + "grad_norm": 1.053001880645752, + "learning_rate": 0.0001998413642157538, + "loss": 4.7473, + "step": 79050 + }, + { + "epoch": 0.14760939493677383, + "grad_norm": 0.9560543298721313, + "learning_rate": 0.00019984112759343327, + "loss": 4.7536, + "step": 79100 + }, + { + "epoch": 0.1477027004961523, + "grad_norm": 0.8099980354309082, + "learning_rate": 0.00019984089079491075, + "loss": 4.5886, + "step": 79150 + }, + { + "epoch": 0.1477960060555308, + "grad_norm": 0.8475550413131714, + "learning_rate": 0.0001998406538201866, + "loss": 4.628, + "step": 79200 + }, + { + "epoch": 0.14788931161490929, + "grad_norm": 1.233866572380066, + "learning_rate": 0.00019984041666926125, + "loss": 4.6666, + "step": 79250 + }, + { + "epoch": 0.1479826171742878, + "grad_norm": 1.088966727256775, + "learning_rate": 0.00019984017934213515, + "loss": 4.7315, + "step": 79300 + }, + { + "epoch": 0.14807592273366627, + "grad_norm": 1.3375529050827026, + "learning_rate": 0.0001998399418388087, + "loss": 4.6797, + "step": 79350 + }, + { + "epoch": 0.14816922829304477, + "grad_norm": 0.8874202370643616, + "learning_rate": 0.00019983970415928234, + "loss": 4.8035, + "step": 79400 + }, + { + "epoch": 0.14826253385242327, + "grad_norm": 0.9835208058357239, + "learning_rate": 0.00019983946630355642, + "loss": 4.8454, + "step": 79450 + }, + { + "epoch": 0.14835583941180175, + "grad_norm": 1.30135977268219, + "learning_rate": 0.00019983922827163144, + "loss": 4.9588, + "step": 79500 + }, + { + "epoch": 0.14844914497118025, + "grad_norm": 0.8822344541549683, + "learning_rate": 0.00019983899006350777, + "loss": 4.6737, + "step": 79550 + }, + { + "epoch": 0.14854245053055873, + "grad_norm": 0.812828779220581, + "learning_rate": 0.0001998387516791859, + "loss": 4.7829, + "step": 79600 + }, + { + "epoch": 0.14863575608993723, + "grad_norm": 0.9404298663139343, + "learning_rate": 0.00019983851311866616, + "loss": 4.4065, + "step": 79650 + }, + { + "epoch": 0.1487290616493157, + "grad_norm": 1.5870780944824219, + "learning_rate": 0.000199838274381949, + "loss": 4.8494, + "step": 79700 + }, + { + "epoch": 0.1488223672086942, + "grad_norm": 0.9650738835334778, + "learning_rate": 0.0001998380354690349, + "loss": 4.7546, + "step": 79750 + }, + { + "epoch": 0.14891567276807272, + "grad_norm": 1.1705528497695923, + "learning_rate": 0.00019983779637992418, + "loss": 4.9383, + "step": 79800 + }, + { + "epoch": 0.1490089783274512, + "grad_norm": 1.1494523286819458, + "learning_rate": 0.00019983755711461734, + "loss": 4.7803, + "step": 79850 + }, + { + "epoch": 0.1491022838868297, + "grad_norm": 0.7461511492729187, + "learning_rate": 0.00019983731767311477, + "loss": 4.7261, + "step": 79900 + }, + { + "epoch": 0.14919558944620817, + "grad_norm": 1.043479323387146, + "learning_rate": 0.0001998370780554169, + "loss": 4.8492, + "step": 79950 + }, + { + "epoch": 0.14928889500558667, + "grad_norm": 1.0631600618362427, + "learning_rate": 0.0001998368382615242, + "loss": 4.7619, + "step": 80000 + }, + { + "epoch": 0.14938220056496515, + "grad_norm": 1.0687527656555176, + "learning_rate": 0.00019983659829143698, + "loss": 4.9802, + "step": 80050 + }, + { + "epoch": 0.14947550612434365, + "grad_norm": 0.7543812394142151, + "learning_rate": 0.0001998363581451558, + "loss": 5.0034, + "step": 80100 + }, + { + "epoch": 0.14956881168372216, + "grad_norm": 0.6798197627067566, + "learning_rate": 0.00019983611782268095, + "loss": 4.7076, + "step": 80150 + }, + { + "epoch": 0.14966211724310063, + "grad_norm": 0.8588387370109558, + "learning_rate": 0.00019983587732401293, + "loss": 4.8063, + "step": 80200 + }, + { + "epoch": 0.14975542280247914, + "grad_norm": 1.1029499769210815, + "learning_rate": 0.0001998356366491522, + "loss": 4.6521, + "step": 80250 + }, + { + "epoch": 0.1498487283618576, + "grad_norm": 0.8040598034858704, + "learning_rate": 0.0001998353957980991, + "loss": 4.6823, + "step": 80300 + }, + { + "epoch": 0.14994203392123612, + "grad_norm": 1.1279397010803223, + "learning_rate": 0.00019983515477085408, + "loss": 4.7486, + "step": 80350 + }, + { + "epoch": 0.1500353394806146, + "grad_norm": 1.1090079545974731, + "learning_rate": 0.00019983491356741762, + "loss": 4.7731, + "step": 80400 + }, + { + "epoch": 0.1501286450399931, + "grad_norm": 1.3547964096069336, + "learning_rate": 0.00019983467218779007, + "loss": 4.8618, + "step": 80450 + }, + { + "epoch": 0.1502219505993716, + "grad_norm": 1.263022780418396, + "learning_rate": 0.00019983443063197192, + "loss": 4.8745, + "step": 80500 + }, + { + "epoch": 0.15031525615875008, + "grad_norm": 1.056709885597229, + "learning_rate": 0.0001998341888999635, + "loss": 4.7292, + "step": 80550 + }, + { + "epoch": 0.15040856171812858, + "grad_norm": 1.0344963073730469, + "learning_rate": 0.0001998339469917654, + "loss": 4.5219, + "step": 80600 + }, + { + "epoch": 0.15050186727750706, + "grad_norm": 0.7098541259765625, + "learning_rate": 0.0001998337049073779, + "loss": 4.6419, + "step": 80650 + }, + { + "epoch": 0.15059517283688556, + "grad_norm": 1.0774918794631958, + "learning_rate": 0.00019983346264680147, + "loss": 4.7146, + "step": 80700 + }, + { + "epoch": 0.15068847839626404, + "grad_norm": 0.8869603872299194, + "learning_rate": 0.00019983322021003655, + "loss": 4.8143, + "step": 80750 + }, + { + "epoch": 0.15078178395564254, + "grad_norm": 1.0924726724624634, + "learning_rate": 0.00019983297759708357, + "loss": 4.7092, + "step": 80800 + }, + { + "epoch": 0.15087508951502102, + "grad_norm": 1.0143225193023682, + "learning_rate": 0.00019983273480794295, + "loss": 4.6599, + "step": 80850 + }, + { + "epoch": 0.15096839507439952, + "grad_norm": 1.0644515752792358, + "learning_rate": 0.0001998324918426151, + "loss": 4.6655, + "step": 80900 + }, + { + "epoch": 0.15106170063377802, + "grad_norm": 1.2146971225738525, + "learning_rate": 0.00019983224870110048, + "loss": 4.7266, + "step": 80950 + }, + { + "epoch": 0.1511550061931565, + "grad_norm": 0.9861900210380554, + "learning_rate": 0.00019983200538339953, + "loss": 4.8545, + "step": 81000 + }, + { + "epoch": 0.1511550061931565, + "eval_loss": 4.951722145080566, + "eval_runtime": 230.9557, + "eval_samples_per_second": 11.292, + "eval_steps_per_second": 11.292, + "eval_tts_loss": 7.390448370325648, + "step": 81000 + }, + { + "epoch": 0.151248311752535, + "grad_norm": 1.09833824634552, + "learning_rate": 0.00019983176188951264, + "loss": 4.7289, + "step": 81050 + }, + { + "epoch": 0.15134161731191348, + "grad_norm": 1.1554728746414185, + "learning_rate": 0.00019983151821944027, + "loss": 4.7042, + "step": 81100 + }, + { + "epoch": 0.15143492287129198, + "grad_norm": 0.9898795485496521, + "learning_rate": 0.00019983127437318282, + "loss": 4.7149, + "step": 81150 + }, + { + "epoch": 0.15152822843067046, + "grad_norm": 0.9649356603622437, + "learning_rate": 0.00019983103035074072, + "loss": 4.9477, + "step": 81200 + }, + { + "epoch": 0.15162153399004896, + "grad_norm": 0.9268784523010254, + "learning_rate": 0.00019983078615211442, + "loss": 4.6484, + "step": 81250 + }, + { + "epoch": 0.15171483954942747, + "grad_norm": 1.1820745468139648, + "learning_rate": 0.00019983054177730435, + "loss": 4.6602, + "step": 81300 + }, + { + "epoch": 0.15180814510880594, + "grad_norm": 0.8073734045028687, + "learning_rate": 0.00019983029722631096, + "loss": 4.6794, + "step": 81350 + }, + { + "epoch": 0.15190145066818445, + "grad_norm": 0.8954252600669861, + "learning_rate": 0.00019983005249913467, + "loss": 4.6629, + "step": 81400 + }, + { + "epoch": 0.15199475622756292, + "grad_norm": 1.1588051319122314, + "learning_rate": 0.0001998298075957759, + "loss": 4.9659, + "step": 81450 + }, + { + "epoch": 0.15208806178694143, + "grad_norm": 1.1768730878829956, + "learning_rate": 0.00019982956251623506, + "loss": 4.7299, + "step": 81500 + }, + { + "epoch": 0.1521813673463199, + "grad_norm": 0.9447085857391357, + "learning_rate": 0.00019982931726051262, + "loss": 4.7048, + "step": 81550 + }, + { + "epoch": 0.1522746729056984, + "grad_norm": 0.8608984351158142, + "learning_rate": 0.000199829071828609, + "loss": 4.7718, + "step": 81600 + }, + { + "epoch": 0.1523679784650769, + "grad_norm": 0.7421265840530396, + "learning_rate": 0.0001998288262205246, + "loss": 4.8859, + "step": 81650 + }, + { + "epoch": 0.15246128402445538, + "grad_norm": 0.9788615107536316, + "learning_rate": 0.00019982858043625994, + "loss": 4.8217, + "step": 81700 + }, + { + "epoch": 0.1525545895838339, + "grad_norm": 0.8593958616256714, + "learning_rate": 0.00019982833447581538, + "loss": 4.918, + "step": 81750 + }, + { + "epoch": 0.15264789514321236, + "grad_norm": 1.2187819480895996, + "learning_rate": 0.0001998280883391914, + "loss": 4.6866, + "step": 81800 + }, + { + "epoch": 0.15274120070259087, + "grad_norm": 1.0192663669586182, + "learning_rate": 0.0001998278420263884, + "loss": 4.7098, + "step": 81850 + }, + { + "epoch": 0.15283450626196934, + "grad_norm": 1.1101956367492676, + "learning_rate": 0.00019982759553740677, + "loss": 4.6928, + "step": 81900 + }, + { + "epoch": 0.15292781182134785, + "grad_norm": 0.8426311016082764, + "learning_rate": 0.00019982734887224703, + "loss": 4.7077, + "step": 81950 + }, + { + "epoch": 0.15302111738072632, + "grad_norm": 1.0216084718704224, + "learning_rate": 0.00019982710203090962, + "loss": 4.822, + "step": 82000 + }, + { + "epoch": 0.15311442294010483, + "grad_norm": 1.1248880624771118, + "learning_rate": 0.00019982685501339492, + "loss": 4.7781, + "step": 82050 + }, + { + "epoch": 0.15320772849948333, + "grad_norm": 0.8825070261955261, + "learning_rate": 0.00019982660781970335, + "loss": 4.7426, + "step": 82100 + }, + { + "epoch": 0.1533010340588618, + "grad_norm": 1.2149336338043213, + "learning_rate": 0.00019982636044983543, + "loss": 4.7091, + "step": 82150 + }, + { + "epoch": 0.1533943396182403, + "grad_norm": 1.1825565099716187, + "learning_rate": 0.00019982611290379152, + "loss": 4.7934, + "step": 82200 + }, + { + "epoch": 0.1534876451776188, + "grad_norm": 0.9500203728675842, + "learning_rate": 0.0001998258651815721, + "loss": 4.5296, + "step": 82250 + }, + { + "epoch": 0.1535809507369973, + "grad_norm": 1.058222770690918, + "learning_rate": 0.00019982561728317758, + "loss": 4.7336, + "step": 82300 + }, + { + "epoch": 0.15367425629637577, + "grad_norm": 1.1481443643569946, + "learning_rate": 0.00019982536920860842, + "loss": 4.6843, + "step": 82350 + }, + { + "epoch": 0.15376756185575427, + "grad_norm": 1.009384036064148, + "learning_rate": 0.00019982512095786503, + "loss": 4.6789, + "step": 82400 + }, + { + "epoch": 0.15386086741513277, + "grad_norm": 1.0169785022735596, + "learning_rate": 0.00019982487253094791, + "loss": 4.8183, + "step": 82450 + }, + { + "epoch": 0.15395417297451125, + "grad_norm": 1.4280728101730347, + "learning_rate": 0.0001998246239278574, + "loss": 4.7322, + "step": 82500 + }, + { + "epoch": 0.15404747853388975, + "grad_norm": 0.9956715106964111, + "learning_rate": 0.00019982437514859402, + "loss": 4.467, + "step": 82550 + }, + { + "epoch": 0.15414078409326823, + "grad_norm": 0.9345201253890991, + "learning_rate": 0.0001998241261931582, + "loss": 4.8818, + "step": 82600 + }, + { + "epoch": 0.15423408965264673, + "grad_norm": 0.8383892774581909, + "learning_rate": 0.00019982387706155035, + "loss": 5.0506, + "step": 82650 + }, + { + "epoch": 0.1543273952120252, + "grad_norm": 1.0288281440734863, + "learning_rate": 0.00019982362775377094, + "loss": 4.7276, + "step": 82700 + }, + { + "epoch": 0.1544207007714037, + "grad_norm": 1.5287654399871826, + "learning_rate": 0.00019982337826982035, + "loss": 4.7515, + "step": 82750 + }, + { + "epoch": 0.15451400633078222, + "grad_norm": 1.1659990549087524, + "learning_rate": 0.00019982312860969908, + "loss": 4.7312, + "step": 82800 + }, + { + "epoch": 0.1546073118901607, + "grad_norm": 1.0841422080993652, + "learning_rate": 0.00019982287877340756, + "loss": 4.6208, + "step": 82850 + }, + { + "epoch": 0.1547006174495392, + "grad_norm": 0.9620239734649658, + "learning_rate": 0.0001998226287609462, + "loss": 4.7568, + "step": 82900 + }, + { + "epoch": 0.15479392300891767, + "grad_norm": 1.0376754999160767, + "learning_rate": 0.0001998223785723155, + "loss": 4.9583, + "step": 82950 + }, + { + "epoch": 0.15488722856829618, + "grad_norm": 0.8780238032341003, + "learning_rate": 0.00019982212820751584, + "loss": 4.7517, + "step": 83000 + }, + { + "epoch": 0.15498053412767465, + "grad_norm": 0.9946292042732239, + "learning_rate": 0.00019982187766654768, + "loss": 4.7237, + "step": 83050 + }, + { + "epoch": 0.15507383968705316, + "grad_norm": 0.7709692716598511, + "learning_rate": 0.00019982162694941148, + "loss": 4.9062, + "step": 83100 + }, + { + "epoch": 0.15516714524643166, + "grad_norm": 1.010310411453247, + "learning_rate": 0.00019982137605610768, + "loss": 4.7754, + "step": 83150 + }, + { + "epoch": 0.15526045080581014, + "grad_norm": 0.752420961856842, + "learning_rate": 0.00019982112498663672, + "loss": 4.762, + "step": 83200 + }, + { + "epoch": 0.15535375636518864, + "grad_norm": 0.9181608557701111, + "learning_rate": 0.00019982087374099906, + "loss": 4.6671, + "step": 83250 + }, + { + "epoch": 0.15544706192456711, + "grad_norm": 1.0234555006027222, + "learning_rate": 0.00019982062231919509, + "loss": 4.963, + "step": 83300 + }, + { + "epoch": 0.15554036748394562, + "grad_norm": 1.1121389865875244, + "learning_rate": 0.00019982037072122525, + "loss": 4.5522, + "step": 83350 + }, + { + "epoch": 0.1556336730433241, + "grad_norm": 1.1406534910202026, + "learning_rate": 0.00019982011894709007, + "loss": 4.8627, + "step": 83400 + }, + { + "epoch": 0.1557269786027026, + "grad_norm": 0.9664844870567322, + "learning_rate": 0.00019981986699678993, + "loss": 4.6783, + "step": 83450 + }, + { + "epoch": 0.15582028416208107, + "grad_norm": 0.931291937828064, + "learning_rate": 0.0001998196148703253, + "loss": 4.6384, + "step": 83500 + }, + { + "epoch": 0.15591358972145958, + "grad_norm": 1.102245569229126, + "learning_rate": 0.0001998193625676966, + "loss": 4.5632, + "step": 83550 + }, + { + "epoch": 0.15600689528083808, + "grad_norm": 0.9392174482345581, + "learning_rate": 0.00019981911008890428, + "loss": 4.71, + "step": 83600 + }, + { + "epoch": 0.15610020084021656, + "grad_norm": 0.7961680293083191, + "learning_rate": 0.00019981885743394884, + "loss": 4.7738, + "step": 83650 + }, + { + "epoch": 0.15619350639959506, + "grad_norm": 0.9825389385223389, + "learning_rate": 0.00019981860460283063, + "loss": 4.4743, + "step": 83700 + }, + { + "epoch": 0.15628681195897354, + "grad_norm": 0.8377106189727783, + "learning_rate": 0.00019981835159555016, + "loss": 4.7894, + "step": 83750 + }, + { + "epoch": 0.15638011751835204, + "grad_norm": 0.9133685231208801, + "learning_rate": 0.00019981809841210788, + "loss": 4.7747, + "step": 83800 + }, + { + "epoch": 0.15647342307773052, + "grad_norm": 0.8195970058441162, + "learning_rate": 0.0001998178450525042, + "loss": 4.592, + "step": 83850 + }, + { + "epoch": 0.15656672863710902, + "grad_norm": 1.0992616415023804, + "learning_rate": 0.0001998175915167396, + "loss": 4.8078, + "step": 83900 + }, + { + "epoch": 0.15666003419648752, + "grad_norm": 1.0138832330703735, + "learning_rate": 0.0001998173378048145, + "loss": 4.6467, + "step": 83950 + }, + { + "epoch": 0.156753339755866, + "grad_norm": 0.8786870837211609, + "learning_rate": 0.0001998170839167294, + "loss": 4.7446, + "step": 84000 + }, + { + "epoch": 0.156753339755866, + "eval_loss": 4.946982383728027, + "eval_runtime": 229.0908, + "eval_samples_per_second": 11.384, + "eval_steps_per_second": 11.384, + "eval_tts_loss": 7.394625851078862, + "step": 84000 + }, + { + "epoch": 0.1568466453152445, + "grad_norm": 1.0230584144592285, + "learning_rate": 0.00019981682985248465, + "loss": 4.6526, + "step": 84050 + }, + { + "epoch": 0.15693995087462298, + "grad_norm": 0.7539162039756775, + "learning_rate": 0.0001998165756120808, + "loss": 4.6949, + "step": 84100 + }, + { + "epoch": 0.15703325643400148, + "grad_norm": 1.1452572345733643, + "learning_rate": 0.00019981632119551822, + "loss": 4.6745, + "step": 84150 + }, + { + "epoch": 0.15712656199337996, + "grad_norm": 0.9784168601036072, + "learning_rate": 0.00019981606660279745, + "loss": 4.9249, + "step": 84200 + }, + { + "epoch": 0.15721986755275846, + "grad_norm": 1.1359683275222778, + "learning_rate": 0.00019981581183391885, + "loss": 4.761, + "step": 84250 + }, + { + "epoch": 0.15731317311213697, + "grad_norm": 1.1261489391326904, + "learning_rate": 0.0001998155568888829, + "loss": 4.6123, + "step": 84300 + }, + { + "epoch": 0.15740647867151544, + "grad_norm": 1.306156039237976, + "learning_rate": 0.00019981530176769007, + "loss": 4.6882, + "step": 84350 + }, + { + "epoch": 0.15749978423089395, + "grad_norm": 0.861440896987915, + "learning_rate": 0.0001998150464703408, + "loss": 4.6409, + "step": 84400 + }, + { + "epoch": 0.15759308979027242, + "grad_norm": 0.6180737018585205, + "learning_rate": 0.00019981479099683553, + "loss": 4.6318, + "step": 84450 + }, + { + "epoch": 0.15768639534965093, + "grad_norm": 1.0346062183380127, + "learning_rate": 0.00019981453534717475, + "loss": 4.8614, + "step": 84500 + }, + { + "epoch": 0.1577797009090294, + "grad_norm": 1.3572213649749756, + "learning_rate": 0.00019981427952135884, + "loss": 4.7108, + "step": 84550 + }, + { + "epoch": 0.1578730064684079, + "grad_norm": 0.7147801518440247, + "learning_rate": 0.0001998140235193883, + "loss": 4.6901, + "step": 84600 + }, + { + "epoch": 0.15796631202778638, + "grad_norm": 1.1168407201766968, + "learning_rate": 0.00019981376734126355, + "loss": 4.7444, + "step": 84650 + }, + { + "epoch": 0.15805961758716489, + "grad_norm": 1.0936293601989746, + "learning_rate": 0.00019981351098698509, + "loss": 4.8869, + "step": 84700 + }, + { + "epoch": 0.1581529231465434, + "grad_norm": 0.9468141198158264, + "learning_rate": 0.00019981325445655331, + "loss": 4.7834, + "step": 84750 + }, + { + "epoch": 0.15824622870592187, + "grad_norm": 1.188549518585205, + "learning_rate": 0.00019981299774996875, + "loss": 4.7705, + "step": 84800 + }, + { + "epoch": 0.15833953426530037, + "grad_norm": 1.0123662948608398, + "learning_rate": 0.0001998127408672318, + "loss": 4.7251, + "step": 84850 + }, + { + "epoch": 0.15843283982467884, + "grad_norm": 1.1224631071090698, + "learning_rate": 0.0001998124838083429, + "loss": 4.7438, + "step": 84900 + }, + { + "epoch": 0.15852614538405735, + "grad_norm": 0.9828144311904907, + "learning_rate": 0.00019981222657330255, + "loss": 4.7185, + "step": 84950 + }, + { + "epoch": 0.15861945094343582, + "grad_norm": 0.932032585144043, + "learning_rate": 0.00019981196916211115, + "loss": 4.7946, + "step": 85000 + }, + { + "epoch": 0.15871275650281433, + "grad_norm": 1.082952618598938, + "learning_rate": 0.0001998117115747692, + "loss": 4.7448, + "step": 85050 + }, + { + "epoch": 0.15880606206219283, + "grad_norm": 1.2148911952972412, + "learning_rate": 0.00019981145381127715, + "loss": 4.6534, + "step": 85100 + }, + { + "epoch": 0.1588993676215713, + "grad_norm": 0.8082234859466553, + "learning_rate": 0.00019981119587163547, + "loss": 4.6324, + "step": 85150 + }, + { + "epoch": 0.1589926731809498, + "grad_norm": 1.083858847618103, + "learning_rate": 0.00019981093775584457, + "loss": 4.6805, + "step": 85200 + }, + { + "epoch": 0.1590859787403283, + "grad_norm": 1.0355510711669922, + "learning_rate": 0.00019981067946390493, + "loss": 5.026, + "step": 85250 + }, + { + "epoch": 0.1591792842997068, + "grad_norm": 1.0597902536392212, + "learning_rate": 0.00019981042099581698, + "loss": 4.8371, + "step": 85300 + }, + { + "epoch": 0.15927258985908527, + "grad_norm": 0.8655616641044617, + "learning_rate": 0.0001998101623515812, + "loss": 4.8269, + "step": 85350 + }, + { + "epoch": 0.15936589541846377, + "grad_norm": 1.1475633382797241, + "learning_rate": 0.00019980990353119809, + "loss": 4.6525, + "step": 85400 + }, + { + "epoch": 0.15945920097784227, + "grad_norm": 0.9865615963935852, + "learning_rate": 0.00019980964453466803, + "loss": 4.6893, + "step": 85450 + }, + { + "epoch": 0.15955250653722075, + "grad_norm": 0.8663655519485474, + "learning_rate": 0.00019980938536199151, + "loss": 4.736, + "step": 85500 + }, + { + "epoch": 0.15964581209659925, + "grad_norm": 0.9567622542381287, + "learning_rate": 0.000199809126013169, + "loss": 4.5879, + "step": 85550 + }, + { + "epoch": 0.15973911765597773, + "grad_norm": 0.9500153660774231, + "learning_rate": 0.00019980886648820093, + "loss": 4.7534, + "step": 85600 + }, + { + "epoch": 0.15983242321535623, + "grad_norm": 0.9333567023277283, + "learning_rate": 0.0001998086067870878, + "loss": 4.671, + "step": 85650 + }, + { + "epoch": 0.1599257287747347, + "grad_norm": 0.9707118272781372, + "learning_rate": 0.00019980834690983, + "loss": 4.7252, + "step": 85700 + }, + { + "epoch": 0.1600190343341132, + "grad_norm": 0.801749587059021, + "learning_rate": 0.00019980808685642805, + "loss": 4.7634, + "step": 85750 + }, + { + "epoch": 0.16011233989349172, + "grad_norm": 1.0327675342559814, + "learning_rate": 0.0001998078266268824, + "loss": 4.7954, + "step": 85800 + }, + { + "epoch": 0.1602056454528702, + "grad_norm": 0.9381875991821289, + "learning_rate": 0.00019980756622119347, + "loss": 4.6373, + "step": 85850 + }, + { + "epoch": 0.1602989510122487, + "grad_norm": 0.9350003600120544, + "learning_rate": 0.00019980730563936175, + "loss": 4.7086, + "step": 85900 + }, + { + "epoch": 0.16039225657162717, + "grad_norm": 1.1545840501785278, + "learning_rate": 0.00019980704488138776, + "loss": 4.7647, + "step": 85950 + }, + { + "epoch": 0.16048556213100568, + "grad_norm": 1.1162843704223633, + "learning_rate": 0.00019980678394727184, + "loss": 4.701, + "step": 86000 + }, + { + "epoch": 0.16057886769038415, + "grad_norm": 0.9487619400024414, + "learning_rate": 0.0001998065228370145, + "loss": 4.847, + "step": 86050 + }, + { + "epoch": 0.16067217324976266, + "grad_norm": 1.1424440145492554, + "learning_rate": 0.00019980626155061623, + "loss": 4.7775, + "step": 86100 + }, + { + "epoch": 0.16076547880914113, + "grad_norm": 1.0836292505264282, + "learning_rate": 0.00019980600008807746, + "loss": 4.7517, + "step": 86150 + }, + { + "epoch": 0.16085878436851964, + "grad_norm": 1.026230812072754, + "learning_rate": 0.00019980573844939866, + "loss": 4.6033, + "step": 86200 + }, + { + "epoch": 0.16095208992789814, + "grad_norm": 0.7633348107337952, + "learning_rate": 0.0001998054766345803, + "loss": 4.7064, + "step": 86250 + }, + { + "epoch": 0.16104539548727662, + "grad_norm": 0.9830362796783447, + "learning_rate": 0.00019980521464362284, + "loss": 4.6753, + "step": 86300 + }, + { + "epoch": 0.16113870104665512, + "grad_norm": 0.7929043173789978, + "learning_rate": 0.00019980495247652673, + "loss": 4.4158, + "step": 86350 + }, + { + "epoch": 0.1612320066060336, + "grad_norm": 0.906753420829773, + "learning_rate": 0.00019980469013329243, + "loss": 4.5392, + "step": 86400 + }, + { + "epoch": 0.1613253121654121, + "grad_norm": 0.841719388961792, + "learning_rate": 0.00019980442761392041, + "loss": 4.7356, + "step": 86450 + }, + { + "epoch": 0.16141861772479058, + "grad_norm": 0.9761390686035156, + "learning_rate": 0.00019980416491841116, + "loss": 4.8662, + "step": 86500 + }, + { + "epoch": 0.16151192328416908, + "grad_norm": 1.2850239276885986, + "learning_rate": 0.0001998039020467651, + "loss": 4.6927, + "step": 86550 + }, + { + "epoch": 0.16160522884354758, + "grad_norm": 0.9129481911659241, + "learning_rate": 0.00019980363899898277, + "loss": 4.855, + "step": 86600 + }, + { + "epoch": 0.16169853440292606, + "grad_norm": 1.1330456733703613, + "learning_rate": 0.0001998033757750645, + "loss": 5.0053, + "step": 86650 + }, + { + "epoch": 0.16179183996230456, + "grad_norm": 1.0761449337005615, + "learning_rate": 0.00019980311237501085, + "loss": 4.6144, + "step": 86700 + }, + { + "epoch": 0.16188514552168304, + "grad_norm": 0.9955719709396362, + "learning_rate": 0.0001998028487988223, + "loss": 4.7949, + "step": 86750 + }, + { + "epoch": 0.16197845108106154, + "grad_norm": 1.190447211265564, + "learning_rate": 0.00019980258504649927, + "loss": 4.7975, + "step": 86800 + }, + { + "epoch": 0.16207175664044002, + "grad_norm": 1.0047932863235474, + "learning_rate": 0.00019980232111804224, + "loss": 4.7366, + "step": 86850 + }, + { + "epoch": 0.16216506219981852, + "grad_norm": 0.9887164831161499, + "learning_rate": 0.00019980205701345164, + "loss": 4.7679, + "step": 86900 + }, + { + "epoch": 0.16225836775919703, + "grad_norm": 1.305762529373169, + "learning_rate": 0.00019980179273272803, + "loss": 4.8636, + "step": 86950 + }, + { + "epoch": 0.1623516733185755, + "grad_norm": 1.0135672092437744, + "learning_rate": 0.00019980152827587177, + "loss": 4.7288, + "step": 87000 + }, + { + "epoch": 0.1623516733185755, + "eval_loss": 4.931732654571533, + "eval_runtime": 230.4634, + "eval_samples_per_second": 11.316, + "eval_steps_per_second": 11.316, + "eval_tts_loss": 7.40806279529566, + "step": 87000 + }, + { + "epoch": 0.162444978877954, + "grad_norm": 1.1869860887527466, + "learning_rate": 0.0001998012636428834, + "loss": 4.6017, + "step": 87050 + }, + { + "epoch": 0.16253828443733248, + "grad_norm": 0.9774503707885742, + "learning_rate": 0.00019980099883376334, + "loss": 4.7084, + "step": 87100 + }, + { + "epoch": 0.16263158999671098, + "grad_norm": 0.8578800559043884, + "learning_rate": 0.00019980073384851207, + "loss": 4.9125, + "step": 87150 + }, + { + "epoch": 0.16272489555608946, + "grad_norm": 1.0579946041107178, + "learning_rate": 0.00019980046868713013, + "loss": 4.7317, + "step": 87200 + }, + { + "epoch": 0.16281820111546796, + "grad_norm": 0.6333214044570923, + "learning_rate": 0.00019980020334961783, + "loss": 4.5953, + "step": 87250 + }, + { + "epoch": 0.16291150667484644, + "grad_norm": 1.0423671007156372, + "learning_rate": 0.0001997999378359758, + "loss": 4.7238, + "step": 87300 + }, + { + "epoch": 0.16300481223422494, + "grad_norm": 0.7020459175109863, + "learning_rate": 0.0001997996721462044, + "loss": 4.6665, + "step": 87350 + }, + { + "epoch": 0.16309811779360345, + "grad_norm": 0.9732704162597656, + "learning_rate": 0.00019979940628030415, + "loss": 4.6746, + "step": 87400 + }, + { + "epoch": 0.16319142335298192, + "grad_norm": 0.8247692584991455, + "learning_rate": 0.00019979914023827552, + "loss": 4.8856, + "step": 87450 + }, + { + "epoch": 0.16328472891236043, + "grad_norm": 1.1054822206497192, + "learning_rate": 0.00019979887402011898, + "loss": 4.639, + "step": 87500 + }, + { + "epoch": 0.1633780344717389, + "grad_norm": 1.0070406198501587, + "learning_rate": 0.00019979860762583496, + "loss": 4.7724, + "step": 87550 + }, + { + "epoch": 0.1634713400311174, + "grad_norm": 0.9917934536933899, + "learning_rate": 0.00019979834105542397, + "loss": 4.8537, + "step": 87600 + }, + { + "epoch": 0.16356464559049588, + "grad_norm": 1.4356805086135864, + "learning_rate": 0.00019979807430888644, + "loss": 4.816, + "step": 87650 + }, + { + "epoch": 0.1636579511498744, + "grad_norm": 1.1016113758087158, + "learning_rate": 0.0001997978073862229, + "loss": 4.8199, + "step": 87700 + }, + { + "epoch": 0.1637512567092529, + "grad_norm": 1.1834055185317993, + "learning_rate": 0.00019979754028743378, + "loss": 4.5439, + "step": 87750 + }, + { + "epoch": 0.16384456226863137, + "grad_norm": 1.2742072343826294, + "learning_rate": 0.00019979727301251959, + "loss": 4.7865, + "step": 87800 + }, + { + "epoch": 0.16393786782800987, + "grad_norm": 1.1467939615249634, + "learning_rate": 0.0001997970055614807, + "loss": 4.848, + "step": 87850 + }, + { + "epoch": 0.16403117338738835, + "grad_norm": 0.9691332578659058, + "learning_rate": 0.00019979673793431773, + "loss": 4.866, + "step": 87900 + }, + { + "epoch": 0.16412447894676685, + "grad_norm": 1.061712622642517, + "learning_rate": 0.00019979647013103105, + "loss": 4.5297, + "step": 87950 + }, + { + "epoch": 0.16421778450614533, + "grad_norm": 0.9618381857872009, + "learning_rate": 0.00019979620215162115, + "loss": 4.6956, + "step": 88000 + }, + { + "epoch": 0.16431109006552383, + "grad_norm": 1.0956761837005615, + "learning_rate": 0.0001997959339960885, + "loss": 4.7234, + "step": 88050 + }, + { + "epoch": 0.16440439562490233, + "grad_norm": 1.304779291152954, + "learning_rate": 0.00019979566566443363, + "loss": 4.7132, + "step": 88100 + }, + { + "epoch": 0.1644977011842808, + "grad_norm": 0.8119908571243286, + "learning_rate": 0.00019979539715665693, + "loss": 4.7707, + "step": 88150 + }, + { + "epoch": 0.1645910067436593, + "grad_norm": 0.9809343218803406, + "learning_rate": 0.00019979512847275897, + "loss": 4.6912, + "step": 88200 + }, + { + "epoch": 0.1646843123030378, + "grad_norm": 1.1159480810165405, + "learning_rate": 0.00019979485961274008, + "loss": 4.7362, + "step": 88250 + }, + { + "epoch": 0.1647776178624163, + "grad_norm": 1.0415050983428955, + "learning_rate": 0.0001997945905766009, + "loss": 4.8182, + "step": 88300 + }, + { + "epoch": 0.16487092342179477, + "grad_norm": 0.856876015663147, + "learning_rate": 0.00019979432136434179, + "loss": 4.475, + "step": 88350 + }, + { + "epoch": 0.16496422898117327, + "grad_norm": 0.9265991449356079, + "learning_rate": 0.0001997940519759633, + "loss": 4.5788, + "step": 88400 + }, + { + "epoch": 0.16505753454055178, + "grad_norm": 1.082276701927185, + "learning_rate": 0.00019979378241146583, + "loss": 4.658, + "step": 88450 + }, + { + "epoch": 0.16515084009993025, + "grad_norm": 0.916746199131012, + "learning_rate": 0.0001997935126708499, + "loss": 4.7118, + "step": 88500 + }, + { + "epoch": 0.16524414565930876, + "grad_norm": 0.7613538503646851, + "learning_rate": 0.00019979324275411597, + "loss": 4.6433, + "step": 88550 + }, + { + "epoch": 0.16533745121868723, + "grad_norm": 0.9194437861442566, + "learning_rate": 0.00019979297266126453, + "loss": 4.8783, + "step": 88600 + }, + { + "epoch": 0.16543075677806574, + "grad_norm": 0.907049298286438, + "learning_rate": 0.00019979270239229606, + "loss": 5.006, + "step": 88650 + }, + { + "epoch": 0.1655240623374442, + "grad_norm": 0.8706242442131042, + "learning_rate": 0.00019979243194721103, + "loss": 4.6975, + "step": 88700 + }, + { + "epoch": 0.16561736789682271, + "grad_norm": 1.136927843093872, + "learning_rate": 0.0001997921613260099, + "loss": 4.7098, + "step": 88750 + }, + { + "epoch": 0.1657106734562012, + "grad_norm": 0.9624984264373779, + "learning_rate": 0.0001997918905286932, + "loss": 4.7689, + "step": 88800 + }, + { + "epoch": 0.1658039790155797, + "grad_norm": 1.003145694732666, + "learning_rate": 0.00019979161955526134, + "loss": 4.6874, + "step": 88850 + }, + { + "epoch": 0.1658972845749582, + "grad_norm": 0.9634168148040771, + "learning_rate": 0.00019979134840571484, + "loss": 4.7637, + "step": 88900 + }, + { + "epoch": 0.16599059013433667, + "grad_norm": 1.0608584880828857, + "learning_rate": 0.00019979107708005417, + "loss": 4.6663, + "step": 88950 + }, + { + "epoch": 0.16608389569371518, + "grad_norm": 0.7554059028625488, + "learning_rate": 0.00019979080557827982, + "loss": 4.7785, + "step": 89000 + }, + { + "epoch": 0.16617720125309365, + "grad_norm": 1.0240588188171387, + "learning_rate": 0.00019979053390039225, + "loss": 4.7544, + "step": 89050 + }, + { + "epoch": 0.16627050681247216, + "grad_norm": 1.253793478012085, + "learning_rate": 0.00019979026204639192, + "loss": 4.5589, + "step": 89100 + }, + { + "epoch": 0.16636381237185063, + "grad_norm": 0.729597270488739, + "learning_rate": 0.00019978999001627937, + "loss": 4.5692, + "step": 89150 + }, + { + "epoch": 0.16645711793122914, + "grad_norm": 0.9666997194290161, + "learning_rate": 0.00019978971781005504, + "loss": 4.7101, + "step": 89200 + }, + { + "epoch": 0.16655042349060764, + "grad_norm": 1.3623840808868408, + "learning_rate": 0.0001997894454277194, + "loss": 4.6743, + "step": 89250 + }, + { + "epoch": 0.16664372904998612, + "grad_norm": 0.9426568746566772, + "learning_rate": 0.00019978917286927295, + "loss": 4.6291, + "step": 89300 + }, + { + "epoch": 0.16673703460936462, + "grad_norm": 0.9860442876815796, + "learning_rate": 0.00019978890013471617, + "loss": 4.7023, + "step": 89350 + }, + { + "epoch": 0.1668303401687431, + "grad_norm": 0.9578588008880615, + "learning_rate": 0.00019978862722404956, + "loss": 4.8009, + "step": 89400 + }, + { + "epoch": 0.1669236457281216, + "grad_norm": 1.0634305477142334, + "learning_rate": 0.00019978835413727358, + "loss": 4.8068, + "step": 89450 + }, + { + "epoch": 0.16701695128750008, + "grad_norm": 0.5965625643730164, + "learning_rate": 0.0001997880808743887, + "loss": 4.5365, + "step": 89500 + }, + { + "epoch": 0.16711025684687858, + "grad_norm": 0.9046986699104309, + "learning_rate": 0.0001997878074353954, + "loss": 4.7543, + "step": 89550 + }, + { + "epoch": 0.16720356240625708, + "grad_norm": 1.0531772375106812, + "learning_rate": 0.0001997875338202942, + "loss": 4.7865, + "step": 89600 + }, + { + "epoch": 0.16729686796563556, + "grad_norm": 1.0829286575317383, + "learning_rate": 0.00019978726002908555, + "loss": 4.5878, + "step": 89650 + }, + { + "epoch": 0.16739017352501406, + "grad_norm": 0.9807512164115906, + "learning_rate": 0.00019978698606176994, + "loss": 4.7111, + "step": 89700 + }, + { + "epoch": 0.16748347908439254, + "grad_norm": 0.7498921155929565, + "learning_rate": 0.00019978671191834787, + "loss": 4.5111, + "step": 89750 + }, + { + "epoch": 0.16757678464377104, + "grad_norm": 1.3092355728149414, + "learning_rate": 0.0001997864375988198, + "loss": 4.6699, + "step": 89800 + }, + { + "epoch": 0.16767009020314952, + "grad_norm": 1.1617666482925415, + "learning_rate": 0.00019978616310318623, + "loss": 4.7742, + "step": 89850 + }, + { + "epoch": 0.16776339576252802, + "grad_norm": 1.0421655178070068, + "learning_rate": 0.00019978588843144767, + "loss": 4.8136, + "step": 89900 + }, + { + "epoch": 0.16785670132190653, + "grad_norm": 1.0589338541030884, + "learning_rate": 0.00019978561358360454, + "loss": 4.961, + "step": 89950 + }, + { + "epoch": 0.167950006881285, + "grad_norm": 0.8682652115821838, + "learning_rate": 0.00019978533855965735, + "loss": 4.6367, + "step": 90000 + }, + { + "epoch": 0.167950006881285, + "eval_loss": 4.925217628479004, + "eval_runtime": 229.1885, + "eval_samples_per_second": 11.379, + "eval_steps_per_second": 11.379, + "eval_tts_loss": 7.469426870473198, + "step": 90000 + }, + { + "epoch": 0.1680433124406635, + "grad_norm": 1.0179359912872314, + "learning_rate": 0.00019978506335960664, + "loss": 4.7742, + "step": 90050 + }, + { + "epoch": 0.16813661800004198, + "grad_norm": 1.0394161939620972, + "learning_rate": 0.00019978478798345282, + "loss": 4.7354, + "step": 90100 + }, + { + "epoch": 0.16822992355942049, + "grad_norm": 1.118033766746521, + "learning_rate": 0.0001997845124311964, + "loss": 4.6653, + "step": 90150 + }, + { + "epoch": 0.16832322911879896, + "grad_norm": 0.7835417985916138, + "learning_rate": 0.00019978423670283788, + "loss": 4.7592, + "step": 90200 + }, + { + "epoch": 0.16841653467817747, + "grad_norm": 1.0521881580352783, + "learning_rate": 0.00019978396079837777, + "loss": 4.7853, + "step": 90250 + }, + { + "epoch": 0.16850984023755594, + "grad_norm": 0.9498222470283508, + "learning_rate": 0.0001997836847178165, + "loss": 4.7828, + "step": 90300 + }, + { + "epoch": 0.16860314579693444, + "grad_norm": 0.9034340381622314, + "learning_rate": 0.00019978340846115456, + "loss": 4.6206, + "step": 90350 + }, + { + "epoch": 0.16869645135631295, + "grad_norm": 1.0530623197555542, + "learning_rate": 0.00019978313202839249, + "loss": 4.6888, + "step": 90400 + }, + { + "epoch": 0.16878975691569142, + "grad_norm": 1.2802088260650635, + "learning_rate": 0.00019978285541953077, + "loss": 4.665, + "step": 90450 + }, + { + "epoch": 0.16888306247506993, + "grad_norm": 0.7782902717590332, + "learning_rate": 0.00019978257863456983, + "loss": 4.7043, + "step": 90500 + }, + { + "epoch": 0.1689763680344484, + "grad_norm": 0.930324912071228, + "learning_rate": 0.00019978230167351022, + "loss": 4.7948, + "step": 90550 + }, + { + "epoch": 0.1690696735938269, + "grad_norm": 1.2882273197174072, + "learning_rate": 0.00019978202453635238, + "loss": 4.8561, + "step": 90600 + }, + { + "epoch": 0.16916297915320538, + "grad_norm": 0.7437873482704163, + "learning_rate": 0.00019978174722309685, + "loss": 4.8459, + "step": 90650 + }, + { + "epoch": 0.1692562847125839, + "grad_norm": 0.9222044944763184, + "learning_rate": 0.00019978146973374407, + "loss": 4.581, + "step": 90700 + }, + { + "epoch": 0.1693495902719624, + "grad_norm": 1.0944164991378784, + "learning_rate": 0.00019978119206829454, + "loss": 4.86, + "step": 90750 + }, + { + "epoch": 0.16944289583134087, + "grad_norm": 0.8687542080879211, + "learning_rate": 0.00019978091422674882, + "loss": 4.7375, + "step": 90800 + }, + { + "epoch": 0.16953620139071937, + "grad_norm": 1.013555884361267, + "learning_rate": 0.0001997806362091073, + "loss": 4.8501, + "step": 90850 + }, + { + "epoch": 0.16962950695009785, + "grad_norm": 0.9529797434806824, + "learning_rate": 0.00019978035801537054, + "loss": 4.9599, + "step": 90900 + }, + { + "epoch": 0.16972281250947635, + "grad_norm": 1.0254067182540894, + "learning_rate": 0.00019978007964553899, + "loss": 4.6487, + "step": 90950 + }, + { + "epoch": 0.16981611806885483, + "grad_norm": 1.2346069812774658, + "learning_rate": 0.00019977980109961313, + "loss": 4.9191, + "step": 91000 + }, + { + "epoch": 0.16990942362823333, + "grad_norm": 0.7727384567260742, + "learning_rate": 0.00019977952237759352, + "loss": 4.676, + "step": 91050 + }, + { + "epoch": 0.17000272918761183, + "grad_norm": 0.8196634650230408, + "learning_rate": 0.00019977924347948058, + "loss": 4.7945, + "step": 91100 + }, + { + "epoch": 0.1700960347469903, + "grad_norm": 0.937310516834259, + "learning_rate": 0.00019977896440527486, + "loss": 4.8049, + "step": 91150 + }, + { + "epoch": 0.1701893403063688, + "grad_norm": 0.6780749559402466, + "learning_rate": 0.0001997786851549768, + "loss": 4.9489, + "step": 91200 + }, + { + "epoch": 0.1702826458657473, + "grad_norm": 1.1923000812530518, + "learning_rate": 0.00019977840572858691, + "loss": 4.6109, + "step": 91250 + }, + { + "epoch": 0.1703759514251258, + "grad_norm": 1.0224426984786987, + "learning_rate": 0.00019977812612610572, + "loss": 4.7127, + "step": 91300 + }, + { + "epoch": 0.17046925698450427, + "grad_norm": 0.8383764624595642, + "learning_rate": 0.0001997778463475337, + "loss": 4.6056, + "step": 91350 + }, + { + "epoch": 0.17056256254388277, + "grad_norm": 1.2207326889038086, + "learning_rate": 0.00019977756639287132, + "loss": 4.7927, + "step": 91400 + }, + { + "epoch": 0.17065586810326125, + "grad_norm": 1.1539119482040405, + "learning_rate": 0.0001997772862621191, + "loss": 4.7997, + "step": 91450 + }, + { + "epoch": 0.17074917366263975, + "grad_norm": 0.9894577860832214, + "learning_rate": 0.0001997770059552775, + "loss": 4.7092, + "step": 91500 + }, + { + "epoch": 0.17084247922201826, + "grad_norm": 1.1220641136169434, + "learning_rate": 0.00019977672547234705, + "loss": 4.7147, + "step": 91550 + }, + { + "epoch": 0.17093578478139673, + "grad_norm": 0.8501759171485901, + "learning_rate": 0.00019977644481332824, + "loss": 4.7409, + "step": 91600 + }, + { + "epoch": 0.17102909034077524, + "grad_norm": 0.859946608543396, + "learning_rate": 0.00019977616397822158, + "loss": 4.5836, + "step": 91650 + }, + { + "epoch": 0.1711223959001537, + "grad_norm": 0.7580205798149109, + "learning_rate": 0.00019977588296702753, + "loss": 4.6373, + "step": 91700 + }, + { + "epoch": 0.17121570145953222, + "grad_norm": 1.3309026956558228, + "learning_rate": 0.0001997756017797466, + "loss": 4.6238, + "step": 91750 + }, + { + "epoch": 0.1713090070189107, + "grad_norm": 0.965422511100769, + "learning_rate": 0.0001997753204163793, + "loss": 4.8401, + "step": 91800 + }, + { + "epoch": 0.1714023125782892, + "grad_norm": 0.9164948463439941, + "learning_rate": 0.0001997750388769261, + "loss": 4.8671, + "step": 91850 + }, + { + "epoch": 0.1714956181376677, + "grad_norm": 0.8471038937568665, + "learning_rate": 0.00019977475716138752, + "loss": 4.7976, + "step": 91900 + }, + { + "epoch": 0.17158892369704618, + "grad_norm": 1.1867235898971558, + "learning_rate": 0.00019977447526976406, + "loss": 4.8716, + "step": 91950 + }, + { + "epoch": 0.17168222925642468, + "grad_norm": 1.0447583198547363, + "learning_rate": 0.00019977419320205616, + "loss": 4.5796, + "step": 92000 + }, + { + "epoch": 0.17177553481580315, + "grad_norm": 0.894728422164917, + "learning_rate": 0.00019977391095826443, + "loss": 4.7049, + "step": 92050 + }, + { + "epoch": 0.17186884037518166, + "grad_norm": 1.03791344165802, + "learning_rate": 0.0001997736285383893, + "loss": 4.6183, + "step": 92100 + }, + { + "epoch": 0.17196214593456013, + "grad_norm": 0.7112425565719604, + "learning_rate": 0.00019977334594243124, + "loss": 4.5619, + "step": 92150 + }, + { + "epoch": 0.17205545149393864, + "grad_norm": 0.7274053692817688, + "learning_rate": 0.00019977306317039078, + "loss": 4.7356, + "step": 92200 + }, + { + "epoch": 0.17214875705331714, + "grad_norm": 0.9669164419174194, + "learning_rate": 0.0001997727802222684, + "loss": 4.5581, + "step": 92250 + }, + { + "epoch": 0.17224206261269562, + "grad_norm": 0.9389071464538574, + "learning_rate": 0.00019977249709806464, + "loss": 4.7506, + "step": 92300 + }, + { + "epoch": 0.17233536817207412, + "grad_norm": 0.9951257109642029, + "learning_rate": 0.00019977221379778, + "loss": 4.9339, + "step": 92350 + }, + { + "epoch": 0.1724286737314526, + "grad_norm": 1.1136345863342285, + "learning_rate": 0.00019977193032141494, + "loss": 4.8545, + "step": 92400 + }, + { + "epoch": 0.1725219792908311, + "grad_norm": 0.7983369827270508, + "learning_rate": 0.00019977164666896998, + "loss": 4.6367, + "step": 92450 + }, + { + "epoch": 0.17261528485020958, + "grad_norm": 0.905382513999939, + "learning_rate": 0.00019977136284044563, + "loss": 4.4375, + "step": 92500 + }, + { + "epoch": 0.17270859040958808, + "grad_norm": 1.1273249387741089, + "learning_rate": 0.00019977107883584235, + "loss": 4.777, + "step": 92550 + }, + { + "epoch": 0.17280189596896658, + "grad_norm": 0.7400512099266052, + "learning_rate": 0.00019977079465516068, + "loss": 4.6677, + "step": 92600 + }, + { + "epoch": 0.17289520152834506, + "grad_norm": 1.0424542427062988, + "learning_rate": 0.00019977051029840116, + "loss": 4.6719, + "step": 92650 + }, + { + "epoch": 0.17298850708772356, + "grad_norm": 1.0340840816497803, + "learning_rate": 0.00019977022576556417, + "loss": 4.7587, + "step": 92700 + }, + { + "epoch": 0.17308181264710204, + "grad_norm": 1.0534294843673706, + "learning_rate": 0.00019976994105665033, + "loss": 4.7536, + "step": 92750 + }, + { + "epoch": 0.17317511820648054, + "grad_norm": 0.754612922668457, + "learning_rate": 0.0001997696561716601, + "loss": 4.5673, + "step": 92800 + }, + { + "epoch": 0.17326842376585902, + "grad_norm": 1.1167819499969482, + "learning_rate": 0.00019976937111059395, + "loss": 4.735, + "step": 92850 + }, + { + "epoch": 0.17336172932523752, + "grad_norm": 1.0311816930770874, + "learning_rate": 0.00019976908587345245, + "loss": 4.8155, + "step": 92900 + }, + { + "epoch": 0.173455034884616, + "grad_norm": 1.6865220069885254, + "learning_rate": 0.00019976880046023606, + "loss": 4.5625, + "step": 92950 + }, + { + "epoch": 0.1735483404439945, + "grad_norm": 1.0632766485214233, + "learning_rate": 0.0001997685148709453, + "loss": 4.6878, + "step": 93000 + }, + { + "epoch": 0.1735483404439945, + "eval_loss": 4.916426658630371, + "eval_runtime": 229.9701, + "eval_samples_per_second": 11.341, + "eval_steps_per_second": 11.341, + "eval_tts_loss": 7.440112192040892, + "step": 93000 + }, + { + "epoch": 0.173641646003373, + "grad_norm": 0.9786635637283325, + "learning_rate": 0.00019976822910558063, + "loss": 4.7599, + "step": 93050 + }, + { + "epoch": 0.17373495156275148, + "grad_norm": 0.9855509400367737, + "learning_rate": 0.00019976794316414265, + "loss": 4.4347, + "step": 93100 + }, + { + "epoch": 0.17382825712213, + "grad_norm": 0.8809786438941956, + "learning_rate": 0.00019976765704663176, + "loss": 4.6688, + "step": 93150 + }, + { + "epoch": 0.17392156268150846, + "grad_norm": 1.0891468524932861, + "learning_rate": 0.00019976737075304851, + "loss": 4.7232, + "step": 93200 + }, + { + "epoch": 0.17401486824088697, + "grad_norm": 1.038535237312317, + "learning_rate": 0.00019976708428339342, + "loss": 4.7486, + "step": 93250 + }, + { + "epoch": 0.17410817380026544, + "grad_norm": 1.0178803205490112, + "learning_rate": 0.00019976679763766697, + "loss": 4.6426, + "step": 93300 + }, + { + "epoch": 0.17420147935964395, + "grad_norm": 0.7876224517822266, + "learning_rate": 0.00019976651081586968, + "loss": 4.7747, + "step": 93350 + }, + { + "epoch": 0.17429478491902245, + "grad_norm": 0.6796728372573853, + "learning_rate": 0.00019976622381800207, + "loss": 4.8263, + "step": 93400 + }, + { + "epoch": 0.17438809047840093, + "grad_norm": 0.796763002872467, + "learning_rate": 0.0001997659366440646, + "loss": 4.5471, + "step": 93450 + }, + { + "epoch": 0.17448139603777943, + "grad_norm": 0.7914952039718628, + "learning_rate": 0.00019976564929405783, + "loss": 4.8249, + "step": 93500 + }, + { + "epoch": 0.1745747015971579, + "grad_norm": 0.7674325704574585, + "learning_rate": 0.00019976536176798224, + "loss": 4.6139, + "step": 93550 + }, + { + "epoch": 0.1746680071565364, + "grad_norm": 0.8790803551673889, + "learning_rate": 0.00019976507406583834, + "loss": 4.6569, + "step": 93600 + }, + { + "epoch": 0.17476131271591488, + "grad_norm": 1.362616777420044, + "learning_rate": 0.00019976478618762662, + "loss": 4.7218, + "step": 93650 + }, + { + "epoch": 0.1748546182752934, + "grad_norm": 1.1918220520019531, + "learning_rate": 0.00019976449813334762, + "loss": 4.9003, + "step": 93700 + }, + { + "epoch": 0.1749479238346719, + "grad_norm": 0.9692675471305847, + "learning_rate": 0.00019976420990300183, + "loss": 4.6867, + "step": 93750 + }, + { + "epoch": 0.17504122939405037, + "grad_norm": 1.2216622829437256, + "learning_rate": 0.00019976392149658977, + "loss": 4.6528, + "step": 93800 + }, + { + "epoch": 0.17513453495342887, + "grad_norm": 0.9711513519287109, + "learning_rate": 0.00019976363291411195, + "loss": 4.7519, + "step": 93850 + }, + { + "epoch": 0.17522784051280735, + "grad_norm": 1.0663491487503052, + "learning_rate": 0.00019976334415556886, + "loss": 4.5517, + "step": 93900 + }, + { + "epoch": 0.17532114607218585, + "grad_norm": 0.9361185431480408, + "learning_rate": 0.00019976305522096101, + "loss": 4.7161, + "step": 93950 + }, + { + "epoch": 0.17541445163156433, + "grad_norm": 0.7986595630645752, + "learning_rate": 0.00019976276611028895, + "loss": 4.7228, + "step": 94000 + }, + { + "epoch": 0.17550775719094283, + "grad_norm": 0.9233583211898804, + "learning_rate": 0.00019976247682355316, + "loss": 4.8464, + "step": 94050 + }, + { + "epoch": 0.1756010627503213, + "grad_norm": 1.2323817014694214, + "learning_rate": 0.00019976218736075414, + "loss": 4.5892, + "step": 94100 + }, + { + "epoch": 0.1756943683096998, + "grad_norm": 1.0528844594955444, + "learning_rate": 0.00019976189772189242, + "loss": 4.8237, + "step": 94150 + }, + { + "epoch": 0.17578767386907831, + "grad_norm": 1.1424999237060547, + "learning_rate": 0.00019976160790696848, + "loss": 4.6996, + "step": 94200 + }, + { + "epoch": 0.1758809794284568, + "grad_norm": 0.8401368856430054, + "learning_rate": 0.00019976131791598286, + "loss": 4.8017, + "step": 94250 + }, + { + "epoch": 0.1759742849878353, + "grad_norm": 0.7714797854423523, + "learning_rate": 0.00019976102774893608, + "loss": 4.5253, + "step": 94300 + }, + { + "epoch": 0.17606759054721377, + "grad_norm": 1.129595398902893, + "learning_rate": 0.00019976073740582864, + "loss": 4.8271, + "step": 94350 + }, + { + "epoch": 0.17616089610659227, + "grad_norm": 0.9854161739349365, + "learning_rate": 0.00019976044688666106, + "loss": 4.6552, + "step": 94400 + }, + { + "epoch": 0.17625420166597075, + "grad_norm": 1.1386884450912476, + "learning_rate": 0.00019976015619143384, + "loss": 4.6417, + "step": 94450 + }, + { + "epoch": 0.17634750722534925, + "grad_norm": 1.0306202173233032, + "learning_rate": 0.0001997598653201475, + "loss": 4.7774, + "step": 94500 + }, + { + "epoch": 0.17644081278472776, + "grad_norm": 1.3340245485305786, + "learning_rate": 0.00019975957427280255, + "loss": 4.9127, + "step": 94550 + }, + { + "epoch": 0.17653411834410623, + "grad_norm": 1.3886502981185913, + "learning_rate": 0.0001997592830493995, + "loss": 4.5873, + "step": 94600 + }, + { + "epoch": 0.17662742390348474, + "grad_norm": 1.117301344871521, + "learning_rate": 0.00019975899164993886, + "loss": 4.7842, + "step": 94650 + }, + { + "epoch": 0.1767207294628632, + "grad_norm": 1.071517825126648, + "learning_rate": 0.00019975870007442117, + "loss": 4.6684, + "step": 94700 + }, + { + "epoch": 0.17681403502224172, + "grad_norm": 1.0764856338500977, + "learning_rate": 0.0001997584083228469, + "loss": 4.6823, + "step": 94750 + }, + { + "epoch": 0.1769073405816202, + "grad_norm": 0.9086782932281494, + "learning_rate": 0.00019975811639521662, + "loss": 4.863, + "step": 94800 + }, + { + "epoch": 0.1770006461409987, + "grad_norm": 0.9636552929878235, + "learning_rate": 0.00019975782429153082, + "loss": 4.685, + "step": 94850 + }, + { + "epoch": 0.1770939517003772, + "grad_norm": 0.620227038860321, + "learning_rate": 0.00019975753201179, + "loss": 4.6816, + "step": 94900 + }, + { + "epoch": 0.17718725725975568, + "grad_norm": 0.8294452428817749, + "learning_rate": 0.00019975723955599468, + "loss": 4.6858, + "step": 94950 + }, + { + "epoch": 0.17728056281913418, + "grad_norm": 1.1841310262680054, + "learning_rate": 0.0001997569469241454, + "loss": 4.6845, + "step": 95000 + }, + { + "epoch": 0.17737386837851266, + "grad_norm": 1.1353343725204468, + "learning_rate": 0.00019975665411624266, + "loss": 4.9758, + "step": 95050 + }, + { + "epoch": 0.17746717393789116, + "grad_norm": 1.098388910293579, + "learning_rate": 0.00019975636113228696, + "loss": 4.807, + "step": 95100 + }, + { + "epoch": 0.17756047949726964, + "grad_norm": 1.1415269374847412, + "learning_rate": 0.00019975606797227886, + "loss": 4.6393, + "step": 95150 + }, + { + "epoch": 0.17765378505664814, + "grad_norm": 1.01333487033844, + "learning_rate": 0.00019975577463621883, + "loss": 4.7383, + "step": 95200 + }, + { + "epoch": 0.17774709061602664, + "grad_norm": 0.9192553758621216, + "learning_rate": 0.00019975548112410743, + "loss": 4.7246, + "step": 95250 + }, + { + "epoch": 0.17784039617540512, + "grad_norm": 1.233699917793274, + "learning_rate": 0.00019975518743594516, + "loss": 4.6757, + "step": 95300 + }, + { + "epoch": 0.17793370173478362, + "grad_norm": 1.0001840591430664, + "learning_rate": 0.00019975489357173252, + "loss": 4.6488, + "step": 95350 + }, + { + "epoch": 0.1780270072941621, + "grad_norm": 0.9523444771766663, + "learning_rate": 0.00019975459953147005, + "loss": 4.6393, + "step": 95400 + }, + { + "epoch": 0.1781203128535406, + "grad_norm": 1.081119179725647, + "learning_rate": 0.00019975430531515826, + "loss": 4.583, + "step": 95450 + }, + { + "epoch": 0.17821361841291908, + "grad_norm": 0.9086116552352905, + "learning_rate": 0.00019975401092279767, + "loss": 4.659, + "step": 95500 + }, + { + "epoch": 0.17830692397229758, + "grad_norm": 0.9077746868133545, + "learning_rate": 0.00019975371635438884, + "loss": 4.5218, + "step": 95550 + }, + { + "epoch": 0.17840022953167606, + "grad_norm": 1.0118244886398315, + "learning_rate": 0.00019975342160993224, + "loss": 4.9855, + "step": 95600 + }, + { + "epoch": 0.17849353509105456, + "grad_norm": 0.7483125329017639, + "learning_rate": 0.0001997531266894284, + "loss": 4.5592, + "step": 95650 + }, + { + "epoch": 0.17858684065043307, + "grad_norm": 0.9252362847328186, + "learning_rate": 0.00019975283159287782, + "loss": 4.6356, + "step": 95700 + }, + { + "epoch": 0.17868014620981154, + "grad_norm": 0.7764643430709839, + "learning_rate": 0.00019975253632028105, + "loss": 4.6681, + "step": 95750 + }, + { + "epoch": 0.17877345176919004, + "grad_norm": 0.9048830270767212, + "learning_rate": 0.00019975224087163861, + "loss": 4.5292, + "step": 95800 + }, + { + "epoch": 0.17886675732856852, + "grad_norm": 0.9486568570137024, + "learning_rate": 0.00019975194524695102, + "loss": 4.7868, + "step": 95850 + }, + { + "epoch": 0.17896006288794702, + "grad_norm": 0.8098161816596985, + "learning_rate": 0.0001997516494462188, + "loss": 4.6501, + "step": 95900 + }, + { + "epoch": 0.1790533684473255, + "grad_norm": 1.2166732549667358, + "learning_rate": 0.00019975135346944248, + "loss": 4.4709, + "step": 95950 + }, + { + "epoch": 0.179146674006704, + "grad_norm": 0.9364936351776123, + "learning_rate": 0.00019975105731662255, + "loss": 4.592, + "step": 96000 + }, + { + "epoch": 0.179146674006704, + "eval_loss": 4.917341232299805, + "eval_runtime": 231.2993, + "eval_samples_per_second": 11.275, + "eval_steps_per_second": 11.275, + "eval_tts_loss": 7.391886383147933, + "step": 96000 + }, + { + "epoch": 0.1792399795660825, + "grad_norm": 1.0819836854934692, + "learning_rate": 0.00019975076098775957, + "loss": 4.6746, + "step": 96050 + }, + { + "epoch": 0.17933328512546098, + "grad_norm": 1.1346968412399292, + "learning_rate": 0.00019975046448285407, + "loss": 4.7887, + "step": 96100 + }, + { + "epoch": 0.1794265906848395, + "grad_norm": 1.0858914852142334, + "learning_rate": 0.00019975016780190652, + "loss": 4.5072, + "step": 96150 + }, + { + "epoch": 0.17951989624421796, + "grad_norm": 1.002199649810791, + "learning_rate": 0.0001997498709449175, + "loss": 4.7632, + "step": 96200 + }, + { + "epoch": 0.17961320180359647, + "grad_norm": 0.8308196067810059, + "learning_rate": 0.00019974957391188746, + "loss": 4.6145, + "step": 96250 + }, + { + "epoch": 0.17970650736297494, + "grad_norm": 0.9789503216743469, + "learning_rate": 0.00019974927670281703, + "loss": 4.6052, + "step": 96300 + }, + { + "epoch": 0.17979981292235345, + "grad_norm": 1.1850559711456299, + "learning_rate": 0.00019974897931770663, + "loss": 4.6967, + "step": 96350 + }, + { + "epoch": 0.17989311848173195, + "grad_norm": 1.0554805994033813, + "learning_rate": 0.00019974868175655686, + "loss": 4.8473, + "step": 96400 + }, + { + "epoch": 0.17998642404111043, + "grad_norm": 1.0979632139205933, + "learning_rate": 0.00019974838401936822, + "loss": 4.8164, + "step": 96450 + }, + { + "epoch": 0.18007972960048893, + "grad_norm": 1.1470582485198975, + "learning_rate": 0.00019974808610614123, + "loss": 4.674, + "step": 96500 + }, + { + "epoch": 0.1801730351598674, + "grad_norm": 0.7371369004249573, + "learning_rate": 0.00019974778801687643, + "loss": 4.7965, + "step": 96550 + }, + { + "epoch": 0.1802663407192459, + "grad_norm": 0.9566873908042908, + "learning_rate": 0.00019974748975157432, + "loss": 4.8604, + "step": 96600 + }, + { + "epoch": 0.18035964627862439, + "grad_norm": 1.2158596515655518, + "learning_rate": 0.00019974719131023543, + "loss": 4.7986, + "step": 96650 + }, + { + "epoch": 0.1804529518380029, + "grad_norm": 1.0148197412490845, + "learning_rate": 0.0001997468926928603, + "loss": 4.9964, + "step": 96700 + }, + { + "epoch": 0.1805462573973814, + "grad_norm": 1.0894036293029785, + "learning_rate": 0.00019974659389944947, + "loss": 4.7985, + "step": 96750 + }, + { + "epoch": 0.18063956295675987, + "grad_norm": 1.0293515920639038, + "learning_rate": 0.00019974629493000348, + "loss": 4.7646, + "step": 96800 + }, + { + "epoch": 0.18073286851613837, + "grad_norm": 1.054319977760315, + "learning_rate": 0.00019974599578452277, + "loss": 4.6976, + "step": 96850 + }, + { + "epoch": 0.18082617407551685, + "grad_norm": 1.062825083732605, + "learning_rate": 0.00019974569646300799, + "loss": 4.8446, + "step": 96900 + }, + { + "epoch": 0.18091947963489535, + "grad_norm": 0.7005245089530945, + "learning_rate": 0.00019974539696545956, + "loss": 4.5875, + "step": 96950 + }, + { + "epoch": 0.18101278519427383, + "grad_norm": 1.2943309545516968, + "learning_rate": 0.0001997450972918781, + "loss": 4.6447, + "step": 97000 + }, + { + "epoch": 0.18110609075365233, + "grad_norm": 0.8457721471786499, + "learning_rate": 0.00019974479744226404, + "loss": 4.8779, + "step": 97050 + }, + { + "epoch": 0.1811993963130308, + "grad_norm": 1.1781150102615356, + "learning_rate": 0.000199744497416618, + "loss": 4.7332, + "step": 97100 + }, + { + "epoch": 0.1812927018724093, + "grad_norm": 0.9106166958808899, + "learning_rate": 0.00019974419721494044, + "loss": 4.5838, + "step": 97150 + }, + { + "epoch": 0.18138600743178782, + "grad_norm": 1.1041089296340942, + "learning_rate": 0.00019974389683723195, + "loss": 4.5042, + "step": 97200 + }, + { + "epoch": 0.1814793129911663, + "grad_norm": 0.9294777512550354, + "learning_rate": 0.00019974359628349304, + "loss": 4.8125, + "step": 97250 + }, + { + "epoch": 0.1815726185505448, + "grad_norm": 1.158308506011963, + "learning_rate": 0.0001997432955537242, + "loss": 4.9712, + "step": 97300 + }, + { + "epoch": 0.18166592410992327, + "grad_norm": 0.9689257740974426, + "learning_rate": 0.00019974299464792602, + "loss": 4.7698, + "step": 97350 + }, + { + "epoch": 0.18175922966930178, + "grad_norm": 1.2290277481079102, + "learning_rate": 0.00019974269356609902, + "loss": 4.585, + "step": 97400 + }, + { + "epoch": 0.18185253522868025, + "grad_norm": 1.2505154609680176, + "learning_rate": 0.00019974239230824368, + "loss": 4.7192, + "step": 97450 + }, + { + "epoch": 0.18194584078805875, + "grad_norm": 1.1231937408447266, + "learning_rate": 0.00019974209087436057, + "loss": 4.6296, + "step": 97500 + }, + { + "epoch": 0.18203914634743726, + "grad_norm": 1.0043781995773315, + "learning_rate": 0.00019974178926445023, + "loss": 4.7468, + "step": 97550 + }, + { + "epoch": 0.18213245190681573, + "grad_norm": 0.986024796962738, + "learning_rate": 0.0001997414874785132, + "loss": 4.6579, + "step": 97600 + }, + { + "epoch": 0.18222575746619424, + "grad_norm": 0.9532257914543152, + "learning_rate": 0.00019974118551654998, + "loss": 4.8355, + "step": 97650 + }, + { + "epoch": 0.18231906302557271, + "grad_norm": 1.6420865058898926, + "learning_rate": 0.0001997408833785611, + "loss": 4.793, + "step": 97700 + }, + { + "epoch": 0.18241236858495122, + "grad_norm": 1.155208706855774, + "learning_rate": 0.00019974058106454713, + "loss": 4.7287, + "step": 97750 + }, + { + "epoch": 0.1825056741443297, + "grad_norm": 0.7323354482650757, + "learning_rate": 0.00019974027857450858, + "loss": 4.7684, + "step": 97800 + }, + { + "epoch": 0.1825989797037082, + "grad_norm": 0.9008813500404358, + "learning_rate": 0.000199739975908446, + "loss": 4.858, + "step": 97850 + }, + { + "epoch": 0.1826922852630867, + "grad_norm": 1.181888461112976, + "learning_rate": 0.0001997396730663599, + "loss": 4.7723, + "step": 97900 + }, + { + "epoch": 0.18278559082246518, + "grad_norm": 0.7667856812477112, + "learning_rate": 0.00019973937004825082, + "loss": 4.5993, + "step": 97950 + }, + { + "epoch": 0.18287889638184368, + "grad_norm": 1.1447443962097168, + "learning_rate": 0.00019973906685411932, + "loss": 4.5683, + "step": 98000 + }, + { + "epoch": 0.18297220194122216, + "grad_norm": 0.7537021040916443, + "learning_rate": 0.00019973876348396589, + "loss": 4.7089, + "step": 98050 + }, + { + "epoch": 0.18306550750060066, + "grad_norm": 0.937552809715271, + "learning_rate": 0.00019973845993779112, + "loss": 4.8778, + "step": 98100 + }, + { + "epoch": 0.18315881305997914, + "grad_norm": 1.2293739318847656, + "learning_rate": 0.0001997381562155955, + "loss": 4.762, + "step": 98150 + }, + { + "epoch": 0.18325211861935764, + "grad_norm": 1.0932244062423706, + "learning_rate": 0.0001997378523173796, + "loss": 4.8538, + "step": 98200 + }, + { + "epoch": 0.18334542417873612, + "grad_norm": 1.1234608888626099, + "learning_rate": 0.00019973754824314393, + "loss": 4.6881, + "step": 98250 + }, + { + "epoch": 0.18343872973811462, + "grad_norm": 0.9274517893791199, + "learning_rate": 0.00019973724399288903, + "loss": 4.8478, + "step": 98300 + }, + { + "epoch": 0.18353203529749312, + "grad_norm": 0.9792783260345459, + "learning_rate": 0.00019973693956661545, + "loss": 4.5753, + "step": 98350 + }, + { + "epoch": 0.1836253408568716, + "grad_norm": 1.3715656995773315, + "learning_rate": 0.00019973663496432374, + "loss": 4.6483, + "step": 98400 + }, + { + "epoch": 0.1837186464162501, + "grad_norm": 0.7978438138961792, + "learning_rate": 0.00019973633018601438, + "loss": 4.6874, + "step": 98450 + }, + { + "epoch": 0.18381195197562858, + "grad_norm": 0.9657984375953674, + "learning_rate": 0.00019973602523168797, + "loss": 4.7344, + "step": 98500 + }, + { + "epoch": 0.18390525753500708, + "grad_norm": 1.0131194591522217, + "learning_rate": 0.00019973572010134502, + "loss": 4.6663, + "step": 98550 + }, + { + "epoch": 0.18399856309438556, + "grad_norm": 1.2889825105667114, + "learning_rate": 0.00019973541479498608, + "loss": 4.6407, + "step": 98600 + }, + { + "epoch": 0.18409186865376406, + "grad_norm": 1.0131268501281738, + "learning_rate": 0.00019973510931261168, + "loss": 4.8937, + "step": 98650 + }, + { + "epoch": 0.18418517421314257, + "grad_norm": 0.9884664416313171, + "learning_rate": 0.00019973480365422235, + "loss": 4.7954, + "step": 98700 + }, + { + "epoch": 0.18427847977252104, + "grad_norm": 1.0592337846755981, + "learning_rate": 0.00019973449781981865, + "loss": 4.7603, + "step": 98750 + }, + { + "epoch": 0.18437178533189955, + "grad_norm": 0.9447229504585266, + "learning_rate": 0.00019973419180940112, + "loss": 4.5579, + "step": 98800 + }, + { + "epoch": 0.18446509089127802, + "grad_norm": 1.186189889907837, + "learning_rate": 0.00019973388562297026, + "loss": 4.6064, + "step": 98850 + }, + { + "epoch": 0.18455839645065653, + "grad_norm": 1.156333088874817, + "learning_rate": 0.00019973357926052665, + "loss": 4.6359, + "step": 98900 + }, + { + "epoch": 0.184651702010035, + "grad_norm": 1.2002015113830566, + "learning_rate": 0.00019973327272207082, + "loss": 4.5993, + "step": 98950 + }, + { + "epoch": 0.1847450075694135, + "grad_norm": 1.1841974258422852, + "learning_rate": 0.00019973296600760333, + "loss": 4.6125, + "step": 99000 + }, + { + "epoch": 0.1847450075694135, + "eval_loss": 4.913722991943359, + "eval_runtime": 229.7391, + "eval_samples_per_second": 11.352, + "eval_steps_per_second": 11.352, + "eval_tts_loss": 7.435562890975742, + "step": 99000 + }, + { + "epoch": 0.184838313128792, + "grad_norm": 1.0555998086929321, + "learning_rate": 0.00019973265911712472, + "loss": 4.7896, + "step": 99050 + }, + { + "epoch": 0.18493161868817048, + "grad_norm": 1.1442583799362183, + "learning_rate": 0.00019973235205063546, + "loss": 4.695, + "step": 99100 + }, + { + "epoch": 0.185024924247549, + "grad_norm": 1.149874210357666, + "learning_rate": 0.0001997320448081362, + "loss": 4.7661, + "step": 99150 + }, + { + "epoch": 0.18511822980692746, + "grad_norm": 0.9762941002845764, + "learning_rate": 0.00019973173738962742, + "loss": 4.6591, + "step": 99200 + }, + { + "epoch": 0.18521153536630597, + "grad_norm": 1.0889984369277954, + "learning_rate": 0.00019973142979510963, + "loss": 4.6857, + "step": 99250 + }, + { + "epoch": 0.18530484092568444, + "grad_norm": 1.1313124895095825, + "learning_rate": 0.00019973112202458345, + "loss": 4.5218, + "step": 99300 + }, + { + "epoch": 0.18539814648506295, + "grad_norm": 0.9728614687919617, + "learning_rate": 0.0001997308140780494, + "loss": 4.6974, + "step": 99350 + }, + { + "epoch": 0.18549145204444145, + "grad_norm": 1.0745929479599, + "learning_rate": 0.000199730505955508, + "loss": 4.6735, + "step": 99400 + }, + { + "epoch": 0.18558475760381993, + "grad_norm": 0.6565387845039368, + "learning_rate": 0.00019973019765695977, + "loss": 4.6161, + "step": 99450 + }, + { + "epoch": 0.18567806316319843, + "grad_norm": 1.0615601539611816, + "learning_rate": 0.00019972988918240533, + "loss": 4.8634, + "step": 99500 + }, + { + "epoch": 0.1857713687225769, + "grad_norm": 0.948047399520874, + "learning_rate": 0.00019972958053184517, + "loss": 4.8276, + "step": 99550 + }, + { + "epoch": 0.1858646742819554, + "grad_norm": 1.0971624851226807, + "learning_rate": 0.00019972927170527983, + "loss": 4.9053, + "step": 99600 + }, + { + "epoch": 0.1859579798413339, + "grad_norm": 0.9675891399383545, + "learning_rate": 0.0001997289627027099, + "loss": 4.7391, + "step": 99650 + }, + { + "epoch": 0.1860512854007124, + "grad_norm": 0.8968614339828491, + "learning_rate": 0.0001997286535241359, + "loss": 4.6259, + "step": 99700 + }, + { + "epoch": 0.18614459096009087, + "grad_norm": 0.772883951663971, + "learning_rate": 0.00019972834416955837, + "loss": 4.6393, + "step": 99750 + }, + { + "epoch": 0.18623789651946937, + "grad_norm": 0.9643422365188599, + "learning_rate": 0.00019972803463897784, + "loss": 4.793, + "step": 99800 + }, + { + "epoch": 0.18633120207884787, + "grad_norm": 1.0831654071807861, + "learning_rate": 0.00019972772493239492, + "loss": 4.7557, + "step": 99850 + }, + { + "epoch": 0.18642450763822635, + "grad_norm": 1.0079478025436401, + "learning_rate": 0.00019972741504981008, + "loss": 4.4274, + "step": 99900 + }, + { + "epoch": 0.18651781319760485, + "grad_norm": 0.9231737852096558, + "learning_rate": 0.00019972710499122388, + "loss": 4.8235, + "step": 99950 + }, + { + "epoch": 0.18661111875698333, + "grad_norm": 1.2413074970245361, + "learning_rate": 0.00019972679475663694, + "loss": 4.676, + "step": 100000 + }, + { + "epoch": 0.18670442431636183, + "grad_norm": 1.0384629964828491, + "learning_rate": 0.0001997264843460497, + "loss": 4.7109, + "step": 100050 + }, + { + "epoch": 0.1867977298757403, + "grad_norm": 1.113856554031372, + "learning_rate": 0.0001997261737594628, + "loss": 4.5818, + "step": 100100 + }, + { + "epoch": 0.1868910354351188, + "grad_norm": 1.0480083227157593, + "learning_rate": 0.00019972586299687675, + "loss": 4.6197, + "step": 100150 + }, + { + "epoch": 0.18698434099449732, + "grad_norm": 1.1027523279190063, + "learning_rate": 0.00019972555205829208, + "loss": 4.714, + "step": 100200 + }, + { + "epoch": 0.1870776465538758, + "grad_norm": 1.1065394878387451, + "learning_rate": 0.00019972524094370936, + "loss": 4.6669, + "step": 100250 + }, + { + "epoch": 0.1871709521132543, + "grad_norm": 1.1278355121612549, + "learning_rate": 0.00019972492965312916, + "loss": 4.7749, + "step": 100300 + }, + { + "epoch": 0.18726425767263277, + "grad_norm": 0.9157338738441467, + "learning_rate": 0.00019972461818655197, + "loss": 4.886, + "step": 100350 + }, + { + "epoch": 0.18735756323201128, + "grad_norm": 0.8030517101287842, + "learning_rate": 0.0001997243065439784, + "loss": 4.7571, + "step": 100400 + }, + { + "epoch": 0.18745086879138975, + "grad_norm": 0.9438421726226807, + "learning_rate": 0.000199723994725409, + "loss": 4.8845, + "step": 100450 + }, + { + "epoch": 0.18754417435076826, + "grad_norm": 1.0755733251571655, + "learning_rate": 0.00019972368273084426, + "loss": 4.8225, + "step": 100500 + }, + { + "epoch": 0.18763747991014676, + "grad_norm": 1.0262365341186523, + "learning_rate": 0.00019972337056028475, + "loss": 4.7597, + "step": 100550 + }, + { + "epoch": 0.18773078546952524, + "grad_norm": 0.8555694222450256, + "learning_rate": 0.00019972305821373108, + "loss": 4.6938, + "step": 100600 + }, + { + "epoch": 0.18782409102890374, + "grad_norm": 0.9016522169113159, + "learning_rate": 0.00019972274569118374, + "loss": 4.5223, + "step": 100650 + }, + { + "epoch": 0.18791739658828222, + "grad_norm": 1.187323808670044, + "learning_rate": 0.0001997224329926433, + "loss": 4.7224, + "step": 100700 + }, + { + "epoch": 0.18801070214766072, + "grad_norm": 1.1199045181274414, + "learning_rate": 0.0001997221201181103, + "loss": 4.5454, + "step": 100750 + }, + { + "epoch": 0.1881040077070392, + "grad_norm": 0.8698449730873108, + "learning_rate": 0.00019972180706758535, + "loss": 4.7941, + "step": 100800 + }, + { + "epoch": 0.1881973132664177, + "grad_norm": 1.17020583152771, + "learning_rate": 0.00019972149384106893, + "loss": 4.5653, + "step": 100850 + }, + { + "epoch": 0.18829061882579617, + "grad_norm": 1.278030514717102, + "learning_rate": 0.00019972118043856162, + "loss": 4.4897, + "step": 100900 + }, + { + "epoch": 0.18838392438517468, + "grad_norm": 0.8267766237258911, + "learning_rate": 0.00019972086686006395, + "loss": 4.5364, + "step": 100950 + }, + { + "epoch": 0.18847722994455318, + "grad_norm": 1.0478912591934204, + "learning_rate": 0.00019972055310557652, + "loss": 4.8908, + "step": 101000 + }, + { + "epoch": 0.18857053550393166, + "grad_norm": 1.0098049640655518, + "learning_rate": 0.00019972023917509986, + "loss": 4.6964, + "step": 101050 + }, + { + "epoch": 0.18866384106331016, + "grad_norm": 0.9657384753227234, + "learning_rate": 0.00019971992506863452, + "loss": 4.5589, + "step": 101100 + }, + { + "epoch": 0.18875714662268864, + "grad_norm": 1.05173921585083, + "learning_rate": 0.00019971961078618108, + "loss": 4.5888, + "step": 101150 + }, + { + "epoch": 0.18885045218206714, + "grad_norm": 0.9653334617614746, + "learning_rate": 0.00019971929632774002, + "loss": 4.4529, + "step": 101200 + }, + { + "epoch": 0.18894375774144562, + "grad_norm": 0.8235491514205933, + "learning_rate": 0.000199718981693312, + "loss": 4.6389, + "step": 101250 + }, + { + "epoch": 0.18903706330082412, + "grad_norm": 0.921193540096283, + "learning_rate": 0.0001997186668828975, + "loss": 4.7759, + "step": 101300 + }, + { + "epoch": 0.18913036886020262, + "grad_norm": 0.6879051327705383, + "learning_rate": 0.0001997183518964971, + "loss": 4.9006, + "step": 101350 + }, + { + "epoch": 0.1892236744195811, + "grad_norm": 1.0606837272644043, + "learning_rate": 0.00019971803673411135, + "loss": 4.7399, + "step": 101400 + }, + { + "epoch": 0.1893169799789596, + "grad_norm": 0.8906022906303406, + "learning_rate": 0.00019971772139574083, + "loss": 4.5369, + "step": 101450 + }, + { + "epoch": 0.18941028553833808, + "grad_norm": 0.6738720536231995, + "learning_rate": 0.0001997174058813861, + "loss": 4.7231, + "step": 101500 + }, + { + "epoch": 0.18950359109771658, + "grad_norm": 1.0936765670776367, + "learning_rate": 0.00019971709019104764, + "loss": 4.6172, + "step": 101550 + }, + { + "epoch": 0.18959689665709506, + "grad_norm": 0.9944111108779907, + "learning_rate": 0.0001997167743247261, + "loss": 4.7858, + "step": 101600 + }, + { + "epoch": 0.18969020221647356, + "grad_norm": 0.7380058765411377, + "learning_rate": 0.00019971645828242195, + "loss": 4.8101, + "step": 101650 + }, + { + "epoch": 0.18978350777585207, + "grad_norm": 1.133588194847107, + "learning_rate": 0.00019971614206413586, + "loss": 4.5909, + "step": 101700 + }, + { + "epoch": 0.18987681333523054, + "grad_norm": 0.9205106496810913, + "learning_rate": 0.00019971582566986827, + "loss": 4.4921, + "step": 101750 + }, + { + "epoch": 0.18997011889460905, + "grad_norm": 1.0584709644317627, + "learning_rate": 0.0001997155090996198, + "loss": 4.7779, + "step": 101800 + }, + { + "epoch": 0.19006342445398752, + "grad_norm": 1.113847017288208, + "learning_rate": 0.00019971519235339105, + "loss": 4.8153, + "step": 101850 + }, + { + "epoch": 0.19015673001336603, + "grad_norm": 1.1247295141220093, + "learning_rate": 0.00019971487543118247, + "loss": 4.8936, + "step": 101900 + }, + { + "epoch": 0.1902500355727445, + "grad_norm": 1.1859774589538574, + "learning_rate": 0.00019971455833299473, + "loss": 4.5792, + "step": 101950 + }, + { + "epoch": 0.190343341132123, + "grad_norm": 0.8291439414024353, + "learning_rate": 0.00019971424105882831, + "loss": 4.5756, + "step": 102000 + }, + { + "epoch": 0.190343341132123, + "eval_loss": 4.920861721038818, + "eval_runtime": 229.1235, + "eval_samples_per_second": 11.383, + "eval_steps_per_second": 11.383, + "eval_tts_loss": 7.3611460567746265, + "step": 102000 + }, + { + "epoch": 0.1904366466915015, + "grad_norm": 0.6810999512672424, + "learning_rate": 0.0001997139236086838, + "loss": 4.8005, + "step": 102050 + }, + { + "epoch": 0.19052995225087999, + "grad_norm": 1.0597984790802002, + "learning_rate": 0.00019971360598256178, + "loss": 4.7247, + "step": 102100 + }, + { + "epoch": 0.1906232578102585, + "grad_norm": 0.9405797719955444, + "learning_rate": 0.00019971328818046277, + "loss": 4.628, + "step": 102150 + }, + { + "epoch": 0.19071656336963697, + "grad_norm": 1.037429928779602, + "learning_rate": 0.00019971297020238737, + "loss": 4.8803, + "step": 102200 + }, + { + "epoch": 0.19080986892901547, + "grad_norm": 1.3440206050872803, + "learning_rate": 0.00019971265204833612, + "loss": 4.7855, + "step": 102250 + }, + { + "epoch": 0.19090317448839395, + "grad_norm": 1.0140005350112915, + "learning_rate": 0.00019971233371830958, + "loss": 4.6445, + "step": 102300 + }, + { + "epoch": 0.19099648004777245, + "grad_norm": 1.2359912395477295, + "learning_rate": 0.0001997120152123083, + "loss": 4.8863, + "step": 102350 + }, + { + "epoch": 0.19108978560715092, + "grad_norm": 0.9746628403663635, + "learning_rate": 0.00019971169653033288, + "loss": 4.7716, + "step": 102400 + }, + { + "epoch": 0.19118309116652943, + "grad_norm": 1.2258669137954712, + "learning_rate": 0.00019971137767238386, + "loss": 4.9284, + "step": 102450 + }, + { + "epoch": 0.19127639672590793, + "grad_norm": 0.9276184439659119, + "learning_rate": 0.0001997110586384618, + "loss": 4.5106, + "step": 102500 + }, + { + "epoch": 0.1913697022852864, + "grad_norm": 1.0533397197723389, + "learning_rate": 0.00019971073942856723, + "loss": 4.8912, + "step": 102550 + }, + { + "epoch": 0.1914630078446649, + "grad_norm": 1.0632177591323853, + "learning_rate": 0.00019971042004270081, + "loss": 4.764, + "step": 102600 + }, + { + "epoch": 0.1915563134040434, + "grad_norm": 1.2086039781570435, + "learning_rate": 0.000199710100480863, + "loss": 4.8496, + "step": 102650 + }, + { + "epoch": 0.1916496189634219, + "grad_norm": 1.030631184577942, + "learning_rate": 0.00019970978074305444, + "loss": 4.6516, + "step": 102700 + }, + { + "epoch": 0.19174292452280037, + "grad_norm": 1.213368535041809, + "learning_rate": 0.00019970946082927565, + "loss": 4.5534, + "step": 102750 + }, + { + "epoch": 0.19183623008217887, + "grad_norm": 0.9166072607040405, + "learning_rate": 0.0001997091407395272, + "loss": 4.6505, + "step": 102800 + }, + { + "epoch": 0.19192953564155738, + "grad_norm": 1.179938793182373, + "learning_rate": 0.00019970882047380965, + "loss": 4.8351, + "step": 102850 + }, + { + "epoch": 0.19202284120093585, + "grad_norm": 0.961173415184021, + "learning_rate": 0.00019970850003212358, + "loss": 4.7488, + "step": 102900 + }, + { + "epoch": 0.19211614676031435, + "grad_norm": 0.8187737464904785, + "learning_rate": 0.00019970817941446955, + "loss": 4.8423, + "step": 102950 + }, + { + "epoch": 0.19220945231969283, + "grad_norm": 1.1100882291793823, + "learning_rate": 0.00019970785862084814, + "loss": 4.7505, + "step": 103000 + }, + { + "epoch": 0.19230275787907133, + "grad_norm": 0.842174232006073, + "learning_rate": 0.00019970753765125992, + "loss": 4.4804, + "step": 103050 + }, + { + "epoch": 0.1923960634384498, + "grad_norm": 1.1668494939804077, + "learning_rate": 0.0001997072165057054, + "loss": 4.6164, + "step": 103100 + }, + { + "epoch": 0.19248936899782831, + "grad_norm": 1.0573556423187256, + "learning_rate": 0.0001997068951841852, + "loss": 4.7643, + "step": 103150 + }, + { + "epoch": 0.19258267455720682, + "grad_norm": 1.0298389196395874, + "learning_rate": 0.00019970657368669989, + "loss": 4.5873, + "step": 103200 + }, + { + "epoch": 0.1926759801165853, + "grad_norm": 1.0277085304260254, + "learning_rate": 0.00019970625201325002, + "loss": 4.6273, + "step": 103250 + }, + { + "epoch": 0.1927692856759638, + "grad_norm": 0.9712157249450684, + "learning_rate": 0.00019970593016383617, + "loss": 4.4374, + "step": 103300 + }, + { + "epoch": 0.19286259123534227, + "grad_norm": 0.8751323223114014, + "learning_rate": 0.00019970560813845886, + "loss": 4.6056, + "step": 103350 + }, + { + "epoch": 0.19295589679472078, + "grad_norm": 1.0707224607467651, + "learning_rate": 0.00019970528593711874, + "loss": 4.9276, + "step": 103400 + }, + { + "epoch": 0.19304920235409925, + "grad_norm": 1.120894193649292, + "learning_rate": 0.00019970496355981628, + "loss": 4.5104, + "step": 103450 + }, + { + "epoch": 0.19314250791347776, + "grad_norm": 0.9309062361717224, + "learning_rate": 0.00019970464100655213, + "loss": 4.775, + "step": 103500 + }, + { + "epoch": 0.19323581347285623, + "grad_norm": 1.098646879196167, + "learning_rate": 0.00019970431827732684, + "loss": 4.6697, + "step": 103550 + }, + { + "epoch": 0.19332911903223474, + "grad_norm": 1.0255775451660156, + "learning_rate": 0.00019970399537214096, + "loss": 4.7094, + "step": 103600 + }, + { + "epoch": 0.19342242459161324, + "grad_norm": 0.9653803110122681, + "learning_rate": 0.00019970367229099506, + "loss": 4.6016, + "step": 103650 + }, + { + "epoch": 0.19351573015099172, + "grad_norm": 1.1465661525726318, + "learning_rate": 0.00019970334903388976, + "loss": 4.892, + "step": 103700 + }, + { + "epoch": 0.19360903571037022, + "grad_norm": 1.0665031671524048, + "learning_rate": 0.00019970302560082557, + "loss": 4.8088, + "step": 103750 + }, + { + "epoch": 0.1937023412697487, + "grad_norm": 1.180091381072998, + "learning_rate": 0.00019970270199180307, + "loss": 4.7068, + "step": 103800 + }, + { + "epoch": 0.1937956468291272, + "grad_norm": 0.9207305312156677, + "learning_rate": 0.00019970237820682285, + "loss": 4.6804, + "step": 103850 + }, + { + "epoch": 0.19388895238850568, + "grad_norm": 0.6405048370361328, + "learning_rate": 0.0001997020542458855, + "loss": 4.6396, + "step": 103900 + }, + { + "epoch": 0.19398225794788418, + "grad_norm": 0.9103243947029114, + "learning_rate": 0.00019970173010899154, + "loss": 4.7101, + "step": 103950 + }, + { + "epoch": 0.19407556350726268, + "grad_norm": 1.086352825164795, + "learning_rate": 0.00019970140579614158, + "loss": 4.8022, + "step": 104000 + }, + { + "epoch": 0.19416886906664116, + "grad_norm": 0.9352219104766846, + "learning_rate": 0.0001997010813073362, + "loss": 4.759, + "step": 104050 + }, + { + "epoch": 0.19426217462601966, + "grad_norm": 0.8962497115135193, + "learning_rate": 0.00019970075664257593, + "loss": 4.7071, + "step": 104100 + }, + { + "epoch": 0.19435548018539814, + "grad_norm": 1.1224924325942993, + "learning_rate": 0.00019970043180186139, + "loss": 4.8363, + "step": 104150 + }, + { + "epoch": 0.19444878574477664, + "grad_norm": 0.8941819071769714, + "learning_rate": 0.00019970010678519308, + "loss": 4.7968, + "step": 104200 + }, + { + "epoch": 0.19454209130415512, + "grad_norm": 1.0338774919509888, + "learning_rate": 0.00019969978159257168, + "loss": 4.7946, + "step": 104250 + }, + { + "epoch": 0.19463539686353362, + "grad_norm": 1.0611014366149902, + "learning_rate": 0.0001996994562239977, + "loss": 4.7344, + "step": 104300 + }, + { + "epoch": 0.19472870242291213, + "grad_norm": 0.9688970446586609, + "learning_rate": 0.00019969913067947168, + "loss": 4.6926, + "step": 104350 + }, + { + "epoch": 0.1948220079822906, + "grad_norm": 0.716977059841156, + "learning_rate": 0.00019969880495899426, + "loss": 4.775, + "step": 104400 + }, + { + "epoch": 0.1949153135416691, + "grad_norm": 1.210655927658081, + "learning_rate": 0.000199698479062566, + "loss": 4.7247, + "step": 104450 + }, + { + "epoch": 0.19500861910104758, + "grad_norm": 1.2916840314865112, + "learning_rate": 0.0001996981529901875, + "loss": 4.6479, + "step": 104500 + }, + { + "epoch": 0.19510192466042608, + "grad_norm": 1.0678060054779053, + "learning_rate": 0.00019969782674185923, + "loss": 4.7438, + "step": 104550 + }, + { + "epoch": 0.19519523021980456, + "grad_norm": 0.8926165699958801, + "learning_rate": 0.0001996975003175819, + "loss": 4.7003, + "step": 104600 + }, + { + "epoch": 0.19528853577918306, + "grad_norm": 0.8851156234741211, + "learning_rate": 0.000199697173717356, + "loss": 4.7207, + "step": 104650 + }, + { + "epoch": 0.19538184133856157, + "grad_norm": 0.8675005435943604, + "learning_rate": 0.0001996968469411821, + "loss": 4.4109, + "step": 104700 + }, + { + "epoch": 0.19547514689794004, + "grad_norm": 0.9853196740150452, + "learning_rate": 0.00019969651998906084, + "loss": 4.6939, + "step": 104750 + }, + { + "epoch": 0.19556845245731855, + "grad_norm": 0.8662623167037964, + "learning_rate": 0.0001996961928609928, + "loss": 4.763, + "step": 104800 + }, + { + "epoch": 0.19566175801669702, + "grad_norm": 1.1778194904327393, + "learning_rate": 0.00019969586555697845, + "loss": 4.6761, + "step": 104850 + }, + { + "epoch": 0.19575506357607553, + "grad_norm": 1.2361232042312622, + "learning_rate": 0.0001996955380770185, + "loss": 4.7453, + "step": 104900 + }, + { + "epoch": 0.195848369135454, + "grad_norm": 1.2205801010131836, + "learning_rate": 0.00019969521042111343, + "loss": 4.8602, + "step": 104950 + }, + { + "epoch": 0.1959416746948325, + "grad_norm": 1.0522173643112183, + "learning_rate": 0.00019969488258926384, + "loss": 4.8686, + "step": 105000 + }, + { + "epoch": 0.1959416746948325, + "eval_loss": 4.90353536605835, + "eval_runtime": 230.4752, + "eval_samples_per_second": 11.316, + "eval_steps_per_second": 11.316, + "eval_tts_loss": 7.433045758504386, + "step": 105000 + }, + { + "epoch": 0.19603498025421098, + "grad_norm": 0.8923590183258057, + "learning_rate": 0.00019969455458147036, + "loss": 4.6476, + "step": 105050 + }, + { + "epoch": 0.1961282858135895, + "grad_norm": 1.0533981323242188, + "learning_rate": 0.00019969422639773353, + "loss": 4.7786, + "step": 105100 + }, + { + "epoch": 0.196221591372968, + "grad_norm": 1.0477826595306396, + "learning_rate": 0.00019969389803805391, + "loss": 4.6004, + "step": 105150 + }, + { + "epoch": 0.19631489693234647, + "grad_norm": 1.089428186416626, + "learning_rate": 0.00019969356950243214, + "loss": 4.6546, + "step": 105200 + }, + { + "epoch": 0.19640820249172497, + "grad_norm": 0.7010145783424377, + "learning_rate": 0.00019969324079086872, + "loss": 4.6761, + "step": 105250 + }, + { + "epoch": 0.19650150805110345, + "grad_norm": 0.9401247501373291, + "learning_rate": 0.0001996929119033643, + "loss": 4.5199, + "step": 105300 + }, + { + "epoch": 0.19659481361048195, + "grad_norm": 1.1464163064956665, + "learning_rate": 0.00019969258283991943, + "loss": 4.7447, + "step": 105350 + }, + { + "epoch": 0.19668811916986043, + "grad_norm": 0.9506011605262756, + "learning_rate": 0.0001996922536005347, + "loss": 4.9001, + "step": 105400 + }, + { + "epoch": 0.19678142472923893, + "grad_norm": 1.0676487684249878, + "learning_rate": 0.00019969192418521064, + "loss": 4.5418, + "step": 105450 + }, + { + "epoch": 0.19687473028861743, + "grad_norm": 1.0197359323501587, + "learning_rate": 0.0001996915945939479, + "loss": 4.706, + "step": 105500 + }, + { + "epoch": 0.1969680358479959, + "grad_norm": 1.120069980621338, + "learning_rate": 0.00019969126482674706, + "loss": 4.6804, + "step": 105550 + }, + { + "epoch": 0.1970613414073744, + "grad_norm": 0.9666004776954651, + "learning_rate": 0.00019969093488360863, + "loss": 4.792, + "step": 105600 + }, + { + "epoch": 0.1971546469667529, + "grad_norm": 1.1865931749343872, + "learning_rate": 0.0001996906047645333, + "loss": 4.6457, + "step": 105650 + }, + { + "epoch": 0.1972479525261314, + "grad_norm": 0.9605655074119568, + "learning_rate": 0.00019969027446952155, + "loss": 4.8505, + "step": 105700 + }, + { + "epoch": 0.19734125808550987, + "grad_norm": 0.6585858464241028, + "learning_rate": 0.00019968994399857404, + "loss": 4.5811, + "step": 105750 + }, + { + "epoch": 0.19743456364488837, + "grad_norm": 1.0499614477157593, + "learning_rate": 0.0001996896133516913, + "loss": 4.661, + "step": 105800 + }, + { + "epoch": 0.19752786920426688, + "grad_norm": 1.0251386165618896, + "learning_rate": 0.0001996892825288739, + "loss": 4.8558, + "step": 105850 + }, + { + "epoch": 0.19762117476364535, + "grad_norm": 1.2540901899337769, + "learning_rate": 0.00019968895153012252, + "loss": 4.5407, + "step": 105900 + }, + { + "epoch": 0.19771448032302386, + "grad_norm": 0.6922102570533752, + "learning_rate": 0.00019968862035543765, + "loss": 4.5388, + "step": 105950 + }, + { + "epoch": 0.19780778588240233, + "grad_norm": 1.0659449100494385, + "learning_rate": 0.00019968828900481992, + "loss": 4.6475, + "step": 106000 + }, + { + "epoch": 0.19790109144178084, + "grad_norm": 0.8389386534690857, + "learning_rate": 0.00019968795747826986, + "loss": 4.555, + "step": 106050 + }, + { + "epoch": 0.1979943970011593, + "grad_norm": 0.8430420160293579, + "learning_rate": 0.00019968762577578814, + "loss": 4.7839, + "step": 106100 + }, + { + "epoch": 0.19808770256053781, + "grad_norm": 1.0360347032546997, + "learning_rate": 0.00019968729389737528, + "loss": 4.7264, + "step": 106150 + }, + { + "epoch": 0.19818100811991632, + "grad_norm": 0.9396027326583862, + "learning_rate": 0.0001996869618430319, + "loss": 4.5931, + "step": 106200 + }, + { + "epoch": 0.1982743136792948, + "grad_norm": 0.8887374997138977, + "learning_rate": 0.00019968662961275855, + "loss": 4.6601, + "step": 106250 + }, + { + "epoch": 0.1983676192386733, + "grad_norm": 1.1754348278045654, + "learning_rate": 0.00019968629720655586, + "loss": 4.7664, + "step": 106300 + }, + { + "epoch": 0.19846092479805177, + "grad_norm": 0.6698839068412781, + "learning_rate": 0.0001996859646244244, + "loss": 4.5077, + "step": 106350 + }, + { + "epoch": 0.19855423035743028, + "grad_norm": 0.944202184677124, + "learning_rate": 0.00019968563186636474, + "loss": 4.6335, + "step": 106400 + }, + { + "epoch": 0.19864753591680875, + "grad_norm": 1.1213611364364624, + "learning_rate": 0.00019968529893237745, + "loss": 4.6235, + "step": 106450 + }, + { + "epoch": 0.19874084147618726, + "grad_norm": 1.068108081817627, + "learning_rate": 0.0001996849658224632, + "loss": 4.6891, + "step": 106500 + }, + { + "epoch": 0.19883414703556573, + "grad_norm": 1.1877700090408325, + "learning_rate": 0.0001996846325366225, + "loss": 4.5616, + "step": 106550 + }, + { + "epoch": 0.19892745259494424, + "grad_norm": 0.9596836566925049, + "learning_rate": 0.00019968429907485597, + "loss": 4.612, + "step": 106600 + }, + { + "epoch": 0.19902075815432274, + "grad_norm": 0.8319620490074158, + "learning_rate": 0.0001996839654371642, + "loss": 4.6712, + "step": 106650 + }, + { + "epoch": 0.19911406371370122, + "grad_norm": 0.921432375907898, + "learning_rate": 0.00019968363162354774, + "loss": 4.7998, + "step": 106700 + }, + { + "epoch": 0.19920736927307972, + "grad_norm": 1.089003324508667, + "learning_rate": 0.00019968329763400726, + "loss": 4.5917, + "step": 106750 + }, + { + "epoch": 0.1993006748324582, + "grad_norm": 1.233829140663147, + "learning_rate": 0.00019968296346854327, + "loss": 4.8402, + "step": 106800 + }, + { + "epoch": 0.1993939803918367, + "grad_norm": 1.0184001922607422, + "learning_rate": 0.0001996826291271564, + "loss": 4.7715, + "step": 106850 + }, + { + "epoch": 0.19948728595121518, + "grad_norm": 1.058369517326355, + "learning_rate": 0.0001996822946098472, + "loss": 4.6193, + "step": 106900 + }, + { + "epoch": 0.19958059151059368, + "grad_norm": 1.2557109594345093, + "learning_rate": 0.00019968195991661633, + "loss": 4.9265, + "step": 106950 + }, + { + "epoch": 0.19967389706997218, + "grad_norm": 0.7956662178039551, + "learning_rate": 0.00019968162504746434, + "loss": 4.8568, + "step": 107000 + }, + { + "epoch": 0.19976720262935066, + "grad_norm": 0.9638285040855408, + "learning_rate": 0.00019968129000239179, + "loss": 4.6949, + "step": 107050 + }, + { + "epoch": 0.19986050818872916, + "grad_norm": 1.0805922746658325, + "learning_rate": 0.0001996809547813993, + "loss": 4.7621, + "step": 107100 + }, + { + "epoch": 0.19995381374810764, + "grad_norm": 0.9019108414649963, + "learning_rate": 0.00019968061938448748, + "loss": 4.4901, + "step": 107150 + }, + { + "epoch": 0.20004711930748614, + "grad_norm": 0.7746037840843201, + "learning_rate": 0.0001996802838116569, + "loss": 4.4827, + "step": 107200 + }, + { + "epoch": 0.20014042486686462, + "grad_norm": 1.065016508102417, + "learning_rate": 0.00019967994806290818, + "loss": 4.8291, + "step": 107250 + }, + { + "epoch": 0.20023373042624312, + "grad_norm": 1.0848886966705322, + "learning_rate": 0.00019967961213824187, + "loss": 4.7838, + "step": 107300 + }, + { + "epoch": 0.20032703598562163, + "grad_norm": 0.9382323026657104, + "learning_rate": 0.0001996792760376586, + "loss": 4.6825, + "step": 107350 + }, + { + "epoch": 0.2004203415450001, + "grad_norm": 1.0932012796401978, + "learning_rate": 0.00019967893976115893, + "loss": 4.8105, + "step": 107400 + }, + { + "epoch": 0.2005136471043786, + "grad_norm": 0.8511140942573547, + "learning_rate": 0.00019967860330874344, + "loss": 4.6117, + "step": 107450 + }, + { + "epoch": 0.20060695266375708, + "grad_norm": 1.1722475290298462, + "learning_rate": 0.00019967826668041281, + "loss": 4.6226, + "step": 107500 + }, + { + "epoch": 0.20070025822313559, + "grad_norm": 1.120883822441101, + "learning_rate": 0.00019967792987616757, + "loss": 4.8838, + "step": 107550 + }, + { + "epoch": 0.20079356378251406, + "grad_norm": 0.9528012871742249, + "learning_rate": 0.0001996775928960083, + "loss": 4.7664, + "step": 107600 + }, + { + "epoch": 0.20088686934189257, + "grad_norm": 0.9668023586273193, + "learning_rate": 0.0001996772557399356, + "loss": 4.6156, + "step": 107650 + }, + { + "epoch": 0.20098017490127104, + "grad_norm": 0.8092545866966248, + "learning_rate": 0.00019967691840795014, + "loss": 4.8754, + "step": 107700 + }, + { + "epoch": 0.20107348046064955, + "grad_norm": 1.1513761281967163, + "learning_rate": 0.0001996765809000524, + "loss": 4.7403, + "step": 107750 + }, + { + "epoch": 0.20116678602002805, + "grad_norm": 1.2313563823699951, + "learning_rate": 0.00019967624321624307, + "loss": 4.7554, + "step": 107800 + }, + { + "epoch": 0.20126009157940652, + "grad_norm": 1.2222445011138916, + "learning_rate": 0.0001996759053565227, + "loss": 4.6705, + "step": 107850 + }, + { + "epoch": 0.20135339713878503, + "grad_norm": 1.0731276273727417, + "learning_rate": 0.00019967556732089189, + "loss": 4.7103, + "step": 107900 + }, + { + "epoch": 0.2014467026981635, + "grad_norm": 0.8914058804512024, + "learning_rate": 0.00019967522910935127, + "loss": 4.7109, + "step": 107950 + }, + { + "epoch": 0.201540008257542, + "grad_norm": 0.9752347469329834, + "learning_rate": 0.00019967489072190137, + "loss": 4.8808, + "step": 108000 + }, + { + "epoch": 0.201540008257542, + "eval_loss": 4.898900508880615, + "eval_runtime": 231.0932, + "eval_samples_per_second": 11.285, + "eval_steps_per_second": 11.285, + "eval_tts_loss": 7.3902602159986674, + "step": 108000 + }, + { + "epoch": 0.20163331381692048, + "grad_norm": 1.1584863662719727, + "learning_rate": 0.00019967455215854285, + "loss": 4.6683, + "step": 108050 + }, + { + "epoch": 0.201726619376299, + "grad_norm": 1.2001816034317017, + "learning_rate": 0.00019967421341927628, + "loss": 4.6347, + "step": 108100 + }, + { + "epoch": 0.2018199249356775, + "grad_norm": 1.0059622526168823, + "learning_rate": 0.00019967387450410225, + "loss": 4.9153, + "step": 108150 + }, + { + "epoch": 0.20191323049505597, + "grad_norm": 0.6907665729522705, + "learning_rate": 0.00019967353541302138, + "loss": 4.6052, + "step": 108200 + }, + { + "epoch": 0.20200653605443447, + "grad_norm": 0.9193375706672668, + "learning_rate": 0.00019967319614603427, + "loss": 4.7746, + "step": 108250 + }, + { + "epoch": 0.20209984161381295, + "grad_norm": 0.8793426752090454, + "learning_rate": 0.00019967285670314147, + "loss": 4.6992, + "step": 108300 + }, + { + "epoch": 0.20219314717319145, + "grad_norm": 1.1513392925262451, + "learning_rate": 0.00019967251708434365, + "loss": 4.9056, + "step": 108350 + }, + { + "epoch": 0.20228645273256993, + "grad_norm": 1.0788843631744385, + "learning_rate": 0.00019967217728964135, + "loss": 4.6675, + "step": 108400 + }, + { + "epoch": 0.20237975829194843, + "grad_norm": 0.9058928489685059, + "learning_rate": 0.0001996718373190352, + "loss": 4.7488, + "step": 108450 + }, + { + "epoch": 0.20247306385132693, + "grad_norm": 0.7392362952232361, + "learning_rate": 0.0001996714971725258, + "loss": 4.6251, + "step": 108500 + }, + { + "epoch": 0.2025663694107054, + "grad_norm": 0.9372434616088867, + "learning_rate": 0.00019967115685011377, + "loss": 4.9667, + "step": 108550 + }, + { + "epoch": 0.20265967497008391, + "grad_norm": 1.1257874965667725, + "learning_rate": 0.00019967081635179968, + "loss": 4.7603, + "step": 108600 + }, + { + "epoch": 0.2027529805294624, + "grad_norm": 0.8157814741134644, + "learning_rate": 0.0001996704756775841, + "loss": 4.5966, + "step": 108650 + }, + { + "epoch": 0.2028462860888409, + "grad_norm": 1.0224437713623047, + "learning_rate": 0.0001996701348274677, + "loss": 4.6797, + "step": 108700 + }, + { + "epoch": 0.20293959164821937, + "grad_norm": 1.2594748735427856, + "learning_rate": 0.00019966979380145103, + "loss": 4.6246, + "step": 108750 + }, + { + "epoch": 0.20303289720759787, + "grad_norm": 0.936982274055481, + "learning_rate": 0.00019966945259953473, + "loss": 4.7162, + "step": 108800 + }, + { + "epoch": 0.20312620276697638, + "grad_norm": 1.1596720218658447, + "learning_rate": 0.00019966911122171936, + "loss": 4.6277, + "step": 108850 + }, + { + "epoch": 0.20321950832635485, + "grad_norm": 0.9620667099952698, + "learning_rate": 0.00019966876966800556, + "loss": 4.7468, + "step": 108900 + }, + { + "epoch": 0.20331281388573336, + "grad_norm": 1.2302641868591309, + "learning_rate": 0.00019966842793839394, + "loss": 4.6094, + "step": 108950 + }, + { + "epoch": 0.20340611944511183, + "grad_norm": 1.131081461906433, + "learning_rate": 0.00019966808603288504, + "loss": 4.628, + "step": 109000 + }, + { + "epoch": 0.20349942500449034, + "grad_norm": 0.9959622621536255, + "learning_rate": 0.00019966774395147955, + "loss": 4.8578, + "step": 109050 + }, + { + "epoch": 0.2035927305638688, + "grad_norm": 1.1797744035720825, + "learning_rate": 0.00019966740169417803, + "loss": 4.8179, + "step": 109100 + }, + { + "epoch": 0.20368603612324732, + "grad_norm": 1.0603262186050415, + "learning_rate": 0.00019966705926098102, + "loss": 4.6757, + "step": 109150 + }, + { + "epoch": 0.2037793416826258, + "grad_norm": 1.2537606954574585, + "learning_rate": 0.00019966671665188924, + "loss": 4.5657, + "step": 109200 + }, + { + "epoch": 0.2038726472420043, + "grad_norm": 0.8469638228416443, + "learning_rate": 0.0001996663738669032, + "loss": 4.5299, + "step": 109250 + }, + { + "epoch": 0.2039659528013828, + "grad_norm": 1.1526724100112915, + "learning_rate": 0.0001996660309060236, + "loss": 4.6851, + "step": 109300 + }, + { + "epoch": 0.20405925836076128, + "grad_norm": 0.9065315127372742, + "learning_rate": 0.00019966568776925097, + "loss": 4.6405, + "step": 109350 + }, + { + "epoch": 0.20415256392013978, + "grad_norm": 1.0660070180892944, + "learning_rate": 0.00019966534445658591, + "loss": 4.8246, + "step": 109400 + }, + { + "epoch": 0.20424586947951825, + "grad_norm": 0.9208210110664368, + "learning_rate": 0.0001996650009680291, + "loss": 4.8364, + "step": 109450 + }, + { + "epoch": 0.20433917503889676, + "grad_norm": 1.1117137670516968, + "learning_rate": 0.00019966465730358106, + "loss": 4.6232, + "step": 109500 + }, + { + "epoch": 0.20443248059827523, + "grad_norm": 0.7682850360870361, + "learning_rate": 0.00019966431346324246, + "loss": 4.6112, + "step": 109550 + }, + { + "epoch": 0.20452578615765374, + "grad_norm": 0.9791049957275391, + "learning_rate": 0.0001996639694470139, + "loss": 4.739, + "step": 109600 + }, + { + "epoch": 0.20461909171703224, + "grad_norm": 0.7025151252746582, + "learning_rate": 0.00019966362525489592, + "loss": 4.625, + "step": 109650 + }, + { + "epoch": 0.20471239727641072, + "grad_norm": 0.9386808276176453, + "learning_rate": 0.00019966328088688923, + "loss": 4.7254, + "step": 109700 + }, + { + "epoch": 0.20480570283578922, + "grad_norm": 0.9682460427284241, + "learning_rate": 0.00019966293634299435, + "loss": 4.6955, + "step": 109750 + }, + { + "epoch": 0.2048990083951677, + "grad_norm": 0.9403902292251587, + "learning_rate": 0.00019966259162321196, + "loss": 4.648, + "step": 109800 + }, + { + "epoch": 0.2049923139545462, + "grad_norm": 0.8865294456481934, + "learning_rate": 0.00019966224672754259, + "loss": 4.6593, + "step": 109850 + }, + { + "epoch": 0.20508561951392468, + "grad_norm": 0.9685643315315247, + "learning_rate": 0.0001996619016559869, + "loss": 4.8078, + "step": 109900 + }, + { + "epoch": 0.20517892507330318, + "grad_norm": 1.1610651016235352, + "learning_rate": 0.00019966155640854552, + "loss": 4.7153, + "step": 109950 + }, + { + "epoch": 0.20527223063268168, + "grad_norm": 1.3731842041015625, + "learning_rate": 0.000199661210985219, + "loss": 4.6649, + "step": 110000 + }, + { + "epoch": 0.20536553619206016, + "grad_norm": 0.9859686493873596, + "learning_rate": 0.00019966086538600797, + "loss": 4.649, + "step": 110050 + }, + { + "epoch": 0.20545884175143866, + "grad_norm": 0.9828516244888306, + "learning_rate": 0.00019966051961091305, + "loss": 4.6669, + "step": 110100 + }, + { + "epoch": 0.20555214731081714, + "grad_norm": 1.1860650777816772, + "learning_rate": 0.00019966017365993487, + "loss": 4.6617, + "step": 110150 + }, + { + "epoch": 0.20564545287019564, + "grad_norm": 0.7568049430847168, + "learning_rate": 0.00019965982753307403, + "loss": 4.5716, + "step": 110200 + }, + { + "epoch": 0.20573875842957412, + "grad_norm": 1.4290099143981934, + "learning_rate": 0.0001996594812303311, + "loss": 4.7415, + "step": 110250 + }, + { + "epoch": 0.20583206398895262, + "grad_norm": 1.090683937072754, + "learning_rate": 0.00019965913475170673, + "loss": 4.7548, + "step": 110300 + }, + { + "epoch": 0.2059253695483311, + "grad_norm": 1.1351553201675415, + "learning_rate": 0.00019965878809720153, + "loss": 4.6024, + "step": 110350 + }, + { + "epoch": 0.2060186751077096, + "grad_norm": 0.8278642296791077, + "learning_rate": 0.0001996584412668161, + "loss": 4.7692, + "step": 110400 + }, + { + "epoch": 0.2061119806670881, + "grad_norm": 0.9527729749679565, + "learning_rate": 0.00019965809426055104, + "loss": 4.6836, + "step": 110450 + }, + { + "epoch": 0.20620528622646658, + "grad_norm": 0.9294393062591553, + "learning_rate": 0.000199657747078407, + "loss": 4.7799, + "step": 110500 + }, + { + "epoch": 0.2062985917858451, + "grad_norm": 0.8470353484153748, + "learning_rate": 0.00019965739972038456, + "loss": 4.6028, + "step": 110550 + }, + { + "epoch": 0.20639189734522356, + "grad_norm": 1.0058228969573975, + "learning_rate": 0.00019965705218648436, + "loss": 4.7502, + "step": 110600 + }, + { + "epoch": 0.20648520290460207, + "grad_norm": 1.0412211418151855, + "learning_rate": 0.000199656704476707, + "loss": 4.7988, + "step": 110650 + }, + { + "epoch": 0.20657850846398054, + "grad_norm": 1.1575844287872314, + "learning_rate": 0.00019965635659105306, + "loss": 4.905, + "step": 110700 + }, + { + "epoch": 0.20667181402335905, + "grad_norm": 1.1507608890533447, + "learning_rate": 0.0001996560085295232, + "loss": 4.708, + "step": 110750 + }, + { + "epoch": 0.20676511958273755, + "grad_norm": 1.1214720010757446, + "learning_rate": 0.00019965566029211807, + "loss": 4.7185, + "step": 110800 + }, + { + "epoch": 0.20685842514211603, + "grad_norm": 0.8996137380599976, + "learning_rate": 0.00019965531187883816, + "loss": 4.6913, + "step": 110850 + }, + { + "epoch": 0.20695173070149453, + "grad_norm": 1.0486303567886353, + "learning_rate": 0.0001996549632896842, + "loss": 4.9886, + "step": 110900 + }, + { + "epoch": 0.207045036260873, + "grad_norm": 0.9184983968734741, + "learning_rate": 0.00019965461452465676, + "loss": 4.4322, + "step": 110950 + }, + { + "epoch": 0.2071383418202515, + "grad_norm": 1.004927396774292, + "learning_rate": 0.00019965426558375647, + "loss": 4.7627, + "step": 111000 + }, + { + "epoch": 0.2071383418202515, + "eval_loss": 4.893270015716553, + "eval_runtime": 228.2153, + "eval_samples_per_second": 11.428, + "eval_steps_per_second": 11.428, + "eval_tts_loss": 7.3785387509319635, + "step": 111000 + }, + { + "epoch": 0.20723164737962999, + "grad_norm": 1.0662178993225098, + "learning_rate": 0.0001996539164669839, + "loss": 4.5831, + "step": 111050 + }, + { + "epoch": 0.2073249529390085, + "grad_norm": 1.0260896682739258, + "learning_rate": 0.00019965356717433971, + "loss": 4.6671, + "step": 111100 + }, + { + "epoch": 0.207418258498387, + "grad_norm": 0.8488427400588989, + "learning_rate": 0.00019965321770582455, + "loss": 4.8138, + "step": 111150 + }, + { + "epoch": 0.20751156405776547, + "grad_norm": 0.9575690627098083, + "learning_rate": 0.00019965286806143898, + "loss": 4.5885, + "step": 111200 + }, + { + "epoch": 0.20760486961714397, + "grad_norm": 0.9709866046905518, + "learning_rate": 0.00019965251824118361, + "loss": 4.721, + "step": 111250 + }, + { + "epoch": 0.20769817517652245, + "grad_norm": 1.058888554573059, + "learning_rate": 0.00019965216824505908, + "loss": 4.681, + "step": 111300 + }, + { + "epoch": 0.20779148073590095, + "grad_norm": 0.9510217905044556, + "learning_rate": 0.00019965181807306604, + "loss": 4.84, + "step": 111350 + }, + { + "epoch": 0.20788478629527943, + "grad_norm": 1.249549388885498, + "learning_rate": 0.00019965146772520505, + "loss": 4.5905, + "step": 111400 + }, + { + "epoch": 0.20797809185465793, + "grad_norm": 0.9404703378677368, + "learning_rate": 0.00019965111720147673, + "loss": 4.7374, + "step": 111450 + }, + { + "epoch": 0.20807139741403644, + "grad_norm": 0.9950361251831055, + "learning_rate": 0.00019965076650188177, + "loss": 4.7764, + "step": 111500 + }, + { + "epoch": 0.2081647029734149, + "grad_norm": 1.015062689781189, + "learning_rate": 0.0001996504156264207, + "loss": 4.7205, + "step": 111550 + }, + { + "epoch": 0.20825800853279341, + "grad_norm": 1.0537135601043701, + "learning_rate": 0.00019965006457509422, + "loss": 4.7283, + "step": 111600 + }, + { + "epoch": 0.2083513140921719, + "grad_norm": 0.9111753106117249, + "learning_rate": 0.00019964971334790287, + "loss": 4.5574, + "step": 111650 + }, + { + "epoch": 0.2084446196515504, + "grad_norm": 1.2378709316253662, + "learning_rate": 0.00019964936194484737, + "loss": 4.5554, + "step": 111700 + }, + { + "epoch": 0.20853792521092887, + "grad_norm": 0.8984060883522034, + "learning_rate": 0.00019964901036592823, + "loss": 4.6334, + "step": 111750 + }, + { + "epoch": 0.20863123077030737, + "grad_norm": 0.6208471655845642, + "learning_rate": 0.0001996486586111461, + "loss": 4.5307, + "step": 111800 + }, + { + "epoch": 0.20872453632968585, + "grad_norm": 1.0021463632583618, + "learning_rate": 0.00019964830668050168, + "loss": 4.8079, + "step": 111850 + }, + { + "epoch": 0.20881784188906435, + "grad_norm": 0.6217372417449951, + "learning_rate": 0.00019964795457399549, + "loss": 4.5041, + "step": 111900 + }, + { + "epoch": 0.20891114744844286, + "grad_norm": 1.1446421146392822, + "learning_rate": 0.00019964760229162823, + "loss": 4.656, + "step": 111950 + }, + { + "epoch": 0.20900445300782133, + "grad_norm": 0.9120539426803589, + "learning_rate": 0.00019964724983340045, + "loss": 4.6973, + "step": 112000 + }, + { + "epoch": 0.20909775856719984, + "grad_norm": 0.6660919189453125, + "learning_rate": 0.0001996468971993128, + "loss": 4.584, + "step": 112050 + }, + { + "epoch": 0.2091910641265783, + "grad_norm": 1.0424081087112427, + "learning_rate": 0.00019964654438936594, + "loss": 4.7912, + "step": 112100 + }, + { + "epoch": 0.20928436968595682, + "grad_norm": 0.8505585789680481, + "learning_rate": 0.00019964619140356047, + "loss": 4.6274, + "step": 112150 + }, + { + "epoch": 0.2093776752453353, + "grad_norm": 1.0397758483886719, + "learning_rate": 0.00019964583824189699, + "loss": 4.4355, + "step": 112200 + }, + { + "epoch": 0.2094709808047138, + "grad_norm": 1.0277235507965088, + "learning_rate": 0.00019964548490437612, + "loss": 4.7227, + "step": 112250 + }, + { + "epoch": 0.2095642863640923, + "grad_norm": 1.0164071321487427, + "learning_rate": 0.0001996451313909985, + "loss": 4.6742, + "step": 112300 + }, + { + "epoch": 0.20965759192347078, + "grad_norm": 1.1814101934432983, + "learning_rate": 0.0001996447777017648, + "loss": 4.5611, + "step": 112350 + }, + { + "epoch": 0.20975089748284928, + "grad_norm": 0.9306598901748657, + "learning_rate": 0.00019964442383667557, + "loss": 4.749, + "step": 112400 + }, + { + "epoch": 0.20984420304222776, + "grad_norm": 0.7609649896621704, + "learning_rate": 0.0001996440697957315, + "loss": 4.8032, + "step": 112450 + }, + { + "epoch": 0.20993750860160626, + "grad_norm": 0.7726313471794128, + "learning_rate": 0.00019964371557893313, + "loss": 4.6712, + "step": 112500 + }, + { + "epoch": 0.21003081416098474, + "grad_norm": 0.718001127243042, + "learning_rate": 0.00019964336118628116, + "loss": 4.3786, + "step": 112550 + }, + { + "epoch": 0.21012411972036324, + "grad_norm": 1.221449375152588, + "learning_rate": 0.00019964300661777618, + "loss": 4.6134, + "step": 112600 + }, + { + "epoch": 0.21021742527974174, + "grad_norm": 0.9702039361000061, + "learning_rate": 0.0001996426518734188, + "loss": 4.4521, + "step": 112650 + }, + { + "epoch": 0.21031073083912022, + "grad_norm": 1.0901730060577393, + "learning_rate": 0.00019964229695320973, + "loss": 4.7553, + "step": 112700 + }, + { + "epoch": 0.21040403639849872, + "grad_norm": 1.4701701402664185, + "learning_rate": 0.00019964194185714953, + "loss": 4.4546, + "step": 112750 + }, + { + "epoch": 0.2104973419578772, + "grad_norm": 1.0534309148788452, + "learning_rate": 0.0001996415865852388, + "loss": 4.618, + "step": 112800 + }, + { + "epoch": 0.2105906475172557, + "grad_norm": 0.875619649887085, + "learning_rate": 0.00019964123113747824, + "loss": 4.7289, + "step": 112850 + }, + { + "epoch": 0.21068395307663418, + "grad_norm": 1.1053240299224854, + "learning_rate": 0.00019964087551386841, + "loss": 4.6377, + "step": 112900 + }, + { + "epoch": 0.21077725863601268, + "grad_norm": 1.1777266263961792, + "learning_rate": 0.00019964051971440997, + "loss": 4.9208, + "step": 112950 + }, + { + "epoch": 0.21087056419539116, + "grad_norm": 0.9478060007095337, + "learning_rate": 0.00019964016373910358, + "loss": 4.5935, + "step": 113000 + }, + { + "epoch": 0.21096386975476966, + "grad_norm": 1.104618787765503, + "learning_rate": 0.0001996398075879498, + "loss": 4.7408, + "step": 113050 + }, + { + "epoch": 0.21105717531414817, + "grad_norm": 0.9139821529388428, + "learning_rate": 0.00019963945126094928, + "loss": 4.8311, + "step": 113100 + }, + { + "epoch": 0.21115048087352664, + "grad_norm": 1.1723382472991943, + "learning_rate": 0.00019963909475810267, + "loss": 4.7919, + "step": 113150 + }, + { + "epoch": 0.21124378643290515, + "grad_norm": 1.1692183017730713, + "learning_rate": 0.0001996387380794106, + "loss": 4.8049, + "step": 113200 + }, + { + "epoch": 0.21133709199228362, + "grad_norm": 0.8972973823547363, + "learning_rate": 0.0001996383812248737, + "loss": 4.6703, + "step": 113250 + }, + { + "epoch": 0.21143039755166212, + "grad_norm": 0.8258083462715149, + "learning_rate": 0.00019963802419449258, + "loss": 4.6485, + "step": 113300 + }, + { + "epoch": 0.2115237031110406, + "grad_norm": 1.101173758506775, + "learning_rate": 0.00019963766698826785, + "loss": 4.5944, + "step": 113350 + }, + { + "epoch": 0.2116170086704191, + "grad_norm": 0.9222469925880432, + "learning_rate": 0.00019963730960620023, + "loss": 4.6801, + "step": 113400 + }, + { + "epoch": 0.2117103142297976, + "grad_norm": 0.8024877309799194, + "learning_rate": 0.00019963695204829022, + "loss": 4.6408, + "step": 113450 + }, + { + "epoch": 0.21180361978917608, + "grad_norm": 1.1959446668624878, + "learning_rate": 0.00019963659431453858, + "loss": 4.7532, + "step": 113500 + }, + { + "epoch": 0.2118969253485546, + "grad_norm": 1.03960382938385, + "learning_rate": 0.00019963623640494586, + "loss": 4.531, + "step": 113550 + }, + { + "epoch": 0.21199023090793306, + "grad_norm": 0.9562681317329407, + "learning_rate": 0.0001996358783195127, + "loss": 4.525, + "step": 113600 + }, + { + "epoch": 0.21208353646731157, + "grad_norm": 0.9814481735229492, + "learning_rate": 0.00019963552005823975, + "loss": 4.5483, + "step": 113650 + }, + { + "epoch": 0.21217684202669004, + "grad_norm": 0.948824942111969, + "learning_rate": 0.00019963516162112765, + "loss": 4.8581, + "step": 113700 + }, + { + "epoch": 0.21227014758606855, + "grad_norm": 1.0382654666900635, + "learning_rate": 0.00019963480300817704, + "loss": 4.7724, + "step": 113750 + }, + { + "epoch": 0.21236345314544705, + "grad_norm": 1.0841362476348877, + "learning_rate": 0.00019963444421938848, + "loss": 4.8569, + "step": 113800 + }, + { + "epoch": 0.21245675870482553, + "grad_norm": 1.138662338256836, + "learning_rate": 0.00019963408525476272, + "loss": 4.828, + "step": 113850 + }, + { + "epoch": 0.21255006426420403, + "grad_norm": 0.9034907817840576, + "learning_rate": 0.00019963372611430028, + "loss": 4.7168, + "step": 113900 + }, + { + "epoch": 0.2126433698235825, + "grad_norm": 0.9675794243812561, + "learning_rate": 0.0001996333667980019, + "loss": 4.9587, + "step": 113950 + }, + { + "epoch": 0.212736675382961, + "grad_norm": 0.9928723573684692, + "learning_rate": 0.0001996330073058681, + "loss": 4.6761, + "step": 114000 + }, + { + "epoch": 0.212736675382961, + "eval_loss": 4.887726783752441, + "eval_runtime": 228.3892, + "eval_samples_per_second": 11.419, + "eval_steps_per_second": 11.419, + "eval_tts_loss": 7.461525186266396, + "step": 114000 + }, + { + "epoch": 0.2128299809423395, + "grad_norm": 0.9984357953071594, + "learning_rate": 0.0001996326476378996, + "loss": 4.7151, + "step": 114050 + }, + { + "epoch": 0.212923286501718, + "grad_norm": 0.9879376888275146, + "learning_rate": 0.000199632287794097, + "loss": 4.5944, + "step": 114100 + }, + { + "epoch": 0.2130165920610965, + "grad_norm": 0.9942005276679993, + "learning_rate": 0.00019963192777446094, + "loss": 4.676, + "step": 114150 + }, + { + "epoch": 0.21310989762047497, + "grad_norm": 1.000113606452942, + "learning_rate": 0.00019963156757899205, + "loss": 4.5756, + "step": 114200 + }, + { + "epoch": 0.21320320317985347, + "grad_norm": 1.1187829971313477, + "learning_rate": 0.000199631207207691, + "loss": 4.5309, + "step": 114250 + }, + { + "epoch": 0.21329650873923195, + "grad_norm": 1.0895246267318726, + "learning_rate": 0.0001996308466605584, + "loss": 4.4908, + "step": 114300 + }, + { + "epoch": 0.21338981429861045, + "grad_norm": 0.7784030437469482, + "learning_rate": 0.00019963048593759486, + "loss": 4.4745, + "step": 114350 + }, + { + "epoch": 0.21348311985798893, + "grad_norm": 1.0729563236236572, + "learning_rate": 0.00019963012503880107, + "loss": 4.5451, + "step": 114400 + }, + { + "epoch": 0.21357642541736743, + "grad_norm": 1.1839163303375244, + "learning_rate": 0.00019962976396417764, + "loss": 4.7851, + "step": 114450 + }, + { + "epoch": 0.2136697309767459, + "grad_norm": 1.1860408782958984, + "learning_rate": 0.00019962940271372518, + "loss": 4.6238, + "step": 114500 + }, + { + "epoch": 0.2137630365361244, + "grad_norm": 0.9122569561004639, + "learning_rate": 0.00019962904128744438, + "loss": 4.6063, + "step": 114550 + }, + { + "epoch": 0.21385634209550292, + "grad_norm": 0.8689755797386169, + "learning_rate": 0.00019962867968533585, + "loss": 4.5032, + "step": 114600 + }, + { + "epoch": 0.2139496476548814, + "grad_norm": 0.8875397443771362, + "learning_rate": 0.0001996283179074002, + "loss": 4.7033, + "step": 114650 + }, + { + "epoch": 0.2140429532142599, + "grad_norm": 0.7156913876533508, + "learning_rate": 0.00019962795595363813, + "loss": 4.8056, + "step": 114700 + }, + { + "epoch": 0.21413625877363837, + "grad_norm": 1.1040927171707153, + "learning_rate": 0.00019962759382405025, + "loss": 4.6738, + "step": 114750 + }, + { + "epoch": 0.21422956433301688, + "grad_norm": 0.9362927675247192, + "learning_rate": 0.00019962723151863718, + "loss": 4.8287, + "step": 114800 + }, + { + "epoch": 0.21432286989239535, + "grad_norm": 0.8515648245811462, + "learning_rate": 0.00019962686903739958, + "loss": 4.4636, + "step": 114850 + }, + { + "epoch": 0.21441617545177385, + "grad_norm": 1.0025912523269653, + "learning_rate": 0.0001996265063803381, + "loss": 4.7396, + "step": 114900 + }, + { + "epoch": 0.21450948101115236, + "grad_norm": 0.7344082593917847, + "learning_rate": 0.00019962614354745336, + "loss": 4.5112, + "step": 114950 + }, + { + "epoch": 0.21460278657053083, + "grad_norm": 1.0541155338287354, + "learning_rate": 0.00019962578053874598, + "loss": 4.6548, + "step": 115000 + }, + { + "epoch": 0.21469609212990934, + "grad_norm": 0.9491979479789734, + "learning_rate": 0.00019962541735421666, + "loss": 4.7796, + "step": 115050 + }, + { + "epoch": 0.21478939768928781, + "grad_norm": 0.8249531388282776, + "learning_rate": 0.000199625053993866, + "loss": 4.7001, + "step": 115100 + }, + { + "epoch": 0.21488270324866632, + "grad_norm": 0.862076461315155, + "learning_rate": 0.00019962469045769465, + "loss": 4.4974, + "step": 115150 + }, + { + "epoch": 0.2149760088080448, + "grad_norm": 1.1294341087341309, + "learning_rate": 0.00019962432674570325, + "loss": 4.6363, + "step": 115200 + }, + { + "epoch": 0.2150693143674233, + "grad_norm": 0.7202677130699158, + "learning_rate": 0.0001996239628578924, + "loss": 4.6216, + "step": 115250 + }, + { + "epoch": 0.2151626199268018, + "grad_norm": 0.8325316905975342, + "learning_rate": 0.00019962359879426285, + "loss": 4.6454, + "step": 115300 + }, + { + "epoch": 0.21525592548618028, + "grad_norm": 1.1335790157318115, + "learning_rate": 0.00019962323455481514, + "loss": 4.6516, + "step": 115350 + }, + { + "epoch": 0.21534923104555878, + "grad_norm": 0.8559627532958984, + "learning_rate": 0.00019962287013954998, + "loss": 4.7461, + "step": 115400 + }, + { + "epoch": 0.21544253660493726, + "grad_norm": 1.0715285539627075, + "learning_rate": 0.00019962250554846796, + "loss": 4.7282, + "step": 115450 + }, + { + "epoch": 0.21553584216431576, + "grad_norm": 1.0772089958190918, + "learning_rate": 0.00019962214078156975, + "loss": 4.808, + "step": 115500 + }, + { + "epoch": 0.21562914772369424, + "grad_norm": 0.8694863319396973, + "learning_rate": 0.000199621775838856, + "loss": 4.6563, + "step": 115550 + }, + { + "epoch": 0.21572245328307274, + "grad_norm": 0.7618568539619446, + "learning_rate": 0.00019962141072032733, + "loss": 4.5163, + "step": 115600 + }, + { + "epoch": 0.21581575884245124, + "grad_norm": 0.7407212257385254, + "learning_rate": 0.0001996210454259844, + "loss": 4.7637, + "step": 115650 + }, + { + "epoch": 0.21590906440182972, + "grad_norm": 0.86847984790802, + "learning_rate": 0.00019962067995582788, + "loss": 4.6896, + "step": 115700 + }, + { + "epoch": 0.21600236996120822, + "grad_norm": 1.2252628803253174, + "learning_rate": 0.00019962031430985838, + "loss": 4.5515, + "step": 115750 + }, + { + "epoch": 0.2160956755205867, + "grad_norm": 0.8714708089828491, + "learning_rate": 0.00019961994848807652, + "loss": 4.699, + "step": 115800 + }, + { + "epoch": 0.2161889810799652, + "grad_norm": 0.9541438221931458, + "learning_rate": 0.000199619582490483, + "loss": 4.7368, + "step": 115850 + }, + { + "epoch": 0.21628228663934368, + "grad_norm": 1.139961838722229, + "learning_rate": 0.00019961921631707845, + "loss": 4.7307, + "step": 115900 + }, + { + "epoch": 0.21637559219872218, + "grad_norm": 1.154788613319397, + "learning_rate": 0.00019961884996786352, + "loss": 4.7677, + "step": 115950 + }, + { + "epoch": 0.21646889775810066, + "grad_norm": 0.9261074662208557, + "learning_rate": 0.00019961848344283883, + "loss": 4.5874, + "step": 116000 + }, + { + "epoch": 0.21656220331747916, + "grad_norm": 1.3883506059646606, + "learning_rate": 0.00019961811674200505, + "loss": 4.6135, + "step": 116050 + }, + { + "epoch": 0.21665550887685767, + "grad_norm": 1.012757658958435, + "learning_rate": 0.0001996177498653628, + "loss": 4.7373, + "step": 116100 + }, + { + "epoch": 0.21674881443623614, + "grad_norm": 1.0129855871200562, + "learning_rate": 0.0001996173828129128, + "loss": 4.7325, + "step": 116150 + }, + { + "epoch": 0.21684211999561465, + "grad_norm": 1.0152562856674194, + "learning_rate": 0.00019961701558465563, + "loss": 4.5226, + "step": 116200 + }, + { + "epoch": 0.21693542555499312, + "grad_norm": 1.094165563583374, + "learning_rate": 0.00019961664818059194, + "loss": 4.7992, + "step": 116250 + }, + { + "epoch": 0.21702873111437163, + "grad_norm": 1.0749988555908203, + "learning_rate": 0.0001996162806007224, + "loss": 4.7296, + "step": 116300 + }, + { + "epoch": 0.2171220366737501, + "grad_norm": 0.8733508586883545, + "learning_rate": 0.00019961591284504767, + "loss": 4.8671, + "step": 116350 + }, + { + "epoch": 0.2172153422331286, + "grad_norm": 0.7462573647499084, + "learning_rate": 0.00019961554491356836, + "loss": 4.8757, + "step": 116400 + }, + { + "epoch": 0.2173086477925071, + "grad_norm": 0.8342740535736084, + "learning_rate": 0.00019961517680628515, + "loss": 4.5841, + "step": 116450 + }, + { + "epoch": 0.21740195335188559, + "grad_norm": 1.1010075807571411, + "learning_rate": 0.00019961480852319867, + "loss": 4.5726, + "step": 116500 + }, + { + "epoch": 0.2174952589112641, + "grad_norm": 0.8100841045379639, + "learning_rate": 0.0001996144400643096, + "loss": 4.6927, + "step": 116550 + }, + { + "epoch": 0.21758856447064256, + "grad_norm": 1.0698412656784058, + "learning_rate": 0.00019961407142961854, + "loss": 4.8108, + "step": 116600 + }, + { + "epoch": 0.21768187003002107, + "grad_norm": 0.9264890551567078, + "learning_rate": 0.00019961370261912623, + "loss": 4.555, + "step": 116650 + }, + { + "epoch": 0.21777517558939954, + "grad_norm": 0.8985602855682373, + "learning_rate": 0.00019961333363283318, + "loss": 4.7173, + "step": 116700 + }, + { + "epoch": 0.21786848114877805, + "grad_norm": 0.9962376356124878, + "learning_rate": 0.0001996129644707402, + "loss": 4.5897, + "step": 116750 + }, + { + "epoch": 0.21796178670815655, + "grad_norm": 1.108918309211731, + "learning_rate": 0.00019961259513284781, + "loss": 4.7698, + "step": 116800 + }, + { + "epoch": 0.21805509226753503, + "grad_norm": 1.3311587572097778, + "learning_rate": 0.00019961222561915677, + "loss": 4.6247, + "step": 116850 + }, + { + "epoch": 0.21814839782691353, + "grad_norm": 0.602816104888916, + "learning_rate": 0.00019961185592966762, + "loss": 4.9767, + "step": 116900 + }, + { + "epoch": 0.218241703386292, + "grad_norm": 1.00581693649292, + "learning_rate": 0.0001996114860643811, + "loss": 4.5662, + "step": 116950 + }, + { + "epoch": 0.2183350089456705, + "grad_norm": 0.9467529654502869, + "learning_rate": 0.00019961111602329786, + "loss": 4.6823, + "step": 117000 + }, + { + "epoch": 0.2183350089456705, + "eval_loss": 4.885964393615723, + "eval_runtime": 228.2045, + "eval_samples_per_second": 11.428, + "eval_steps_per_second": 11.428, + "eval_tts_loss": 7.423409835071205, + "step": 117000 + }, + { + "epoch": 0.218428314505049, + "grad_norm": 0.9018577933311462, + "learning_rate": 0.00019961074580641846, + "loss": 4.7069, + "step": 117050 + }, + { + "epoch": 0.2185216200644275, + "grad_norm": 1.015038251876831, + "learning_rate": 0.0001996103754137437, + "loss": 4.772, + "step": 117100 + }, + { + "epoch": 0.21861492562380597, + "grad_norm": 1.082078456878662, + "learning_rate": 0.00019961000484527413, + "loss": 4.5817, + "step": 117150 + }, + { + "epoch": 0.21870823118318447, + "grad_norm": 1.0951112508773804, + "learning_rate": 0.0001996096341010104, + "loss": 4.76, + "step": 117200 + }, + { + "epoch": 0.21880153674256297, + "grad_norm": 1.023581862449646, + "learning_rate": 0.00019960926318095322, + "loss": 4.547, + "step": 117250 + }, + { + "epoch": 0.21889484230194145, + "grad_norm": 0.8084960579872131, + "learning_rate": 0.00019960889208510318, + "loss": 4.8218, + "step": 117300 + }, + { + "epoch": 0.21898814786131995, + "grad_norm": 1.2182049751281738, + "learning_rate": 0.000199608520813461, + "loss": 4.6656, + "step": 117350 + }, + { + "epoch": 0.21908145342069843, + "grad_norm": 0.9085681438446045, + "learning_rate": 0.00019960814936602732, + "loss": 4.8121, + "step": 117400 + }, + { + "epoch": 0.21917475898007693, + "grad_norm": 0.9307880997657776, + "learning_rate": 0.00019960777774280278, + "loss": 4.6564, + "step": 117450 + }, + { + "epoch": 0.2192680645394554, + "grad_norm": 0.8649754524230957, + "learning_rate": 0.00019960740594378803, + "loss": 4.7026, + "step": 117500 + }, + { + "epoch": 0.2193613700988339, + "grad_norm": 0.7863887548446655, + "learning_rate": 0.00019960703396898371, + "loss": 4.5972, + "step": 117550 + }, + { + "epoch": 0.21945467565821242, + "grad_norm": 0.9783292412757874, + "learning_rate": 0.00019960666181839055, + "loss": 4.6181, + "step": 117600 + }, + { + "epoch": 0.2195479812175909, + "grad_norm": 1.023344874382019, + "learning_rate": 0.00019960628949200913, + "loss": 4.5983, + "step": 117650 + }, + { + "epoch": 0.2196412867769694, + "grad_norm": 0.9733097553253174, + "learning_rate": 0.0001996059169898401, + "loss": 4.8129, + "step": 117700 + }, + { + "epoch": 0.21973459233634787, + "grad_norm": 0.9498526453971863, + "learning_rate": 0.00019960554431188422, + "loss": 4.5947, + "step": 117750 + }, + { + "epoch": 0.21982789789572638, + "grad_norm": 1.1250981092453003, + "learning_rate": 0.00019960517145814203, + "loss": 4.4665, + "step": 117800 + }, + { + "epoch": 0.21992120345510485, + "grad_norm": 0.7923647165298462, + "learning_rate": 0.00019960479842861428, + "loss": 4.5954, + "step": 117850 + }, + { + "epoch": 0.22001450901448336, + "grad_norm": 1.0662282705307007, + "learning_rate": 0.00019960442522330154, + "loss": 4.755, + "step": 117900 + }, + { + "epoch": 0.22010781457386186, + "grad_norm": 1.0039044618606567, + "learning_rate": 0.00019960405184220456, + "loss": 4.7219, + "step": 117950 + }, + { + "epoch": 0.22020112013324034, + "grad_norm": 1.210990309715271, + "learning_rate": 0.00019960367828532392, + "loss": 4.6545, + "step": 118000 + }, + { + "epoch": 0.22029442569261884, + "grad_norm": 0.9507876634597778, + "learning_rate": 0.00019960330455266035, + "loss": 4.7404, + "step": 118050 + }, + { + "epoch": 0.22038773125199732, + "grad_norm": 1.1549371480941772, + "learning_rate": 0.00019960293064421445, + "loss": 4.8518, + "step": 118100 + }, + { + "epoch": 0.22048103681137582, + "grad_norm": 1.0183074474334717, + "learning_rate": 0.0001996025565599869, + "loss": 4.7184, + "step": 118150 + }, + { + "epoch": 0.2205743423707543, + "grad_norm": 1.1634994745254517, + "learning_rate": 0.00019960218229997837, + "loss": 4.6093, + "step": 118200 + }, + { + "epoch": 0.2206676479301328, + "grad_norm": 0.969548225402832, + "learning_rate": 0.00019960180786418951, + "loss": 4.8916, + "step": 118250 + }, + { + "epoch": 0.2207609534895113, + "grad_norm": 1.2633224725723267, + "learning_rate": 0.000199601433252621, + "loss": 4.6768, + "step": 118300 + }, + { + "epoch": 0.22085425904888978, + "grad_norm": 0.9347729682922363, + "learning_rate": 0.00019960105846527346, + "loss": 4.6532, + "step": 118350 + }, + { + "epoch": 0.22094756460826828, + "grad_norm": 1.0598022937774658, + "learning_rate": 0.0001996006835021476, + "loss": 4.6961, + "step": 118400 + }, + { + "epoch": 0.22104087016764676, + "grad_norm": 1.2343388795852661, + "learning_rate": 0.00019960030836324404, + "loss": 4.6699, + "step": 118450 + }, + { + "epoch": 0.22113417572702526, + "grad_norm": 0.871600866317749, + "learning_rate": 0.00019959993304856347, + "loss": 4.6907, + "step": 118500 + }, + { + "epoch": 0.22122748128640374, + "grad_norm": 0.9813231825828552, + "learning_rate": 0.00019959955755810652, + "loss": 4.6616, + "step": 118550 + }, + { + "epoch": 0.22132078684578224, + "grad_norm": 1.049269199371338, + "learning_rate": 0.00019959918189187392, + "loss": 4.6005, + "step": 118600 + }, + { + "epoch": 0.22141409240516072, + "grad_norm": 0.7890821695327759, + "learning_rate": 0.00019959880604986628, + "loss": 4.685, + "step": 118650 + }, + { + "epoch": 0.22150739796453922, + "grad_norm": 0.8772531151771545, + "learning_rate": 0.00019959843003208424, + "loss": 4.7188, + "step": 118700 + }, + { + "epoch": 0.22160070352391772, + "grad_norm": 1.0186238288879395, + "learning_rate": 0.00019959805383852852, + "loss": 4.8515, + "step": 118750 + }, + { + "epoch": 0.2216940090832962, + "grad_norm": 1.081235647201538, + "learning_rate": 0.00019959767746919975, + "loss": 4.7645, + "step": 118800 + }, + { + "epoch": 0.2217873146426747, + "grad_norm": 0.9641054272651672, + "learning_rate": 0.0001995973009240986, + "loss": 4.6056, + "step": 118850 + }, + { + "epoch": 0.22188062020205318, + "grad_norm": 0.8466959595680237, + "learning_rate": 0.00019959692420322577, + "loss": 4.7974, + "step": 118900 + }, + { + "epoch": 0.22197392576143168, + "grad_norm": 1.1167409420013428, + "learning_rate": 0.00019959654730658185, + "loss": 4.8381, + "step": 118950 + }, + { + "epoch": 0.22206723132081016, + "grad_norm": 0.8874496221542358, + "learning_rate": 0.0001995961702341676, + "loss": 4.5208, + "step": 119000 + }, + { + "epoch": 0.22216053688018866, + "grad_norm": 1.0184682607650757, + "learning_rate": 0.0001995957929859836, + "loss": 4.766, + "step": 119050 + }, + { + "epoch": 0.22225384243956717, + "grad_norm": 0.8084706664085388, + "learning_rate": 0.00019959541556203054, + "loss": 4.6748, + "step": 119100 + }, + { + "epoch": 0.22234714799894564, + "grad_norm": 0.9491915106773376, + "learning_rate": 0.0001995950379623091, + "loss": 4.7225, + "step": 119150 + }, + { + "epoch": 0.22244045355832415, + "grad_norm": 0.9508479833602905, + "learning_rate": 0.00019959466018682, + "loss": 4.785, + "step": 119200 + }, + { + "epoch": 0.22253375911770262, + "grad_norm": 0.8194633722305298, + "learning_rate": 0.0001995942822355638, + "loss": 4.7103, + "step": 119250 + }, + { + "epoch": 0.22262706467708113, + "grad_norm": 0.9935794472694397, + "learning_rate": 0.0001995939041085412, + "loss": 4.7287, + "step": 119300 + }, + { + "epoch": 0.2227203702364596, + "grad_norm": 0.7997922897338867, + "learning_rate": 0.0001995935258057529, + "loss": 4.621, + "step": 119350 + }, + { + "epoch": 0.2228136757958381, + "grad_norm": 0.9721159338951111, + "learning_rate": 0.00019959314732719956, + "loss": 4.7279, + "step": 119400 + }, + { + "epoch": 0.2229069813552166, + "grad_norm": 1.006213665008545, + "learning_rate": 0.0001995927686728818, + "loss": 4.8168, + "step": 119450 + }, + { + "epoch": 0.22300028691459509, + "grad_norm": 0.976470947265625, + "learning_rate": 0.00019959238984280038, + "loss": 4.6645, + "step": 119500 + }, + { + "epoch": 0.2230935924739736, + "grad_norm": 0.9182271957397461, + "learning_rate": 0.00019959201083695588, + "loss": 4.6798, + "step": 119550 + }, + { + "epoch": 0.22318689803335207, + "grad_norm": 0.873684287071228, + "learning_rate": 0.00019959163165534903, + "loss": 4.4998, + "step": 119600 + }, + { + "epoch": 0.22328020359273057, + "grad_norm": 1.1776008605957031, + "learning_rate": 0.00019959125229798046, + "loss": 4.7635, + "step": 119650 + }, + { + "epoch": 0.22337350915210905, + "grad_norm": 1.159077763557434, + "learning_rate": 0.00019959087276485087, + "loss": 4.6824, + "step": 119700 + }, + { + "epoch": 0.22346681471148755, + "grad_norm": 1.2135952711105347, + "learning_rate": 0.0001995904930559609, + "loss": 4.7218, + "step": 119750 + }, + { + "epoch": 0.22356012027086603, + "grad_norm": 0.8527218699455261, + "learning_rate": 0.0001995901131713112, + "loss": 4.7581, + "step": 119800 + }, + { + "epoch": 0.22365342583024453, + "grad_norm": 1.1286325454711914, + "learning_rate": 0.0001995897331109025, + "loss": 4.6019, + "step": 119850 + }, + { + "epoch": 0.22374673138962303, + "grad_norm": 0.7209294438362122, + "learning_rate": 0.00019958935287473547, + "loss": 4.4614, + "step": 119900 + }, + { + "epoch": 0.2238400369490015, + "grad_norm": 1.012035608291626, + "learning_rate": 0.00019958897246281074, + "loss": 4.743, + "step": 119950 + }, + { + "epoch": 0.22393334250838, + "grad_norm": 0.9427865147590637, + "learning_rate": 0.000199588591875129, + "loss": 4.5508, + "step": 120000 + }, + { + "epoch": 0.22393334250838, + "eval_loss": 4.883612632751465, + "eval_runtime": 229.5565, + "eval_samples_per_second": 11.361, + "eval_steps_per_second": 11.361, + "eval_tts_loss": 7.450549049201519, + "step": 120000 + }, + { + "epoch": 0.2240266480677585, + "grad_norm": 1.0907127857208252, + "learning_rate": 0.0001995882111116909, + "loss": 4.6219, + "step": 120050 + }, + { + "epoch": 0.224119953627137, + "grad_norm": 1.2542510032653809, + "learning_rate": 0.00019958783017249714, + "loss": 4.5626, + "step": 120100 + }, + { + "epoch": 0.22421325918651547, + "grad_norm": 0.9580556750297546, + "learning_rate": 0.00019958744905754837, + "loss": 4.7883, + "step": 120150 + }, + { + "epoch": 0.22430656474589397, + "grad_norm": 1.1819099187850952, + "learning_rate": 0.00019958706776684532, + "loss": 4.7903, + "step": 120200 + }, + { + "epoch": 0.22439987030527248, + "grad_norm": 0.9004278182983398, + "learning_rate": 0.00019958668630038856, + "loss": 4.6342, + "step": 120250 + }, + { + "epoch": 0.22449317586465095, + "grad_norm": 0.898798942565918, + "learning_rate": 0.00019958630465817886, + "loss": 4.9032, + "step": 120300 + }, + { + "epoch": 0.22458648142402945, + "grad_norm": 1.324995994567871, + "learning_rate": 0.00019958592284021685, + "loss": 4.7004, + "step": 120350 + }, + { + "epoch": 0.22467978698340793, + "grad_norm": 0.9683493971824646, + "learning_rate": 0.00019958554084650317, + "loss": 4.7933, + "step": 120400 + }, + { + "epoch": 0.22477309254278643, + "grad_norm": 0.9748978614807129, + "learning_rate": 0.00019958515867703858, + "loss": 4.8761, + "step": 120450 + }, + { + "epoch": 0.2248663981021649, + "grad_norm": 1.192099690437317, + "learning_rate": 0.00019958477633182372, + "loss": 4.6939, + "step": 120500 + }, + { + "epoch": 0.22495970366154341, + "grad_norm": 1.2291393280029297, + "learning_rate": 0.0001995843938108592, + "loss": 4.743, + "step": 120550 + }, + { + "epoch": 0.22505300922092192, + "grad_norm": 1.2293897867202759, + "learning_rate": 0.00019958401111414578, + "loss": 4.8312, + "step": 120600 + }, + { + "epoch": 0.2251463147803004, + "grad_norm": 1.0013765096664429, + "learning_rate": 0.0001995836282416841, + "loss": 4.71, + "step": 120650 + }, + { + "epoch": 0.2252396203396789, + "grad_norm": 1.1996088027954102, + "learning_rate": 0.00019958324519347485, + "loss": 4.8127, + "step": 120700 + }, + { + "epoch": 0.22533292589905737, + "grad_norm": 0.9375223517417908, + "learning_rate": 0.00019958286196951867, + "loss": 4.6523, + "step": 120750 + }, + { + "epoch": 0.22542623145843588, + "grad_norm": 0.7946412563323975, + "learning_rate": 0.00019958247856981628, + "loss": 4.5345, + "step": 120800 + }, + { + "epoch": 0.22551953701781435, + "grad_norm": 0.9169266223907471, + "learning_rate": 0.00019958209499436832, + "loss": 4.5954, + "step": 120850 + }, + { + "epoch": 0.22561284257719286, + "grad_norm": 1.0124138593673706, + "learning_rate": 0.0001995817112431755, + "loss": 4.7285, + "step": 120900 + }, + { + "epoch": 0.22570614813657136, + "grad_norm": 0.8469855189323425, + "learning_rate": 0.00019958132731623843, + "loss": 4.5862, + "step": 120950 + }, + { + "epoch": 0.22579945369594984, + "grad_norm": 0.9922230839729309, + "learning_rate": 0.00019958094321355791, + "loss": 4.7852, + "step": 121000 + }, + { + "epoch": 0.22589275925532834, + "grad_norm": 1.1002434492111206, + "learning_rate": 0.00019958055893513451, + "loss": 4.7454, + "step": 121050 + }, + { + "epoch": 0.22598606481470682, + "grad_norm": 1.2257192134857178, + "learning_rate": 0.00019958017448096896, + "loss": 4.7457, + "step": 121100 + }, + { + "epoch": 0.22607937037408532, + "grad_norm": 1.063035249710083, + "learning_rate": 0.0001995797898510619, + "loss": 4.5511, + "step": 121150 + }, + { + "epoch": 0.2261726759334638, + "grad_norm": 0.769403874874115, + "learning_rate": 0.00019957940504541406, + "loss": 4.6497, + "step": 121200 + }, + { + "epoch": 0.2262659814928423, + "grad_norm": 0.9175217151641846, + "learning_rate": 0.0001995790200640261, + "loss": 4.6908, + "step": 121250 + }, + { + "epoch": 0.22635928705222078, + "grad_norm": 1.0887384414672852, + "learning_rate": 0.00019957863490689865, + "loss": 4.9673, + "step": 121300 + }, + { + "epoch": 0.22645259261159928, + "grad_norm": 1.0084506273269653, + "learning_rate": 0.00019957824957403245, + "loss": 4.6811, + "step": 121350 + }, + { + "epoch": 0.22654589817097778, + "grad_norm": 1.035234808921814, + "learning_rate": 0.00019957786406542817, + "loss": 4.5367, + "step": 121400 + }, + { + "epoch": 0.22663920373035626, + "grad_norm": 1.082025408744812, + "learning_rate": 0.0001995774783810865, + "loss": 4.8276, + "step": 121450 + }, + { + "epoch": 0.22673250928973476, + "grad_norm": 0.8627831339836121, + "learning_rate": 0.00019957709252100807, + "loss": 4.5622, + "step": 121500 + }, + { + "epoch": 0.22682581484911324, + "grad_norm": 0.6274713277816772, + "learning_rate": 0.0001995767064851936, + "loss": 4.6064, + "step": 121550 + }, + { + "epoch": 0.22691912040849174, + "grad_norm": 0.6322067975997925, + "learning_rate": 0.00019957632027364376, + "loss": 4.6496, + "step": 121600 + }, + { + "epoch": 0.22701242596787022, + "grad_norm": 0.7151324152946472, + "learning_rate": 0.00019957593388635923, + "loss": 4.4901, + "step": 121650 + }, + { + "epoch": 0.22710573152724872, + "grad_norm": 0.8204741477966309, + "learning_rate": 0.00019957554732334071, + "loss": 4.8227, + "step": 121700 + }, + { + "epoch": 0.22719903708662723, + "grad_norm": 1.0234726667404175, + "learning_rate": 0.0001995751605845889, + "loss": 4.6125, + "step": 121750 + }, + { + "epoch": 0.2272923426460057, + "grad_norm": 1.1819202899932861, + "learning_rate": 0.0001995747736701044, + "loss": 4.7185, + "step": 121800 + }, + { + "epoch": 0.2273856482053842, + "grad_norm": 0.9932510256767273, + "learning_rate": 0.00019957438657988797, + "loss": 4.9552, + "step": 121850 + }, + { + "epoch": 0.22747895376476268, + "grad_norm": 1.0149807929992676, + "learning_rate": 0.00019957399931394028, + "loss": 4.7491, + "step": 121900 + }, + { + "epoch": 0.22757225932414119, + "grad_norm": 0.8650890588760376, + "learning_rate": 0.00019957361187226199, + "loss": 4.7261, + "step": 121950 + }, + { + "epoch": 0.22766556488351966, + "grad_norm": 0.8572404980659485, + "learning_rate": 0.00019957322425485376, + "loss": 4.6074, + "step": 122000 + }, + { + "epoch": 0.22775887044289816, + "grad_norm": 0.8673201203346252, + "learning_rate": 0.00019957283646171634, + "loss": 4.5425, + "step": 122050 + }, + { + "epoch": 0.22785217600227667, + "grad_norm": 1.1532055139541626, + "learning_rate": 0.0001995724484928504, + "loss": 4.6619, + "step": 122100 + }, + { + "epoch": 0.22794548156165514, + "grad_norm": 1.0270742177963257, + "learning_rate": 0.00019957206034825658, + "loss": 4.7036, + "step": 122150 + }, + { + "epoch": 0.22803878712103365, + "grad_norm": 1.0229623317718506, + "learning_rate": 0.0001995716720279356, + "loss": 4.5638, + "step": 122200 + }, + { + "epoch": 0.22813209268041212, + "grad_norm": 1.0269454717636108, + "learning_rate": 0.00019957128353188816, + "loss": 4.4661, + "step": 122250 + }, + { + "epoch": 0.22822539823979063, + "grad_norm": 1.051167368888855, + "learning_rate": 0.0001995708948601149, + "loss": 4.5614, + "step": 122300 + }, + { + "epoch": 0.2283187037991691, + "grad_norm": 0.9413858652114868, + "learning_rate": 0.00019957050601261656, + "loss": 4.5145, + "step": 122350 + }, + { + "epoch": 0.2284120093585476, + "grad_norm": 0.7322555184364319, + "learning_rate": 0.00019957011698939377, + "loss": 4.7114, + "step": 122400 + }, + { + "epoch": 0.2285053149179261, + "grad_norm": 0.9706824421882629, + "learning_rate": 0.00019956972779044727, + "loss": 4.5615, + "step": 122450 + }, + { + "epoch": 0.2285986204773046, + "grad_norm": 1.0096715688705444, + "learning_rate": 0.0001995693384157777, + "loss": 4.7756, + "step": 122500 + }, + { + "epoch": 0.2286919260366831, + "grad_norm": 0.901025652885437, + "learning_rate": 0.00019956894886538574, + "loss": 4.7176, + "step": 122550 + }, + { + "epoch": 0.22878523159606157, + "grad_norm": 0.9558797478675842, + "learning_rate": 0.00019956855913927214, + "loss": 4.8155, + "step": 122600 + }, + { + "epoch": 0.22887853715544007, + "grad_norm": 0.6272359490394592, + "learning_rate": 0.00019956816923743755, + "loss": 4.7247, + "step": 122650 + }, + { + "epoch": 0.22897184271481855, + "grad_norm": 1.197523832321167, + "learning_rate": 0.00019956777915988269, + "loss": 4.6914, + "step": 122700 + }, + { + "epoch": 0.22906514827419705, + "grad_norm": 0.9652473330497742, + "learning_rate": 0.00019956738890660817, + "loss": 4.6038, + "step": 122750 + }, + { + "epoch": 0.22915845383357553, + "grad_norm": 1.0007468461990356, + "learning_rate": 0.00019956699847761472, + "loss": 4.6983, + "step": 122800 + }, + { + "epoch": 0.22925175939295403, + "grad_norm": 0.9335245490074158, + "learning_rate": 0.0001995666078729031, + "loss": 4.631, + "step": 122850 + }, + { + "epoch": 0.22934506495233253, + "grad_norm": 0.9081475734710693, + "learning_rate": 0.00019956621709247386, + "loss": 4.8258, + "step": 122900 + }, + { + "epoch": 0.229438370511711, + "grad_norm": 0.7586884498596191, + "learning_rate": 0.00019956582613632785, + "loss": 4.5945, + "step": 122950 + }, + { + "epoch": 0.2295316760710895, + "grad_norm": 0.909521222114563, + "learning_rate": 0.0001995654350044656, + "loss": 4.6523, + "step": 123000 + }, + { + "epoch": 0.2295316760710895, + "eval_loss": 4.8781867027282715, + "eval_runtime": 230.72, + "eval_samples_per_second": 11.304, + "eval_steps_per_second": 11.304, + "eval_tts_loss": 7.44082480463045, + "step": 123000 + }, + { + "epoch": 0.229624981630468, + "grad_norm": 1.208822250366211, + "learning_rate": 0.00019956504369688792, + "loss": 4.9531, + "step": 123050 + }, + { + "epoch": 0.2297182871898465, + "grad_norm": 1.0203379392623901, + "learning_rate": 0.00019956465221359544, + "loss": 4.6531, + "step": 123100 + }, + { + "epoch": 0.22981159274922497, + "grad_norm": 0.9764498472213745, + "learning_rate": 0.00019956426055458887, + "loss": 4.5436, + "step": 123150 + }, + { + "epoch": 0.22990489830860347, + "grad_norm": 1.1145259141921997, + "learning_rate": 0.00019956386871986892, + "loss": 4.5113, + "step": 123200 + }, + { + "epoch": 0.22999820386798198, + "grad_norm": 0.9552769064903259, + "learning_rate": 0.00019956347670943624, + "loss": 4.7847, + "step": 123250 + }, + { + "epoch": 0.23009150942736045, + "grad_norm": 1.0565723180770874, + "learning_rate": 0.00019956308452329154, + "loss": 4.8575, + "step": 123300 + }, + { + "epoch": 0.23018481498673896, + "grad_norm": 1.123518705368042, + "learning_rate": 0.0001995626921614355, + "loss": 4.6688, + "step": 123350 + }, + { + "epoch": 0.23027812054611743, + "grad_norm": 0.911044716835022, + "learning_rate": 0.00019956229962386885, + "loss": 4.7472, + "step": 123400 + }, + { + "epoch": 0.23037142610549594, + "grad_norm": 1.0663899183273315, + "learning_rate": 0.00019956190691059223, + "loss": 4.7779, + "step": 123450 + }, + { + "epoch": 0.2304647316648744, + "grad_norm": 0.9321340918540955, + "learning_rate": 0.0001995615140216064, + "loss": 4.6964, + "step": 123500 + }, + { + "epoch": 0.23055803722425292, + "grad_norm": 0.8135243058204651, + "learning_rate": 0.00019956112095691203, + "loss": 4.6287, + "step": 123550 + }, + { + "epoch": 0.23065134278363142, + "grad_norm": 1.0988374948501587, + "learning_rate": 0.00019956072771650977, + "loss": 4.8658, + "step": 123600 + }, + { + "epoch": 0.2307446483430099, + "grad_norm": 0.8417083024978638, + "learning_rate": 0.00019956033430040034, + "loss": 4.7001, + "step": 123650 + }, + { + "epoch": 0.2308379539023884, + "grad_norm": 1.0932660102844238, + "learning_rate": 0.00019955994070858447, + "loss": 4.5109, + "step": 123700 + }, + { + "epoch": 0.23093125946176687, + "grad_norm": 1.016459584236145, + "learning_rate": 0.00019955954694106278, + "loss": 4.7203, + "step": 123750 + }, + { + "epoch": 0.23102456502114538, + "grad_norm": 1.1212247610092163, + "learning_rate": 0.000199559152997836, + "loss": 4.5223, + "step": 123800 + }, + { + "epoch": 0.23111787058052385, + "grad_norm": 0.849577784538269, + "learning_rate": 0.00019955875887890488, + "loss": 4.6992, + "step": 123850 + }, + { + "epoch": 0.23121117613990236, + "grad_norm": 0.7211328744888306, + "learning_rate": 0.00019955836458427007, + "loss": 4.554, + "step": 123900 + }, + { + "epoch": 0.23130448169928083, + "grad_norm": 0.7936710715293884, + "learning_rate": 0.00019955797011393222, + "loss": 4.7927, + "step": 123950 + }, + { + "epoch": 0.23139778725865934, + "grad_norm": 0.9986937642097473, + "learning_rate": 0.0001995575754678921, + "loss": 4.4808, + "step": 124000 + }, + { + "epoch": 0.23149109281803784, + "grad_norm": 0.8931700587272644, + "learning_rate": 0.00019955718064615036, + "loss": 4.7492, + "step": 124050 + }, + { + "epoch": 0.23158439837741632, + "grad_norm": 1.1811097860336304, + "learning_rate": 0.00019955678564870772, + "loss": 4.5177, + "step": 124100 + }, + { + "epoch": 0.23167770393679482, + "grad_norm": 0.7191671133041382, + "learning_rate": 0.0001995563904755649, + "loss": 4.8179, + "step": 124150 + }, + { + "epoch": 0.2317710094961733, + "grad_norm": 1.1839138269424438, + "learning_rate": 0.00019955599512672253, + "loss": 4.6699, + "step": 124200 + }, + { + "epoch": 0.2318643150555518, + "grad_norm": 0.9712040424346924, + "learning_rate": 0.00019955559960218135, + "loss": 4.6827, + "step": 124250 + }, + { + "epoch": 0.23195762061493028, + "grad_norm": 1.1487324237823486, + "learning_rate": 0.00019955520390194207, + "loss": 4.5614, + "step": 124300 + }, + { + "epoch": 0.23205092617430878, + "grad_norm": 1.104325532913208, + "learning_rate": 0.00019955480802600538, + "loss": 4.6895, + "step": 124350 + }, + { + "epoch": 0.23214423173368728, + "grad_norm": 1.0275545120239258, + "learning_rate": 0.00019955441197437191, + "loss": 4.6102, + "step": 124400 + }, + { + "epoch": 0.23223753729306576, + "grad_norm": 1.1445039510726929, + "learning_rate": 0.00019955401574704248, + "loss": 4.6854, + "step": 124450 + }, + { + "epoch": 0.23233084285244426, + "grad_norm": 0.9036419987678528, + "learning_rate": 0.00019955361934401774, + "loss": 4.6985, + "step": 124500 + }, + { + "epoch": 0.23242414841182274, + "grad_norm": 0.825995683670044, + "learning_rate": 0.00019955322276529836, + "loss": 4.8375, + "step": 124550 + }, + { + "epoch": 0.23251745397120124, + "grad_norm": 1.3338888883590698, + "learning_rate": 0.00019955282601088502, + "loss": 4.851, + "step": 124600 + }, + { + "epoch": 0.23261075953057972, + "grad_norm": 1.4017682075500488, + "learning_rate": 0.0001995524290807785, + "loss": 4.7548, + "step": 124650 + }, + { + "epoch": 0.23270406508995822, + "grad_norm": 0.6777539849281311, + "learning_rate": 0.00019955203197497947, + "loss": 4.6612, + "step": 124700 + }, + { + "epoch": 0.23279737064933673, + "grad_norm": 0.94838947057724, + "learning_rate": 0.0001995516346934886, + "loss": 4.8645, + "step": 124750 + }, + { + "epoch": 0.2328906762087152, + "grad_norm": 1.0013059377670288, + "learning_rate": 0.0001995512372363066, + "loss": 4.6178, + "step": 124800 + }, + { + "epoch": 0.2329839817680937, + "grad_norm": 0.8147088885307312, + "learning_rate": 0.0001995508396034342, + "loss": 4.4805, + "step": 124850 + }, + { + "epoch": 0.23307728732747218, + "grad_norm": 0.6364634037017822, + "learning_rate": 0.0001995504417948721, + "loss": 4.614, + "step": 124900 + }, + { + "epoch": 0.23317059288685069, + "grad_norm": 1.0096015930175781, + "learning_rate": 0.00019955004381062096, + "loss": 4.8126, + "step": 124950 + }, + { + "epoch": 0.23326389844622916, + "grad_norm": 1.0549802780151367, + "learning_rate": 0.00019954964565068147, + "loss": 4.5155, + "step": 125000 + }, + { + "epoch": 0.23335720400560767, + "grad_norm": 1.0715384483337402, + "learning_rate": 0.00019954924731505443, + "loss": 4.8361, + "step": 125050 + }, + { + "epoch": 0.23345050956498617, + "grad_norm": 1.089919090270996, + "learning_rate": 0.00019954884880374048, + "loss": 4.4423, + "step": 125100 + }, + { + "epoch": 0.23354381512436465, + "grad_norm": 1.1438995599746704, + "learning_rate": 0.00019954845011674032, + "loss": 4.5797, + "step": 125150 + }, + { + "epoch": 0.23363712068374315, + "grad_norm": 0.8875765800476074, + "learning_rate": 0.00019954805125405468, + "loss": 4.7633, + "step": 125200 + }, + { + "epoch": 0.23373042624312163, + "grad_norm": 1.0435429811477661, + "learning_rate": 0.00019954765221568424, + "loss": 4.5587, + "step": 125250 + }, + { + "epoch": 0.23382373180250013, + "grad_norm": 0.8705243468284607, + "learning_rate": 0.00019954725300162966, + "loss": 4.7671, + "step": 125300 + }, + { + "epoch": 0.2339170373618786, + "grad_norm": 1.100628137588501, + "learning_rate": 0.00019954685361189177, + "loss": 4.5598, + "step": 125350 + }, + { + "epoch": 0.2340103429212571, + "grad_norm": 0.7905378341674805, + "learning_rate": 0.00019954645404647115, + "loss": 4.6633, + "step": 125400 + }, + { + "epoch": 0.23410364848063558, + "grad_norm": 1.0382624864578247, + "learning_rate": 0.00019954605430536857, + "loss": 4.8594, + "step": 125450 + }, + { + "epoch": 0.2341969540400141, + "grad_norm": 1.0554344654083252, + "learning_rate": 0.0001995456543885847, + "loss": 4.8501, + "step": 125500 + }, + { + "epoch": 0.2342902595993926, + "grad_norm": 0.6847901344299316, + "learning_rate": 0.0001995452542961203, + "loss": 4.686, + "step": 125550 + }, + { + "epoch": 0.23438356515877107, + "grad_norm": 0.9976589679718018, + "learning_rate": 0.000199544854027976, + "loss": 4.7711, + "step": 125600 + }, + { + "epoch": 0.23447687071814957, + "grad_norm": 1.0457299947738647, + "learning_rate": 0.0001995444535841526, + "loss": 4.5277, + "step": 125650 + }, + { + "epoch": 0.23457017627752805, + "grad_norm": 0.7479730844497681, + "learning_rate": 0.00019954405296465072, + "loss": 4.7013, + "step": 125700 + }, + { + "epoch": 0.23466348183690655, + "grad_norm": 1.141799807548523, + "learning_rate": 0.0001995436521694711, + "loss": 4.8921, + "step": 125750 + }, + { + "epoch": 0.23475678739628503, + "grad_norm": 0.865399181842804, + "learning_rate": 0.00019954325119861442, + "loss": 4.6746, + "step": 125800 + }, + { + "epoch": 0.23485009295566353, + "grad_norm": 1.0194836854934692, + "learning_rate": 0.00019954285005208147, + "loss": 4.8391, + "step": 125850 + }, + { + "epoch": 0.23494339851504203, + "grad_norm": 0.7085402011871338, + "learning_rate": 0.00019954244872987287, + "loss": 4.7155, + "step": 125900 + }, + { + "epoch": 0.2350367040744205, + "grad_norm": 0.9328494071960449, + "learning_rate": 0.0001995420472319894, + "loss": 4.7267, + "step": 125950 + }, + { + "epoch": 0.23513000963379901, + "grad_norm": 0.9679514169692993, + "learning_rate": 0.0001995416455584317, + "loss": 4.6913, + "step": 126000 + }, + { + "epoch": 0.23513000963379901, + "eval_loss": 4.869121551513672, + "eval_runtime": 230.4744, + "eval_samples_per_second": 11.316, + "eval_steps_per_second": 11.316, + "eval_tts_loss": 7.37138597899048, + "step": 126000 + }, + { + "epoch": 0.2352233151931775, + "grad_norm": 1.0236223936080933, + "learning_rate": 0.00019954124370920054, + "loss": 4.6602, + "step": 126050 + }, + { + "epoch": 0.235316620752556, + "grad_norm": 0.9021424651145935, + "learning_rate": 0.0001995408416842966, + "loss": 4.3545, + "step": 126100 + }, + { + "epoch": 0.23540992631193447, + "grad_norm": 1.059244155883789, + "learning_rate": 0.00019954043948372055, + "loss": 4.7123, + "step": 126150 + }, + { + "epoch": 0.23550323187131297, + "grad_norm": 0.8630461096763611, + "learning_rate": 0.00019954003710747316, + "loss": 4.7759, + "step": 126200 + }, + { + "epoch": 0.23559653743069148, + "grad_norm": 1.192002534866333, + "learning_rate": 0.00019953963455555514, + "loss": 4.7526, + "step": 126250 + }, + { + "epoch": 0.23568984299006995, + "grad_norm": 1.1485189199447632, + "learning_rate": 0.00019953923182796713, + "loss": 4.7111, + "step": 126300 + }, + { + "epoch": 0.23578314854944846, + "grad_norm": 0.9921421408653259, + "learning_rate": 0.00019953882892470993, + "loss": 4.6672, + "step": 126350 + }, + { + "epoch": 0.23587645410882693, + "grad_norm": 1.0147913694381714, + "learning_rate": 0.0001995384258457842, + "loss": 4.8141, + "step": 126400 + }, + { + "epoch": 0.23596975966820544, + "grad_norm": 1.1433029174804688, + "learning_rate": 0.00019953802259119066, + "loss": 4.6498, + "step": 126450 + }, + { + "epoch": 0.2360630652275839, + "grad_norm": 0.8153554201126099, + "learning_rate": 0.00019953761916093003, + "loss": 4.501, + "step": 126500 + }, + { + "epoch": 0.23615637078696242, + "grad_norm": 0.8511623740196228, + "learning_rate": 0.00019953721555500303, + "loss": 4.6241, + "step": 126550 + }, + { + "epoch": 0.2362496763463409, + "grad_norm": 0.9093624353408813, + "learning_rate": 0.00019953681177341034, + "loss": 4.8207, + "step": 126600 + }, + { + "epoch": 0.2363429819057194, + "grad_norm": 1.1618552207946777, + "learning_rate": 0.0001995364078161527, + "loss": 4.7207, + "step": 126650 + }, + { + "epoch": 0.2364362874650979, + "grad_norm": 1.0167028903961182, + "learning_rate": 0.0001995360036832308, + "loss": 4.7409, + "step": 126700 + }, + { + "epoch": 0.23652959302447638, + "grad_norm": 1.2850828170776367, + "learning_rate": 0.00019953559937464535, + "loss": 4.8472, + "step": 126750 + }, + { + "epoch": 0.23662289858385488, + "grad_norm": 0.9665905237197876, + "learning_rate": 0.00019953519489039715, + "loss": 4.658, + "step": 126800 + }, + { + "epoch": 0.23671620414323336, + "grad_norm": 0.9830486178398132, + "learning_rate": 0.0001995347902304868, + "loss": 4.6645, + "step": 126850 + }, + { + "epoch": 0.23680950970261186, + "grad_norm": 1.1448428630828857, + "learning_rate": 0.00019953438539491505, + "loss": 4.7673, + "step": 126900 + }, + { + "epoch": 0.23690281526199033, + "grad_norm": 1.1206557750701904, + "learning_rate": 0.00019953398038368266, + "loss": 4.8454, + "step": 126950 + }, + { + "epoch": 0.23699612082136884, + "grad_norm": 0.9602530002593994, + "learning_rate": 0.00019953357519679026, + "loss": 4.588, + "step": 127000 + }, + { + "epoch": 0.23708942638074734, + "grad_norm": 0.816632091999054, + "learning_rate": 0.00019953316983423863, + "loss": 4.6631, + "step": 127050 + }, + { + "epoch": 0.23718273194012582, + "grad_norm": 0.996062159538269, + "learning_rate": 0.00019953276429602847, + "loss": 4.6397, + "step": 127100 + }, + { + "epoch": 0.23727603749950432, + "grad_norm": 1.116538405418396, + "learning_rate": 0.0001995323585821605, + "loss": 4.7699, + "step": 127150 + }, + { + "epoch": 0.2373693430588828, + "grad_norm": 1.0206782817840576, + "learning_rate": 0.00019953195269263544, + "loss": 4.568, + "step": 127200 + }, + { + "epoch": 0.2374626486182613, + "grad_norm": 0.8355110287666321, + "learning_rate": 0.00019953154662745397, + "loss": 4.5723, + "step": 127250 + }, + { + "epoch": 0.23755595417763978, + "grad_norm": 0.7962259650230408, + "learning_rate": 0.00019953114038661685, + "loss": 4.6924, + "step": 127300 + }, + { + "epoch": 0.23764925973701828, + "grad_norm": 1.0387051105499268, + "learning_rate": 0.0001995307339701248, + "loss": 4.6205, + "step": 127350 + }, + { + "epoch": 0.23774256529639679, + "grad_norm": 0.9013943672180176, + "learning_rate": 0.0001995303273779785, + "loss": 4.705, + "step": 127400 + }, + { + "epoch": 0.23783587085577526, + "grad_norm": 0.9052708148956299, + "learning_rate": 0.00019952992061017866, + "loss": 4.6817, + "step": 127450 + }, + { + "epoch": 0.23792917641515376, + "grad_norm": 1.073463797569275, + "learning_rate": 0.00019952951366672606, + "loss": 4.8822, + "step": 127500 + }, + { + "epoch": 0.23802248197453224, + "grad_norm": 0.8236823081970215, + "learning_rate": 0.00019952910654762136, + "loss": 4.7687, + "step": 127550 + }, + { + "epoch": 0.23811578753391074, + "grad_norm": 1.148167371749878, + "learning_rate": 0.0001995286992528653, + "loss": 4.6313, + "step": 127600 + }, + { + "epoch": 0.23820909309328922, + "grad_norm": 0.5891329050064087, + "learning_rate": 0.00019952829178245863, + "loss": 4.6894, + "step": 127650 + }, + { + "epoch": 0.23830239865266772, + "grad_norm": 0.9606289267539978, + "learning_rate": 0.00019952788413640196, + "loss": 4.7802, + "step": 127700 + }, + { + "epoch": 0.23839570421204623, + "grad_norm": 0.8213685750961304, + "learning_rate": 0.00019952747631469617, + "loss": 4.6354, + "step": 127750 + }, + { + "epoch": 0.2384890097714247, + "grad_norm": 0.9802691340446472, + "learning_rate": 0.00019952706831734187, + "loss": 4.5251, + "step": 127800 + }, + { + "epoch": 0.2385823153308032, + "grad_norm": 0.8711889386177063, + "learning_rate": 0.00019952666014433977, + "loss": 4.6781, + "step": 127850 + }, + { + "epoch": 0.23867562089018168, + "grad_norm": 1.1614457368850708, + "learning_rate": 0.0001995262517956907, + "loss": 4.5533, + "step": 127900 + }, + { + "epoch": 0.2387689264495602, + "grad_norm": 0.9102240204811096, + "learning_rate": 0.00019952584327139524, + "loss": 4.6598, + "step": 127950 + }, + { + "epoch": 0.23886223200893866, + "grad_norm": 0.8396139144897461, + "learning_rate": 0.00019952543457145422, + "loss": 4.4948, + "step": 128000 + }, + { + "epoch": 0.23895553756831717, + "grad_norm": 0.927024781703949, + "learning_rate": 0.0001995250256958683, + "loss": 4.5239, + "step": 128050 + }, + { + "epoch": 0.23904884312769564, + "grad_norm": 1.0152112245559692, + "learning_rate": 0.00019952461664463823, + "loss": 4.6425, + "step": 128100 + }, + { + "epoch": 0.23914214868707415, + "grad_norm": 1.4888454675674438, + "learning_rate": 0.0001995242074177647, + "loss": 4.7832, + "step": 128150 + }, + { + "epoch": 0.23923545424645265, + "grad_norm": 1.1071109771728516, + "learning_rate": 0.0001995237980152485, + "loss": 4.6113, + "step": 128200 + }, + { + "epoch": 0.23932875980583113, + "grad_norm": 0.9352720379829407, + "learning_rate": 0.00019952338843709028, + "loss": 4.6275, + "step": 128250 + }, + { + "epoch": 0.23942206536520963, + "grad_norm": 0.9158457517623901, + "learning_rate": 0.00019952297868329079, + "loss": 4.6196, + "step": 128300 + }, + { + "epoch": 0.2395153709245881, + "grad_norm": 0.46742984652519226, + "learning_rate": 0.00019952256875385076, + "loss": 4.6612, + "step": 128350 + }, + { + "epoch": 0.2396086764839666, + "grad_norm": 1.0727510452270508, + "learning_rate": 0.0001995221586487709, + "loss": 4.7063, + "step": 128400 + }, + { + "epoch": 0.23970198204334509, + "grad_norm": 0.9847069382667542, + "learning_rate": 0.00019952174836805197, + "loss": 4.9514, + "step": 128450 + }, + { + "epoch": 0.2397952876027236, + "grad_norm": 1.0134080648422241, + "learning_rate": 0.00019952133791169464, + "loss": 4.4549, + "step": 128500 + }, + { + "epoch": 0.2398885931621021, + "grad_norm": 0.7838501334190369, + "learning_rate": 0.00019952092727969965, + "loss": 4.348, + "step": 128550 + }, + { + "epoch": 0.23998189872148057, + "grad_norm": 0.9687092900276184, + "learning_rate": 0.00019952051647206776, + "loss": 4.5199, + "step": 128600 + }, + { + "epoch": 0.24007520428085907, + "grad_norm": 0.833861231803894, + "learning_rate": 0.00019952010548879968, + "loss": 4.812, + "step": 128650 + }, + { + "epoch": 0.24016850984023755, + "grad_norm": 1.0054713487625122, + "learning_rate": 0.00019951969432989607, + "loss": 4.6566, + "step": 128700 + }, + { + "epoch": 0.24026181539961605, + "grad_norm": 0.8803058862686157, + "learning_rate": 0.00019951928299535775, + "loss": 4.4073, + "step": 128750 + }, + { + "epoch": 0.24035512095899453, + "grad_norm": 1.0025216341018677, + "learning_rate": 0.0001995188714851854, + "loss": 4.6065, + "step": 128800 + }, + { + "epoch": 0.24044842651837303, + "grad_norm": 0.9606624245643616, + "learning_rate": 0.0001995184597993798, + "loss": 4.6934, + "step": 128850 + }, + { + "epoch": 0.24054173207775154, + "grad_norm": 0.8765124678611755, + "learning_rate": 0.00019951804793794155, + "loss": 4.6806, + "step": 128900 + }, + { + "epoch": 0.24063503763713, + "grad_norm": 0.9700292944908142, + "learning_rate": 0.0001995176359008715, + "loss": 4.7322, + "step": 128950 + }, + { + "epoch": 0.24072834319650852, + "grad_norm": 0.7968749403953552, + "learning_rate": 0.0001995172236881703, + "loss": 4.648, + "step": 129000 + }, + { + "epoch": 0.24072834319650852, + "eval_loss": 4.864802837371826, + "eval_runtime": 229.1704, + "eval_samples_per_second": 11.38, + "eval_steps_per_second": 11.38, + "eval_tts_loss": 7.457818870576944, + "step": 129000 + }, + { + "epoch": 0.240821648755887, + "grad_norm": 0.8393458724021912, + "learning_rate": 0.00019951681129983876, + "loss": 4.6002, + "step": 129050 + }, + { + "epoch": 0.2409149543152655, + "grad_norm": 1.1570461988449097, + "learning_rate": 0.00019951639873587752, + "loss": 4.5618, + "step": 129100 + }, + { + "epoch": 0.24100825987464397, + "grad_norm": 1.0129081010818481, + "learning_rate": 0.00019951598599628735, + "loss": 4.4886, + "step": 129150 + }, + { + "epoch": 0.24110156543402247, + "grad_norm": 0.7932485342025757, + "learning_rate": 0.00019951557308106897, + "loss": 4.6398, + "step": 129200 + }, + { + "epoch": 0.24119487099340095, + "grad_norm": 0.9829528331756592, + "learning_rate": 0.00019951515999022315, + "loss": 4.6797, + "step": 129250 + }, + { + "epoch": 0.24128817655277945, + "grad_norm": 0.748111367225647, + "learning_rate": 0.00019951474672375054, + "loss": 4.5282, + "step": 129300 + }, + { + "epoch": 0.24138148211215796, + "grad_norm": 0.9890366792678833, + "learning_rate": 0.00019951433328165195, + "loss": 4.7415, + "step": 129350 + }, + { + "epoch": 0.24147478767153643, + "grad_norm": 0.8397785425186157, + "learning_rate": 0.00019951391966392806, + "loss": 4.704, + "step": 129400 + }, + { + "epoch": 0.24156809323091494, + "grad_norm": 0.9623748064041138, + "learning_rate": 0.0001995135058705796, + "loss": 4.6239, + "step": 129450 + }, + { + "epoch": 0.2416613987902934, + "grad_norm": 0.6656234860420227, + "learning_rate": 0.00019951309190160732, + "loss": 4.5185, + "step": 129500 + }, + { + "epoch": 0.24175470434967192, + "grad_norm": 1.2473630905151367, + "learning_rate": 0.00019951267775701194, + "loss": 4.6633, + "step": 129550 + }, + { + "epoch": 0.2418480099090504, + "grad_norm": 1.0328094959259033, + "learning_rate": 0.00019951226343679419, + "loss": 4.6325, + "step": 129600 + }, + { + "epoch": 0.2419413154684289, + "grad_norm": 0.8722690343856812, + "learning_rate": 0.00019951184894095481, + "loss": 4.7876, + "step": 129650 + }, + { + "epoch": 0.2420346210278074, + "grad_norm": 0.8723517060279846, + "learning_rate": 0.00019951143426949453, + "loss": 4.6523, + "step": 129700 + }, + { + "epoch": 0.24212792658718588, + "grad_norm": 0.9116722941398621, + "learning_rate": 0.00019951101942241407, + "loss": 4.6544, + "step": 129750 + }, + { + "epoch": 0.24222123214656438, + "grad_norm": 0.9696924686431885, + "learning_rate": 0.0001995106043997142, + "loss": 4.5401, + "step": 129800 + }, + { + "epoch": 0.24231453770594286, + "grad_norm": 0.9570209383964539, + "learning_rate": 0.00019951018920139556, + "loss": 4.6208, + "step": 129850 + }, + { + "epoch": 0.24240784326532136, + "grad_norm": 1.2670087814331055, + "learning_rate": 0.00019950977382745898, + "loss": 4.6398, + "step": 129900 + }, + { + "epoch": 0.24250114882469984, + "grad_norm": 0.9587291479110718, + "learning_rate": 0.00019950935827790515, + "loss": 4.6175, + "step": 129950 + }, + { + "epoch": 0.24259445438407834, + "grad_norm": 0.772126317024231, + "learning_rate": 0.00019950894255273485, + "loss": 4.4946, + "step": 130000 + }, + { + "epoch": 0.24268775994345684, + "grad_norm": 1.0134305953979492, + "learning_rate": 0.00019950852665194874, + "loss": 4.7511, + "step": 130050 + }, + { + "epoch": 0.24278106550283532, + "grad_norm": 0.9733072519302368, + "learning_rate": 0.0001995081105755476, + "loss": 4.7236, + "step": 130100 + }, + { + "epoch": 0.24287437106221382, + "grad_norm": 0.7325246334075928, + "learning_rate": 0.00019950769432353215, + "loss": 4.783, + "step": 130150 + }, + { + "epoch": 0.2429676766215923, + "grad_norm": 1.1216601133346558, + "learning_rate": 0.00019950727789590312, + "loss": 4.8321, + "step": 130200 + }, + { + "epoch": 0.2430609821809708, + "grad_norm": 1.0440672636032104, + "learning_rate": 0.00019950686129266128, + "loss": 4.7706, + "step": 130250 + }, + { + "epoch": 0.24315428774034928, + "grad_norm": 0.9024654030799866, + "learning_rate": 0.0001995064445138073, + "loss": 4.7164, + "step": 130300 + }, + { + "epoch": 0.24324759329972778, + "grad_norm": 0.8911654949188232, + "learning_rate": 0.00019950602755934197, + "loss": 4.5, + "step": 130350 + }, + { + "epoch": 0.24334089885910629, + "grad_norm": 0.9491771459579468, + "learning_rate": 0.00019950561042926602, + "loss": 4.6981, + "step": 130400 + }, + { + "epoch": 0.24343420441848476, + "grad_norm": 0.9986402988433838, + "learning_rate": 0.0001995051931235802, + "loss": 4.8321, + "step": 130450 + }, + { + "epoch": 0.24352750997786327, + "grad_norm": 0.9581927061080933, + "learning_rate": 0.0001995047756422852, + "loss": 4.7905, + "step": 130500 + }, + { + "epoch": 0.24362081553724174, + "grad_norm": 0.8729482293128967, + "learning_rate": 0.00019950435798538174, + "loss": 4.5298, + "step": 130550 + }, + { + "epoch": 0.24371412109662025, + "grad_norm": 1.103448510169983, + "learning_rate": 0.00019950394015287064, + "loss": 4.5703, + "step": 130600 + }, + { + "epoch": 0.24380742665599872, + "grad_norm": 1.0000076293945312, + "learning_rate": 0.00019950352214475258, + "loss": 4.4457, + "step": 130650 + }, + { + "epoch": 0.24390073221537723, + "grad_norm": 0.767560601234436, + "learning_rate": 0.0001995031039610283, + "loss": 5.0027, + "step": 130700 + }, + { + "epoch": 0.2439940377747557, + "grad_norm": 0.7368218898773193, + "learning_rate": 0.00019950268560169857, + "loss": 4.6973, + "step": 130750 + }, + { + "epoch": 0.2440873433341342, + "grad_norm": 0.9467607140541077, + "learning_rate": 0.0001995022670667641, + "loss": 4.4799, + "step": 130800 + }, + { + "epoch": 0.2441806488935127, + "grad_norm": 0.8919101357460022, + "learning_rate": 0.00019950184835622564, + "loss": 4.563, + "step": 130850 + }, + { + "epoch": 0.24427395445289118, + "grad_norm": 0.9824000000953674, + "learning_rate": 0.00019950142947008394, + "loss": 4.3836, + "step": 130900 + }, + { + "epoch": 0.2443672600122697, + "grad_norm": 0.9101470112800598, + "learning_rate": 0.00019950101040833973, + "loss": 4.5129, + "step": 130950 + }, + { + "epoch": 0.24446056557164816, + "grad_norm": 0.9340775609016418, + "learning_rate": 0.00019950059117099372, + "loss": 4.5774, + "step": 131000 + }, + { + "epoch": 0.24455387113102667, + "grad_norm": 0.7431853413581848, + "learning_rate": 0.00019950017175804664, + "loss": 4.6977, + "step": 131050 + }, + { + "epoch": 0.24464717669040514, + "grad_norm": 1.0146548748016357, + "learning_rate": 0.00019949975216949933, + "loss": 4.6435, + "step": 131100 + }, + { + "epoch": 0.24474048224978365, + "grad_norm": 1.0537569522857666, + "learning_rate": 0.00019949933240535244, + "loss": 4.6641, + "step": 131150 + }, + { + "epoch": 0.24483378780916215, + "grad_norm": 1.1558393239974976, + "learning_rate": 0.00019949891246560673, + "loss": 4.7927, + "step": 131200 + }, + { + "epoch": 0.24492709336854063, + "grad_norm": 0.8795566558837891, + "learning_rate": 0.00019949849235026296, + "loss": 4.7991, + "step": 131250 + }, + { + "epoch": 0.24502039892791913, + "grad_norm": 1.0229332447052002, + "learning_rate": 0.00019949807205932185, + "loss": 4.6722, + "step": 131300 + }, + { + "epoch": 0.2451137044872976, + "grad_norm": 0.985419750213623, + "learning_rate": 0.00019949765159278413, + "loss": 4.6676, + "step": 131350 + }, + { + "epoch": 0.2452070100466761, + "grad_norm": 0.8589661121368408, + "learning_rate": 0.00019949723095065062, + "loss": 4.5871, + "step": 131400 + }, + { + "epoch": 0.2453003156060546, + "grad_norm": 1.0366382598876953, + "learning_rate": 0.00019949681013292194, + "loss": 4.7374, + "step": 131450 + }, + { + "epoch": 0.2453936211654331, + "grad_norm": 1.379646897315979, + "learning_rate": 0.00019949638913959892, + "loss": 4.7077, + "step": 131500 + }, + { + "epoch": 0.2454869267248116, + "grad_norm": 1.0151523351669312, + "learning_rate": 0.0001994959679706823, + "loss": 4.6403, + "step": 131550 + }, + { + "epoch": 0.24558023228419007, + "grad_norm": 0.8096948266029358, + "learning_rate": 0.00019949554662617277, + "loss": 4.6464, + "step": 131600 + }, + { + "epoch": 0.24567353784356857, + "grad_norm": 1.1682924032211304, + "learning_rate": 0.00019949512510607115, + "loss": 4.6433, + "step": 131650 + }, + { + "epoch": 0.24576684340294705, + "grad_norm": 1.0436276197433472, + "learning_rate": 0.00019949470341037808, + "loss": 4.6682, + "step": 131700 + }, + { + "epoch": 0.24586014896232555, + "grad_norm": 1.0496689081192017, + "learning_rate": 0.00019949428153909443, + "loss": 4.7054, + "step": 131750 + }, + { + "epoch": 0.24595345452170403, + "grad_norm": 1.0121614933013916, + "learning_rate": 0.00019949385949222083, + "loss": 4.656, + "step": 131800 + }, + { + "epoch": 0.24604676008108253, + "grad_norm": 0.8388519287109375, + "learning_rate": 0.00019949343726975808, + "loss": 4.5378, + "step": 131850 + }, + { + "epoch": 0.24614006564046104, + "grad_norm": 0.9778211116790771, + "learning_rate": 0.00019949301487170694, + "loss": 4.6026, + "step": 131900 + }, + { + "epoch": 0.2462333711998395, + "grad_norm": 1.1076475381851196, + "learning_rate": 0.0001994925922980681, + "loss": 4.7711, + "step": 131950 + }, + { + "epoch": 0.24632667675921802, + "grad_norm": 1.092414379119873, + "learning_rate": 0.0001994921695488424, + "loss": 4.6508, + "step": 132000 + }, + { + "epoch": 0.24632667675921802, + "eval_loss": 4.861928939819336, + "eval_runtime": 233.537, + "eval_samples_per_second": 11.167, + "eval_steps_per_second": 11.167, + "eval_tts_loss": 7.461830320030999, + "step": 132000 + }, + { + "epoch": 0.2464199823185965, + "grad_norm": 1.5649731159210205, + "learning_rate": 0.0001994917466240305, + "loss": 4.7985, + "step": 132050 + }, + { + "epoch": 0.246513287877975, + "grad_norm": 0.7559811472892761, + "learning_rate": 0.00019949132352363317, + "loss": 4.545, + "step": 132100 + }, + { + "epoch": 0.24660659343735347, + "grad_norm": 1.1505687236785889, + "learning_rate": 0.00019949090024765114, + "loss": 4.7491, + "step": 132150 + }, + { + "epoch": 0.24669989899673198, + "grad_norm": 0.856060802936554, + "learning_rate": 0.0001994904767960852, + "loss": 4.6556, + "step": 132200 + }, + { + "epoch": 0.24679320455611045, + "grad_norm": 1.4347225427627563, + "learning_rate": 0.00019949005316893608, + "loss": 4.6111, + "step": 132250 + }, + { + "epoch": 0.24688651011548896, + "grad_norm": 1.2799773216247559, + "learning_rate": 0.0001994896293662045, + "loss": 4.8259, + "step": 132300 + }, + { + "epoch": 0.24697981567486746, + "grad_norm": 0.7314300537109375, + "learning_rate": 0.00019948920538789125, + "loss": 4.5923, + "step": 132350 + }, + { + "epoch": 0.24707312123424593, + "grad_norm": 1.4587212800979614, + "learning_rate": 0.00019948878123399703, + "loss": 4.6689, + "step": 132400 + }, + { + "epoch": 0.24716642679362444, + "grad_norm": 0.7531588077545166, + "learning_rate": 0.00019948835690452264, + "loss": 4.6059, + "step": 132450 + }, + { + "epoch": 0.24725973235300291, + "grad_norm": 1.1337380409240723, + "learning_rate": 0.00019948793239946881, + "loss": 4.6628, + "step": 132500 + }, + { + "epoch": 0.24735303791238142, + "grad_norm": 0.6850484609603882, + "learning_rate": 0.00019948750771883628, + "loss": 4.7061, + "step": 132550 + }, + { + "epoch": 0.2474463434717599, + "grad_norm": 1.0581914186477661, + "learning_rate": 0.00019948708286262581, + "loss": 4.7761, + "step": 132600 + }, + { + "epoch": 0.2475396490311384, + "grad_norm": 0.8422567844390869, + "learning_rate": 0.00019948665783083814, + "loss": 4.8732, + "step": 132650 + }, + { + "epoch": 0.2476329545905169, + "grad_norm": 1.0814651250839233, + "learning_rate": 0.000199486232623474, + "loss": 4.8206, + "step": 132700 + }, + { + "epoch": 0.24772626014989538, + "grad_norm": 1.1069611310958862, + "learning_rate": 0.00019948580724053422, + "loss": 4.8655, + "step": 132750 + }, + { + "epoch": 0.24781956570927388, + "grad_norm": 1.0033888816833496, + "learning_rate": 0.00019948538168201944, + "loss": 4.6903, + "step": 132800 + }, + { + "epoch": 0.24791287126865236, + "grad_norm": 0.9167322516441345, + "learning_rate": 0.00019948495594793048, + "loss": 4.4695, + "step": 132850 + }, + { + "epoch": 0.24800617682803086, + "grad_norm": 0.970410943031311, + "learning_rate": 0.00019948453003826807, + "loss": 4.647, + "step": 132900 + }, + { + "epoch": 0.24809948238740934, + "grad_norm": 1.0341490507125854, + "learning_rate": 0.00019948410395303302, + "loss": 4.6685, + "step": 132950 + }, + { + "epoch": 0.24819278794678784, + "grad_norm": 0.9720880389213562, + "learning_rate": 0.000199483677692226, + "loss": 4.6414, + "step": 133000 + }, + { + "epoch": 0.24828609350616634, + "grad_norm": 0.8272822499275208, + "learning_rate": 0.00019948325125584778, + "loss": 4.8563, + "step": 133050 + }, + { + "epoch": 0.24837939906554482, + "grad_norm": 1.2242487668991089, + "learning_rate": 0.0001994828246438991, + "loss": 4.6999, + "step": 133100 + }, + { + "epoch": 0.24847270462492332, + "grad_norm": 0.9121090769767761, + "learning_rate": 0.0001994823978563808, + "loss": 4.752, + "step": 133150 + }, + { + "epoch": 0.2485660101843018, + "grad_norm": 0.7485866546630859, + "learning_rate": 0.00019948197089329352, + "loss": 4.6905, + "step": 133200 + }, + { + "epoch": 0.2486593157436803, + "grad_norm": 0.6626511812210083, + "learning_rate": 0.0001994815437546381, + "loss": 4.5935, + "step": 133250 + }, + { + "epoch": 0.24875262130305878, + "grad_norm": 1.1593636274337769, + "learning_rate": 0.00019948111644041525, + "loss": 4.9399, + "step": 133300 + }, + { + "epoch": 0.24884592686243728, + "grad_norm": 1.0495115518569946, + "learning_rate": 0.00019948068895062575, + "loss": 4.6613, + "step": 133350 + }, + { + "epoch": 0.24893923242181576, + "grad_norm": 1.1008858680725098, + "learning_rate": 0.00019948026128527034, + "loss": 4.9013, + "step": 133400 + }, + { + "epoch": 0.24903253798119426, + "grad_norm": 0.784603476524353, + "learning_rate": 0.00019947983344434974, + "loss": 4.6166, + "step": 133450 + }, + { + "epoch": 0.24912584354057277, + "grad_norm": 0.9832925796508789, + "learning_rate": 0.00019947940542786477, + "loss": 4.7428, + "step": 133500 + }, + { + "epoch": 0.24921914909995124, + "grad_norm": 0.9270418286323547, + "learning_rate": 0.0001994789772358161, + "loss": 4.4206, + "step": 133550 + }, + { + "epoch": 0.24931245465932975, + "grad_norm": 1.2622601985931396, + "learning_rate": 0.00019947854886820458, + "loss": 4.6319, + "step": 133600 + }, + { + "epoch": 0.24940576021870822, + "grad_norm": 0.7396588325500488, + "learning_rate": 0.00019947812032503092, + "loss": 4.786, + "step": 133650 + }, + { + "epoch": 0.24949906577808673, + "grad_norm": 0.9550930261611938, + "learning_rate": 0.00019947769160629587, + "loss": 4.7091, + "step": 133700 + }, + { + "epoch": 0.2495923713374652, + "grad_norm": 1.0118082761764526, + "learning_rate": 0.00019947726271200025, + "loss": 4.7579, + "step": 133750 + }, + { + "epoch": 0.2496856768968437, + "grad_norm": 0.838410496711731, + "learning_rate": 0.0001994768336421447, + "loss": 4.7016, + "step": 133800 + }, + { + "epoch": 0.2497789824562222, + "grad_norm": 0.8363553881645203, + "learning_rate": 0.00019947640439673008, + "loss": 4.6482, + "step": 133850 + }, + { + "epoch": 0.24987228801560069, + "grad_norm": 1.0128004550933838, + "learning_rate": 0.0001994759749757571, + "loss": 4.8391, + "step": 133900 + }, + { + "epoch": 0.2499655935749792, + "grad_norm": 0.8684887290000916, + "learning_rate": 0.00019947554537922652, + "loss": 4.5435, + "step": 133950 + }, + { + "epoch": 0.2500588991343577, + "grad_norm": 0.8165358901023865, + "learning_rate": 0.0001994751156071391, + "loss": 4.6167, + "step": 134000 + }, + { + "epoch": 0.25015220469373617, + "grad_norm": 0.9833562970161438, + "learning_rate": 0.00019947468565949562, + "loss": 4.7929, + "step": 134050 + }, + { + "epoch": 0.25024551025311464, + "grad_norm": 0.8654858469963074, + "learning_rate": 0.00019947425553629682, + "loss": 4.8782, + "step": 134100 + }, + { + "epoch": 0.2503388158124931, + "grad_norm": 0.8595501184463501, + "learning_rate": 0.00019947382523754348, + "loss": 4.6882, + "step": 134150 + }, + { + "epoch": 0.25043212137187165, + "grad_norm": 0.9764409065246582, + "learning_rate": 0.0001994733947632363, + "loss": 4.723, + "step": 134200 + }, + { + "epoch": 0.25052542693125013, + "grad_norm": 1.361682415008545, + "learning_rate": 0.0001994729641133761, + "loss": 4.9449, + "step": 134250 + }, + { + "epoch": 0.2506187324906286, + "grad_norm": 1.0243170261383057, + "learning_rate": 0.00019947253328796364, + "loss": 4.8382, + "step": 134300 + }, + { + "epoch": 0.25071203805000714, + "grad_norm": 0.9603681564331055, + "learning_rate": 0.00019947210228699963, + "loss": 4.7538, + "step": 134350 + }, + { + "epoch": 0.2508053436093856, + "grad_norm": 0.8127009272575378, + "learning_rate": 0.0001994716711104849, + "loss": 4.74, + "step": 134400 + }, + { + "epoch": 0.2508986491687641, + "grad_norm": 1.0043110847473145, + "learning_rate": 0.00019947123975842012, + "loss": 4.6537, + "step": 134450 + }, + { + "epoch": 0.25099195472814256, + "grad_norm": 0.88910973072052, + "learning_rate": 0.00019947080823080616, + "loss": 4.6681, + "step": 134500 + }, + { + "epoch": 0.2510852602875211, + "grad_norm": 1.072106957435608, + "learning_rate": 0.00019947037652764369, + "loss": 4.6476, + "step": 134550 + }, + { + "epoch": 0.25117856584689957, + "grad_norm": 1.160860538482666, + "learning_rate": 0.0001994699446489335, + "loss": 4.8706, + "step": 134600 + }, + { + "epoch": 0.25127187140627805, + "grad_norm": 1.0427870750427246, + "learning_rate": 0.0001994695125946764, + "loss": 4.7866, + "step": 134650 + }, + { + "epoch": 0.2513651769656566, + "grad_norm": 0.8088847398757935, + "learning_rate": 0.00019946908036487307, + "loss": 4.5036, + "step": 134700 + }, + { + "epoch": 0.25145848252503505, + "grad_norm": 0.8402940034866333, + "learning_rate": 0.00019946864795952434, + "loss": 4.4938, + "step": 134750 + }, + { + "epoch": 0.25155178808441353, + "grad_norm": 0.9996683597564697, + "learning_rate": 0.00019946821537863095, + "loss": 4.6048, + "step": 134800 + }, + { + "epoch": 0.251645093643792, + "grad_norm": 0.9219832420349121, + "learning_rate": 0.00019946778262219366, + "loss": 4.6674, + "step": 134850 + }, + { + "epoch": 0.25173839920317054, + "grad_norm": 0.9935362935066223, + "learning_rate": 0.00019946734969021322, + "loss": 4.8539, + "step": 134900 + }, + { + "epoch": 0.251831704762549, + "grad_norm": 1.0310291051864624, + "learning_rate": 0.00019946691658269042, + "loss": 4.5868, + "step": 134950 + }, + { + "epoch": 0.2519250103219275, + "grad_norm": 0.9530249238014221, + "learning_rate": 0.00019946648329962602, + "loss": 4.6397, + "step": 135000 + }, + { + "epoch": 0.2519250103219275, + "eval_loss": 4.854243755340576, + "eval_runtime": 229.3514, + "eval_samples_per_second": 11.371, + "eval_steps_per_second": 11.371, + "eval_tts_loss": 7.440514219263454, + "step": 135000 + }, + { + "epoch": 0.252018315881306, + "grad_norm": 0.7310658097267151, + "learning_rate": 0.00019946604984102077, + "loss": 4.6717, + "step": 135050 + }, + { + "epoch": 0.2521116214406845, + "grad_norm": 0.7537361979484558, + "learning_rate": 0.00019946561620687544, + "loss": 4.6167, + "step": 135100 + }, + { + "epoch": 0.252204927000063, + "grad_norm": 0.9830604195594788, + "learning_rate": 0.00019946518239719082, + "loss": 4.4883, + "step": 135150 + }, + { + "epoch": 0.25229823255944145, + "grad_norm": 0.8440564870834351, + "learning_rate": 0.00019946474841196765, + "loss": 4.7197, + "step": 135200 + }, + { + "epoch": 0.25239153811882, + "grad_norm": 1.2416318655014038, + "learning_rate": 0.00019946431425120665, + "loss": 4.5972, + "step": 135250 + }, + { + "epoch": 0.25248484367819846, + "grad_norm": 1.1739615201950073, + "learning_rate": 0.00019946387991490871, + "loss": 4.4314, + "step": 135300 + }, + { + "epoch": 0.25257814923757693, + "grad_norm": 1.079363226890564, + "learning_rate": 0.0001994634454030745, + "loss": 4.7442, + "step": 135350 + }, + { + "epoch": 0.25267145479695546, + "grad_norm": 1.0851373672485352, + "learning_rate": 0.00019946301071570478, + "loss": 4.7188, + "step": 135400 + }, + { + "epoch": 0.25276476035633394, + "grad_norm": 0.8519002199172974, + "learning_rate": 0.00019946257585280036, + "loss": 4.5734, + "step": 135450 + }, + { + "epoch": 0.2528580659157124, + "grad_norm": 1.175642728805542, + "learning_rate": 0.00019946214081436204, + "loss": 4.8021, + "step": 135500 + }, + { + "epoch": 0.2529513714750909, + "grad_norm": 0.984249472618103, + "learning_rate": 0.0001994617056003905, + "loss": 4.7219, + "step": 135550 + }, + { + "epoch": 0.2530446770344694, + "grad_norm": 1.4463765621185303, + "learning_rate": 0.00019946127021088655, + "loss": 4.5299, + "step": 135600 + }, + { + "epoch": 0.2531379825938479, + "grad_norm": 1.2375439405441284, + "learning_rate": 0.00019946083464585096, + "loss": 4.6073, + "step": 135650 + }, + { + "epoch": 0.2532312881532264, + "grad_norm": 1.073440432548523, + "learning_rate": 0.0001994603989052845, + "loss": 4.5566, + "step": 135700 + }, + { + "epoch": 0.2533245937126049, + "grad_norm": 1.0174309015274048, + "learning_rate": 0.00019945996298918793, + "loss": 4.6937, + "step": 135750 + }, + { + "epoch": 0.2534178992719834, + "grad_norm": 0.9919219613075256, + "learning_rate": 0.00019945952689756204, + "loss": 4.665, + "step": 135800 + }, + { + "epoch": 0.25351120483136186, + "grad_norm": 0.8121524453163147, + "learning_rate": 0.0001994590906304076, + "loss": 4.882, + "step": 135850 + }, + { + "epoch": 0.25360451039074033, + "grad_norm": 0.5900788903236389, + "learning_rate": 0.00019945865418772533, + "loss": 4.7844, + "step": 135900 + }, + { + "epoch": 0.25369781595011887, + "grad_norm": 1.1510095596313477, + "learning_rate": 0.00019945821756951605, + "loss": 4.729, + "step": 135950 + }, + { + "epoch": 0.25379112150949734, + "grad_norm": 0.9832910895347595, + "learning_rate": 0.0001994577807757805, + "loss": 4.7004, + "step": 136000 + }, + { + "epoch": 0.2538844270688758, + "grad_norm": 0.8559871315956116, + "learning_rate": 0.0001994573438065195, + "loss": 4.7026, + "step": 136050 + }, + { + "epoch": 0.25397773262825435, + "grad_norm": 0.9402391910552979, + "learning_rate": 0.00019945690666173376, + "loss": 4.5792, + "step": 136100 + }, + { + "epoch": 0.2540710381876328, + "grad_norm": 0.9144654870033264, + "learning_rate": 0.00019945646934142408, + "loss": 4.6403, + "step": 136150 + }, + { + "epoch": 0.2541643437470113, + "grad_norm": 1.0262720584869385, + "learning_rate": 0.00019945603184559125, + "loss": 4.766, + "step": 136200 + }, + { + "epoch": 0.2542576493063898, + "grad_norm": 0.9860953092575073, + "learning_rate": 0.00019945559417423604, + "loss": 4.7118, + "step": 136250 + }, + { + "epoch": 0.2543509548657683, + "grad_norm": 0.9816266298294067, + "learning_rate": 0.00019945515632735917, + "loss": 4.6694, + "step": 136300 + }, + { + "epoch": 0.2544442604251468, + "grad_norm": 1.0225788354873657, + "learning_rate": 0.00019945471830496144, + "loss": 4.6733, + "step": 136350 + }, + { + "epoch": 0.25453756598452526, + "grad_norm": 0.8615061640739441, + "learning_rate": 0.00019945428010704368, + "loss": 4.4516, + "step": 136400 + }, + { + "epoch": 0.2546308715439038, + "grad_norm": 0.9485636353492737, + "learning_rate": 0.00019945384173360658, + "loss": 4.7265, + "step": 136450 + }, + { + "epoch": 0.25472417710328227, + "grad_norm": 0.9879016876220703, + "learning_rate": 0.00019945340318465097, + "loss": 4.6516, + "step": 136500 + }, + { + "epoch": 0.25481748266266074, + "grad_norm": 0.886114776134491, + "learning_rate": 0.00019945296446017755, + "loss": 4.7834, + "step": 136550 + }, + { + "epoch": 0.2549107882220392, + "grad_norm": 0.901164174079895, + "learning_rate": 0.00019945252556018722, + "loss": 4.7377, + "step": 136600 + }, + { + "epoch": 0.25500409378141775, + "grad_norm": 1.0700454711914062, + "learning_rate": 0.00019945208648468064, + "loss": 4.629, + "step": 136650 + }, + { + "epoch": 0.2550973993407962, + "grad_norm": 0.761396586894989, + "learning_rate": 0.00019945164723365863, + "loss": 4.5535, + "step": 136700 + }, + { + "epoch": 0.2551907049001747, + "grad_norm": 0.9955791234970093, + "learning_rate": 0.00019945120780712197, + "loss": 4.8143, + "step": 136750 + }, + { + "epoch": 0.2552840104595532, + "grad_norm": 0.7975375652313232, + "learning_rate": 0.00019945076820507144, + "loss": 4.6698, + "step": 136800 + }, + { + "epoch": 0.2553773160189317, + "grad_norm": 0.8464979529380798, + "learning_rate": 0.00019945032842750776, + "loss": 4.6989, + "step": 136850 + }, + { + "epoch": 0.2554706215783102, + "grad_norm": 0.9093438386917114, + "learning_rate": 0.0001994498884744318, + "loss": 4.7043, + "step": 136900 + }, + { + "epoch": 0.25556392713768866, + "grad_norm": 0.7675898671150208, + "learning_rate": 0.00019944944834584426, + "loss": 4.8434, + "step": 136950 + }, + { + "epoch": 0.2556572326970672, + "grad_norm": 1.0677566528320312, + "learning_rate": 0.00019944900804174593, + "loss": 4.7249, + "step": 137000 + }, + { + "epoch": 0.25575053825644567, + "grad_norm": 0.7189279794692993, + "learning_rate": 0.00019944856756213763, + "loss": 4.5614, + "step": 137050 + }, + { + "epoch": 0.25584384381582415, + "grad_norm": 1.0350892543792725, + "learning_rate": 0.0001994481269070201, + "loss": 4.963, + "step": 137100 + }, + { + "epoch": 0.2559371493752026, + "grad_norm": 0.7921010255813599, + "learning_rate": 0.0001994476860763941, + "loss": 4.5003, + "step": 137150 + }, + { + "epoch": 0.25603045493458115, + "grad_norm": 1.237960934638977, + "learning_rate": 0.00019944724507026045, + "loss": 4.6863, + "step": 137200 + }, + { + "epoch": 0.25612376049395963, + "grad_norm": 0.9462816119194031, + "learning_rate": 0.0001994468038886199, + "loss": 4.6557, + "step": 137250 + }, + { + "epoch": 0.2562170660533381, + "grad_norm": 1.1308284997940063, + "learning_rate": 0.00019944636253147325, + "loss": 4.5256, + "step": 137300 + }, + { + "epoch": 0.25631037161271664, + "grad_norm": 0.8221685886383057, + "learning_rate": 0.00019944592099882126, + "loss": 4.4556, + "step": 137350 + }, + { + "epoch": 0.2564036771720951, + "grad_norm": 0.8622978925704956, + "learning_rate": 0.00019944547929066472, + "loss": 4.5635, + "step": 137400 + }, + { + "epoch": 0.2564969827314736, + "grad_norm": 1.0377954244613647, + "learning_rate": 0.0001994450374070044, + "loss": 4.7431, + "step": 137450 + }, + { + "epoch": 0.25659028829085206, + "grad_norm": 0.9822484254837036, + "learning_rate": 0.00019944459534784113, + "loss": 4.5507, + "step": 137500 + }, + { + "epoch": 0.2566835938502306, + "grad_norm": 1.063193917274475, + "learning_rate": 0.0001994441531131756, + "loss": 4.6944, + "step": 137550 + }, + { + "epoch": 0.25677689940960907, + "grad_norm": 0.9671196937561035, + "learning_rate": 0.00019944371070300865, + "loss": 4.7503, + "step": 137600 + }, + { + "epoch": 0.25687020496898755, + "grad_norm": 1.0232065916061401, + "learning_rate": 0.00019944326811734104, + "loss": 4.4999, + "step": 137650 + }, + { + "epoch": 0.2569635105283661, + "grad_norm": 0.7935104966163635, + "learning_rate": 0.00019944282535617358, + "loss": 4.7047, + "step": 137700 + }, + { + "epoch": 0.25705681608774456, + "grad_norm": 1.06540846824646, + "learning_rate": 0.00019944238241950704, + "loss": 4.8058, + "step": 137750 + }, + { + "epoch": 0.25715012164712303, + "grad_norm": 1.0669400691986084, + "learning_rate": 0.00019944193930734216, + "loss": 4.6499, + "step": 137800 + }, + { + "epoch": 0.2572434272065015, + "grad_norm": 0.9878630042076111, + "learning_rate": 0.00019944149601967977, + "loss": 4.5647, + "step": 137850 + }, + { + "epoch": 0.25733673276588004, + "grad_norm": 1.0921598672866821, + "learning_rate": 0.00019944105255652065, + "loss": 4.6141, + "step": 137900 + }, + { + "epoch": 0.2574300383252585, + "grad_norm": 0.7695943117141724, + "learning_rate": 0.00019944060891786554, + "loss": 4.5943, + "step": 137950 + }, + { + "epoch": 0.257523343884637, + "grad_norm": 1.0480443239212036, + "learning_rate": 0.00019944016510371526, + "loss": 4.7454, + "step": 138000 + }, + { + "epoch": 0.257523343884637, + "eval_loss": 4.861461162567139, + "eval_runtime": 230.715, + "eval_samples_per_second": 11.304, + "eval_steps_per_second": 11.304, + "eval_tts_loss": 7.436345217680913, + "step": 138000 + }, + { + "epoch": 0.2576166494440155, + "grad_norm": 1.0323460102081299, + "learning_rate": 0.0001994397211140706, + "loss": 4.7979, + "step": 138050 + }, + { + "epoch": 0.257709955003394, + "grad_norm": 1.2108174562454224, + "learning_rate": 0.0001994392769489323, + "loss": 4.5913, + "step": 138100 + }, + { + "epoch": 0.2578032605627725, + "grad_norm": 0.9902898073196411, + "learning_rate": 0.0001994388326083012, + "loss": 4.4259, + "step": 138150 + }, + { + "epoch": 0.25789656612215095, + "grad_norm": 1.0889242887496948, + "learning_rate": 0.00019943838809217805, + "loss": 4.6503, + "step": 138200 + }, + { + "epoch": 0.2579898716815295, + "grad_norm": 0.991397500038147, + "learning_rate": 0.00019943794340056362, + "loss": 4.6364, + "step": 138250 + }, + { + "epoch": 0.25808317724090796, + "grad_norm": 1.141694188117981, + "learning_rate": 0.00019943749853345875, + "loss": 4.7025, + "step": 138300 + }, + { + "epoch": 0.25817648280028643, + "grad_norm": 0.8368604183197021, + "learning_rate": 0.0001994370534908642, + "loss": 4.6701, + "step": 138350 + }, + { + "epoch": 0.25826978835966496, + "grad_norm": 1.0592689514160156, + "learning_rate": 0.00019943660827278072, + "loss": 4.569, + "step": 138400 + }, + { + "epoch": 0.25836309391904344, + "grad_norm": 0.7820909023284912, + "learning_rate": 0.00019943616287920914, + "loss": 4.7223, + "step": 138450 + }, + { + "epoch": 0.2584563994784219, + "grad_norm": 1.0248945951461792, + "learning_rate": 0.0001994357173101502, + "loss": 4.6945, + "step": 138500 + }, + { + "epoch": 0.2585497050378004, + "grad_norm": 0.9100630283355713, + "learning_rate": 0.00019943527156560475, + "loss": 4.5899, + "step": 138550 + }, + { + "epoch": 0.2586430105971789, + "grad_norm": 1.1249892711639404, + "learning_rate": 0.00019943482564557352, + "loss": 4.6453, + "step": 138600 + }, + { + "epoch": 0.2587363161565574, + "grad_norm": 1.033903956413269, + "learning_rate": 0.00019943437955005733, + "loss": 4.7613, + "step": 138650 + }, + { + "epoch": 0.2588296217159359, + "grad_norm": 1.0414884090423584, + "learning_rate": 0.00019943393327905695, + "loss": 4.6118, + "step": 138700 + }, + { + "epoch": 0.2589229272753144, + "grad_norm": 0.7779209017753601, + "learning_rate": 0.00019943348683257317, + "loss": 4.7519, + "step": 138750 + }, + { + "epoch": 0.2590162328346929, + "grad_norm": 1.0751841068267822, + "learning_rate": 0.0001994330402106068, + "loss": 4.5788, + "step": 138800 + }, + { + "epoch": 0.25910953839407136, + "grad_norm": 0.7722923159599304, + "learning_rate": 0.00019943259341315857, + "loss": 4.4937, + "step": 138850 + }, + { + "epoch": 0.25920284395344984, + "grad_norm": 0.8846758008003235, + "learning_rate": 0.00019943214644022934, + "loss": 4.7343, + "step": 138900 + }, + { + "epoch": 0.25929614951282837, + "grad_norm": 1.157002329826355, + "learning_rate": 0.00019943169929181987, + "loss": 4.8432, + "step": 138950 + }, + { + "epoch": 0.25938945507220684, + "grad_norm": 0.8861170411109924, + "learning_rate": 0.00019943125196793094, + "loss": 4.7967, + "step": 139000 + }, + { + "epoch": 0.2594827606315853, + "grad_norm": 0.9354442954063416, + "learning_rate": 0.00019943080446856333, + "loss": 4.6719, + "step": 139050 + }, + { + "epoch": 0.25957606619096385, + "grad_norm": 1.0083680152893066, + "learning_rate": 0.00019943035679371787, + "loss": 4.6639, + "step": 139100 + }, + { + "epoch": 0.2596693717503423, + "grad_norm": 1.0523858070373535, + "learning_rate": 0.0001994299089433953, + "loss": 4.6234, + "step": 139150 + }, + { + "epoch": 0.2597626773097208, + "grad_norm": 0.9569562077522278, + "learning_rate": 0.00019942946091759648, + "loss": 4.4507, + "step": 139200 + }, + { + "epoch": 0.2598559828690993, + "grad_norm": 1.1821283102035522, + "learning_rate": 0.00019942901271632213, + "loss": 4.7459, + "step": 139250 + }, + { + "epoch": 0.2599492884284778, + "grad_norm": 0.9408050179481506, + "learning_rate": 0.00019942856433957304, + "loss": 4.6761, + "step": 139300 + }, + { + "epoch": 0.2600425939878563, + "grad_norm": 0.939137876033783, + "learning_rate": 0.00019942811578735006, + "loss": 4.6466, + "step": 139350 + }, + { + "epoch": 0.26013589954723476, + "grad_norm": 0.8856296539306641, + "learning_rate": 0.00019942766705965395, + "loss": 4.9821, + "step": 139400 + }, + { + "epoch": 0.26022920510661324, + "grad_norm": 0.6106601357460022, + "learning_rate": 0.00019942721815648548, + "loss": 4.5543, + "step": 139450 + }, + { + "epoch": 0.26032251066599177, + "grad_norm": 0.8569189310073853, + "learning_rate": 0.0001994267690778455, + "loss": 4.6647, + "step": 139500 + }, + { + "epoch": 0.26041581622537024, + "grad_norm": 1.102795958518982, + "learning_rate": 0.00019942631982373474, + "loss": 4.4512, + "step": 139550 + }, + { + "epoch": 0.2605091217847487, + "grad_norm": 0.9730409979820251, + "learning_rate": 0.00019942587039415403, + "loss": 4.9565, + "step": 139600 + }, + { + "epoch": 0.26060242734412725, + "grad_norm": 0.7721739411354065, + "learning_rate": 0.00019942542078910415, + "loss": 4.7068, + "step": 139650 + }, + { + "epoch": 0.26069573290350573, + "grad_norm": 0.70539790391922, + "learning_rate": 0.0001994249710085859, + "loss": 4.5592, + "step": 139700 + }, + { + "epoch": 0.2607890384628842, + "grad_norm": 1.0219758749008179, + "learning_rate": 0.00019942452105260007, + "loss": 4.6835, + "step": 139750 + }, + { + "epoch": 0.2608823440222627, + "grad_norm": 1.2075634002685547, + "learning_rate": 0.00019942407092114745, + "loss": 4.5386, + "step": 139800 + }, + { + "epoch": 0.2609756495816412, + "grad_norm": 0.8083966970443726, + "learning_rate": 0.00019942362061422885, + "loss": 4.6374, + "step": 139850 + }, + { + "epoch": 0.2610689551410197, + "grad_norm": 0.7901577353477478, + "learning_rate": 0.00019942317013184506, + "loss": 4.7071, + "step": 139900 + }, + { + "epoch": 0.26116226070039816, + "grad_norm": 1.2326618432998657, + "learning_rate": 0.00019942271947399683, + "loss": 4.6927, + "step": 139950 + }, + { + "epoch": 0.2612555662597767, + "grad_norm": 0.7666860818862915, + "learning_rate": 0.00019942226864068502, + "loss": 4.8306, + "step": 140000 + }, + { + "epoch": 0.26134887181915517, + "grad_norm": 0.9988517761230469, + "learning_rate": 0.00019942181763191042, + "loss": 4.6557, + "step": 140050 + }, + { + "epoch": 0.26144217737853365, + "grad_norm": 0.7240771651268005, + "learning_rate": 0.0001994213664476738, + "loss": 4.4553, + "step": 140100 + }, + { + "epoch": 0.2615354829379121, + "grad_norm": 1.13454270362854, + "learning_rate": 0.00019942091508797595, + "loss": 4.5737, + "step": 140150 + }, + { + "epoch": 0.26162878849729065, + "grad_norm": 0.8832876682281494, + "learning_rate": 0.00019942046355281767, + "loss": 4.7567, + "step": 140200 + }, + { + "epoch": 0.26172209405666913, + "grad_norm": 1.089231014251709, + "learning_rate": 0.00019942001184219976, + "loss": 4.5035, + "step": 140250 + }, + { + "epoch": 0.2618153996160476, + "grad_norm": 1.0818380117416382, + "learning_rate": 0.00019941955995612305, + "loss": 4.8077, + "step": 140300 + }, + { + "epoch": 0.26190870517542614, + "grad_norm": 1.0916963815689087, + "learning_rate": 0.00019941910789458825, + "loss": 4.7335, + "step": 140350 + }, + { + "epoch": 0.2620020107348046, + "grad_norm": 1.029809594154358, + "learning_rate": 0.00019941865565759628, + "loss": 4.7261, + "step": 140400 + }, + { + "epoch": 0.2620953162941831, + "grad_norm": 0.786016047000885, + "learning_rate": 0.00019941820324514785, + "loss": 4.6044, + "step": 140450 + }, + { + "epoch": 0.26218862185356157, + "grad_norm": 0.9927809834480286, + "learning_rate": 0.0001994177506572438, + "loss": 4.5903, + "step": 140500 + }, + { + "epoch": 0.2622819274129401, + "grad_norm": 1.1766144037246704, + "learning_rate": 0.0001994172978938849, + "loss": 4.4297, + "step": 140550 + }, + { + "epoch": 0.2623752329723186, + "grad_norm": 0.8965726494789124, + "learning_rate": 0.00019941684495507198, + "loss": 4.7621, + "step": 140600 + }, + { + "epoch": 0.26246853853169705, + "grad_norm": 0.9904941916465759, + "learning_rate": 0.00019941639184080582, + "loss": 4.5519, + "step": 140650 + }, + { + "epoch": 0.2625618440910756, + "grad_norm": 0.9716530442237854, + "learning_rate": 0.0001994159385510872, + "loss": 4.6065, + "step": 140700 + }, + { + "epoch": 0.26265514965045406, + "grad_norm": 0.7707858085632324, + "learning_rate": 0.00019941548508591695, + "loss": 4.6938, + "step": 140750 + }, + { + "epoch": 0.26274845520983253, + "grad_norm": 1.0510951280593872, + "learning_rate": 0.00019941503144529587, + "loss": 4.8028, + "step": 140800 + }, + { + "epoch": 0.262841760769211, + "grad_norm": 0.8451001048088074, + "learning_rate": 0.00019941457762922475, + "loss": 4.4955, + "step": 140850 + }, + { + "epoch": 0.26293506632858954, + "grad_norm": 0.8992056250572205, + "learning_rate": 0.00019941412363770438, + "loss": 4.6839, + "step": 140900 + }, + { + "epoch": 0.263028371887968, + "grad_norm": 0.8834390640258789, + "learning_rate": 0.00019941366947073559, + "loss": 4.6869, + "step": 140950 + }, + { + "epoch": 0.2631216774473465, + "grad_norm": 1.1744734048843384, + "learning_rate": 0.00019941321512831914, + "loss": 4.6612, + "step": 141000 + }, + { + "epoch": 0.2631216774473465, + "eval_loss": 4.854221820831299, + "eval_runtime": 230.4865, + "eval_samples_per_second": 11.315, + "eval_steps_per_second": 11.315, + "eval_tts_loss": 7.429972851733202, + "step": 141000 + }, + { + "epoch": 0.263214983006725, + "grad_norm": 1.01349675655365, + "learning_rate": 0.00019941276061045588, + "loss": 4.7109, + "step": 141050 + }, + { + "epoch": 0.2633082885661035, + "grad_norm": 0.7250033617019653, + "learning_rate": 0.0001994123059171466, + "loss": 4.6136, + "step": 141100 + }, + { + "epoch": 0.263401594125482, + "grad_norm": 0.8900704979896545, + "learning_rate": 0.00019941185104839208, + "loss": 4.4201, + "step": 141150 + }, + { + "epoch": 0.26349489968486045, + "grad_norm": 0.9894741773605347, + "learning_rate": 0.00019941139600419315, + "loss": 4.5358, + "step": 141200 + }, + { + "epoch": 0.263588205244239, + "grad_norm": 0.728403627872467, + "learning_rate": 0.00019941094078455057, + "loss": 4.4723, + "step": 141250 + }, + { + "epoch": 0.26368151080361746, + "grad_norm": 0.9740440249443054, + "learning_rate": 0.00019941048538946516, + "loss": 4.9644, + "step": 141300 + }, + { + "epoch": 0.26377481636299593, + "grad_norm": 0.9161767959594727, + "learning_rate": 0.00019941002981893778, + "loss": 4.7106, + "step": 141350 + }, + { + "epoch": 0.26386812192237447, + "grad_norm": 1.2737964391708374, + "learning_rate": 0.00019940957407296918, + "loss": 4.569, + "step": 141400 + }, + { + "epoch": 0.26396142748175294, + "grad_norm": 0.902451753616333, + "learning_rate": 0.00019940911815156012, + "loss": 4.6479, + "step": 141450 + }, + { + "epoch": 0.2640547330411314, + "grad_norm": 0.8375920653343201, + "learning_rate": 0.00019940866205471152, + "loss": 4.7673, + "step": 141500 + }, + { + "epoch": 0.2641480386005099, + "grad_norm": 0.6975705027580261, + "learning_rate": 0.0001994082057824241, + "loss": 4.638, + "step": 141550 + }, + { + "epoch": 0.2642413441598884, + "grad_norm": 1.0885804891586304, + "learning_rate": 0.00019940774933469867, + "loss": 4.7292, + "step": 141600 + }, + { + "epoch": 0.2643346497192669, + "grad_norm": 1.1714972257614136, + "learning_rate": 0.00019940729271153608, + "loss": 4.7405, + "step": 141650 + }, + { + "epoch": 0.2644279552786454, + "grad_norm": 1.126559853553772, + "learning_rate": 0.00019940683591293713, + "loss": 4.58, + "step": 141700 + }, + { + "epoch": 0.2645212608380239, + "grad_norm": 1.1203880310058594, + "learning_rate": 0.00019940637893890256, + "loss": 4.6255, + "step": 141750 + }, + { + "epoch": 0.2646145663974024, + "grad_norm": 0.9070851802825928, + "learning_rate": 0.00019940592178943324, + "loss": 4.8198, + "step": 141800 + }, + { + "epoch": 0.26470787195678086, + "grad_norm": 0.8276423215866089, + "learning_rate": 0.00019940546446452998, + "loss": 4.6199, + "step": 141850 + }, + { + "epoch": 0.26480117751615934, + "grad_norm": 1.2383900880813599, + "learning_rate": 0.0001994050069641935, + "loss": 4.6207, + "step": 141900 + }, + { + "epoch": 0.26489448307553787, + "grad_norm": 0.9433544278144836, + "learning_rate": 0.00019940454928842475, + "loss": 4.7679, + "step": 141950 + }, + { + "epoch": 0.26498778863491634, + "grad_norm": 1.12177574634552, + "learning_rate": 0.00019940409143722444, + "loss": 4.8803, + "step": 142000 + }, + { + "epoch": 0.2650810941942948, + "grad_norm": 0.8541791439056396, + "learning_rate": 0.00019940363341059336, + "loss": 4.7773, + "step": 142050 + }, + { + "epoch": 0.2651743997536733, + "grad_norm": 1.2318682670593262, + "learning_rate": 0.0001994031752085324, + "loss": 4.7115, + "step": 142100 + }, + { + "epoch": 0.2652677053130518, + "grad_norm": 1.1669272184371948, + "learning_rate": 0.0001994027168310423, + "loss": 4.6872, + "step": 142150 + }, + { + "epoch": 0.2653610108724303, + "grad_norm": 0.7266700267791748, + "learning_rate": 0.0001994022582781239, + "loss": 4.4811, + "step": 142200 + }, + { + "epoch": 0.2654543164318088, + "grad_norm": 0.6903916001319885, + "learning_rate": 0.000199401799549778, + "loss": 4.7914, + "step": 142250 + }, + { + "epoch": 0.2655476219911873, + "grad_norm": 1.0394556522369385, + "learning_rate": 0.00019940134064600547, + "loss": 4.686, + "step": 142300 + }, + { + "epoch": 0.2656409275505658, + "grad_norm": 1.02165687084198, + "learning_rate": 0.00019940088156680702, + "loss": 4.5818, + "step": 142350 + }, + { + "epoch": 0.26573423310994426, + "grad_norm": 0.8454614281654358, + "learning_rate": 0.0001994004223121835, + "loss": 4.6726, + "step": 142400 + }, + { + "epoch": 0.26582753866932274, + "grad_norm": 1.206411600112915, + "learning_rate": 0.00019939996288213574, + "loss": 4.6144, + "step": 142450 + }, + { + "epoch": 0.26592084422870127, + "grad_norm": 0.9744541049003601, + "learning_rate": 0.00019939950327666452, + "loss": 4.902, + "step": 142500 + }, + { + "epoch": 0.26601414978807975, + "grad_norm": 0.7370103001594543, + "learning_rate": 0.00019939904349577069, + "loss": 4.5981, + "step": 142550 + }, + { + "epoch": 0.2661074553474582, + "grad_norm": 0.9200401902198792, + "learning_rate": 0.000199398583539455, + "loss": 4.5697, + "step": 142600 + }, + { + "epoch": 0.26620076090683675, + "grad_norm": 1.0769352912902832, + "learning_rate": 0.00019939812340771833, + "loss": 4.7748, + "step": 142650 + }, + { + "epoch": 0.26629406646621523, + "grad_norm": 1.1400402784347534, + "learning_rate": 0.00019939766310056144, + "loss": 4.7504, + "step": 142700 + }, + { + "epoch": 0.2663873720255937, + "grad_norm": 0.8837327361106873, + "learning_rate": 0.0001993972026179852, + "loss": 4.7623, + "step": 142750 + }, + { + "epoch": 0.2664806775849722, + "grad_norm": 0.9852148294448853, + "learning_rate": 0.00019939674195999034, + "loss": 4.7179, + "step": 142800 + }, + { + "epoch": 0.2665739831443507, + "grad_norm": 1.1082642078399658, + "learning_rate": 0.00019939628112657776, + "loss": 4.6306, + "step": 142850 + }, + { + "epoch": 0.2666672887037292, + "grad_norm": 1.2532259225845337, + "learning_rate": 0.0001993958201177482, + "loss": 4.7182, + "step": 142900 + }, + { + "epoch": 0.26676059426310766, + "grad_norm": 1.113945484161377, + "learning_rate": 0.00019939535893350252, + "loss": 4.7235, + "step": 142950 + }, + { + "epoch": 0.2668538998224862, + "grad_norm": 0.853884220123291, + "learning_rate": 0.00019939489757384154, + "loss": 4.7894, + "step": 143000 + }, + { + "epoch": 0.26694720538186467, + "grad_norm": 1.0670225620269775, + "learning_rate": 0.00019939443603876603, + "loss": 4.6292, + "step": 143050 + }, + { + "epoch": 0.26704051094124315, + "grad_norm": 1.090808629989624, + "learning_rate": 0.00019939397432827685, + "loss": 4.7517, + "step": 143100 + }, + { + "epoch": 0.2671338165006216, + "grad_norm": 0.8925207853317261, + "learning_rate": 0.0001993935124423748, + "loss": 4.5034, + "step": 143150 + }, + { + "epoch": 0.26722712206000016, + "grad_norm": 0.7665544748306274, + "learning_rate": 0.00019939305038106064, + "loss": 4.8442, + "step": 143200 + }, + { + "epoch": 0.26732042761937863, + "grad_norm": 1.0686653852462769, + "learning_rate": 0.00019939258814433527, + "loss": 4.5459, + "step": 143250 + }, + { + "epoch": 0.2674137331787571, + "grad_norm": 1.0229724645614624, + "learning_rate": 0.00019939212573219948, + "loss": 4.7637, + "step": 143300 + }, + { + "epoch": 0.26750703873813564, + "grad_norm": 1.1056723594665527, + "learning_rate": 0.00019939166314465406, + "loss": 4.4069, + "step": 143350 + }, + { + "epoch": 0.2676003442975141, + "grad_norm": 0.7907326221466064, + "learning_rate": 0.00019939120038169983, + "loss": 4.6325, + "step": 143400 + }, + { + "epoch": 0.2676936498568926, + "grad_norm": 1.0753734111785889, + "learning_rate": 0.00019939073744333765, + "loss": 4.6872, + "step": 143450 + }, + { + "epoch": 0.26778695541627107, + "grad_norm": 1.119105339050293, + "learning_rate": 0.00019939027432956828, + "loss": 4.64, + "step": 143500 + }, + { + "epoch": 0.2678802609756496, + "grad_norm": 0.839352011680603, + "learning_rate": 0.00019938981104039257, + "loss": 4.6258, + "step": 143550 + }, + { + "epoch": 0.2679735665350281, + "grad_norm": 1.0468686819076538, + "learning_rate": 0.00019938934757581132, + "loss": 4.6909, + "step": 143600 + }, + { + "epoch": 0.26806687209440655, + "grad_norm": 1.086549162864685, + "learning_rate": 0.0001993888839358254, + "loss": 4.6586, + "step": 143650 + }, + { + "epoch": 0.2681601776537851, + "grad_norm": 0.9070858359336853, + "learning_rate": 0.0001993884201204356, + "loss": 4.8447, + "step": 143700 + }, + { + "epoch": 0.26825348321316356, + "grad_norm": 1.0874264240264893, + "learning_rate": 0.00019938795612964267, + "loss": 4.6228, + "step": 143750 + }, + { + "epoch": 0.26834678877254203, + "grad_norm": 0.9141344428062439, + "learning_rate": 0.0001993874919634475, + "loss": 4.6042, + "step": 143800 + }, + { + "epoch": 0.2684400943319205, + "grad_norm": 1.0117084980010986, + "learning_rate": 0.0001993870276218509, + "loss": 4.5892, + "step": 143850 + }, + { + "epoch": 0.26853339989129904, + "grad_norm": 1.1900421380996704, + "learning_rate": 0.0001993865631048537, + "loss": 4.6517, + "step": 143900 + }, + { + "epoch": 0.2686267054506775, + "grad_norm": 1.108844518661499, + "learning_rate": 0.0001993860984124567, + "loss": 4.6209, + "step": 143950 + }, + { + "epoch": 0.268720011010056, + "grad_norm": 0.9162120223045349, + "learning_rate": 0.00019938563354466072, + "loss": 4.7472, + "step": 144000 + }, + { + "epoch": 0.268720011010056, + "eval_loss": 4.846428394317627, + "eval_runtime": 231.517, + "eval_samples_per_second": 11.265, + "eval_steps_per_second": 11.265, + "eval_tts_loss": 7.418669227682099, + "step": 144000 + }, + { + "epoch": 0.2688133165694345, + "grad_norm": 0.8355111479759216, + "learning_rate": 0.00019938516850146655, + "loss": 4.5845, + "step": 144050 + }, + { + "epoch": 0.268906622128813, + "grad_norm": 0.6543982625007629, + "learning_rate": 0.0001993847032828751, + "loss": 4.5369, + "step": 144100 + }, + { + "epoch": 0.2689999276881915, + "grad_norm": 0.947825014591217, + "learning_rate": 0.0001993842378888871, + "loss": 4.6122, + "step": 144150 + }, + { + "epoch": 0.26909323324756995, + "grad_norm": 1.1164684295654297, + "learning_rate": 0.0001993837723195034, + "loss": 4.6844, + "step": 144200 + }, + { + "epoch": 0.2691865388069485, + "grad_norm": 0.8201596736907959, + "learning_rate": 0.00019938330657472486, + "loss": 4.6992, + "step": 144250 + }, + { + "epoch": 0.26927984436632696, + "grad_norm": 1.0226540565490723, + "learning_rate": 0.00019938284065455225, + "loss": 4.8901, + "step": 144300 + }, + { + "epoch": 0.26937314992570544, + "grad_norm": 0.8372309803962708, + "learning_rate": 0.0001993823745589864, + "loss": 4.776, + "step": 144350 + }, + { + "epoch": 0.26946645548508397, + "grad_norm": 0.6700859069824219, + "learning_rate": 0.0001993819082880282, + "loss": 4.5808, + "step": 144400 + }, + { + "epoch": 0.26955976104446244, + "grad_norm": 0.9853873252868652, + "learning_rate": 0.00019938144184167836, + "loss": 4.7041, + "step": 144450 + }, + { + "epoch": 0.2696530666038409, + "grad_norm": 1.0174555778503418, + "learning_rate": 0.0001993809752199378, + "loss": 4.6368, + "step": 144500 + }, + { + "epoch": 0.2697463721632194, + "grad_norm": 1.0446439981460571, + "learning_rate": 0.00019938050842280728, + "loss": 4.8199, + "step": 144550 + }, + { + "epoch": 0.2698396777225979, + "grad_norm": 1.1562541723251343, + "learning_rate": 0.00019938004145028768, + "loss": 4.8258, + "step": 144600 + }, + { + "epoch": 0.2699329832819764, + "grad_norm": 1.0433145761489868, + "learning_rate": 0.00019937957430237976, + "loss": 4.5957, + "step": 144650 + }, + { + "epoch": 0.2700262888413549, + "grad_norm": 0.8568206429481506, + "learning_rate": 0.0001993791069790844, + "loss": 4.6801, + "step": 144700 + }, + { + "epoch": 0.2701195944007334, + "grad_norm": 1.0583845376968384, + "learning_rate": 0.0001993786394804024, + "loss": 4.7475, + "step": 144750 + }, + { + "epoch": 0.2702128999601119, + "grad_norm": 1.1356602907180786, + "learning_rate": 0.00019937817180633457, + "loss": 4.5852, + "step": 144800 + }, + { + "epoch": 0.27030620551949036, + "grad_norm": 1.091326117515564, + "learning_rate": 0.00019937770395688179, + "loss": 4.6252, + "step": 144850 + }, + { + "epoch": 0.27039951107886884, + "grad_norm": 1.135559320449829, + "learning_rate": 0.0001993772359320448, + "loss": 4.7506, + "step": 144900 + }, + { + "epoch": 0.27049281663824737, + "grad_norm": 0.910708487033844, + "learning_rate": 0.0001993767677318245, + "loss": 4.6904, + "step": 144950 + }, + { + "epoch": 0.27058612219762584, + "grad_norm": 1.0501621961593628, + "learning_rate": 0.0001993762993562217, + "loss": 4.8702, + "step": 145000 + }, + { + "epoch": 0.2706794277570043, + "grad_norm": 0.691247820854187, + "learning_rate": 0.0001993758308052372, + "loss": 4.666, + "step": 145050 + }, + { + "epoch": 0.2707727333163828, + "grad_norm": 0.9486430287361145, + "learning_rate": 0.00019937536207887183, + "loss": 4.7462, + "step": 145100 + }, + { + "epoch": 0.27086603887576133, + "grad_norm": 0.9459395408630371, + "learning_rate": 0.00019937489317712646, + "loss": 4.8276, + "step": 145150 + }, + { + "epoch": 0.2709593444351398, + "grad_norm": 1.2889015674591064, + "learning_rate": 0.00019937442410000185, + "loss": 4.6273, + "step": 145200 + }, + { + "epoch": 0.2710526499945183, + "grad_norm": 1.114570140838623, + "learning_rate": 0.00019937395484749892, + "loss": 4.5248, + "step": 145250 + }, + { + "epoch": 0.2711459555538968, + "grad_norm": 1.1005711555480957, + "learning_rate": 0.00019937348541961843, + "loss": 4.6502, + "step": 145300 + }, + { + "epoch": 0.2712392611132753, + "grad_norm": 0.806015133857727, + "learning_rate": 0.00019937301581636122, + "loss": 4.6222, + "step": 145350 + }, + { + "epoch": 0.27133256667265376, + "grad_norm": 0.9318636059761047, + "learning_rate": 0.00019937254603772812, + "loss": 4.5528, + "step": 145400 + }, + { + "epoch": 0.27142587223203224, + "grad_norm": 0.697272539138794, + "learning_rate": 0.00019937207608371996, + "loss": 4.7631, + "step": 145450 + }, + { + "epoch": 0.27151917779141077, + "grad_norm": 0.9297815561294556, + "learning_rate": 0.00019937160595433757, + "loss": 4.5448, + "step": 145500 + }, + { + "epoch": 0.27161248335078925, + "grad_norm": 1.144652247428894, + "learning_rate": 0.00019937113564958176, + "loss": 4.6709, + "step": 145550 + }, + { + "epoch": 0.2717057889101677, + "grad_norm": 0.9392854571342468, + "learning_rate": 0.0001993706651694534, + "loss": 4.7134, + "step": 145600 + }, + { + "epoch": 0.27179909446954625, + "grad_norm": 0.9400250315666199, + "learning_rate": 0.00019937019451395332, + "loss": 4.5493, + "step": 145650 + }, + { + "epoch": 0.27189240002892473, + "grad_norm": 1.1566085815429688, + "learning_rate": 0.0001993697236830823, + "loss": 4.939, + "step": 145700 + }, + { + "epoch": 0.2719857055883032, + "grad_norm": 0.7615442872047424, + "learning_rate": 0.00019936925267684122, + "loss": 4.7826, + "step": 145750 + }, + { + "epoch": 0.2720790111476817, + "grad_norm": 0.8968701362609863, + "learning_rate": 0.0001993687814952309, + "loss": 4.5258, + "step": 145800 + }, + { + "epoch": 0.2721723167070602, + "grad_norm": 1.0293611288070679, + "learning_rate": 0.00019936831013825214, + "loss": 4.6597, + "step": 145850 + }, + { + "epoch": 0.2722656222664387, + "grad_norm": 0.9446884989738464, + "learning_rate": 0.00019936783860590582, + "loss": 4.6033, + "step": 145900 + }, + { + "epoch": 0.27235892782581717, + "grad_norm": 1.0046056509017944, + "learning_rate": 0.00019936736689819275, + "loss": 4.5402, + "step": 145950 + }, + { + "epoch": 0.2724522333851957, + "grad_norm": 0.7611010074615479, + "learning_rate": 0.00019936689501511375, + "loss": 4.6937, + "step": 146000 + }, + { + "epoch": 0.2725455389445742, + "grad_norm": 0.9381779432296753, + "learning_rate": 0.00019936642295666966, + "loss": 4.5509, + "step": 146050 + }, + { + "epoch": 0.27263884450395265, + "grad_norm": 0.862014889717102, + "learning_rate": 0.00019936595072286134, + "loss": 4.6046, + "step": 146100 + }, + { + "epoch": 0.2727321500633311, + "grad_norm": 1.1331309080123901, + "learning_rate": 0.00019936547831368956, + "loss": 4.4339, + "step": 146150 + }, + { + "epoch": 0.27282545562270966, + "grad_norm": 0.9985886812210083, + "learning_rate": 0.00019936500572915523, + "loss": 4.7718, + "step": 146200 + }, + { + "epoch": 0.27291876118208813, + "grad_norm": 1.0175446271896362, + "learning_rate": 0.00019936453296925914, + "loss": 4.5348, + "step": 146250 + }, + { + "epoch": 0.2730120667414666, + "grad_norm": 1.1573448181152344, + "learning_rate": 0.00019936406003400215, + "loss": 4.5544, + "step": 146300 + }, + { + "epoch": 0.27310537230084514, + "grad_norm": 0.7975154519081116, + "learning_rate": 0.00019936358692338506, + "loss": 4.6486, + "step": 146350 + }, + { + "epoch": 0.2731986778602236, + "grad_norm": 0.9577890038490295, + "learning_rate": 0.0001993631136374087, + "loss": 4.661, + "step": 146400 + }, + { + "epoch": 0.2732919834196021, + "grad_norm": 1.0218697786331177, + "learning_rate": 0.00019936264017607398, + "loss": 4.6003, + "step": 146450 + }, + { + "epoch": 0.27338528897898057, + "grad_norm": 1.3306013345718384, + "learning_rate": 0.00019936216653938162, + "loss": 4.7019, + "step": 146500 + }, + { + "epoch": 0.2734785945383591, + "grad_norm": 1.0977168083190918, + "learning_rate": 0.00019936169272733256, + "loss": 4.7338, + "step": 146550 + }, + { + "epoch": 0.2735719000977376, + "grad_norm": 1.1367696523666382, + "learning_rate": 0.0001993612187399276, + "loss": 4.9661, + "step": 146600 + }, + { + "epoch": 0.27366520565711605, + "grad_norm": 1.0126125812530518, + "learning_rate": 0.00019936074457716757, + "loss": 4.7346, + "step": 146650 + }, + { + "epoch": 0.2737585112164946, + "grad_norm": 0.9203944802284241, + "learning_rate": 0.0001993602702390533, + "loss": 4.7422, + "step": 146700 + }, + { + "epoch": 0.27385181677587306, + "grad_norm": 0.8814552426338196, + "learning_rate": 0.00019935979572558563, + "loss": 4.5732, + "step": 146750 + }, + { + "epoch": 0.27394512233525153, + "grad_norm": 0.9684024453163147, + "learning_rate": 0.00019935932103676542, + "loss": 4.6788, + "step": 146800 + }, + { + "epoch": 0.27403842789463, + "grad_norm": 0.8495442271232605, + "learning_rate": 0.00019935884617259349, + "loss": 4.4585, + "step": 146850 + }, + { + "epoch": 0.27413173345400854, + "grad_norm": 0.5965443849563599, + "learning_rate": 0.00019935837113307066, + "loss": 4.4931, + "step": 146900 + }, + { + "epoch": 0.274225039013387, + "grad_norm": 0.9253427386283875, + "learning_rate": 0.00019935789591819778, + "loss": 4.7332, + "step": 146950 + }, + { + "epoch": 0.2743183445727655, + "grad_norm": 0.959456205368042, + "learning_rate": 0.0001993574205279757, + "loss": 4.6347, + "step": 147000 + }, + { + "epoch": 0.2743183445727655, + "eval_loss": 4.84540319442749, + "eval_runtime": 228.6502, + "eval_samples_per_second": 11.406, + "eval_steps_per_second": 11.406, + "eval_tts_loss": 7.421910372788395, + "step": 147000 + }, + { + "epoch": 0.274411650132144, + "grad_norm": 0.8804351687431335, + "learning_rate": 0.0001993569449624053, + "loss": 4.5913, + "step": 147050 + }, + { + "epoch": 0.2745049556915225, + "grad_norm": 1.257071852684021, + "learning_rate": 0.00019935646922148733, + "loss": 4.6649, + "step": 147100 + }, + { + "epoch": 0.274598261250901, + "grad_norm": 1.0313574075698853, + "learning_rate": 0.0001993559933052227, + "loss": 4.6562, + "step": 147150 + }, + { + "epoch": 0.27469156681027945, + "grad_norm": 0.9633307456970215, + "learning_rate": 0.0001993555172136122, + "loss": 4.6321, + "step": 147200 + }, + { + "epoch": 0.274784872369658, + "grad_norm": 0.7918700575828552, + "learning_rate": 0.0001993550409466567, + "loss": 4.7728, + "step": 147250 + }, + { + "epoch": 0.27487817792903646, + "grad_norm": 0.6969190239906311, + "learning_rate": 0.00019935456450435703, + "loss": 4.7156, + "step": 147300 + }, + { + "epoch": 0.27497148348841494, + "grad_norm": 1.4013220071792603, + "learning_rate": 0.00019935408788671405, + "loss": 4.5602, + "step": 147350 + }, + { + "epoch": 0.27506478904779347, + "grad_norm": 1.0779790878295898, + "learning_rate": 0.00019935361109372859, + "loss": 4.7721, + "step": 147400 + }, + { + "epoch": 0.27515809460717194, + "grad_norm": 1.0877084732055664, + "learning_rate": 0.00019935313412540147, + "loss": 4.7129, + "step": 147450 + }, + { + "epoch": 0.2752514001665504, + "grad_norm": 1.1705236434936523, + "learning_rate": 0.00019935265698173358, + "loss": 4.7317, + "step": 147500 + }, + { + "epoch": 0.2753447057259289, + "grad_norm": 1.0509397983551025, + "learning_rate": 0.0001993521796627257, + "loss": 4.7795, + "step": 147550 + }, + { + "epoch": 0.2754380112853074, + "grad_norm": 1.1017162799835205, + "learning_rate": 0.00019935170216837873, + "loss": 4.7745, + "step": 147600 + }, + { + "epoch": 0.2755313168446859, + "grad_norm": 0.9443593621253967, + "learning_rate": 0.0001993512244986935, + "loss": 4.6332, + "step": 147650 + }, + { + "epoch": 0.2756246224040644, + "grad_norm": 1.0674967765808105, + "learning_rate": 0.00019935074665367077, + "loss": 4.721, + "step": 147700 + }, + { + "epoch": 0.27571792796344285, + "grad_norm": 0.976577877998352, + "learning_rate": 0.00019935026863331153, + "loss": 4.5112, + "step": 147750 + }, + { + "epoch": 0.2758112335228214, + "grad_norm": 0.9510547518730164, + "learning_rate": 0.0001993497904376165, + "loss": 4.8486, + "step": 147800 + }, + { + "epoch": 0.27590453908219986, + "grad_norm": 1.0854049921035767, + "learning_rate": 0.00019934931206658658, + "loss": 4.4141, + "step": 147850 + }, + { + "epoch": 0.27599784464157834, + "grad_norm": 1.5401407480239868, + "learning_rate": 0.0001993488335202226, + "loss": 4.741, + "step": 147900 + }, + { + "epoch": 0.27609115020095687, + "grad_norm": 1.0566352605819702, + "learning_rate": 0.00019934835479852546, + "loss": 4.9034, + "step": 147950 + }, + { + "epoch": 0.27618445576033535, + "grad_norm": 1.0547597408294678, + "learning_rate": 0.0001993478759014959, + "loss": 4.6906, + "step": 148000 + }, + { + "epoch": 0.2762777613197138, + "grad_norm": 0.8163520097732544, + "learning_rate": 0.00019934739682913485, + "loss": 4.8077, + "step": 148050 + }, + { + "epoch": 0.2763710668790923, + "grad_norm": 1.1130653619766235, + "learning_rate": 0.00019934691758144308, + "loss": 4.7433, + "step": 148100 + }, + { + "epoch": 0.27646437243847083, + "grad_norm": 0.769314706325531, + "learning_rate": 0.00019934643815842153, + "loss": 4.6217, + "step": 148150 + }, + { + "epoch": 0.2765576779978493, + "grad_norm": 1.1348406076431274, + "learning_rate": 0.000199345958560071, + "loss": 4.5468, + "step": 148200 + }, + { + "epoch": 0.2766509835572278, + "grad_norm": 0.9563210010528564, + "learning_rate": 0.00019934547878639228, + "loss": 4.6116, + "step": 148250 + }, + { + "epoch": 0.2767442891166063, + "grad_norm": 0.9107294082641602, + "learning_rate": 0.00019934499883738632, + "loss": 4.8486, + "step": 148300 + }, + { + "epoch": 0.2768375946759848, + "grad_norm": 1.0701485872268677, + "learning_rate": 0.0001993445187130539, + "loss": 4.7653, + "step": 148350 + }, + { + "epoch": 0.27693090023536326, + "grad_norm": 1.1094465255737305, + "learning_rate": 0.00019934403841339587, + "loss": 4.451, + "step": 148400 + }, + { + "epoch": 0.27702420579474174, + "grad_norm": 1.283995270729065, + "learning_rate": 0.00019934355793841313, + "loss": 4.8218, + "step": 148450 + }, + { + "epoch": 0.27711751135412027, + "grad_norm": 0.9863646030426025, + "learning_rate": 0.00019934307728810646, + "loss": 4.831, + "step": 148500 + }, + { + "epoch": 0.27721081691349875, + "grad_norm": 1.0133435726165771, + "learning_rate": 0.00019934259646247674, + "loss": 4.4905, + "step": 148550 + }, + { + "epoch": 0.2773041224728772, + "grad_norm": 0.9534834623336792, + "learning_rate": 0.00019934211546152484, + "loss": 4.7916, + "step": 148600 + }, + { + "epoch": 0.27739742803225576, + "grad_norm": 1.2967839241027832, + "learning_rate": 0.00019934163428525156, + "loss": 4.7967, + "step": 148650 + }, + { + "epoch": 0.27749073359163423, + "grad_norm": 0.869767427444458, + "learning_rate": 0.00019934115293365778, + "loss": 4.7234, + "step": 148700 + }, + { + "epoch": 0.2775840391510127, + "grad_norm": 0.9812904000282288, + "learning_rate": 0.00019934067140674436, + "loss": 4.4395, + "step": 148750 + }, + { + "epoch": 0.2776773447103912, + "grad_norm": 0.9434531331062317, + "learning_rate": 0.0001993401897045121, + "loss": 4.6072, + "step": 148800 + }, + { + "epoch": 0.2777706502697697, + "grad_norm": 0.6807860732078552, + "learning_rate": 0.00019933970782696193, + "loss": 4.6445, + "step": 148850 + }, + { + "epoch": 0.2778639558291482, + "grad_norm": 0.8833329677581787, + "learning_rate": 0.00019933922577409463, + "loss": 4.4624, + "step": 148900 + }, + { + "epoch": 0.27795726138852667, + "grad_norm": 1.2914021015167236, + "learning_rate": 0.0001993387435459111, + "loss": 4.8027, + "step": 148950 + }, + { + "epoch": 0.2780505669479052, + "grad_norm": 1.0718110799789429, + "learning_rate": 0.00019933826114241213, + "loss": 4.6699, + "step": 149000 + }, + { + "epoch": 0.2781438725072837, + "grad_norm": 0.8196322321891785, + "learning_rate": 0.00019933777856359863, + "loss": 4.5286, + "step": 149050 + }, + { + "epoch": 0.27823717806666215, + "grad_norm": 1.0307577848434448, + "learning_rate": 0.00019933729580947143, + "loss": 4.6294, + "step": 149100 + }, + { + "epoch": 0.2783304836260406, + "grad_norm": 0.874259889125824, + "learning_rate": 0.00019933681288003137, + "loss": 4.6817, + "step": 149150 + }, + { + "epoch": 0.27842378918541916, + "grad_norm": 0.8164685964584351, + "learning_rate": 0.0001993363297752793, + "loss": 4.6156, + "step": 149200 + }, + { + "epoch": 0.27851709474479763, + "grad_norm": 1.4516046047210693, + "learning_rate": 0.0001993358464952161, + "loss": 4.8161, + "step": 149250 + }, + { + "epoch": 0.2786104003041761, + "grad_norm": 1.4525372982025146, + "learning_rate": 0.00019933536303984263, + "loss": 4.6891, + "step": 149300 + }, + { + "epoch": 0.27870370586355464, + "grad_norm": 1.0817906856536865, + "learning_rate": 0.0001993348794091597, + "loss": 4.9178, + "step": 149350 + }, + { + "epoch": 0.2787970114229331, + "grad_norm": 0.8734576106071472, + "learning_rate": 0.0001993343956031682, + "loss": 4.7184, + "step": 149400 + }, + { + "epoch": 0.2788903169823116, + "grad_norm": 0.8996809720993042, + "learning_rate": 0.00019933391162186897, + "loss": 4.5792, + "step": 149450 + }, + { + "epoch": 0.27898362254169007, + "grad_norm": 1.0781534910202026, + "learning_rate": 0.00019933342746526286, + "loss": 4.6787, + "step": 149500 + }, + { + "epoch": 0.2790769281010686, + "grad_norm": 0.9181458353996277, + "learning_rate": 0.00019933294313335072, + "loss": 4.6463, + "step": 149550 + }, + { + "epoch": 0.2791702336604471, + "grad_norm": 1.0423184633255005, + "learning_rate": 0.00019933245862613344, + "loss": 4.5832, + "step": 149600 + }, + { + "epoch": 0.27926353921982555, + "grad_norm": 0.9878393411636353, + "learning_rate": 0.00019933197394361181, + "loss": 4.5294, + "step": 149650 + }, + { + "epoch": 0.2793568447792041, + "grad_norm": 0.9291845560073853, + "learning_rate": 0.00019933148908578678, + "loss": 4.7486, + "step": 149700 + }, + { + "epoch": 0.27945015033858256, + "grad_norm": 1.0207691192626953, + "learning_rate": 0.0001993310040526591, + "loss": 4.7574, + "step": 149750 + }, + { + "epoch": 0.27954345589796104, + "grad_norm": 1.0741710662841797, + "learning_rate": 0.00019933051884422968, + "loss": 4.6548, + "step": 149800 + }, + { + "epoch": 0.2796367614573395, + "grad_norm": 1.0750080347061157, + "learning_rate": 0.0001993300334604994, + "loss": 4.6506, + "step": 149850 + }, + { + "epoch": 0.27973006701671804, + "grad_norm": 1.0452051162719727, + "learning_rate": 0.00019932954790146908, + "loss": 4.7486, + "step": 149900 + }, + { + "epoch": 0.2798233725760965, + "grad_norm": 0.8522201776504517, + "learning_rate": 0.0001993290621671396, + "loss": 4.6639, + "step": 149950 + }, + { + "epoch": 0.279916678135475, + "grad_norm": 0.8773214817047119, + "learning_rate": 0.00019932857625751176, + "loss": 4.7012, + "step": 150000 + }, + { + "epoch": 0.279916678135475, + "eval_loss": 4.83031702041626, + "eval_runtime": 229.2208, + "eval_samples_per_second": 11.378, + "eval_steps_per_second": 11.378, + "eval_tts_loss": 7.386615539569568, + "step": 150000 + }, + { + "epoch": 0.2800099836948535, + "grad_norm": 0.9581308364868164, + "learning_rate": 0.00019932809017258647, + "loss": 4.8594, + "step": 150050 + }, + { + "epoch": 0.280103289254232, + "grad_norm": 0.9009203314781189, + "learning_rate": 0.00019932760391236457, + "loss": 4.6758, + "step": 150100 + }, + { + "epoch": 0.2801965948136105, + "grad_norm": 0.9050132632255554, + "learning_rate": 0.00019932711747684695, + "loss": 4.5833, + "step": 150150 + }, + { + "epoch": 0.28028990037298895, + "grad_norm": 1.0961129665374756, + "learning_rate": 0.00019932663086603446, + "loss": 4.7339, + "step": 150200 + }, + { + "epoch": 0.2803832059323675, + "grad_norm": 1.4378759860992432, + "learning_rate": 0.00019932614407992792, + "loss": 4.8097, + "step": 150250 + }, + { + "epoch": 0.28047651149174596, + "grad_norm": 0.941024899482727, + "learning_rate": 0.0001993256571185282, + "loss": 4.5834, + "step": 150300 + }, + { + "epoch": 0.28056981705112444, + "grad_norm": 0.8255406618118286, + "learning_rate": 0.0001993251699818362, + "loss": 4.5948, + "step": 150350 + }, + { + "epoch": 0.2806631226105029, + "grad_norm": 0.9121527671813965, + "learning_rate": 0.00019932468266985274, + "loss": 4.8421, + "step": 150400 + }, + { + "epoch": 0.28075642816988144, + "grad_norm": 1.2012547254562378, + "learning_rate": 0.0001993241951825787, + "loss": 4.7063, + "step": 150450 + }, + { + "epoch": 0.2808497337292599, + "grad_norm": 1.0221003293991089, + "learning_rate": 0.0001993237075200149, + "loss": 4.5613, + "step": 150500 + }, + { + "epoch": 0.2809430392886384, + "grad_norm": 0.6838630437850952, + "learning_rate": 0.00019932321968216225, + "loss": 4.53, + "step": 150550 + }, + { + "epoch": 0.28103634484801693, + "grad_norm": 0.9109449982643127, + "learning_rate": 0.0001993227316690216, + "loss": 4.7778, + "step": 150600 + }, + { + "epoch": 0.2811296504073954, + "grad_norm": 1.0373647212982178, + "learning_rate": 0.00019932224348059383, + "loss": 4.7198, + "step": 150650 + }, + { + "epoch": 0.2812229559667739, + "grad_norm": 1.202540397644043, + "learning_rate": 0.00019932175511687975, + "loss": 4.5706, + "step": 150700 + }, + { + "epoch": 0.28131626152615236, + "grad_norm": 1.1028730869293213, + "learning_rate": 0.00019932126657788024, + "loss": 4.6268, + "step": 150750 + }, + { + "epoch": 0.2814095670855309, + "grad_norm": 0.88832688331604, + "learning_rate": 0.00019932077786359616, + "loss": 4.7166, + "step": 150800 + }, + { + "epoch": 0.28150287264490936, + "grad_norm": 1.0212421417236328, + "learning_rate": 0.00019932028897402844, + "loss": 4.7143, + "step": 150850 + }, + { + "epoch": 0.28159617820428784, + "grad_norm": 0.8306617736816406, + "learning_rate": 0.00019931979990917782, + "loss": 4.7177, + "step": 150900 + }, + { + "epoch": 0.28168948376366637, + "grad_norm": 1.1900157928466797, + "learning_rate": 0.00019931931066904528, + "loss": 4.6889, + "step": 150950 + }, + { + "epoch": 0.28178278932304485, + "grad_norm": 0.8949201703071594, + "learning_rate": 0.00019931882125363162, + "loss": 4.72, + "step": 151000 + }, + { + "epoch": 0.2818760948824233, + "grad_norm": 1.0268856287002563, + "learning_rate": 0.00019931833166293768, + "loss": 4.7837, + "step": 151050 + }, + { + "epoch": 0.2819694004418018, + "grad_norm": 1.0028334856033325, + "learning_rate": 0.00019931784189696438, + "loss": 4.4346, + "step": 151100 + }, + { + "epoch": 0.28206270600118033, + "grad_norm": 0.6958338618278503, + "learning_rate": 0.00019931735195571258, + "loss": 4.599, + "step": 151150 + }, + { + "epoch": 0.2821560115605588, + "grad_norm": 1.0011975765228271, + "learning_rate": 0.00019931686183918314, + "loss": 4.6019, + "step": 151200 + }, + { + "epoch": 0.2822493171199373, + "grad_norm": 1.0134965181350708, + "learning_rate": 0.00019931637154737688, + "loss": 4.5689, + "step": 151250 + }, + { + "epoch": 0.2823426226793158, + "grad_norm": 1.1758054494857788, + "learning_rate": 0.00019931588108029473, + "loss": 4.863, + "step": 151300 + }, + { + "epoch": 0.2824359282386943, + "grad_norm": 0.6513984203338623, + "learning_rate": 0.0001993153904379375, + "loss": 4.8059, + "step": 151350 + }, + { + "epoch": 0.28252923379807277, + "grad_norm": 0.9191519618034363, + "learning_rate": 0.0001993148996203061, + "loss": 4.5621, + "step": 151400 + }, + { + "epoch": 0.28262253935745124, + "grad_norm": 1.069206953048706, + "learning_rate": 0.00019931440862740136, + "loss": 4.54, + "step": 151450 + }, + { + "epoch": 0.2827158449168298, + "grad_norm": 0.8649135828018188, + "learning_rate": 0.00019931391745922416, + "loss": 4.4516, + "step": 151500 + }, + { + "epoch": 0.28280915047620825, + "grad_norm": 0.8605431914329529, + "learning_rate": 0.0001993134261157754, + "loss": 4.6004, + "step": 151550 + }, + { + "epoch": 0.2829024560355867, + "grad_norm": 1.1754326820373535, + "learning_rate": 0.00019931293459705588, + "loss": 4.7162, + "step": 151600 + }, + { + "epoch": 0.28299576159496526, + "grad_norm": 0.9741180539131165, + "learning_rate": 0.0001993124429030665, + "loss": 4.8181, + "step": 151650 + }, + { + "epoch": 0.28308906715434373, + "grad_norm": 1.0868324041366577, + "learning_rate": 0.00019931195103380819, + "loss": 4.602, + "step": 151700 + }, + { + "epoch": 0.2831823727137222, + "grad_norm": 0.9874230027198792, + "learning_rate": 0.00019931145898928173, + "loss": 4.6277, + "step": 151750 + }, + { + "epoch": 0.2832756782731007, + "grad_norm": 1.13186776638031, + "learning_rate": 0.00019931096676948802, + "loss": 4.5465, + "step": 151800 + }, + { + "epoch": 0.2833689838324792, + "grad_norm": 0.8239442110061646, + "learning_rate": 0.00019931047437442794, + "loss": 4.7412, + "step": 151850 + }, + { + "epoch": 0.2834622893918577, + "grad_norm": 1.0195013284683228, + "learning_rate": 0.00019930998180410232, + "loss": 4.4785, + "step": 151900 + }, + { + "epoch": 0.28355559495123617, + "grad_norm": 1.3029369115829468, + "learning_rate": 0.00019930948905851206, + "loss": 4.8227, + "step": 151950 + }, + { + "epoch": 0.2836489005106147, + "grad_norm": 0.8565980792045593, + "learning_rate": 0.00019930899613765805, + "loss": 4.5811, + "step": 152000 + }, + { + "epoch": 0.2837422060699932, + "grad_norm": 0.9807513356208801, + "learning_rate": 0.0001993085030415411, + "loss": 4.733, + "step": 152050 + }, + { + "epoch": 0.28383551162937165, + "grad_norm": 1.0454181432724, + "learning_rate": 0.00019930800977016215, + "loss": 4.703, + "step": 152100 + }, + { + "epoch": 0.2839288171887501, + "grad_norm": 1.0807571411132812, + "learning_rate": 0.00019930751632352202, + "loss": 4.4999, + "step": 152150 + }, + { + "epoch": 0.28402212274812866, + "grad_norm": 1.1262049674987793, + "learning_rate": 0.0001993070227016216, + "loss": 4.6597, + "step": 152200 + }, + { + "epoch": 0.28411542830750713, + "grad_norm": 0.8939054012298584, + "learning_rate": 0.00019930652890446176, + "loss": 4.6779, + "step": 152250 + }, + { + "epoch": 0.2842087338668856, + "grad_norm": 0.9293799996376038, + "learning_rate": 0.00019930603493204337, + "loss": 4.551, + "step": 152300 + }, + { + "epoch": 0.28430203942626414, + "grad_norm": 1.1074103116989136, + "learning_rate": 0.0001993055407843673, + "loss": 4.805, + "step": 152350 + }, + { + "epoch": 0.2843953449856426, + "grad_norm": 1.147536039352417, + "learning_rate": 0.0001993050464614344, + "loss": 4.7777, + "step": 152400 + }, + { + "epoch": 0.2844886505450211, + "grad_norm": 0.8999912142753601, + "learning_rate": 0.00019930455196324562, + "loss": 4.7226, + "step": 152450 + }, + { + "epoch": 0.28458195610439957, + "grad_norm": 0.8882901072502136, + "learning_rate": 0.00019930405728980176, + "loss": 4.5275, + "step": 152500 + }, + { + "epoch": 0.2846752616637781, + "grad_norm": 0.958745539188385, + "learning_rate": 0.00019930356244110369, + "loss": 4.5713, + "step": 152550 + }, + { + "epoch": 0.2847685672231566, + "grad_norm": 0.8023204803466797, + "learning_rate": 0.00019930306741715233, + "loss": 4.6426, + "step": 152600 + }, + { + "epoch": 0.28486187278253505, + "grad_norm": 0.7979447245597839, + "learning_rate": 0.0001993025722179485, + "loss": 4.81, + "step": 152650 + }, + { + "epoch": 0.2849551783419136, + "grad_norm": 0.8460389375686646, + "learning_rate": 0.00019930207684349313, + "loss": 4.7702, + "step": 152700 + }, + { + "epoch": 0.28504848390129206, + "grad_norm": 0.905491054058075, + "learning_rate": 0.00019930158129378707, + "loss": 4.6398, + "step": 152750 + }, + { + "epoch": 0.28514178946067054, + "grad_norm": 0.5452645421028137, + "learning_rate": 0.00019930108556883116, + "loss": 4.4575, + "step": 152800 + }, + { + "epoch": 0.285235095020049, + "grad_norm": 1.016485333442688, + "learning_rate": 0.00019930058966862634, + "loss": 4.6945, + "step": 152850 + }, + { + "epoch": 0.28532840057942754, + "grad_norm": 0.6815197467803955, + "learning_rate": 0.00019930009359317343, + "loss": 4.6492, + "step": 152900 + }, + { + "epoch": 0.285421706138806, + "grad_norm": 0.9308417439460754, + "learning_rate": 0.00019929959734247331, + "loss": 4.6306, + "step": 152950 + }, + { + "epoch": 0.2855150116981845, + "grad_norm": 0.9671070575714111, + "learning_rate": 0.0001992991009165269, + "loss": 4.6504, + "step": 153000 + }, + { + "epoch": 0.2855150116981845, + "eval_loss": 4.835600852966309, + "eval_runtime": 230.5993, + "eval_samples_per_second": 11.31, + "eval_steps_per_second": 11.31, + "eval_tts_loss": 7.43258083978685, + "step": 153000 + }, + { + "epoch": 0.28560831725756297, + "grad_norm": 0.8776199221611023, + "learning_rate": 0.00019929860431533504, + "loss": 4.5989, + "step": 153050 + }, + { + "epoch": 0.2857016228169415, + "grad_norm": 0.8226851224899292, + "learning_rate": 0.00019929810753889865, + "loss": 4.3491, + "step": 153100 + }, + { + "epoch": 0.28579492837632, + "grad_norm": 1.3178420066833496, + "learning_rate": 0.00019929761058721853, + "loss": 4.5217, + "step": 153150 + }, + { + "epoch": 0.28588823393569845, + "grad_norm": 1.0944714546203613, + "learning_rate": 0.0001992971134602956, + "loss": 4.7149, + "step": 153200 + }, + { + "epoch": 0.285981539495077, + "grad_norm": 0.8839026689529419, + "learning_rate": 0.00019929661615813077, + "loss": 4.5549, + "step": 153250 + }, + { + "epoch": 0.28607484505445546, + "grad_norm": 0.9436131119728088, + "learning_rate": 0.00019929611868072487, + "loss": 4.7396, + "step": 153300 + }, + { + "epoch": 0.28616815061383394, + "grad_norm": 0.8555207252502441, + "learning_rate": 0.00019929562102807878, + "loss": 4.6732, + "step": 153350 + }, + { + "epoch": 0.2862614561732124, + "grad_norm": 0.8614400625228882, + "learning_rate": 0.0001992951232001934, + "loss": 4.5358, + "step": 153400 + }, + { + "epoch": 0.28635476173259095, + "grad_norm": 0.7893537282943726, + "learning_rate": 0.00019929462519706958, + "loss": 4.7613, + "step": 153450 + }, + { + "epoch": 0.2864480672919694, + "grad_norm": 1.0133888721466064, + "learning_rate": 0.00019929412701870822, + "loss": 4.7695, + "step": 153500 + }, + { + "epoch": 0.2865413728513479, + "grad_norm": 1.0339868068695068, + "learning_rate": 0.00019929362866511022, + "loss": 4.6753, + "step": 153550 + }, + { + "epoch": 0.28663467841072643, + "grad_norm": 0.8841448426246643, + "learning_rate": 0.00019929313013627642, + "loss": 4.5938, + "step": 153600 + }, + { + "epoch": 0.2867279839701049, + "grad_norm": 0.8612989783287048, + "learning_rate": 0.00019929263143220774, + "loss": 4.4805, + "step": 153650 + }, + { + "epoch": 0.2868212895294834, + "grad_norm": 0.9694012403488159, + "learning_rate": 0.000199292132552905, + "loss": 4.8494, + "step": 153700 + }, + { + "epoch": 0.28691459508886186, + "grad_norm": 1.2090984582901, + "learning_rate": 0.00019929163349836915, + "loss": 4.7134, + "step": 153750 + }, + { + "epoch": 0.2870079006482404, + "grad_norm": 1.0495941638946533, + "learning_rate": 0.00019929113426860103, + "loss": 4.7221, + "step": 153800 + }, + { + "epoch": 0.28710120620761886, + "grad_norm": 0.8493571281433105, + "learning_rate": 0.00019929063486360152, + "loss": 4.4686, + "step": 153850 + }, + { + "epoch": 0.28719451176699734, + "grad_norm": 1.2186065912246704, + "learning_rate": 0.0001992901352833715, + "loss": 4.7176, + "step": 153900 + }, + { + "epoch": 0.28728781732637587, + "grad_norm": 1.0181117057800293, + "learning_rate": 0.00019928963552791188, + "loss": 4.484, + "step": 153950 + }, + { + "epoch": 0.28738112288575435, + "grad_norm": 1.2028471231460571, + "learning_rate": 0.0001992891355972235, + "loss": 4.8875, + "step": 154000 + }, + { + "epoch": 0.2874744284451328, + "grad_norm": 1.1655298471450806, + "learning_rate": 0.00019928863549130733, + "loss": 4.5263, + "step": 154050 + }, + { + "epoch": 0.2875677340045113, + "grad_norm": 1.0849800109863281, + "learning_rate": 0.00019928813521016413, + "loss": 4.4706, + "step": 154100 + }, + { + "epoch": 0.28766103956388983, + "grad_norm": 0.9207818508148193, + "learning_rate": 0.00019928763475379486, + "loss": 4.605, + "step": 154150 + }, + { + "epoch": 0.2877543451232683, + "grad_norm": 0.8862490653991699, + "learning_rate": 0.00019928713412220036, + "loss": 4.5169, + "step": 154200 + }, + { + "epoch": 0.2878476506826468, + "grad_norm": 1.0077502727508545, + "learning_rate": 0.00019928663331538154, + "loss": 4.7856, + "step": 154250 + }, + { + "epoch": 0.2879409562420253, + "grad_norm": 1.1006149053573608, + "learning_rate": 0.00019928613233333934, + "loss": 4.7136, + "step": 154300 + }, + { + "epoch": 0.2880342618014038, + "grad_norm": 1.0672188997268677, + "learning_rate": 0.00019928563117607454, + "loss": 4.6588, + "step": 154350 + }, + { + "epoch": 0.28812756736078227, + "grad_norm": 1.0344702005386353, + "learning_rate": 0.00019928512984358808, + "loss": 4.5211, + "step": 154400 + }, + { + "epoch": 0.28822087292016074, + "grad_norm": 0.9514538645744324, + "learning_rate": 0.00019928462833588081, + "loss": 4.586, + "step": 154450 + }, + { + "epoch": 0.2883141784795393, + "grad_norm": 0.9031883478164673, + "learning_rate": 0.00019928412665295368, + "loss": 4.6923, + "step": 154500 + }, + { + "epoch": 0.28840748403891775, + "grad_norm": 0.9801226258277893, + "learning_rate": 0.00019928362479480752, + "loss": 4.6889, + "step": 154550 + }, + { + "epoch": 0.2885007895982962, + "grad_norm": 0.9183834791183472, + "learning_rate": 0.00019928312276144326, + "loss": 4.5817, + "step": 154600 + }, + { + "epoch": 0.28859409515767476, + "grad_norm": 0.8800172209739685, + "learning_rate": 0.0001992826205528617, + "loss": 4.6275, + "step": 154650 + }, + { + "epoch": 0.28868740071705323, + "grad_norm": 1.0279831886291504, + "learning_rate": 0.00019928211816906384, + "loss": 4.6246, + "step": 154700 + }, + { + "epoch": 0.2887807062764317, + "grad_norm": 0.9106327295303345, + "learning_rate": 0.00019928161561005045, + "loss": 4.5679, + "step": 154750 + }, + { + "epoch": 0.2888740118358102, + "grad_norm": 0.9070572257041931, + "learning_rate": 0.0001992811128758225, + "loss": 4.6825, + "step": 154800 + }, + { + "epoch": 0.2889673173951887, + "grad_norm": 0.9388306140899658, + "learning_rate": 0.00019928060996638093, + "loss": 4.4394, + "step": 154850 + }, + { + "epoch": 0.2890606229545672, + "grad_norm": 0.9213298559188843, + "learning_rate": 0.00019928010688172645, + "loss": 4.6287, + "step": 154900 + }, + { + "epoch": 0.28915392851394567, + "grad_norm": 0.9049386382102966, + "learning_rate": 0.00019927960362186008, + "loss": 4.7666, + "step": 154950 + }, + { + "epoch": 0.2892472340733242, + "grad_norm": 0.9011629223823547, + "learning_rate": 0.0001992791001867827, + "loss": 4.5634, + "step": 155000 + }, + { + "epoch": 0.2893405396327027, + "grad_norm": 1.0195951461791992, + "learning_rate": 0.00019927859657649518, + "loss": 4.6655, + "step": 155050 + }, + { + "epoch": 0.28943384519208115, + "grad_norm": 0.7028415203094482, + "learning_rate": 0.00019927809279099838, + "loss": 4.4955, + "step": 155100 + }, + { + "epoch": 0.2895271507514596, + "grad_norm": 0.9944257140159607, + "learning_rate": 0.0001992775888302932, + "loss": 4.9458, + "step": 155150 + }, + { + "epoch": 0.28962045631083816, + "grad_norm": 1.0102462768554688, + "learning_rate": 0.0001992770846943806, + "loss": 4.748, + "step": 155200 + }, + { + "epoch": 0.28971376187021664, + "grad_norm": 0.8883141875267029, + "learning_rate": 0.00019927658038326138, + "loss": 4.4985, + "step": 155250 + }, + { + "epoch": 0.2898070674295951, + "grad_norm": 0.8198468089103699, + "learning_rate": 0.0001992760758969365, + "loss": 4.6353, + "step": 155300 + }, + { + "epoch": 0.28990037298897364, + "grad_norm": 0.7499061226844788, + "learning_rate": 0.00019927557123540677, + "loss": 4.5579, + "step": 155350 + }, + { + "epoch": 0.2899936785483521, + "grad_norm": 0.7764875888824463, + "learning_rate": 0.0001992750663986731, + "loss": 4.4778, + "step": 155400 + }, + { + "epoch": 0.2900869841077306, + "grad_norm": 0.8831614255905151, + "learning_rate": 0.00019927456138673646, + "loss": 4.403, + "step": 155450 + }, + { + "epoch": 0.29018028966710907, + "grad_norm": 1.0157040357589722, + "learning_rate": 0.00019927405619959767, + "loss": 4.6915, + "step": 155500 + }, + { + "epoch": 0.2902735952264876, + "grad_norm": 1.1390964984893799, + "learning_rate": 0.00019927355083725762, + "loss": 4.5186, + "step": 155550 + }, + { + "epoch": 0.2903669007858661, + "grad_norm": 1.154977560043335, + "learning_rate": 0.00019927304529971727, + "loss": 4.7172, + "step": 155600 + }, + { + "epoch": 0.29046020634524455, + "grad_norm": 1.095638632774353, + "learning_rate": 0.00019927253958697743, + "loss": 4.8124, + "step": 155650 + }, + { + "epoch": 0.29055351190462303, + "grad_norm": 0.9511516094207764, + "learning_rate": 0.00019927203369903901, + "loss": 4.4195, + "step": 155700 + }, + { + "epoch": 0.29064681746400156, + "grad_norm": 1.0165811777114868, + "learning_rate": 0.00019927152763590296, + "loss": 4.6677, + "step": 155750 + }, + { + "epoch": 0.29074012302338004, + "grad_norm": 0.8672209978103638, + "learning_rate": 0.0001992710213975701, + "loss": 4.7442, + "step": 155800 + }, + { + "epoch": 0.2908334285827585, + "grad_norm": 1.1338589191436768, + "learning_rate": 0.00019927051498404134, + "loss": 4.6876, + "step": 155850 + }, + { + "epoch": 0.29092673414213704, + "grad_norm": 1.0696067810058594, + "learning_rate": 0.00019927000839531763, + "loss": 4.5303, + "step": 155900 + }, + { + "epoch": 0.2910200397015155, + "grad_norm": 0.9519825577735901, + "learning_rate": 0.00019926950163139977, + "loss": 4.581, + "step": 155950 + }, + { + "epoch": 0.291113345260894, + "grad_norm": 1.0160168409347534, + "learning_rate": 0.00019926899469228877, + "loss": 4.589, + "step": 156000 + }, + { + "epoch": 0.291113345260894, + "eval_loss": 4.831135272979736, + "eval_runtime": 231.6301, + "eval_samples_per_second": 11.259, + "eval_steps_per_second": 11.259, + "eval_tts_loss": 7.382750091658608, + "step": 156000 + }, + { + "epoch": 0.2912066508202725, + "grad_norm": 1.5486959218978882, + "learning_rate": 0.00019926848757798542, + "loss": 4.5012, + "step": 156050 + }, + { + "epoch": 0.291299956379651, + "grad_norm": 1.007653832435608, + "learning_rate": 0.00019926798028849064, + "loss": 4.5957, + "step": 156100 + }, + { + "epoch": 0.2913932619390295, + "grad_norm": 0.7587629556655884, + "learning_rate": 0.00019926747282380538, + "loss": 4.5975, + "step": 156150 + }, + { + "epoch": 0.29148656749840796, + "grad_norm": 0.8650146126747131, + "learning_rate": 0.0001992669651839305, + "loss": 4.5823, + "step": 156200 + }, + { + "epoch": 0.2915798730577865, + "grad_norm": 0.8940017819404602, + "learning_rate": 0.00019926645736886687, + "loss": 4.5223, + "step": 156250 + }, + { + "epoch": 0.29167317861716496, + "grad_norm": 1.1759480237960815, + "learning_rate": 0.0001992659493786154, + "loss": 4.5113, + "step": 156300 + }, + { + "epoch": 0.29176648417654344, + "grad_norm": 0.9425550699234009, + "learning_rate": 0.000199265441213177, + "loss": 4.62, + "step": 156350 + }, + { + "epoch": 0.2918597897359219, + "grad_norm": 1.2574522495269775, + "learning_rate": 0.00019926493287255258, + "loss": 4.7219, + "step": 156400 + }, + { + "epoch": 0.29195309529530045, + "grad_norm": 0.6988387703895569, + "learning_rate": 0.00019926442435674302, + "loss": 4.5019, + "step": 156450 + }, + { + "epoch": 0.2920464008546789, + "grad_norm": 1.215088963508606, + "learning_rate": 0.0001992639156657492, + "loss": 4.7895, + "step": 156500 + }, + { + "epoch": 0.2921397064140574, + "grad_norm": 1.1339821815490723, + "learning_rate": 0.00019926340679957206, + "loss": 4.7092, + "step": 156550 + }, + { + "epoch": 0.29223301197343593, + "grad_norm": 1.184877872467041, + "learning_rate": 0.00019926289775821246, + "loss": 4.848, + "step": 156600 + }, + { + "epoch": 0.2923263175328144, + "grad_norm": 1.0538442134857178, + "learning_rate": 0.0001992623885416713, + "loss": 4.7515, + "step": 156650 + }, + { + "epoch": 0.2924196230921929, + "grad_norm": 0.875169038772583, + "learning_rate": 0.0001992618791499495, + "loss": 4.5665, + "step": 156700 + }, + { + "epoch": 0.29251292865157136, + "grad_norm": 0.8852382898330688, + "learning_rate": 0.00019926136958304795, + "loss": 4.4925, + "step": 156750 + }, + { + "epoch": 0.2926062342109499, + "grad_norm": 0.683463454246521, + "learning_rate": 0.00019926085984096754, + "loss": 4.5527, + "step": 156800 + }, + { + "epoch": 0.29269953977032837, + "grad_norm": 0.848481297492981, + "learning_rate": 0.00019926034992370922, + "loss": 4.5793, + "step": 156850 + }, + { + "epoch": 0.29279284532970684, + "grad_norm": 0.9897000193595886, + "learning_rate": 0.0001992598398312738, + "loss": 4.6143, + "step": 156900 + }, + { + "epoch": 0.2928861508890854, + "grad_norm": 0.9709441065788269, + "learning_rate": 0.00019925932956366222, + "loss": 4.5603, + "step": 156950 + }, + { + "epoch": 0.29297945644846385, + "grad_norm": 0.9135065674781799, + "learning_rate": 0.00019925881912087544, + "loss": 4.7722, + "step": 157000 + }, + { + "epoch": 0.2930727620078423, + "grad_norm": 0.9769490361213684, + "learning_rate": 0.00019925830850291428, + "loss": 4.534, + "step": 157050 + }, + { + "epoch": 0.2931660675672208, + "grad_norm": 1.041882872581482, + "learning_rate": 0.00019925779770977963, + "loss": 4.6919, + "step": 157100 + }, + { + "epoch": 0.29325937312659933, + "grad_norm": 0.6851228475570679, + "learning_rate": 0.0001992572867414725, + "loss": 4.5882, + "step": 157150 + }, + { + "epoch": 0.2933526786859778, + "grad_norm": 1.007688283920288, + "learning_rate": 0.00019925677559799368, + "loss": 4.5231, + "step": 157200 + }, + { + "epoch": 0.2934459842453563, + "grad_norm": 1.0649223327636719, + "learning_rate": 0.00019925626427934413, + "loss": 4.7773, + "step": 157250 + }, + { + "epoch": 0.2935392898047348, + "grad_norm": 0.9082051515579224, + "learning_rate": 0.00019925575278552474, + "loss": 4.5285, + "step": 157300 + }, + { + "epoch": 0.2936325953641133, + "grad_norm": 1.0188795328140259, + "learning_rate": 0.00019925524111653642, + "loss": 4.7059, + "step": 157350 + }, + { + "epoch": 0.29372590092349177, + "grad_norm": 0.9674282670021057, + "learning_rate": 0.00019925472927238006, + "loss": 4.5048, + "step": 157400 + }, + { + "epoch": 0.29381920648287024, + "grad_norm": 0.9318031072616577, + "learning_rate": 0.00019925421725305656, + "loss": 4.5211, + "step": 157450 + }, + { + "epoch": 0.2939125120422488, + "grad_norm": 1.3290448188781738, + "learning_rate": 0.00019925370505856683, + "loss": 4.6717, + "step": 157500 + }, + { + "epoch": 0.29400581760162725, + "grad_norm": 0.9480525851249695, + "learning_rate": 0.00019925319268891177, + "loss": 4.4553, + "step": 157550 + }, + { + "epoch": 0.2940991231610057, + "grad_norm": 0.9672073721885681, + "learning_rate": 0.00019925268014409227, + "loss": 4.8206, + "step": 157600 + }, + { + "epoch": 0.29419242872038426, + "grad_norm": 0.8860331177711487, + "learning_rate": 0.0001992521674241093, + "loss": 4.6907, + "step": 157650 + }, + { + "epoch": 0.29428573427976273, + "grad_norm": 0.9548688530921936, + "learning_rate": 0.0001992516545289637, + "loss": 4.6107, + "step": 157700 + }, + { + "epoch": 0.2943790398391412, + "grad_norm": 0.9516497850418091, + "learning_rate": 0.0001992511414586564, + "loss": 4.5693, + "step": 157750 + }, + { + "epoch": 0.2944723453985197, + "grad_norm": 1.0647341012954712, + "learning_rate": 0.0001992506282131883, + "loss": 4.5256, + "step": 157800 + }, + { + "epoch": 0.2945656509578982, + "grad_norm": 0.9625911712646484, + "learning_rate": 0.0001992501147925603, + "loss": 4.592, + "step": 157850 + }, + { + "epoch": 0.2946589565172767, + "grad_norm": 1.109753131866455, + "learning_rate": 0.00019924960119677328, + "loss": 4.3946, + "step": 157900 + }, + { + "epoch": 0.29475226207665517, + "grad_norm": 0.7586527466773987, + "learning_rate": 0.0001992490874258282, + "loss": 4.6777, + "step": 157950 + }, + { + "epoch": 0.2948455676360337, + "grad_norm": 1.0405488014221191, + "learning_rate": 0.00019924857347972593, + "loss": 4.6913, + "step": 158000 + }, + { + "epoch": 0.2949388731954122, + "grad_norm": 0.7991945743560791, + "learning_rate": 0.00019924805935846746, + "loss": 4.6677, + "step": 158050 + }, + { + "epoch": 0.29503217875479065, + "grad_norm": 1.0138425827026367, + "learning_rate": 0.00019924754506205354, + "loss": 4.7778, + "step": 158100 + }, + { + "epoch": 0.29512548431416913, + "grad_norm": 0.9910553693771362, + "learning_rate": 0.00019924703059048522, + "loss": 4.5255, + "step": 158150 + }, + { + "epoch": 0.29521878987354766, + "grad_norm": 1.0533186197280884, + "learning_rate": 0.00019924651594376334, + "loss": 4.779, + "step": 158200 + }, + { + "epoch": 0.29531209543292614, + "grad_norm": 0.8881279230117798, + "learning_rate": 0.0001992460011218888, + "loss": 4.8608, + "step": 158250 + }, + { + "epoch": 0.2954054009923046, + "grad_norm": 0.9535214304924011, + "learning_rate": 0.00019924548612486258, + "loss": 4.6377, + "step": 158300 + }, + { + "epoch": 0.2954987065516831, + "grad_norm": 1.084489107131958, + "learning_rate": 0.00019924497095268547, + "loss": 4.6459, + "step": 158350 + }, + { + "epoch": 0.2955920121110616, + "grad_norm": 1.0378689765930176, + "learning_rate": 0.0001992444556053585, + "loss": 4.7632, + "step": 158400 + }, + { + "epoch": 0.2956853176704401, + "grad_norm": 0.878700315952301, + "learning_rate": 0.00019924394008288254, + "loss": 4.4921, + "step": 158450 + }, + { + "epoch": 0.29577862322981857, + "grad_norm": 1.0042437314987183, + "learning_rate": 0.00019924342438525848, + "loss": 4.7155, + "step": 158500 + }, + { + "epoch": 0.2958719287891971, + "grad_norm": 1.3455873727798462, + "learning_rate": 0.00019924290851248722, + "loss": 4.6473, + "step": 158550 + }, + { + "epoch": 0.2959652343485756, + "grad_norm": 0.9573052525520325, + "learning_rate": 0.0001992423924645697, + "loss": 4.8455, + "step": 158600 + }, + { + "epoch": 0.29605853990795405, + "grad_norm": 1.201541781425476, + "learning_rate": 0.00019924187624150677, + "loss": 4.6125, + "step": 158650 + }, + { + "epoch": 0.29615184546733253, + "grad_norm": 1.1214284896850586, + "learning_rate": 0.00019924135984329946, + "loss": 4.5251, + "step": 158700 + }, + { + "epoch": 0.29624515102671106, + "grad_norm": 0.8579356670379639, + "learning_rate": 0.00019924084326994858, + "loss": 4.6285, + "step": 158750 + }, + { + "epoch": 0.29633845658608954, + "grad_norm": 1.1231144666671753, + "learning_rate": 0.00019924032652145508, + "loss": 4.5755, + "step": 158800 + }, + { + "epoch": 0.296431762145468, + "grad_norm": 0.8243206739425659, + "learning_rate": 0.00019923980959781984, + "loss": 4.5558, + "step": 158850 + }, + { + "epoch": 0.29652506770484655, + "grad_norm": 1.0935394763946533, + "learning_rate": 0.00019923929249904385, + "loss": 4.913, + "step": 158900 + }, + { + "epoch": 0.296618373264225, + "grad_norm": 0.9777284264564514, + "learning_rate": 0.00019923877522512795, + "loss": 4.6809, + "step": 158950 + }, + { + "epoch": 0.2967116788236035, + "grad_norm": 0.9716470241546631, + "learning_rate": 0.00019923825777607305, + "loss": 4.4853, + "step": 159000 + }, + { + "epoch": 0.2967116788236035, + "eval_loss": 4.834835529327393, + "eval_runtime": 229.1261, + "eval_samples_per_second": 11.382, + "eval_steps_per_second": 11.382, + "eval_tts_loss": 7.493346262103278, + "step": 159000 + }, + { + "epoch": 0.296804984382982, + "grad_norm": 1.0158995389938354, + "learning_rate": 0.00019923774015188014, + "loss": 4.6832, + "step": 159050 + }, + { + "epoch": 0.2968982899423605, + "grad_norm": 1.0746111869812012, + "learning_rate": 0.00019923722235255, + "loss": 4.9126, + "step": 159100 + }, + { + "epoch": 0.296991595501739, + "grad_norm": 0.8816997408866882, + "learning_rate": 0.00019923670437808368, + "loss": 4.6653, + "step": 159150 + }, + { + "epoch": 0.29708490106111746, + "grad_norm": 0.8043944835662842, + "learning_rate": 0.00019923618622848203, + "loss": 4.6432, + "step": 159200 + }, + { + "epoch": 0.297178206620496, + "grad_norm": 1.2232578992843628, + "learning_rate": 0.00019923566790374598, + "loss": 4.7212, + "step": 159250 + }, + { + "epoch": 0.29727151217987446, + "grad_norm": 0.991773784160614, + "learning_rate": 0.0001992351494038764, + "loss": 4.7941, + "step": 159300 + }, + { + "epoch": 0.29736481773925294, + "grad_norm": 0.9849087595939636, + "learning_rate": 0.00019923463072887428, + "loss": 4.6429, + "step": 159350 + }, + { + "epoch": 0.2974581232986314, + "grad_norm": 1.1423635482788086, + "learning_rate": 0.0001992341118787405, + "loss": 4.875, + "step": 159400 + }, + { + "epoch": 0.29755142885800995, + "grad_norm": 0.6247135996818542, + "learning_rate": 0.00019923359285347594, + "loss": 4.4517, + "step": 159450 + }, + { + "epoch": 0.2976447344173884, + "grad_norm": 0.9761686325073242, + "learning_rate": 0.00019923307365308154, + "loss": 4.5767, + "step": 159500 + }, + { + "epoch": 0.2977380399767669, + "grad_norm": 0.8199406266212463, + "learning_rate": 0.00019923255427755826, + "loss": 4.7449, + "step": 159550 + }, + { + "epoch": 0.29783134553614543, + "grad_norm": 1.3415900468826294, + "learning_rate": 0.00019923203472690698, + "loss": 4.5032, + "step": 159600 + }, + { + "epoch": 0.2979246510955239, + "grad_norm": 0.806204617023468, + "learning_rate": 0.00019923151500112862, + "loss": 4.6618, + "step": 159650 + }, + { + "epoch": 0.2980179566549024, + "grad_norm": 1.2007147073745728, + "learning_rate": 0.0001992309951002241, + "loss": 4.8032, + "step": 159700 + }, + { + "epoch": 0.29811126221428086, + "grad_norm": 0.580830991268158, + "learning_rate": 0.00019923047502419432, + "loss": 4.5456, + "step": 159750 + }, + { + "epoch": 0.2982045677736594, + "grad_norm": 1.0669281482696533, + "learning_rate": 0.0001992299547730402, + "loss": 4.4379, + "step": 159800 + }, + { + "epoch": 0.29829787333303787, + "grad_norm": 0.7621221542358398, + "learning_rate": 0.0001992294343467627, + "loss": 4.698, + "step": 159850 + }, + { + "epoch": 0.29839117889241634, + "grad_norm": 1.0081877708435059, + "learning_rate": 0.00019922891374536268, + "loss": 4.4803, + "step": 159900 + }, + { + "epoch": 0.2984844844517949, + "grad_norm": 1.1483149528503418, + "learning_rate": 0.00019922839296884114, + "loss": 4.7129, + "step": 159950 + }, + { + "epoch": 0.29857779001117335, + "grad_norm": 1.0426714420318604, + "learning_rate": 0.00019922787201719888, + "loss": 4.5651, + "step": 160000 + }, + { + "epoch": 0.2986710955705518, + "grad_norm": 0.8986678123474121, + "learning_rate": 0.00019922735089043691, + "loss": 4.779, + "step": 160050 + }, + { + "epoch": 0.2987644011299303, + "grad_norm": 0.9667002558708191, + "learning_rate": 0.00019922682958855613, + "loss": 4.6677, + "step": 160100 + }, + { + "epoch": 0.29885770668930883, + "grad_norm": 0.9710828065872192, + "learning_rate": 0.00019922630811155747, + "loss": 4.7131, + "step": 160150 + }, + { + "epoch": 0.2989510122486873, + "grad_norm": 1.078286051750183, + "learning_rate": 0.00019922578645944182, + "loss": 4.6679, + "step": 160200 + }, + { + "epoch": 0.2990443178080658, + "grad_norm": 0.694128692150116, + "learning_rate": 0.00019922526463221008, + "loss": 4.5504, + "step": 160250 + }, + { + "epoch": 0.2991376233674443, + "grad_norm": 1.0354713201522827, + "learning_rate": 0.00019922474262986325, + "loss": 4.4172, + "step": 160300 + }, + { + "epoch": 0.2992309289268228, + "grad_norm": 0.9620189666748047, + "learning_rate": 0.0001992242204524022, + "loss": 4.6703, + "step": 160350 + }, + { + "epoch": 0.29932423448620127, + "grad_norm": 0.9035593271255493, + "learning_rate": 0.0001992236980998279, + "loss": 4.6122, + "step": 160400 + }, + { + "epoch": 0.29941754004557974, + "grad_norm": 0.9520270824432373, + "learning_rate": 0.00019922317557214116, + "loss": 4.7732, + "step": 160450 + }, + { + "epoch": 0.2995108456049583, + "grad_norm": 0.8644099831581116, + "learning_rate": 0.00019922265286934302, + "loss": 4.5148, + "step": 160500 + }, + { + "epoch": 0.29960415116433675, + "grad_norm": 0.7828960418701172, + "learning_rate": 0.00019922212999143435, + "loss": 4.7247, + "step": 160550 + }, + { + "epoch": 0.2996974567237152, + "grad_norm": 0.9005388617515564, + "learning_rate": 0.00019922160693841608, + "loss": 4.6693, + "step": 160600 + }, + { + "epoch": 0.29979076228309376, + "grad_norm": 0.6794205904006958, + "learning_rate": 0.0001992210837102891, + "loss": 4.5782, + "step": 160650 + }, + { + "epoch": 0.29988406784247224, + "grad_norm": 1.0133854150772095, + "learning_rate": 0.0001992205603070544, + "loss": 4.562, + "step": 160700 + }, + { + "epoch": 0.2999773734018507, + "grad_norm": 0.9719075560569763, + "learning_rate": 0.00019922003672871285, + "loss": 4.7801, + "step": 160750 + }, + { + "epoch": 0.3000706789612292, + "grad_norm": 0.9305253028869629, + "learning_rate": 0.00019921951297526542, + "loss": 4.4637, + "step": 160800 + }, + { + "epoch": 0.3001639845206077, + "grad_norm": 0.8835601210594177, + "learning_rate": 0.00019921898904671297, + "loss": 4.6572, + "step": 160850 + }, + { + "epoch": 0.3002572900799862, + "grad_norm": 1.0033226013183594, + "learning_rate": 0.0001992184649430565, + "loss": 4.4474, + "step": 160900 + }, + { + "epoch": 0.30035059563936467, + "grad_norm": 1.0143111944198608, + "learning_rate": 0.00019921794066429688, + "loss": 4.6021, + "step": 160950 + }, + { + "epoch": 0.3004439011987432, + "grad_norm": 1.0978728532791138, + "learning_rate": 0.00019921741621043503, + "loss": 4.7851, + "step": 161000 + }, + { + "epoch": 0.3005372067581217, + "grad_norm": 0.6397398710250854, + "learning_rate": 0.00019921689158147195, + "loss": 4.7239, + "step": 161050 + }, + { + "epoch": 0.30063051231750015, + "grad_norm": 1.0616174936294556, + "learning_rate": 0.00019921636677740846, + "loss": 4.7843, + "step": 161100 + }, + { + "epoch": 0.30072381787687863, + "grad_norm": 1.0118709802627563, + "learning_rate": 0.00019921584179824557, + "loss": 4.2376, + "step": 161150 + }, + { + "epoch": 0.30081712343625716, + "grad_norm": 1.0035194158554077, + "learning_rate": 0.00019921531664398418, + "loss": 4.3126, + "step": 161200 + }, + { + "epoch": 0.30091042899563564, + "grad_norm": 1.0038213729858398, + "learning_rate": 0.0001992147913146252, + "loss": 4.673, + "step": 161250 + }, + { + "epoch": 0.3010037345550141, + "grad_norm": 1.1764312982559204, + "learning_rate": 0.00019921426581016958, + "loss": 4.6918, + "step": 161300 + }, + { + "epoch": 0.3010970401143926, + "grad_norm": 0.8991124629974365, + "learning_rate": 0.00019921374013061823, + "loss": 4.4333, + "step": 161350 + }, + { + "epoch": 0.3011903456737711, + "grad_norm": 0.8967214226722717, + "learning_rate": 0.0001992132142759721, + "loss": 4.5326, + "step": 161400 + }, + { + "epoch": 0.3012836512331496, + "grad_norm": 1.1561260223388672, + "learning_rate": 0.0001992126882462321, + "loss": 4.8165, + "step": 161450 + }, + { + "epoch": 0.3013769567925281, + "grad_norm": 1.163358211517334, + "learning_rate": 0.00019921216204139914, + "loss": 4.48, + "step": 161500 + }, + { + "epoch": 0.3014702623519066, + "grad_norm": 0.9330498576164246, + "learning_rate": 0.0001992116356614742, + "loss": 4.5813, + "step": 161550 + }, + { + "epoch": 0.3015635679112851, + "grad_norm": 0.8502509593963623, + "learning_rate": 0.00019921110910645816, + "loss": 4.8287, + "step": 161600 + }, + { + "epoch": 0.30165687347066356, + "grad_norm": 0.6995953917503357, + "learning_rate": 0.000199210582376352, + "loss": 4.683, + "step": 161650 + }, + { + "epoch": 0.30175017903004203, + "grad_norm": 1.1503678560256958, + "learning_rate": 0.0001992100554711566, + "loss": 4.7569, + "step": 161700 + }, + { + "epoch": 0.30184348458942056, + "grad_norm": 0.8800851702690125, + "learning_rate": 0.0001992095283908729, + "loss": 4.6177, + "step": 161750 + }, + { + "epoch": 0.30193679014879904, + "grad_norm": 0.9621112942695618, + "learning_rate": 0.00019920900113550185, + "loss": 4.6983, + "step": 161800 + }, + { + "epoch": 0.3020300957081775, + "grad_norm": 1.1952325105667114, + "learning_rate": 0.00019920847370504435, + "loss": 4.7141, + "step": 161850 + }, + { + "epoch": 0.30212340126755605, + "grad_norm": 0.8587202429771423, + "learning_rate": 0.0001992079460995014, + "loss": 4.7418, + "step": 161900 + }, + { + "epoch": 0.3022167068269345, + "grad_norm": 0.8307145237922668, + "learning_rate": 0.0001992074183188738, + "loss": 4.7172, + "step": 161950 + }, + { + "epoch": 0.302310012386313, + "grad_norm": 0.9355490207672119, + "learning_rate": 0.00019920689036316262, + "loss": 4.7346, + "step": 162000 + }, + { + "epoch": 0.302310012386313, + "eval_loss": 4.827625274658203, + "eval_runtime": 232.9861, + "eval_samples_per_second": 11.194, + "eval_steps_per_second": 11.194, + "eval_tts_loss": 7.458007696548911, + "step": 162000 + }, + { + "epoch": 0.3024033179456915, + "grad_norm": 1.3112372159957886, + "learning_rate": 0.00019920636223236872, + "loss": 4.6245, + "step": 162050 + }, + { + "epoch": 0.30249662350507, + "grad_norm": 0.8583440780639648, + "learning_rate": 0.00019920583392649306, + "loss": 4.6192, + "step": 162100 + }, + { + "epoch": 0.3025899290644485, + "grad_norm": 0.9936934113502502, + "learning_rate": 0.00019920530544553653, + "loss": 4.7136, + "step": 162150 + }, + { + "epoch": 0.30268323462382696, + "grad_norm": 0.9335197806358337, + "learning_rate": 0.00019920477678950013, + "loss": 4.6699, + "step": 162200 + }, + { + "epoch": 0.3027765401832055, + "grad_norm": 0.9630725383758545, + "learning_rate": 0.0001992042479583847, + "loss": 4.6637, + "step": 162250 + }, + { + "epoch": 0.30286984574258397, + "grad_norm": 0.7474913597106934, + "learning_rate": 0.00019920371895219128, + "loss": 4.7166, + "step": 162300 + }, + { + "epoch": 0.30296315130196244, + "grad_norm": 0.7774944305419922, + "learning_rate": 0.00019920318977092075, + "loss": 4.8446, + "step": 162350 + }, + { + "epoch": 0.3030564568613409, + "grad_norm": 1.0125433206558228, + "learning_rate": 0.00019920266041457401, + "loss": 4.6259, + "step": 162400 + }, + { + "epoch": 0.30314976242071945, + "grad_norm": 1.1934956312179565, + "learning_rate": 0.00019920213088315202, + "loss": 4.6795, + "step": 162450 + }, + { + "epoch": 0.3032430679800979, + "grad_norm": 0.9211152195930481, + "learning_rate": 0.00019920160117665577, + "loss": 4.6288, + "step": 162500 + }, + { + "epoch": 0.3033363735394764, + "grad_norm": 0.9939976930618286, + "learning_rate": 0.00019920107129508613, + "loss": 4.7214, + "step": 162550 + }, + { + "epoch": 0.30342967909885493, + "grad_norm": 0.8724084496498108, + "learning_rate": 0.00019920054123844408, + "loss": 4.4638, + "step": 162600 + }, + { + "epoch": 0.3035229846582334, + "grad_norm": 0.994696855545044, + "learning_rate": 0.00019920001100673046, + "loss": 4.8186, + "step": 162650 + }, + { + "epoch": 0.3036162902176119, + "grad_norm": 0.9215883612632751, + "learning_rate": 0.00019919948059994634, + "loss": 4.5902, + "step": 162700 + }, + { + "epoch": 0.30370959577699036, + "grad_norm": 0.9584205746650696, + "learning_rate": 0.00019919895001809258, + "loss": 4.6455, + "step": 162750 + }, + { + "epoch": 0.3038029013363689, + "grad_norm": 0.8970978856086731, + "learning_rate": 0.00019919841926117012, + "loss": 4.5378, + "step": 162800 + }, + { + "epoch": 0.30389620689574737, + "grad_norm": 0.7979338765144348, + "learning_rate": 0.0001991978883291799, + "loss": 4.6594, + "step": 162850 + }, + { + "epoch": 0.30398951245512584, + "grad_norm": 0.7578598856925964, + "learning_rate": 0.00019919735722212284, + "loss": 4.832, + "step": 162900 + }, + { + "epoch": 0.3040828180145044, + "grad_norm": 1.0004535913467407, + "learning_rate": 0.00019919682593999994, + "loss": 4.695, + "step": 162950 + }, + { + "epoch": 0.30417612357388285, + "grad_norm": 0.9248453974723816, + "learning_rate": 0.00019919629448281209, + "loss": 4.3721, + "step": 163000 + }, + { + "epoch": 0.3042694291332613, + "grad_norm": 1.3004566431045532, + "learning_rate": 0.00019919576285056022, + "loss": 4.7349, + "step": 163050 + }, + { + "epoch": 0.3043627346926398, + "grad_norm": 0.8813239336013794, + "learning_rate": 0.0001991952310432453, + "loss": 4.6711, + "step": 163100 + }, + { + "epoch": 0.30445604025201833, + "grad_norm": 1.0755641460418701, + "learning_rate": 0.00019919469906086822, + "loss": 4.8154, + "step": 163150 + }, + { + "epoch": 0.3045493458113968, + "grad_norm": 0.9873396158218384, + "learning_rate": 0.00019919416690343, + "loss": 4.3667, + "step": 163200 + }, + { + "epoch": 0.3046426513707753, + "grad_norm": 0.7341350317001343, + "learning_rate": 0.0001991936345709315, + "loss": 4.5203, + "step": 163250 + }, + { + "epoch": 0.3047359569301538, + "grad_norm": 1.094083547592163, + "learning_rate": 0.00019919310206337366, + "loss": 4.6661, + "step": 163300 + }, + { + "epoch": 0.3048292624895323, + "grad_norm": 1.1205472946166992, + "learning_rate": 0.00019919256938075747, + "loss": 4.6381, + "step": 163350 + }, + { + "epoch": 0.30492256804891077, + "grad_norm": 0.9988998174667358, + "learning_rate": 0.00019919203652308385, + "loss": 4.7326, + "step": 163400 + }, + { + "epoch": 0.30501587360828925, + "grad_norm": 0.9494795203208923, + "learning_rate": 0.00019919150349035375, + "loss": 4.4937, + "step": 163450 + }, + { + "epoch": 0.3051091791676678, + "grad_norm": 0.9424142241477966, + "learning_rate": 0.0001991909702825681, + "loss": 4.3626, + "step": 163500 + }, + { + "epoch": 0.30520248472704625, + "grad_norm": 1.1510084867477417, + "learning_rate": 0.00019919043689972782, + "loss": 4.6443, + "step": 163550 + }, + { + "epoch": 0.30529579028642473, + "grad_norm": 1.1867023706436157, + "learning_rate": 0.0001991899033418339, + "loss": 4.6603, + "step": 163600 + }, + { + "epoch": 0.30538909584580326, + "grad_norm": 1.285799503326416, + "learning_rate": 0.00019918936960888723, + "loss": 4.7631, + "step": 163650 + }, + { + "epoch": 0.30548240140518174, + "grad_norm": 1.011475920677185, + "learning_rate": 0.00019918883570088877, + "loss": 4.4944, + "step": 163700 + }, + { + "epoch": 0.3055757069645602, + "grad_norm": 0.7682061791419983, + "learning_rate": 0.0001991883016178395, + "loss": 4.5854, + "step": 163750 + }, + { + "epoch": 0.3056690125239387, + "grad_norm": 1.2138035297393799, + "learning_rate": 0.0001991877673597403, + "loss": 4.5669, + "step": 163800 + }, + { + "epoch": 0.3057623180833172, + "grad_norm": 1.1199895143508911, + "learning_rate": 0.00019918723292659216, + "loss": 4.8251, + "step": 163850 + }, + { + "epoch": 0.3058556236426957, + "grad_norm": 0.9371145367622375, + "learning_rate": 0.00019918669831839601, + "loss": 4.6153, + "step": 163900 + }, + { + "epoch": 0.30594892920207417, + "grad_norm": 0.8127995729446411, + "learning_rate": 0.00019918616353515277, + "loss": 4.4168, + "step": 163950 + }, + { + "epoch": 0.30604223476145265, + "grad_norm": 0.9031199216842651, + "learning_rate": 0.00019918562857686344, + "loss": 4.707, + "step": 164000 + }, + { + "epoch": 0.3061355403208312, + "grad_norm": 1.1019059419631958, + "learning_rate": 0.00019918509344352887, + "loss": 4.577, + "step": 164050 + }, + { + "epoch": 0.30622884588020965, + "grad_norm": 0.9607084393501282, + "learning_rate": 0.0001991845581351501, + "loss": 4.6266, + "step": 164100 + }, + { + "epoch": 0.30632215143958813, + "grad_norm": 0.6045005917549133, + "learning_rate": 0.00019918402265172805, + "loss": 4.743, + "step": 164150 + }, + { + "epoch": 0.30641545699896666, + "grad_norm": 0.9607996940612793, + "learning_rate": 0.0001991834869932636, + "loss": 4.637, + "step": 164200 + }, + { + "epoch": 0.30650876255834514, + "grad_norm": 1.1145765781402588, + "learning_rate": 0.0001991829511597578, + "loss": 4.6641, + "step": 164250 + }, + { + "epoch": 0.3066020681177236, + "grad_norm": 1.0428991317749023, + "learning_rate": 0.0001991824151512115, + "loss": 4.6888, + "step": 164300 + }, + { + "epoch": 0.3066953736771021, + "grad_norm": 0.8830058574676514, + "learning_rate": 0.00019918187896762575, + "loss": 4.6689, + "step": 164350 + }, + { + "epoch": 0.3067886792364806, + "grad_norm": 0.9978854656219482, + "learning_rate": 0.0001991813426090014, + "loss": 4.7586, + "step": 164400 + }, + { + "epoch": 0.3068819847958591, + "grad_norm": 0.9433168768882751, + "learning_rate": 0.00019918080607533943, + "loss": 4.813, + "step": 164450 + }, + { + "epoch": 0.3069752903552376, + "grad_norm": 0.64937824010849, + "learning_rate": 0.00019918026936664078, + "loss": 4.3087, + "step": 164500 + }, + { + "epoch": 0.3070685959146161, + "grad_norm": 0.9438527822494507, + "learning_rate": 0.00019917973248290643, + "loss": 4.4295, + "step": 164550 + }, + { + "epoch": 0.3071619014739946, + "grad_norm": 0.8068161606788635, + "learning_rate": 0.00019917919542413728, + "loss": 4.4977, + "step": 164600 + }, + { + "epoch": 0.30725520703337306, + "grad_norm": 0.896666407585144, + "learning_rate": 0.00019917865819033432, + "loss": 4.6681, + "step": 164650 + }, + { + "epoch": 0.30734851259275153, + "grad_norm": 1.1718946695327759, + "learning_rate": 0.00019917812078149843, + "loss": 4.565, + "step": 164700 + }, + { + "epoch": 0.30744181815213006, + "grad_norm": 1.3140202760696411, + "learning_rate": 0.00019917758319763066, + "loss": 4.4831, + "step": 164750 + }, + { + "epoch": 0.30753512371150854, + "grad_norm": 1.0430924892425537, + "learning_rate": 0.0001991770454387319, + "loss": 4.4573, + "step": 164800 + }, + { + "epoch": 0.307628429270887, + "grad_norm": 0.9410809278488159, + "learning_rate": 0.0001991765075048031, + "loss": 4.5014, + "step": 164850 + }, + { + "epoch": 0.30772173483026555, + "grad_norm": 0.7173283696174622, + "learning_rate": 0.00019917596939584523, + "loss": 4.7214, + "step": 164900 + }, + { + "epoch": 0.307815040389644, + "grad_norm": 1.0757492780685425, + "learning_rate": 0.00019917543111185921, + "loss": 4.5144, + "step": 164950 + }, + { + "epoch": 0.3079083459490225, + "grad_norm": 0.9228636026382446, + "learning_rate": 0.000199174892652846, + "loss": 4.6586, + "step": 165000 + }, + { + "epoch": 0.3079083459490225, + "eval_loss": 4.838285446166992, + "eval_runtime": 229.3082, + "eval_samples_per_second": 11.373, + "eval_steps_per_second": 11.373, + "eval_tts_loss": 7.4116918211060066, + "step": 165000 + }, + { + "epoch": 0.308001651508401, + "grad_norm": 1.0502113103866577, + "learning_rate": 0.00019917435401880655, + "loss": 4.6569, + "step": 165050 + }, + { + "epoch": 0.3080949570677795, + "grad_norm": 0.9943327903747559, + "learning_rate": 0.0001991738152097418, + "loss": 4.5486, + "step": 165100 + }, + { + "epoch": 0.308188262627158, + "grad_norm": 1.2838720083236694, + "learning_rate": 0.00019917327622565276, + "loss": 4.6833, + "step": 165150 + }, + { + "epoch": 0.30828156818653646, + "grad_norm": 0.9620805978775024, + "learning_rate": 0.00019917273706654033, + "loss": 4.6248, + "step": 165200 + }, + { + "epoch": 0.308374873745915, + "grad_norm": 1.1069529056549072, + "learning_rate": 0.00019917219773240544, + "loss": 4.4957, + "step": 165250 + }, + { + "epoch": 0.30846817930529347, + "grad_norm": 0.6974848508834839, + "learning_rate": 0.0001991716582232491, + "loss": 4.7632, + "step": 165300 + }, + { + "epoch": 0.30856148486467194, + "grad_norm": 1.0856902599334717, + "learning_rate": 0.0001991711185390722, + "loss": 4.6712, + "step": 165350 + }, + { + "epoch": 0.3086547904240504, + "grad_norm": 1.2169914245605469, + "learning_rate": 0.00019917057867987575, + "loss": 4.5188, + "step": 165400 + }, + { + "epoch": 0.30874809598342895, + "grad_norm": 0.9107069969177246, + "learning_rate": 0.00019917003864566065, + "loss": 4.7021, + "step": 165450 + }, + { + "epoch": 0.3088414015428074, + "grad_norm": 0.910029947757721, + "learning_rate": 0.00019916949843642794, + "loss": 4.788, + "step": 165500 + }, + { + "epoch": 0.3089347071021859, + "grad_norm": 1.238258957862854, + "learning_rate": 0.00019916895805217844, + "loss": 4.7447, + "step": 165550 + }, + { + "epoch": 0.30902801266156443, + "grad_norm": 1.087679386138916, + "learning_rate": 0.00019916841749291323, + "loss": 4.6314, + "step": 165600 + }, + { + "epoch": 0.3091213182209429, + "grad_norm": 0.9426969885826111, + "learning_rate": 0.0001991678767586332, + "loss": 4.4512, + "step": 165650 + }, + { + "epoch": 0.3092146237803214, + "grad_norm": 0.944223940372467, + "learning_rate": 0.00019916733584933932, + "loss": 4.7139, + "step": 165700 + }, + { + "epoch": 0.30930792933969986, + "grad_norm": 1.0137444734573364, + "learning_rate": 0.00019916679476503254, + "loss": 4.6459, + "step": 165750 + }, + { + "epoch": 0.3094012348990784, + "grad_norm": 1.2591444253921509, + "learning_rate": 0.00019916625350571384, + "loss": 4.6593, + "step": 165800 + }, + { + "epoch": 0.30949454045845687, + "grad_norm": 0.8761984705924988, + "learning_rate": 0.00019916571207138412, + "loss": 4.4277, + "step": 165850 + }, + { + "epoch": 0.30958784601783534, + "grad_norm": 0.8550477027893066, + "learning_rate": 0.00019916517046204434, + "loss": 4.5806, + "step": 165900 + }, + { + "epoch": 0.3096811515772139, + "grad_norm": 0.9725227355957031, + "learning_rate": 0.00019916462867769553, + "loss": 4.6497, + "step": 165950 + }, + { + "epoch": 0.30977445713659235, + "grad_norm": 1.0279942750930786, + "learning_rate": 0.00019916408671833858, + "loss": 4.6852, + "step": 166000 + }, + { + "epoch": 0.3098677626959708, + "grad_norm": 0.8512559533119202, + "learning_rate": 0.0001991635445839745, + "loss": 4.6959, + "step": 166050 + }, + { + "epoch": 0.3099610682553493, + "grad_norm": 0.7771254181861877, + "learning_rate": 0.00019916300227460417, + "loss": 4.7392, + "step": 166100 + }, + { + "epoch": 0.31005437381472783, + "grad_norm": 1.103044867515564, + "learning_rate": 0.00019916245979022858, + "loss": 4.3448, + "step": 166150 + }, + { + "epoch": 0.3101476793741063, + "grad_norm": 1.1170908212661743, + "learning_rate": 0.00019916191713084872, + "loss": 4.4304, + "step": 166200 + }, + { + "epoch": 0.3102409849334848, + "grad_norm": 1.0523208379745483, + "learning_rate": 0.00019916137429646554, + "loss": 4.7645, + "step": 166250 + }, + { + "epoch": 0.3103342904928633, + "grad_norm": 1.361086368560791, + "learning_rate": 0.00019916083128707995, + "loss": 4.4946, + "step": 166300 + }, + { + "epoch": 0.3104275960522418, + "grad_norm": 0.9509695172309875, + "learning_rate": 0.00019916028810269297, + "loss": 4.7445, + "step": 166350 + }, + { + "epoch": 0.31052090161162027, + "grad_norm": 1.121620774269104, + "learning_rate": 0.0001991597447433055, + "loss": 4.8506, + "step": 166400 + }, + { + "epoch": 0.31061420717099875, + "grad_norm": 1.0928542613983154, + "learning_rate": 0.00019915920120891855, + "loss": 4.6162, + "step": 166450 + }, + { + "epoch": 0.3107075127303773, + "grad_norm": 1.339594841003418, + "learning_rate": 0.00019915865749953304, + "loss": 4.6252, + "step": 166500 + }, + { + "epoch": 0.31080081828975575, + "grad_norm": 0.9323852062225342, + "learning_rate": 0.00019915811361514996, + "loss": 4.7945, + "step": 166550 + }, + { + "epoch": 0.31089412384913423, + "grad_norm": 1.001569390296936, + "learning_rate": 0.00019915756955577027, + "loss": 4.5336, + "step": 166600 + }, + { + "epoch": 0.3109874294085127, + "grad_norm": 0.9864224195480347, + "learning_rate": 0.00019915702532139487, + "loss": 4.5236, + "step": 166650 + }, + { + "epoch": 0.31108073496789124, + "grad_norm": 0.9965605735778809, + "learning_rate": 0.0001991564809120248, + "loss": 4.6922, + "step": 166700 + }, + { + "epoch": 0.3111740405272697, + "grad_norm": 1.0161752700805664, + "learning_rate": 0.000199155936327661, + "loss": 4.7956, + "step": 166750 + }, + { + "epoch": 0.3112673460866482, + "grad_norm": 1.1669565439224243, + "learning_rate": 0.00019915539156830443, + "loss": 4.8049, + "step": 166800 + }, + { + "epoch": 0.3113606516460267, + "grad_norm": 1.1081312894821167, + "learning_rate": 0.000199154846633956, + "loss": 4.6178, + "step": 166850 + }, + { + "epoch": 0.3114539572054052, + "grad_norm": 0.9586905837059021, + "learning_rate": 0.00019915430152461674, + "loss": 4.8232, + "step": 166900 + }, + { + "epoch": 0.3115472627647837, + "grad_norm": 1.1186403036117554, + "learning_rate": 0.00019915375624028755, + "loss": 4.6214, + "step": 166950 + }, + { + "epoch": 0.31164056832416215, + "grad_norm": 1.1981947422027588, + "learning_rate": 0.00019915321078096946, + "loss": 4.7213, + "step": 167000 + }, + { + "epoch": 0.3117338738835407, + "grad_norm": 0.7325296401977539, + "learning_rate": 0.0001991526651466634, + "loss": 4.6811, + "step": 167050 + }, + { + "epoch": 0.31182717944291916, + "grad_norm": 1.0409541130065918, + "learning_rate": 0.00019915211933737033, + "loss": 4.5533, + "step": 167100 + }, + { + "epoch": 0.31192048500229763, + "grad_norm": 0.8175338506698608, + "learning_rate": 0.0001991515733530912, + "loss": 4.5909, + "step": 167150 + }, + { + "epoch": 0.31201379056167616, + "grad_norm": 1.0503971576690674, + "learning_rate": 0.000199151027193827, + "loss": 4.4858, + "step": 167200 + }, + { + "epoch": 0.31210709612105464, + "grad_norm": 1.094265103340149, + "learning_rate": 0.00019915048085957868, + "loss": 4.5226, + "step": 167250 + }, + { + "epoch": 0.3122004016804331, + "grad_norm": 0.8987294435501099, + "learning_rate": 0.0001991499343503472, + "loss": 4.7354, + "step": 167300 + }, + { + "epoch": 0.3122937072398116, + "grad_norm": 1.0248451232910156, + "learning_rate": 0.00019914938766613354, + "loss": 4.5532, + "step": 167350 + }, + { + "epoch": 0.3123870127991901, + "grad_norm": 0.8451243042945862, + "learning_rate": 0.00019914884080693868, + "loss": 4.5967, + "step": 167400 + }, + { + "epoch": 0.3124803183585686, + "grad_norm": 0.8834465742111206, + "learning_rate": 0.0001991482937727635, + "loss": 4.2495, + "step": 167450 + }, + { + "epoch": 0.3125736239179471, + "grad_norm": 0.8053299784660339, + "learning_rate": 0.00019914774656360908, + "loss": 4.9166, + "step": 167500 + }, + { + "epoch": 0.3126669294773256, + "grad_norm": 1.0274240970611572, + "learning_rate": 0.00019914719917947632, + "loss": 4.7979, + "step": 167550 + }, + { + "epoch": 0.3127602350367041, + "grad_norm": 0.8318864107131958, + "learning_rate": 0.0001991466516203662, + "loss": 4.5883, + "step": 167600 + }, + { + "epoch": 0.31285354059608256, + "grad_norm": 1.0550457239151, + "learning_rate": 0.00019914610388627969, + "loss": 4.4031, + "step": 167650 + }, + { + "epoch": 0.31294684615546103, + "grad_norm": 1.0894427299499512, + "learning_rate": 0.00019914555597721773, + "loss": 4.87, + "step": 167700 + }, + { + "epoch": 0.31304015171483957, + "grad_norm": 0.8881887197494507, + "learning_rate": 0.00019914500789318133, + "loss": 4.8212, + "step": 167750 + }, + { + "epoch": 0.31313345727421804, + "grad_norm": 0.9769049286842346, + "learning_rate": 0.00019914445963417144, + "loss": 4.5287, + "step": 167800 + }, + { + "epoch": 0.3132267628335965, + "grad_norm": 1.0178450345993042, + "learning_rate": 0.00019914391120018902, + "loss": 4.5699, + "step": 167850 + }, + { + "epoch": 0.31332006839297505, + "grad_norm": 0.9004485011100769, + "learning_rate": 0.00019914336259123502, + "loss": 4.6477, + "step": 167900 + }, + { + "epoch": 0.3134133739523535, + "grad_norm": 1.098066806793213, + "learning_rate": 0.00019914281380731047, + "loss": 4.4141, + "step": 167950 + }, + { + "epoch": 0.313506679511732, + "grad_norm": 0.9474552273750305, + "learning_rate": 0.00019914226484841625, + "loss": 4.4733, + "step": 168000 + }, + { + "epoch": 0.313506679511732, + "eval_loss": 4.8274312019348145, + "eval_runtime": 228.3749, + "eval_samples_per_second": 11.42, + "eval_steps_per_second": 11.42, + "eval_tts_loss": 7.468050231518612, + "step": 168000 + }, + { + "epoch": 0.3135999850711105, + "grad_norm": 1.2087820768356323, + "learning_rate": 0.00019914171571455342, + "loss": 4.6384, + "step": 168050 + }, + { + "epoch": 0.313693290630489, + "grad_norm": 1.0665926933288574, + "learning_rate": 0.00019914116640572287, + "loss": 4.6831, + "step": 168100 + }, + { + "epoch": 0.3137865961898675, + "grad_norm": 0.769088625907898, + "learning_rate": 0.00019914061692192562, + "loss": 4.7609, + "step": 168150 + }, + { + "epoch": 0.31387990174924596, + "grad_norm": 1.0530633926391602, + "learning_rate": 0.00019914006726316264, + "loss": 4.9207, + "step": 168200 + }, + { + "epoch": 0.3139732073086245, + "grad_norm": 0.705093502998352, + "learning_rate": 0.00019913951742943486, + "loss": 4.642, + "step": 168250 + }, + { + "epoch": 0.31406651286800297, + "grad_norm": 0.9531348347663879, + "learning_rate": 0.0001991389674207433, + "loss": 4.9607, + "step": 168300 + }, + { + "epoch": 0.31415981842738144, + "grad_norm": 0.8925808668136597, + "learning_rate": 0.0001991384172370889, + "loss": 4.4425, + "step": 168350 + }, + { + "epoch": 0.3142531239867599, + "grad_norm": 0.8512563705444336, + "learning_rate": 0.00019913786687847265, + "loss": 4.5021, + "step": 168400 + }, + { + "epoch": 0.31434642954613845, + "grad_norm": 1.233641266822815, + "learning_rate": 0.00019913731634489548, + "loss": 4.7805, + "step": 168450 + }, + { + "epoch": 0.3144397351055169, + "grad_norm": 0.7689288854598999, + "learning_rate": 0.0001991367656363584, + "loss": 4.5333, + "step": 168500 + }, + { + "epoch": 0.3145330406648954, + "grad_norm": 1.1268550157546997, + "learning_rate": 0.00019913621475286237, + "loss": 4.7591, + "step": 168550 + }, + { + "epoch": 0.31462634622427393, + "grad_norm": 1.1939990520477295, + "learning_rate": 0.00019913566369440837, + "loss": 4.6831, + "step": 168600 + }, + { + "epoch": 0.3147196517836524, + "grad_norm": 0.9455341100692749, + "learning_rate": 0.00019913511246099735, + "loss": 4.7392, + "step": 168650 + }, + { + "epoch": 0.3148129573430309, + "grad_norm": 0.9590946435928345, + "learning_rate": 0.00019913456105263033, + "loss": 4.4734, + "step": 168700 + }, + { + "epoch": 0.31490626290240936, + "grad_norm": 1.0091745853424072, + "learning_rate": 0.00019913400946930824, + "loss": 4.6294, + "step": 168750 + }, + { + "epoch": 0.3149995684617879, + "grad_norm": 0.9442651867866516, + "learning_rate": 0.00019913345771103207, + "loss": 4.5777, + "step": 168800 + }, + { + "epoch": 0.31509287402116637, + "grad_norm": 0.7429641485214233, + "learning_rate": 0.00019913290577780278, + "loss": 4.722, + "step": 168850 + }, + { + "epoch": 0.31518617958054485, + "grad_norm": 1.2271195650100708, + "learning_rate": 0.00019913235366962137, + "loss": 4.8243, + "step": 168900 + }, + { + "epoch": 0.3152794851399234, + "grad_norm": 1.0357447862625122, + "learning_rate": 0.00019913180138648876, + "loss": 4.5255, + "step": 168950 + }, + { + "epoch": 0.31537279069930185, + "grad_norm": 1.144982933998108, + "learning_rate": 0.00019913124892840603, + "loss": 4.5941, + "step": 169000 + }, + { + "epoch": 0.31546609625868033, + "grad_norm": 0.9120845794677734, + "learning_rate": 0.00019913069629537403, + "loss": 4.6719, + "step": 169050 + }, + { + "epoch": 0.3155594018180588, + "grad_norm": 1.1090995073318481, + "learning_rate": 0.00019913014348739384, + "loss": 4.7637, + "step": 169100 + }, + { + "epoch": 0.31565270737743734, + "grad_norm": 1.0764634609222412, + "learning_rate": 0.00019912959050446633, + "loss": 4.5816, + "step": 169150 + }, + { + "epoch": 0.3157460129368158, + "grad_norm": 0.9311011433601379, + "learning_rate": 0.00019912903734659257, + "loss": 4.5876, + "step": 169200 + }, + { + "epoch": 0.3158393184961943, + "grad_norm": 0.9119871854782104, + "learning_rate": 0.00019912848401377353, + "loss": 4.428, + "step": 169250 + }, + { + "epoch": 0.31593262405557276, + "grad_norm": 0.9250431060791016, + "learning_rate": 0.00019912793050601014, + "loss": 4.528, + "step": 169300 + }, + { + "epoch": 0.3160259296149513, + "grad_norm": 1.0978806018829346, + "learning_rate": 0.00019912737682330336, + "loss": 4.8015, + "step": 169350 + }, + { + "epoch": 0.31611923517432977, + "grad_norm": 0.9182941913604736, + "learning_rate": 0.00019912682296565422, + "loss": 4.6908, + "step": 169400 + }, + { + "epoch": 0.31621254073370825, + "grad_norm": 0.8654223084449768, + "learning_rate": 0.0001991262689330637, + "loss": 4.5184, + "step": 169450 + }, + { + "epoch": 0.3163058462930868, + "grad_norm": 0.9357742071151733, + "learning_rate": 0.00019912571472553272, + "loss": 4.7359, + "step": 169500 + }, + { + "epoch": 0.31639915185246525, + "grad_norm": 0.9018232226371765, + "learning_rate": 0.00019912516034306232, + "loss": 4.497, + "step": 169550 + }, + { + "epoch": 0.31649245741184373, + "grad_norm": 1.170499324798584, + "learning_rate": 0.00019912460578565345, + "loss": 4.6351, + "step": 169600 + }, + { + "epoch": 0.3165857629712222, + "grad_norm": 1.0474013090133667, + "learning_rate": 0.0001991240510533071, + "loss": 4.6865, + "step": 169650 + }, + { + "epoch": 0.31667906853060074, + "grad_norm": 0.7851083874702454, + "learning_rate": 0.00019912349614602425, + "loss": 4.6992, + "step": 169700 + }, + { + "epoch": 0.3167723740899792, + "grad_norm": 1.0943204164505005, + "learning_rate": 0.00019912294106380586, + "loss": 4.683, + "step": 169750 + }, + { + "epoch": 0.3168656796493577, + "grad_norm": 1.037070870399475, + "learning_rate": 0.00019912238580665288, + "loss": 4.6626, + "step": 169800 + }, + { + "epoch": 0.3169589852087362, + "grad_norm": 0.8870090842247009, + "learning_rate": 0.00019912183037456638, + "loss": 4.6193, + "step": 169850 + }, + { + "epoch": 0.3170522907681147, + "grad_norm": 0.9513359069824219, + "learning_rate": 0.00019912127476754728, + "loss": 4.694, + "step": 169900 + }, + { + "epoch": 0.3171455963274932, + "grad_norm": 1.1024489402770996, + "learning_rate": 0.00019912071898559654, + "loss": 4.6326, + "step": 169950 + }, + { + "epoch": 0.31723890188687165, + "grad_norm": 1.0496639013290405, + "learning_rate": 0.0001991201630287152, + "loss": 4.4681, + "step": 170000 + }, + { + "epoch": 0.3173322074462502, + "grad_norm": 0.8167015910148621, + "learning_rate": 0.00019911960689690422, + "loss": 4.6452, + "step": 170050 + }, + { + "epoch": 0.31742551300562866, + "grad_norm": 0.811915397644043, + "learning_rate": 0.00019911905059016456, + "loss": 4.8109, + "step": 170100 + }, + { + "epoch": 0.31751881856500713, + "grad_norm": 1.01187264919281, + "learning_rate": 0.00019911849410849723, + "loss": 4.6726, + "step": 170150 + }, + { + "epoch": 0.31761212412438566, + "grad_norm": 1.0084149837493896, + "learning_rate": 0.0001991179374519032, + "loss": 4.5055, + "step": 170200 + }, + { + "epoch": 0.31770542968376414, + "grad_norm": 0.9547125101089478, + "learning_rate": 0.00019911738062038342, + "loss": 4.5173, + "step": 170250 + }, + { + "epoch": 0.3177987352431426, + "grad_norm": 0.9173873066902161, + "learning_rate": 0.00019911682361393895, + "loss": 4.8112, + "step": 170300 + }, + { + "epoch": 0.3178920408025211, + "grad_norm": 1.2184867858886719, + "learning_rate": 0.0001991162664325707, + "loss": 4.4778, + "step": 170350 + }, + { + "epoch": 0.3179853463618996, + "grad_norm": 1.0674309730529785, + "learning_rate": 0.0001991157090762797, + "loss": 4.5748, + "step": 170400 + }, + { + "epoch": 0.3180786519212781, + "grad_norm": 0.7090270519256592, + "learning_rate": 0.00019911515154506687, + "loss": 4.4085, + "step": 170450 + }, + { + "epoch": 0.3181719574806566, + "grad_norm": 0.9503317475318909, + "learning_rate": 0.00019911459383893326, + "loss": 4.5279, + "step": 170500 + }, + { + "epoch": 0.3182652630400351, + "grad_norm": 0.6449097990989685, + "learning_rate": 0.00019911403595787984, + "loss": 4.5365, + "step": 170550 + }, + { + "epoch": 0.3183585685994136, + "grad_norm": 1.0800449848175049, + "learning_rate": 0.00019911347790190758, + "loss": 4.5558, + "step": 170600 + }, + { + "epoch": 0.31845187415879206, + "grad_norm": 0.8874872326850891, + "learning_rate": 0.00019911291967101747, + "loss": 4.6167, + "step": 170650 + }, + { + "epoch": 0.31854517971817053, + "grad_norm": 0.8549138903617859, + "learning_rate": 0.00019911236126521047, + "loss": 4.6317, + "step": 170700 + }, + { + "epoch": 0.31863848527754907, + "grad_norm": 1.0165430307388306, + "learning_rate": 0.00019911180268448764, + "loss": 4.6746, + "step": 170750 + }, + { + "epoch": 0.31873179083692754, + "grad_norm": 1.049067497253418, + "learning_rate": 0.00019911124392884988, + "loss": 4.7359, + "step": 170800 + }, + { + "epoch": 0.318825096396306, + "grad_norm": 1.071564793586731, + "learning_rate": 0.00019911068499829825, + "loss": 4.5707, + "step": 170850 + }, + { + "epoch": 0.31891840195568455, + "grad_norm": 0.9487912654876709, + "learning_rate": 0.00019911012589283366, + "loss": 4.8717, + "step": 170900 + }, + { + "epoch": 0.319011707515063, + "grad_norm": 1.0813241004943848, + "learning_rate": 0.00019910956661245717, + "loss": 4.6888, + "step": 170950 + }, + { + "epoch": 0.3191050130744415, + "grad_norm": 1.02992844581604, + "learning_rate": 0.0001991090071571697, + "loss": 4.6039, + "step": 171000 + }, + { + "epoch": 0.3191050130744415, + "eval_loss": 4.821337699890137, + "eval_runtime": 229.1039, + "eval_samples_per_second": 11.383, + "eval_steps_per_second": 11.383, + "eval_tts_loss": 7.466205600627692, + "step": 171000 + }, + { + "epoch": 0.31919831863382, + "grad_norm": 1.2654902935028076, + "learning_rate": 0.0001991084475269723, + "loss": 4.5334, + "step": 171050 + }, + { + "epoch": 0.3192916241931985, + "grad_norm": 0.9937511086463928, + "learning_rate": 0.00019910788772186592, + "loss": 4.6948, + "step": 171100 + }, + { + "epoch": 0.319384929752577, + "grad_norm": 1.1610994338989258, + "learning_rate": 0.00019910732774185155, + "loss": 4.3105, + "step": 171150 + }, + { + "epoch": 0.31947823531195546, + "grad_norm": 0.9847679734230042, + "learning_rate": 0.00019910676758693022, + "loss": 4.7063, + "step": 171200 + }, + { + "epoch": 0.319571540871334, + "grad_norm": 1.15132474899292, + "learning_rate": 0.00019910620725710284, + "loss": 4.4479, + "step": 171250 + }, + { + "epoch": 0.31966484643071247, + "grad_norm": 1.0646486282348633, + "learning_rate": 0.00019910564675237048, + "loss": 4.6407, + "step": 171300 + }, + { + "epoch": 0.31975815199009094, + "grad_norm": 0.982887864112854, + "learning_rate": 0.00019910508607273405, + "loss": 4.4841, + "step": 171350 + }, + { + "epoch": 0.3198514575494694, + "grad_norm": 1.1266651153564453, + "learning_rate": 0.0001991045252181946, + "loss": 4.8636, + "step": 171400 + }, + { + "epoch": 0.31994476310884795, + "grad_norm": 0.9004781246185303, + "learning_rate": 0.0001991039641887531, + "loss": 4.7329, + "step": 171450 + }, + { + "epoch": 0.3200380686682264, + "grad_norm": 0.904808759689331, + "learning_rate": 0.00019910340298441055, + "loss": 4.7403, + "step": 171500 + }, + { + "epoch": 0.3201313742276049, + "grad_norm": 1.0027724504470825, + "learning_rate": 0.0001991028416051679, + "loss": 4.7172, + "step": 171550 + }, + { + "epoch": 0.32022467978698343, + "grad_norm": 0.9115274548530579, + "learning_rate": 0.00019910228005102623, + "loss": 4.7143, + "step": 171600 + }, + { + "epoch": 0.3203179853463619, + "grad_norm": 1.1834417581558228, + "learning_rate": 0.0001991017183219864, + "loss": 4.7278, + "step": 171650 + }, + { + "epoch": 0.3204112909057404, + "grad_norm": 1.0323907136917114, + "learning_rate": 0.00019910115641804952, + "loss": 4.7366, + "step": 171700 + }, + { + "epoch": 0.32050459646511886, + "grad_norm": 1.0845869779586792, + "learning_rate": 0.00019910059433921653, + "loss": 4.7036, + "step": 171750 + }, + { + "epoch": 0.3205979020244974, + "grad_norm": 1.2340019941329956, + "learning_rate": 0.00019910003208548844, + "loss": 4.3835, + "step": 171800 + }, + { + "epoch": 0.32069120758387587, + "grad_norm": 1.1320146322250366, + "learning_rate": 0.0001990994696568662, + "loss": 4.5122, + "step": 171850 + }, + { + "epoch": 0.32078451314325435, + "grad_norm": 1.0517185926437378, + "learning_rate": 0.00019909890705335087, + "loss": 4.4093, + "step": 171900 + }, + { + "epoch": 0.3208778187026328, + "grad_norm": 0.9432281255722046, + "learning_rate": 0.00019909834427494342, + "loss": 4.7337, + "step": 171950 + }, + { + "epoch": 0.32097112426201135, + "grad_norm": 1.0861693620681763, + "learning_rate": 0.00019909778132164475, + "loss": 4.5552, + "step": 172000 + }, + { + "epoch": 0.32106442982138983, + "grad_norm": 1.0468862056732178, + "learning_rate": 0.00019909721819345603, + "loss": 4.8132, + "step": 172050 + }, + { + "epoch": 0.3211577353807683, + "grad_norm": 0.941402018070221, + "learning_rate": 0.00019909665489037807, + "loss": 4.7838, + "step": 172100 + }, + { + "epoch": 0.32125104094014684, + "grad_norm": 0.92326420545578, + "learning_rate": 0.000199096091412412, + "loss": 4.476, + "step": 172150 + }, + { + "epoch": 0.3213443464995253, + "grad_norm": 0.8617150187492371, + "learning_rate": 0.00019909552775955876, + "loss": 4.5015, + "step": 172200 + }, + { + "epoch": 0.3214376520589038, + "grad_norm": 0.7417050004005432, + "learning_rate": 0.00019909496393181932, + "loss": 4.8644, + "step": 172250 + }, + { + "epoch": 0.32153095761828226, + "grad_norm": 1.1346899271011353, + "learning_rate": 0.00019909439992919474, + "loss": 4.8367, + "step": 172300 + }, + { + "epoch": 0.3216242631776608, + "grad_norm": 0.9893891215324402, + "learning_rate": 0.00019909383575168597, + "loss": 4.7965, + "step": 172350 + }, + { + "epoch": 0.3217175687370393, + "grad_norm": 0.8581725358963013, + "learning_rate": 0.000199093271399294, + "loss": 4.6347, + "step": 172400 + }, + { + "epoch": 0.32181087429641775, + "grad_norm": 0.9298039078712463, + "learning_rate": 0.00019909270687201987, + "loss": 4.7319, + "step": 172450 + }, + { + "epoch": 0.3219041798557963, + "grad_norm": 1.0204685926437378, + "learning_rate": 0.00019909214216986453, + "loss": 4.6937, + "step": 172500 + }, + { + "epoch": 0.32199748541517476, + "grad_norm": 1.0665239095687866, + "learning_rate": 0.000199091577292829, + "loss": 4.7268, + "step": 172550 + }, + { + "epoch": 0.32209079097455323, + "grad_norm": 1.0132877826690674, + "learning_rate": 0.0001990910122409143, + "loss": 4.509, + "step": 172600 + }, + { + "epoch": 0.3221840965339317, + "grad_norm": 1.0414512157440186, + "learning_rate": 0.00019909044701412135, + "loss": 4.4304, + "step": 172650 + }, + { + "epoch": 0.32227740209331024, + "grad_norm": 0.763314962387085, + "learning_rate": 0.0001990898816124512, + "loss": 4.5164, + "step": 172700 + }, + { + "epoch": 0.3223707076526887, + "grad_norm": 0.8184088468551636, + "learning_rate": 0.00019908931603590486, + "loss": 4.7437, + "step": 172750 + }, + { + "epoch": 0.3224640132120672, + "grad_norm": 0.8922542333602905, + "learning_rate": 0.00019908875028448332, + "loss": 4.4946, + "step": 172800 + }, + { + "epoch": 0.3225573187714457, + "grad_norm": 0.7964805364608765, + "learning_rate": 0.00019908818435818756, + "loss": 4.6092, + "step": 172850 + }, + { + "epoch": 0.3226506243308242, + "grad_norm": 0.9622323513031006, + "learning_rate": 0.0001990876182570186, + "loss": 4.7243, + "step": 172900 + }, + { + "epoch": 0.3227439298902027, + "grad_norm": 1.1066397428512573, + "learning_rate": 0.0001990870519809774, + "loss": 4.6053, + "step": 172950 + }, + { + "epoch": 0.32283723544958115, + "grad_norm": 1.3279519081115723, + "learning_rate": 0.00019908648553006503, + "loss": 4.5265, + "step": 173000 + }, + { + "epoch": 0.3229305410089597, + "grad_norm": 0.8593128323554993, + "learning_rate": 0.00019908591890428244, + "loss": 4.8224, + "step": 173050 + }, + { + "epoch": 0.32302384656833816, + "grad_norm": 0.9954770803451538, + "learning_rate": 0.00019908535210363062, + "loss": 4.7258, + "step": 173100 + }, + { + "epoch": 0.32311715212771663, + "grad_norm": 0.6582159996032715, + "learning_rate": 0.0001990847851281106, + "loss": 4.6627, + "step": 173150 + }, + { + "epoch": 0.32321045768709517, + "grad_norm": 1.021619439125061, + "learning_rate": 0.00019908421797772336, + "loss": 4.7628, + "step": 173200 + }, + { + "epoch": 0.32330376324647364, + "grad_norm": 1.0340412855148315, + "learning_rate": 0.0001990836506524699, + "loss": 4.4405, + "step": 173250 + }, + { + "epoch": 0.3233970688058521, + "grad_norm": 1.0030783414840698, + "learning_rate": 0.00019908308315235126, + "loss": 4.7434, + "step": 173300 + }, + { + "epoch": 0.3234903743652306, + "grad_norm": 1.1510215997695923, + "learning_rate": 0.0001990825154773684, + "loss": 4.6975, + "step": 173350 + }, + { + "epoch": 0.3235836799246091, + "grad_norm": 0.7062892317771912, + "learning_rate": 0.00019908194762752234, + "loss": 4.6726, + "step": 173400 + }, + { + "epoch": 0.3236769854839876, + "grad_norm": 1.1423907279968262, + "learning_rate": 0.00019908137960281407, + "loss": 4.524, + "step": 173450 + }, + { + "epoch": 0.3237702910433661, + "grad_norm": 0.9802839159965515, + "learning_rate": 0.00019908081140324456, + "loss": 4.5331, + "step": 173500 + }, + { + "epoch": 0.3238635966027446, + "grad_norm": 0.7058094143867493, + "learning_rate": 0.00019908024302881493, + "loss": 4.4531, + "step": 173550 + }, + { + "epoch": 0.3239569021621231, + "grad_norm": 0.9642864465713501, + "learning_rate": 0.00019907967447952603, + "loss": 4.5526, + "step": 173600 + }, + { + "epoch": 0.32405020772150156, + "grad_norm": 0.8382852077484131, + "learning_rate": 0.000199079105755379, + "loss": 4.6682, + "step": 173650 + }, + { + "epoch": 0.32414351328088004, + "grad_norm": 1.2792258262634277, + "learning_rate": 0.00019907853685637475, + "loss": 4.6946, + "step": 173700 + }, + { + "epoch": 0.32423681884025857, + "grad_norm": 1.1190528869628906, + "learning_rate": 0.00019907796778251432, + "loss": 4.5769, + "step": 173750 + }, + { + "epoch": 0.32433012439963704, + "grad_norm": 1.1447540521621704, + "learning_rate": 0.0001990773985337987, + "loss": 4.8288, + "step": 173800 + }, + { + "epoch": 0.3244234299590155, + "grad_norm": 1.0323656797409058, + "learning_rate": 0.00019907682911022892, + "loss": 4.6316, + "step": 173850 + }, + { + "epoch": 0.32451673551839405, + "grad_norm": 0.9703738689422607, + "learning_rate": 0.00019907625951180594, + "loss": 4.6256, + "step": 173900 + }, + { + "epoch": 0.3246100410777725, + "grad_norm": 1.1295841932296753, + "learning_rate": 0.0001990756897385308, + "loss": 4.542, + "step": 173950 + }, + { + "epoch": 0.324703346637151, + "grad_norm": 1.204323172569275, + "learning_rate": 0.00019907511979040454, + "loss": 4.655, + "step": 174000 + }, + { + "epoch": 0.324703346637151, + "eval_loss": 4.816260814666748, + "eval_runtime": 230.4264, + "eval_samples_per_second": 11.318, + "eval_steps_per_second": 11.318, + "eval_tts_loss": 7.435452688951045, + "step": 174000 + }, + { + "epoch": 0.3247966521965295, + "grad_norm": 1.0280636548995972, + "learning_rate": 0.00019907454966742812, + "loss": 4.6904, + "step": 174050 + }, + { + "epoch": 0.324889957755908, + "grad_norm": 1.1565958261489868, + "learning_rate": 0.00019907397936960253, + "loss": 4.3498, + "step": 174100 + }, + { + "epoch": 0.3249832633152865, + "grad_norm": 1.0304701328277588, + "learning_rate": 0.0001990734088969288, + "loss": 4.5535, + "step": 174150 + }, + { + "epoch": 0.32507656887466496, + "grad_norm": 0.8157714605331421, + "learning_rate": 0.00019907283824940794, + "loss": 4.5031, + "step": 174200 + }, + { + "epoch": 0.3251698744340435, + "grad_norm": 0.6631931662559509, + "learning_rate": 0.00019907226742704097, + "loss": 4.8029, + "step": 174250 + }, + { + "epoch": 0.32526317999342197, + "grad_norm": 1.1516517400741577, + "learning_rate": 0.00019907169642982886, + "loss": 4.7112, + "step": 174300 + }, + { + "epoch": 0.32535648555280045, + "grad_norm": 0.7090115547180176, + "learning_rate": 0.00019907112525777265, + "loss": 4.8284, + "step": 174350 + }, + { + "epoch": 0.3254497911121789, + "grad_norm": 1.1411478519439697, + "learning_rate": 0.00019907055391087334, + "loss": 4.7277, + "step": 174400 + }, + { + "epoch": 0.32554309667155745, + "grad_norm": 0.763681948184967, + "learning_rate": 0.00019906998238913192, + "loss": 4.8038, + "step": 174450 + }, + { + "epoch": 0.32563640223093593, + "grad_norm": 1.0577315092086792, + "learning_rate": 0.0001990694106925494, + "loss": 4.6134, + "step": 174500 + }, + { + "epoch": 0.3257297077903144, + "grad_norm": 0.9963567852973938, + "learning_rate": 0.00019906883882112682, + "loss": 4.472, + "step": 174550 + }, + { + "epoch": 0.3258230133496929, + "grad_norm": 0.8183549046516418, + "learning_rate": 0.00019906826677486516, + "loss": 4.4465, + "step": 174600 + }, + { + "epoch": 0.3259163189090714, + "grad_norm": 0.908412516117096, + "learning_rate": 0.00019906769455376545, + "loss": 4.5436, + "step": 174650 + }, + { + "epoch": 0.3260096244684499, + "grad_norm": 1.11734139919281, + "learning_rate": 0.0001990671221578287, + "loss": 4.8297, + "step": 174700 + }, + { + "epoch": 0.32610293002782836, + "grad_norm": 1.123195767402649, + "learning_rate": 0.0001990665495870559, + "loss": 4.5935, + "step": 174750 + }, + { + "epoch": 0.3261962355872069, + "grad_norm": 0.9942184090614319, + "learning_rate": 0.00019906597684144807, + "loss": 4.6195, + "step": 174800 + }, + { + "epoch": 0.32628954114658537, + "grad_norm": 1.0256868600845337, + "learning_rate": 0.00019906540392100624, + "loss": 4.5741, + "step": 174850 + }, + { + "epoch": 0.32638284670596385, + "grad_norm": 1.351872205734253, + "learning_rate": 0.00019906483082573136, + "loss": 4.5077, + "step": 174900 + }, + { + "epoch": 0.3264761522653423, + "grad_norm": 0.7622151374816895, + "learning_rate": 0.00019906425755562452, + "loss": 4.5419, + "step": 174950 + }, + { + "epoch": 0.32656945782472085, + "grad_norm": 0.8031182885169983, + "learning_rate": 0.00019906368411068672, + "loss": 4.5477, + "step": 175000 + }, + { + "epoch": 0.32666276338409933, + "grad_norm": 1.1167811155319214, + "learning_rate": 0.0001990631104909189, + "loss": 4.7045, + "step": 175050 + }, + { + "epoch": 0.3267560689434778, + "grad_norm": 1.2244101762771606, + "learning_rate": 0.00019906253669632214, + "loss": 4.7501, + "step": 175100 + }, + { + "epoch": 0.32684937450285634, + "grad_norm": 0.8345790505409241, + "learning_rate": 0.00019906196272689744, + "loss": 4.5942, + "step": 175150 + }, + { + "epoch": 0.3269426800622348, + "grad_norm": 0.927778422832489, + "learning_rate": 0.00019906138858264577, + "loss": 4.7552, + "step": 175200 + }, + { + "epoch": 0.3270359856216133, + "grad_norm": 1.1129297018051147, + "learning_rate": 0.00019906081426356823, + "loss": 4.5184, + "step": 175250 + }, + { + "epoch": 0.32712929118099177, + "grad_norm": 0.8951523900032043, + "learning_rate": 0.00019906023976966577, + "loss": 4.7909, + "step": 175300 + }, + { + "epoch": 0.3272225967403703, + "grad_norm": 1.09775710105896, + "learning_rate": 0.0001990596651009394, + "loss": 4.61, + "step": 175350 + }, + { + "epoch": 0.3273159022997488, + "grad_norm": 1.0176907777786255, + "learning_rate": 0.00019905909025739015, + "loss": 4.7255, + "step": 175400 + }, + { + "epoch": 0.32740920785912725, + "grad_norm": 1.1389837265014648, + "learning_rate": 0.00019905851523901907, + "loss": 4.4602, + "step": 175450 + }, + { + "epoch": 0.3275025134185058, + "grad_norm": 0.8852382302284241, + "learning_rate": 0.00019905794004582708, + "loss": 4.629, + "step": 175500 + }, + { + "epoch": 0.32759581897788426, + "grad_norm": 1.0725276470184326, + "learning_rate": 0.00019905736467781533, + "loss": 4.5922, + "step": 175550 + }, + { + "epoch": 0.32768912453726273, + "grad_norm": 0.8179895281791687, + "learning_rate": 0.00019905678913498468, + "loss": 4.6167, + "step": 175600 + }, + { + "epoch": 0.3277824300966412, + "grad_norm": 0.7662099599838257, + "learning_rate": 0.0001990562134173363, + "loss": 4.46, + "step": 175650 + }, + { + "epoch": 0.32787573565601974, + "grad_norm": 1.104725956916809, + "learning_rate": 0.00019905563752487112, + "loss": 4.6232, + "step": 175700 + }, + { + "epoch": 0.3279690412153982, + "grad_norm": 1.0175747871398926, + "learning_rate": 0.0001990550614575901, + "loss": 4.5933, + "step": 175750 + }, + { + "epoch": 0.3280623467747767, + "grad_norm": 1.1158710718154907, + "learning_rate": 0.00019905448521549438, + "loss": 4.6522, + "step": 175800 + }, + { + "epoch": 0.3281556523341552, + "grad_norm": 1.6744157075881958, + "learning_rate": 0.00019905390879858495, + "loss": 4.7029, + "step": 175850 + }, + { + "epoch": 0.3282489578935337, + "grad_norm": 0.9802636504173279, + "learning_rate": 0.00019905333220686275, + "loss": 4.5862, + "step": 175900 + }, + { + "epoch": 0.3283422634529122, + "grad_norm": 0.7022011280059814, + "learning_rate": 0.00019905275544032888, + "loss": 4.5594, + "step": 175950 + }, + { + "epoch": 0.32843556901229065, + "grad_norm": 0.8897641897201538, + "learning_rate": 0.00019905217849898433, + "loss": 4.6417, + "step": 176000 + }, + { + "epoch": 0.3285288745716692, + "grad_norm": 1.1504569053649902, + "learning_rate": 0.00019905160138283007, + "loss": 4.6595, + "step": 176050 + }, + { + "epoch": 0.32862218013104766, + "grad_norm": 1.0295069217681885, + "learning_rate": 0.0001990510240918672, + "loss": 4.5644, + "step": 176100 + }, + { + "epoch": 0.32871548569042613, + "grad_norm": 0.9090021848678589, + "learning_rate": 0.0001990504466260967, + "loss": 4.4895, + "step": 176150 + }, + { + "epoch": 0.32880879124980467, + "grad_norm": 0.9149088263511658, + "learning_rate": 0.00019904986898551955, + "loss": 4.6901, + "step": 176200 + }, + { + "epoch": 0.32890209680918314, + "grad_norm": 0.8863251209259033, + "learning_rate": 0.00019904929117013685, + "loss": 4.6697, + "step": 176250 + }, + { + "epoch": 0.3289954023685616, + "grad_norm": 1.07883882522583, + "learning_rate": 0.00019904871317994957, + "loss": 4.598, + "step": 176300 + }, + { + "epoch": 0.3290887079279401, + "grad_norm": 0.9367828369140625, + "learning_rate": 0.00019904813501495875, + "loss": 4.6257, + "step": 176350 + }, + { + "epoch": 0.3291820134873186, + "grad_norm": 1.0381193161010742, + "learning_rate": 0.00019904755667516538, + "loss": 4.6984, + "step": 176400 + }, + { + "epoch": 0.3292753190466971, + "grad_norm": 0.847889244556427, + "learning_rate": 0.00019904697816057053, + "loss": 4.4849, + "step": 176450 + }, + { + "epoch": 0.3293686246060756, + "grad_norm": 1.0124871730804443, + "learning_rate": 0.00019904639947117514, + "loss": 4.7115, + "step": 176500 + }, + { + "epoch": 0.3294619301654541, + "grad_norm": 0.8684128522872925, + "learning_rate": 0.0001990458206069803, + "loss": 4.5261, + "step": 176550 + }, + { + "epoch": 0.3295552357248326, + "grad_norm": 0.7277292609214783, + "learning_rate": 0.00019904524156798704, + "loss": 4.4937, + "step": 176600 + }, + { + "epoch": 0.32964854128421106, + "grad_norm": 1.0759245157241821, + "learning_rate": 0.00019904466235419634, + "loss": 4.6008, + "step": 176650 + }, + { + "epoch": 0.32974184684358954, + "grad_norm": 0.9541880488395691, + "learning_rate": 0.00019904408296560924, + "loss": 4.542, + "step": 176700 + }, + { + "epoch": 0.32983515240296807, + "grad_norm": 0.8675011992454529, + "learning_rate": 0.00019904350340222676, + "loss": 4.5248, + "step": 176750 + }, + { + "epoch": 0.32992845796234654, + "grad_norm": 0.9355060458183289, + "learning_rate": 0.00019904292366404995, + "loss": 4.5585, + "step": 176800 + }, + { + "epoch": 0.330021763521725, + "grad_norm": 1.1286976337432861, + "learning_rate": 0.00019904234375107976, + "loss": 4.6191, + "step": 176850 + }, + { + "epoch": 0.33011506908110355, + "grad_norm": 0.9177277684211731, + "learning_rate": 0.00019904176366331727, + "loss": 4.698, + "step": 176900 + }, + { + "epoch": 0.330208374640482, + "grad_norm": 0.9858579039573669, + "learning_rate": 0.00019904118340076355, + "loss": 4.6723, + "step": 176950 + }, + { + "epoch": 0.3303016801998605, + "grad_norm": 1.172903299331665, + "learning_rate": 0.00019904060296341947, + "loss": 4.5899, + "step": 177000 + }, + { + "epoch": 0.3303016801998605, + "eval_loss": 4.8154120445251465, + "eval_runtime": 228.9954, + "eval_samples_per_second": 11.389, + "eval_steps_per_second": 11.389, + "eval_tts_loss": 7.542438022716634, + "step": 177000 + }, + { + "epoch": 0.330394985759239, + "grad_norm": 1.2343531847000122, + "learning_rate": 0.00019904002235128623, + "loss": 4.5554, + "step": 177050 + }, + { + "epoch": 0.3304882913186175, + "grad_norm": 1.2554324865341187, + "learning_rate": 0.00019903944156436477, + "loss": 4.6312, + "step": 177100 + }, + { + "epoch": 0.330581596877996, + "grad_norm": 1.062771201133728, + "learning_rate": 0.0001990388606026561, + "loss": 4.7273, + "step": 177150 + }, + { + "epoch": 0.33067490243737446, + "grad_norm": 0.921607494354248, + "learning_rate": 0.0001990382794661613, + "loss": 4.5602, + "step": 177200 + }, + { + "epoch": 0.330768207996753, + "grad_norm": 1.0047394037246704, + "learning_rate": 0.00019903769815488132, + "loss": 4.4348, + "step": 177250 + }, + { + "epoch": 0.33086151355613147, + "grad_norm": 0.6674817204475403, + "learning_rate": 0.00019903711666881724, + "loss": 4.6014, + "step": 177300 + }, + { + "epoch": 0.33095481911550995, + "grad_norm": 1.1274831295013428, + "learning_rate": 0.0001990365350079701, + "loss": 4.6538, + "step": 177350 + }, + { + "epoch": 0.3310481246748884, + "grad_norm": 0.9511878490447998, + "learning_rate": 0.00019903595317234088, + "loss": 4.8159, + "step": 177400 + }, + { + "epoch": 0.33114143023426695, + "grad_norm": 1.1247509717941284, + "learning_rate": 0.0001990353711619306, + "loss": 4.6459, + "step": 177450 + }, + { + "epoch": 0.33123473579364543, + "grad_norm": 1.111100673675537, + "learning_rate": 0.00019903478897674036, + "loss": 4.6574, + "step": 177500 + }, + { + "epoch": 0.3313280413530239, + "grad_norm": 0.9724700450897217, + "learning_rate": 0.00019903420661677114, + "loss": 4.6917, + "step": 177550 + }, + { + "epoch": 0.3314213469124024, + "grad_norm": 1.2161903381347656, + "learning_rate": 0.00019903362408202395, + "loss": 4.6725, + "step": 177600 + }, + { + "epoch": 0.3315146524717809, + "grad_norm": 0.881798267364502, + "learning_rate": 0.00019903304137249984, + "loss": 4.4201, + "step": 177650 + }, + { + "epoch": 0.3316079580311594, + "grad_norm": 1.228102684020996, + "learning_rate": 0.00019903245848819988, + "loss": 4.6403, + "step": 177700 + }, + { + "epoch": 0.33170126359053786, + "grad_norm": 0.9526916146278381, + "learning_rate": 0.00019903187542912504, + "loss": 4.5728, + "step": 177750 + }, + { + "epoch": 0.3317945691499164, + "grad_norm": 0.8187074661254883, + "learning_rate": 0.00019903129219527632, + "loss": 4.6696, + "step": 177800 + }, + { + "epoch": 0.33188787470929487, + "grad_norm": 0.9834941029548645, + "learning_rate": 0.00019903070878665485, + "loss": 4.4247, + "step": 177850 + }, + { + "epoch": 0.33198118026867335, + "grad_norm": 1.224478006362915, + "learning_rate": 0.00019903012520326155, + "loss": 4.6612, + "step": 177900 + }, + { + "epoch": 0.3320744858280518, + "grad_norm": 0.7598810791969299, + "learning_rate": 0.00019902954144509753, + "loss": 4.563, + "step": 177950 + }, + { + "epoch": 0.33216779138743036, + "grad_norm": 1.0683156251907349, + "learning_rate": 0.0001990289575121638, + "loss": 4.6277, + "step": 178000 + }, + { + "epoch": 0.33226109694680883, + "grad_norm": 1.1003293991088867, + "learning_rate": 0.0001990283734044614, + "loss": 4.7756, + "step": 178050 + }, + { + "epoch": 0.3323544025061873, + "grad_norm": 0.9530490040779114, + "learning_rate": 0.00019902778912199128, + "loss": 4.5517, + "step": 178100 + }, + { + "epoch": 0.33244770806556584, + "grad_norm": 0.7573721408843994, + "learning_rate": 0.0001990272046647546, + "loss": 4.4958, + "step": 178150 + }, + { + "epoch": 0.3325410136249443, + "grad_norm": 1.020322561264038, + "learning_rate": 0.00019902662003275232, + "loss": 4.5234, + "step": 178200 + }, + { + "epoch": 0.3326343191843228, + "grad_norm": 0.9242296814918518, + "learning_rate": 0.00019902603522598547, + "loss": 4.6549, + "step": 178250 + }, + { + "epoch": 0.33272762474370127, + "grad_norm": 1.179311752319336, + "learning_rate": 0.00019902545024445508, + "loss": 4.5256, + "step": 178300 + }, + { + "epoch": 0.3328209303030798, + "grad_norm": 1.0598257780075073, + "learning_rate": 0.00019902486508816222, + "loss": 4.7532, + "step": 178350 + }, + { + "epoch": 0.3329142358624583, + "grad_norm": 0.7825391292572021, + "learning_rate": 0.0001990242797571079, + "loss": 4.6937, + "step": 178400 + }, + { + "epoch": 0.33300754142183675, + "grad_norm": 1.1020742654800415, + "learning_rate": 0.00019902369425129311, + "loss": 4.6548, + "step": 178450 + }, + { + "epoch": 0.3331008469812153, + "grad_norm": 0.9558370113372803, + "learning_rate": 0.00019902310857071895, + "loss": 4.8421, + "step": 178500 + }, + { + "epoch": 0.33319415254059376, + "grad_norm": 0.8280293941497803, + "learning_rate": 0.00019902252271538644, + "loss": 4.6858, + "step": 178550 + }, + { + "epoch": 0.33328745809997223, + "grad_norm": 0.8872970342636108, + "learning_rate": 0.00019902193668529656, + "loss": 4.4993, + "step": 178600 + }, + { + "epoch": 0.3333807636593507, + "grad_norm": 1.1399357318878174, + "learning_rate": 0.00019902135048045042, + "loss": 4.7029, + "step": 178650 + }, + { + "epoch": 0.33347406921872924, + "grad_norm": 1.1143829822540283, + "learning_rate": 0.00019902076410084903, + "loss": 4.6988, + "step": 178700 + }, + { + "epoch": 0.3335673747781077, + "grad_norm": 1.012130856513977, + "learning_rate": 0.00019902017754649338, + "loss": 4.6413, + "step": 178750 + }, + { + "epoch": 0.3336606803374862, + "grad_norm": 1.0381497144699097, + "learning_rate": 0.00019901959081738456, + "loss": 4.5631, + "step": 178800 + }, + { + "epoch": 0.3337539858968647, + "grad_norm": 0.788299024105072, + "learning_rate": 0.00019901900391352357, + "loss": 4.5039, + "step": 178850 + }, + { + "epoch": 0.3338472914562432, + "grad_norm": 0.8483543992042542, + "learning_rate": 0.0001990184168349115, + "loss": 4.7003, + "step": 178900 + }, + { + "epoch": 0.3339405970156217, + "grad_norm": 0.7698935866355896, + "learning_rate": 0.0001990178295815493, + "loss": 4.5215, + "step": 178950 + }, + { + "epoch": 0.33403390257500015, + "grad_norm": 1.0911551713943481, + "learning_rate": 0.00019901724215343807, + "loss": 4.5445, + "step": 179000 + }, + { + "epoch": 0.3341272081343787, + "grad_norm": 0.824788510799408, + "learning_rate": 0.00019901665455057882, + "loss": 4.5568, + "step": 179050 + }, + { + "epoch": 0.33422051369375716, + "grad_norm": 0.88285893201828, + "learning_rate": 0.00019901606677297262, + "loss": 4.5505, + "step": 179100 + }, + { + "epoch": 0.33431381925313564, + "grad_norm": 0.9610635638237, + "learning_rate": 0.00019901547882062047, + "loss": 4.6942, + "step": 179150 + }, + { + "epoch": 0.33440712481251417, + "grad_norm": 0.9467892050743103, + "learning_rate": 0.0001990148906935234, + "loss": 4.5793, + "step": 179200 + }, + { + "epoch": 0.33450043037189264, + "grad_norm": 1.0193818807601929, + "learning_rate": 0.00019901430239168254, + "loss": 4.5554, + "step": 179250 + }, + { + "epoch": 0.3345937359312711, + "grad_norm": 1.2325133085250854, + "learning_rate": 0.0001990137139150988, + "loss": 4.6663, + "step": 179300 + }, + { + "epoch": 0.3346870414906496, + "grad_norm": 0.6962212324142456, + "learning_rate": 0.00019901312526377329, + "loss": 4.5595, + "step": 179350 + }, + { + "epoch": 0.3347803470500281, + "grad_norm": 1.0818673372268677, + "learning_rate": 0.00019901253643770704, + "loss": 4.5388, + "step": 179400 + }, + { + "epoch": 0.3348736526094066, + "grad_norm": 0.9971811175346375, + "learning_rate": 0.0001990119474369011, + "loss": 4.4762, + "step": 179450 + }, + { + "epoch": 0.3349669581687851, + "grad_norm": 0.9982391595840454, + "learning_rate": 0.00019901135826135645, + "loss": 4.5818, + "step": 179500 + }, + { + "epoch": 0.3350602637281636, + "grad_norm": 2.8972480297088623, + "learning_rate": 0.0001990107689110742, + "loss": 4.5753, + "step": 179550 + }, + { + "epoch": 0.3351535692875421, + "grad_norm": 0.804068922996521, + "learning_rate": 0.00019901017938605536, + "loss": 4.8127, + "step": 179600 + }, + { + "epoch": 0.33524687484692056, + "grad_norm": 0.8897950053215027, + "learning_rate": 0.000199009589686301, + "loss": 4.6659, + "step": 179650 + }, + { + "epoch": 0.33534018040629904, + "grad_norm": 1.096867561340332, + "learning_rate": 0.00019900899981181212, + "loss": 4.6793, + "step": 179700 + }, + { + "epoch": 0.33543348596567757, + "grad_norm": 0.9849563241004944, + "learning_rate": 0.00019900840976258977, + "loss": 4.6498, + "step": 179750 + }, + { + "epoch": 0.33552679152505605, + "grad_norm": 1.0917383432388306, + "learning_rate": 0.000199007819538635, + "loss": 4.4898, + "step": 179800 + }, + { + "epoch": 0.3356200970844345, + "grad_norm": 1.0085644721984863, + "learning_rate": 0.00019900722913994884, + "loss": 4.7175, + "step": 179850 + }, + { + "epoch": 0.33571340264381305, + "grad_norm": 0.8432663679122925, + "learning_rate": 0.00019900663856653233, + "loss": 4.5698, + "step": 179900 + }, + { + "epoch": 0.33580670820319153, + "grad_norm": 1.037923812866211, + "learning_rate": 0.00019900604781838656, + "loss": 4.6207, + "step": 179950 + }, + { + "epoch": 0.33590001376257, + "grad_norm": 0.9545549154281616, + "learning_rate": 0.0001990054568955125, + "loss": 4.6257, + "step": 180000 + }, + { + "epoch": 0.33590001376257, + "eval_loss": 4.818175315856934, + "eval_runtime": 228.2553, + "eval_samples_per_second": 11.426, + "eval_steps_per_second": 11.426, + "eval_tts_loss": 7.496765146657618, + "step": 180000 + }, + { + "epoch": 0.3359933193219485, + "grad_norm": 0.8834198713302612, + "learning_rate": 0.00019900486579791128, + "loss": 4.5444, + "step": 180050 + }, + { + "epoch": 0.336086624881327, + "grad_norm": 1.05471932888031, + "learning_rate": 0.00019900427452558385, + "loss": 4.5951, + "step": 180100 + }, + { + "epoch": 0.3361799304407055, + "grad_norm": 1.0553511381149292, + "learning_rate": 0.0001990036830785313, + "loss": 4.6423, + "step": 180150 + }, + { + "epoch": 0.33627323600008396, + "grad_norm": 0.8488613963127136, + "learning_rate": 0.00019900309145675472, + "loss": 4.8316, + "step": 180200 + }, + { + "epoch": 0.33636654155946244, + "grad_norm": 0.8496327996253967, + "learning_rate": 0.00019900249966025503, + "loss": 4.5013, + "step": 180250 + }, + { + "epoch": 0.33645984711884097, + "grad_norm": 1.085153579711914, + "learning_rate": 0.0001990019076890334, + "loss": 4.5181, + "step": 180300 + }, + { + "epoch": 0.33655315267821945, + "grad_norm": 1.0941439867019653, + "learning_rate": 0.0001990013155430908, + "loss": 4.4706, + "step": 180350 + }, + { + "epoch": 0.3366464582375979, + "grad_norm": 0.9088996052742004, + "learning_rate": 0.00019900072322242827, + "loss": 4.8162, + "step": 180400 + }, + { + "epoch": 0.33673976379697645, + "grad_norm": 0.8877679705619812, + "learning_rate": 0.00019900013072704693, + "loss": 4.4462, + "step": 180450 + }, + { + "epoch": 0.33683306935635493, + "grad_norm": 1.0370676517486572, + "learning_rate": 0.0001989995380569478, + "loss": 4.6237, + "step": 180500 + }, + { + "epoch": 0.3369263749157334, + "grad_norm": 0.7951025366783142, + "learning_rate": 0.00019899894521213183, + "loss": 4.5673, + "step": 180550 + }, + { + "epoch": 0.3370196804751119, + "grad_norm": 1.1994744539260864, + "learning_rate": 0.00019899835219260018, + "loss": 4.4731, + "step": 180600 + }, + { + "epoch": 0.3371129860344904, + "grad_norm": 1.1491518020629883, + "learning_rate": 0.00019899775899835388, + "loss": 4.5261, + "step": 180650 + }, + { + "epoch": 0.3372062915938689, + "grad_norm": 0.9701890349388123, + "learning_rate": 0.0001989971656293939, + "loss": 4.8223, + "step": 180700 + }, + { + "epoch": 0.33729959715324737, + "grad_norm": 0.7716856002807617, + "learning_rate": 0.00019899657208572138, + "loss": 4.7053, + "step": 180750 + }, + { + "epoch": 0.3373929027126259, + "grad_norm": 1.2134188413619995, + "learning_rate": 0.00019899597836733733, + "loss": 4.8142, + "step": 180800 + }, + { + "epoch": 0.3374862082720044, + "grad_norm": 1.0805442333221436, + "learning_rate": 0.00019899538447424277, + "loss": 4.5745, + "step": 180850 + }, + { + "epoch": 0.33757951383138285, + "grad_norm": 0.9581602215766907, + "learning_rate": 0.0001989947904064388, + "loss": 4.7698, + "step": 180900 + }, + { + "epoch": 0.3376728193907613, + "grad_norm": 1.0755600929260254, + "learning_rate": 0.00019899419616392644, + "loss": 4.6043, + "step": 180950 + }, + { + "epoch": 0.33776612495013986, + "grad_norm": 0.9674501419067383, + "learning_rate": 0.00019899360174670673, + "loss": 4.683, + "step": 181000 + }, + { + "epoch": 0.33785943050951833, + "grad_norm": 0.8484556078910828, + "learning_rate": 0.00019899300715478077, + "loss": 4.6563, + "step": 181050 + }, + { + "epoch": 0.3379527360688968, + "grad_norm": 0.9506059885025024, + "learning_rate": 0.00019899241238814953, + "loss": 4.5925, + "step": 181100 + }, + { + "epoch": 0.33804604162827534, + "grad_norm": 1.1092076301574707, + "learning_rate": 0.0001989918174468141, + "loss": 4.689, + "step": 181150 + }, + { + "epoch": 0.3381393471876538, + "grad_norm": 0.8176827430725098, + "learning_rate": 0.00019899122233077554, + "loss": 4.5785, + "step": 181200 + }, + { + "epoch": 0.3382326527470323, + "grad_norm": 0.9832103252410889, + "learning_rate": 0.00019899062704003488, + "loss": 4.4976, + "step": 181250 + }, + { + "epoch": 0.33832595830641077, + "grad_norm": 0.6890990734100342, + "learning_rate": 0.00019899003157459322, + "loss": 4.5854, + "step": 181300 + }, + { + "epoch": 0.3384192638657893, + "grad_norm": 1.0396530628204346, + "learning_rate": 0.00019898943593445154, + "loss": 4.6799, + "step": 181350 + }, + { + "epoch": 0.3385125694251678, + "grad_norm": 1.0980576276779175, + "learning_rate": 0.00019898884011961094, + "loss": 4.5739, + "step": 181400 + }, + { + "epoch": 0.33860587498454625, + "grad_norm": 0.8679540753364563, + "learning_rate": 0.00019898824413007243, + "loss": 4.5388, + "step": 181450 + }, + { + "epoch": 0.3386991805439248, + "grad_norm": 0.857035219669342, + "learning_rate": 0.0001989876479658371, + "loss": 4.4769, + "step": 181500 + }, + { + "epoch": 0.33879248610330326, + "grad_norm": 0.9443413019180298, + "learning_rate": 0.00019898705162690598, + "loss": 4.4387, + "step": 181550 + }, + { + "epoch": 0.33888579166268173, + "grad_norm": 1.0914325714111328, + "learning_rate": 0.00019898645511328016, + "loss": 4.8009, + "step": 181600 + }, + { + "epoch": 0.3389790972220602, + "grad_norm": 0.9410198926925659, + "learning_rate": 0.00019898585842496064, + "loss": 4.7744, + "step": 181650 + }, + { + "epoch": 0.33907240278143874, + "grad_norm": 0.9771658778190613, + "learning_rate": 0.0001989852615619485, + "loss": 4.505, + "step": 181700 + }, + { + "epoch": 0.3391657083408172, + "grad_norm": 0.8809220194816589, + "learning_rate": 0.0001989846645242448, + "loss": 4.6773, + "step": 181750 + }, + { + "epoch": 0.3392590139001957, + "grad_norm": 0.9671509265899658, + "learning_rate": 0.00019898406731185057, + "loss": 4.573, + "step": 181800 + }, + { + "epoch": 0.3393523194595742, + "grad_norm": 1.0768080949783325, + "learning_rate": 0.0001989834699247669, + "loss": 4.8166, + "step": 181850 + }, + { + "epoch": 0.3394456250189527, + "grad_norm": 0.941792905330658, + "learning_rate": 0.00019898287236299478, + "loss": 4.7192, + "step": 181900 + }, + { + "epoch": 0.3395389305783312, + "grad_norm": 0.8942563533782959, + "learning_rate": 0.00019898227462653533, + "loss": 4.5277, + "step": 181950 + }, + { + "epoch": 0.33963223613770965, + "grad_norm": 0.9910199642181396, + "learning_rate": 0.00019898167671538957, + "loss": 4.6392, + "step": 182000 + }, + { + "epoch": 0.3397255416970882, + "grad_norm": 1.0308103561401367, + "learning_rate": 0.0001989810786295586, + "loss": 4.6123, + "step": 182050 + }, + { + "epoch": 0.33981884725646666, + "grad_norm": 0.8738309144973755, + "learning_rate": 0.00019898048036904342, + "loss": 4.5485, + "step": 182100 + }, + { + "epoch": 0.33991215281584514, + "grad_norm": 0.8555976152420044, + "learning_rate": 0.00019897988193384513, + "loss": 4.6508, + "step": 182150 + }, + { + "epoch": 0.34000545837522367, + "grad_norm": 0.9000989198684692, + "learning_rate": 0.00019897928332396477, + "loss": 4.7006, + "step": 182200 + }, + { + "epoch": 0.34009876393460214, + "grad_norm": 0.8779635429382324, + "learning_rate": 0.00019897868453940337, + "loss": 4.7278, + "step": 182250 + }, + { + "epoch": 0.3401920694939806, + "grad_norm": 1.2050081491470337, + "learning_rate": 0.00019897808558016198, + "loss": 4.7024, + "step": 182300 + }, + { + "epoch": 0.3402853750533591, + "grad_norm": 0.8261715173721313, + "learning_rate": 0.0001989774864462417, + "loss": 4.6134, + "step": 182350 + }, + { + "epoch": 0.3403786806127376, + "grad_norm": 1.1884026527404785, + "learning_rate": 0.0001989768871376436, + "loss": 4.5208, + "step": 182400 + }, + { + "epoch": 0.3404719861721161, + "grad_norm": 1.0583335161209106, + "learning_rate": 0.00019897628765436868, + "loss": 4.628, + "step": 182450 + }, + { + "epoch": 0.3405652917314946, + "grad_norm": 0.8147032856941223, + "learning_rate": 0.00019897568799641805, + "loss": 4.5084, + "step": 182500 + }, + { + "epoch": 0.3406585972908731, + "grad_norm": 0.9693218469619751, + "learning_rate": 0.00019897508816379273, + "loss": 4.7667, + "step": 182550 + }, + { + "epoch": 0.3407519028502516, + "grad_norm": 0.9098610877990723, + "learning_rate": 0.0001989744881564938, + "loss": 4.8388, + "step": 182600 + }, + { + "epoch": 0.34084520840963006, + "grad_norm": 1.03932785987854, + "learning_rate": 0.00019897388797452233, + "loss": 4.6463, + "step": 182650 + }, + { + "epoch": 0.34093851396900854, + "grad_norm": 0.9611286520957947, + "learning_rate": 0.00019897328761787932, + "loss": 4.7354, + "step": 182700 + }, + { + "epoch": 0.34103181952838707, + "grad_norm": 1.1163007020950317, + "learning_rate": 0.00019897268708656592, + "loss": 4.6021, + "step": 182750 + }, + { + "epoch": 0.34112512508776555, + "grad_norm": 0.9765385985374451, + "learning_rate": 0.0001989720863805831, + "loss": 4.5286, + "step": 182800 + }, + { + "epoch": 0.341218430647144, + "grad_norm": 0.8786355257034302, + "learning_rate": 0.00019897148549993202, + "loss": 4.6033, + "step": 182850 + }, + { + "epoch": 0.3413117362065225, + "grad_norm": 1.1458057165145874, + "learning_rate": 0.00019897088444461363, + "loss": 4.7663, + "step": 182900 + }, + { + "epoch": 0.34140504176590103, + "grad_norm": 0.8316869139671326, + "learning_rate": 0.00019897028321462905, + "loss": 4.5552, + "step": 182950 + }, + { + "epoch": 0.3414983473252795, + "grad_norm": 0.9072234630584717, + "learning_rate": 0.00019896968180997934, + "loss": 4.6009, + "step": 183000 + }, + { + "epoch": 0.3414983473252795, + "eval_loss": 4.803169250488281, + "eval_runtime": 229.034, + "eval_samples_per_second": 11.387, + "eval_steps_per_second": 11.387, + "eval_tts_loss": 7.459224440081708, + "step": 183000 + }, + { + "epoch": 0.341591652884658, + "grad_norm": 1.083984613418579, + "learning_rate": 0.00019896908023066557, + "loss": 4.469, + "step": 183050 + }, + { + "epoch": 0.3416849584440365, + "grad_norm": 0.8765878677368164, + "learning_rate": 0.00019896847847668877, + "loss": 4.4339, + "step": 183100 + }, + { + "epoch": 0.341778264003415, + "grad_norm": 0.9148985743522644, + "learning_rate": 0.00019896787654805005, + "loss": 4.6406, + "step": 183150 + }, + { + "epoch": 0.34187156956279346, + "grad_norm": 0.8526818156242371, + "learning_rate": 0.00019896727444475038, + "loss": 4.7993, + "step": 183200 + }, + { + "epoch": 0.34196487512217194, + "grad_norm": 0.9056457877159119, + "learning_rate": 0.00019896667216679094, + "loss": 4.4859, + "step": 183250 + }, + { + "epoch": 0.34205818068155047, + "grad_norm": 1.0208319425582886, + "learning_rate": 0.00019896606971417274, + "loss": 4.7113, + "step": 183300 + }, + { + "epoch": 0.34215148624092895, + "grad_norm": 0.9874845743179321, + "learning_rate": 0.0001989654670868968, + "loss": 4.5711, + "step": 183350 + }, + { + "epoch": 0.3422447918003074, + "grad_norm": 0.988519549369812, + "learning_rate": 0.00019896486428496425, + "loss": 4.6615, + "step": 183400 + }, + { + "epoch": 0.34233809735968596, + "grad_norm": 1.0255403518676758, + "learning_rate": 0.0001989642613083761, + "loss": 4.7423, + "step": 183450 + }, + { + "epoch": 0.34243140291906443, + "grad_norm": 0.8926201462745667, + "learning_rate": 0.0001989636581571335, + "loss": 4.5053, + "step": 183500 + }, + { + "epoch": 0.3425247084784429, + "grad_norm": 1.1894657611846924, + "learning_rate": 0.0001989630548312374, + "loss": 4.6994, + "step": 183550 + }, + { + "epoch": 0.3426180140378214, + "grad_norm": 1.066410779953003, + "learning_rate": 0.00019896245133068895, + "loss": 4.6081, + "step": 183600 + }, + { + "epoch": 0.3427113195971999, + "grad_norm": 1.3439453840255737, + "learning_rate": 0.00019896184765548917, + "loss": 4.6694, + "step": 183650 + }, + { + "epoch": 0.3428046251565784, + "grad_norm": 1.0843331813812256, + "learning_rate": 0.00019896124380563916, + "loss": 4.5804, + "step": 183700 + }, + { + "epoch": 0.34289793071595687, + "grad_norm": 1.108058214187622, + "learning_rate": 0.00019896063978113996, + "loss": 4.5375, + "step": 183750 + }, + { + "epoch": 0.3429912362753354, + "grad_norm": 1.012802004814148, + "learning_rate": 0.00019896003558199263, + "loss": 4.5888, + "step": 183800 + }, + { + "epoch": 0.3430845418347139, + "grad_norm": 0.9825184941291809, + "learning_rate": 0.0001989594312081983, + "loss": 4.6243, + "step": 183850 + }, + { + "epoch": 0.34317784739409235, + "grad_norm": 0.7795212268829346, + "learning_rate": 0.0001989588266597579, + "loss": 4.805, + "step": 183900 + }, + { + "epoch": 0.3432711529534708, + "grad_norm": 1.6016143560409546, + "learning_rate": 0.00019895822193667265, + "loss": 4.6542, + "step": 183950 + }, + { + "epoch": 0.34336445851284936, + "grad_norm": 1.0046799182891846, + "learning_rate": 0.00019895761703894355, + "loss": 4.5883, + "step": 184000 + }, + { + "epoch": 0.34345776407222783, + "grad_norm": 0.7315368056297302, + "learning_rate": 0.00019895701196657163, + "loss": 4.6325, + "step": 184050 + }, + { + "epoch": 0.3435510696316063, + "grad_norm": 0.8666321635246277, + "learning_rate": 0.00019895640671955804, + "loss": 4.5639, + "step": 184100 + }, + { + "epoch": 0.34364437519098484, + "grad_norm": 1.0640748739242554, + "learning_rate": 0.00019895580129790377, + "loss": 4.5574, + "step": 184150 + }, + { + "epoch": 0.3437376807503633, + "grad_norm": 0.8577877879142761, + "learning_rate": 0.00019895519570160996, + "loss": 4.6898, + "step": 184200 + }, + { + "epoch": 0.3438309863097418, + "grad_norm": 0.7682492733001709, + "learning_rate": 0.0001989545899306776, + "loss": 4.7193, + "step": 184250 + }, + { + "epoch": 0.34392429186912027, + "grad_norm": 1.053924322128296, + "learning_rate": 0.00019895398398510778, + "loss": 4.8604, + "step": 184300 + }, + { + "epoch": 0.3440175974284988, + "grad_norm": 0.9362823367118835, + "learning_rate": 0.00019895337786490162, + "loss": 4.3165, + "step": 184350 + }, + { + "epoch": 0.3441109029878773, + "grad_norm": 0.9843419194221497, + "learning_rate": 0.0001989527715700602, + "loss": 4.3989, + "step": 184400 + }, + { + "epoch": 0.34420420854725575, + "grad_norm": 1.27715265750885, + "learning_rate": 0.0001989521651005845, + "loss": 4.6078, + "step": 184450 + }, + { + "epoch": 0.3442975141066343, + "grad_norm": 0.9157127141952515, + "learning_rate": 0.00019895155845647567, + "loss": 4.4988, + "step": 184500 + }, + { + "epoch": 0.34439081966601276, + "grad_norm": 0.9360002875328064, + "learning_rate": 0.00019895095163773472, + "loss": 4.6132, + "step": 184550 + }, + { + "epoch": 0.34448412522539124, + "grad_norm": 1.044089674949646, + "learning_rate": 0.00019895034464436276, + "loss": 4.351, + "step": 184600 + }, + { + "epoch": 0.3445774307847697, + "grad_norm": 1.0638103485107422, + "learning_rate": 0.00019894973747636084, + "loss": 4.7351, + "step": 184650 + }, + { + "epoch": 0.34467073634414824, + "grad_norm": 1.2173103094100952, + "learning_rate": 0.00019894913013373007, + "loss": 4.4605, + "step": 184700 + }, + { + "epoch": 0.3447640419035267, + "grad_norm": 0.8456254601478577, + "learning_rate": 0.00019894852261647148, + "loss": 4.8245, + "step": 184750 + }, + { + "epoch": 0.3448573474629052, + "grad_norm": 1.3228527307510376, + "learning_rate": 0.00019894791492458617, + "loss": 4.6516, + "step": 184800 + }, + { + "epoch": 0.3449506530222837, + "grad_norm": 0.8793355226516724, + "learning_rate": 0.0001989473070580752, + "loss": 4.7263, + "step": 184850 + }, + { + "epoch": 0.3450439585816622, + "grad_norm": 0.733553409576416, + "learning_rate": 0.00019894669901693966, + "loss": 4.5974, + "step": 184900 + }, + { + "epoch": 0.3451372641410407, + "grad_norm": 0.9788616895675659, + "learning_rate": 0.00019894609080118057, + "loss": 4.4838, + "step": 184950 + }, + { + "epoch": 0.34523056970041915, + "grad_norm": 1.099247932434082, + "learning_rate": 0.00019894548241079907, + "loss": 4.5225, + "step": 185000 + }, + { + "epoch": 0.3453238752597977, + "grad_norm": 0.9463627934455872, + "learning_rate": 0.00019894487384579618, + "loss": 4.8215, + "step": 185050 + }, + { + "epoch": 0.34541718081917616, + "grad_norm": 0.9453304409980774, + "learning_rate": 0.000198944265106173, + "loss": 4.4883, + "step": 185100 + }, + { + "epoch": 0.34551048637855464, + "grad_norm": 1.1768165826797485, + "learning_rate": 0.0001989436561919306, + "loss": 4.8437, + "step": 185150 + }, + { + "epoch": 0.34560379193793317, + "grad_norm": 0.7917547821998596, + "learning_rate": 0.00019894304710307007, + "loss": 4.3636, + "step": 185200 + }, + { + "epoch": 0.34569709749731165, + "grad_norm": 0.802765429019928, + "learning_rate": 0.00019894243783959246, + "loss": 4.6204, + "step": 185250 + }, + { + "epoch": 0.3457904030566901, + "grad_norm": 0.9512039422988892, + "learning_rate": 0.00019894182840149885, + "loss": 4.4626, + "step": 185300 + }, + { + "epoch": 0.3458837086160686, + "grad_norm": 0.9868209362030029, + "learning_rate": 0.00019894121878879034, + "loss": 4.8379, + "step": 185350 + }, + { + "epoch": 0.34597701417544713, + "grad_norm": 1.1373767852783203, + "learning_rate": 0.000198940609001468, + "loss": 4.4935, + "step": 185400 + }, + { + "epoch": 0.3460703197348256, + "grad_norm": 0.8839678168296814, + "learning_rate": 0.00019893999903953288, + "loss": 4.7252, + "step": 185450 + }, + { + "epoch": 0.3461636252942041, + "grad_norm": 0.8720890283584595, + "learning_rate": 0.00019893938890298607, + "loss": 4.7323, + "step": 185500 + }, + { + "epoch": 0.34625693085358256, + "grad_norm": 0.9795693159103394, + "learning_rate": 0.00019893877859182864, + "loss": 4.8515, + "step": 185550 + }, + { + "epoch": 0.3463502364129611, + "grad_norm": 1.0126317739486694, + "learning_rate": 0.00019893816810606166, + "loss": 4.7654, + "step": 185600 + }, + { + "epoch": 0.34644354197233956, + "grad_norm": 1.0032151937484741, + "learning_rate": 0.00019893755744568626, + "loss": 4.638, + "step": 185650 + }, + { + "epoch": 0.34653684753171804, + "grad_norm": 0.7030245661735535, + "learning_rate": 0.00019893694661070349, + "loss": 4.7943, + "step": 185700 + }, + { + "epoch": 0.34663015309109657, + "grad_norm": 0.9134154915809631, + "learning_rate": 0.00019893633560111437, + "loss": 4.7419, + "step": 185750 + }, + { + "epoch": 0.34672345865047505, + "grad_norm": 0.6795088052749634, + "learning_rate": 0.00019893572441692005, + "loss": 4.593, + "step": 185800 + }, + { + "epoch": 0.3468167642098535, + "grad_norm": 0.9457302093505859, + "learning_rate": 0.00019893511305812159, + "loss": 4.6045, + "step": 185850 + }, + { + "epoch": 0.346910069769232, + "grad_norm": 1.1078051328659058, + "learning_rate": 0.00019893450152472003, + "loss": 4.7646, + "step": 185900 + }, + { + "epoch": 0.34700337532861053, + "grad_norm": 0.8041667342185974, + "learning_rate": 0.00019893388981671653, + "loss": 4.6, + "step": 185950 + }, + { + "epoch": 0.347096680887989, + "grad_norm": 1.0429846048355103, + "learning_rate": 0.00019893327793411207, + "loss": 4.6922, + "step": 186000 + }, + { + "epoch": 0.347096680887989, + "eval_loss": 4.805553913116455, + "eval_runtime": 227.9003, + "eval_samples_per_second": 11.444, + "eval_steps_per_second": 11.444, + "eval_tts_loss": 7.4744320140360205, + "step": 186000 + }, + { + "epoch": 0.3471899864473675, + "grad_norm": 1.188132643699646, + "learning_rate": 0.0001989326658769078, + "loss": 4.6133, + "step": 186050 + }, + { + "epoch": 0.347283292006746, + "grad_norm": 1.105686902999878, + "learning_rate": 0.00019893205364510482, + "loss": 4.5588, + "step": 186100 + }, + { + "epoch": 0.3473765975661245, + "grad_norm": 1.0260311365127563, + "learning_rate": 0.00019893144123870414, + "loss": 4.5501, + "step": 186150 + }, + { + "epoch": 0.34746990312550297, + "grad_norm": 1.099698781967163, + "learning_rate": 0.00019893082865770687, + "loss": 4.6763, + "step": 186200 + }, + { + "epoch": 0.34756320868488144, + "grad_norm": 1.0089246034622192, + "learning_rate": 0.0001989302159021141, + "loss": 4.6909, + "step": 186250 + }, + { + "epoch": 0.34765651424426, + "grad_norm": 1.0701543092727661, + "learning_rate": 0.00019892960297192692, + "loss": 4.6315, + "step": 186300 + }, + { + "epoch": 0.34774981980363845, + "grad_norm": 1.1907925605773926, + "learning_rate": 0.00019892898986714638, + "loss": 4.4744, + "step": 186350 + }, + { + "epoch": 0.3478431253630169, + "grad_norm": 1.0035996437072754, + "learning_rate": 0.00019892837658777362, + "loss": 4.6013, + "step": 186400 + }, + { + "epoch": 0.34793643092239546, + "grad_norm": 1.0162055492401123, + "learning_rate": 0.00019892776313380965, + "loss": 4.4274, + "step": 186450 + }, + { + "epoch": 0.34802973648177393, + "grad_norm": 1.1313321590423584, + "learning_rate": 0.0001989271495052556, + "loss": 4.7547, + "step": 186500 + }, + { + "epoch": 0.3481230420411524, + "grad_norm": 0.8484470248222351, + "learning_rate": 0.0001989265357021125, + "loss": 4.8249, + "step": 186550 + }, + { + "epoch": 0.3482163476005309, + "grad_norm": 0.9785260558128357, + "learning_rate": 0.00019892592172438152, + "loss": 4.7419, + "step": 186600 + }, + { + "epoch": 0.3483096531599094, + "grad_norm": 0.9270712733268738, + "learning_rate": 0.00019892530757206368, + "loss": 4.7485, + "step": 186650 + }, + { + "epoch": 0.3484029587192879, + "grad_norm": 1.1253376007080078, + "learning_rate": 0.00019892469324516004, + "loss": 4.6492, + "step": 186700 + }, + { + "epoch": 0.34849626427866637, + "grad_norm": 0.8123875856399536, + "learning_rate": 0.00019892407874367175, + "loss": 4.5601, + "step": 186750 + }, + { + "epoch": 0.3485895698380449, + "grad_norm": 0.9842862486839294, + "learning_rate": 0.00019892346406759988, + "loss": 4.6747, + "step": 186800 + }, + { + "epoch": 0.3486828753974234, + "grad_norm": 1.0450785160064697, + "learning_rate": 0.0001989228492169455, + "loss": 4.6732, + "step": 186850 + }, + { + "epoch": 0.34877618095680185, + "grad_norm": 1.2406466007232666, + "learning_rate": 0.00019892223419170968, + "loss": 4.5914, + "step": 186900 + }, + { + "epoch": 0.3488694865161803, + "grad_norm": 1.202197790145874, + "learning_rate": 0.0001989216189918935, + "loss": 4.4839, + "step": 186950 + }, + { + "epoch": 0.34896279207555886, + "grad_norm": 0.9749503135681152, + "learning_rate": 0.00019892100361749813, + "loss": 4.5923, + "step": 187000 + }, + { + "epoch": 0.34905609763493733, + "grad_norm": 0.7598263621330261, + "learning_rate": 0.00019892038806852455, + "loss": 4.582, + "step": 187050 + }, + { + "epoch": 0.3491494031943158, + "grad_norm": 1.0366371870040894, + "learning_rate": 0.00019891977234497393, + "loss": 4.6446, + "step": 187100 + }, + { + "epoch": 0.34924270875369434, + "grad_norm": 0.82780522108078, + "learning_rate": 0.0001989191564468473, + "loss": 4.5483, + "step": 187150 + }, + { + "epoch": 0.3493360143130728, + "grad_norm": 1.048930287361145, + "learning_rate": 0.00019891854037414573, + "loss": 4.6858, + "step": 187200 + }, + { + "epoch": 0.3494293198724513, + "grad_norm": 1.0502203702926636, + "learning_rate": 0.00019891792412687038, + "loss": 4.821, + "step": 187250 + }, + { + "epoch": 0.34952262543182977, + "grad_norm": 1.079811930656433, + "learning_rate": 0.00019891730770502228, + "loss": 4.6184, + "step": 187300 + }, + { + "epoch": 0.3496159309912083, + "grad_norm": 0.9850397109985352, + "learning_rate": 0.00019891669110860255, + "loss": 4.7636, + "step": 187350 + }, + { + "epoch": 0.3497092365505868, + "grad_norm": 1.421713948249817, + "learning_rate": 0.00019891607433761226, + "loss": 4.8562, + "step": 187400 + }, + { + "epoch": 0.34980254210996525, + "grad_norm": 0.9206252098083496, + "learning_rate": 0.00019891545739205249, + "loss": 4.47, + "step": 187450 + }, + { + "epoch": 0.3498958476693438, + "grad_norm": 0.9205539226531982, + "learning_rate": 0.00019891484027192435, + "loss": 4.5468, + "step": 187500 + }, + { + "epoch": 0.34998915322872226, + "grad_norm": 1.1044520139694214, + "learning_rate": 0.00019891422297722893, + "loss": 4.7065, + "step": 187550 + }, + { + "epoch": 0.35008245878810074, + "grad_norm": 1.2163364887237549, + "learning_rate": 0.0001989136055079673, + "loss": 4.5895, + "step": 187600 + }, + { + "epoch": 0.3501757643474792, + "grad_norm": 1.0337022542953491, + "learning_rate": 0.00019891298786414057, + "loss": 4.752, + "step": 187650 + }, + { + "epoch": 0.35026906990685774, + "grad_norm": 0.9887984395027161, + "learning_rate": 0.0001989123700457498, + "loss": 4.498, + "step": 187700 + }, + { + "epoch": 0.3503623754662362, + "grad_norm": 0.8707548975944519, + "learning_rate": 0.00019891175205279614, + "loss": 4.4742, + "step": 187750 + }, + { + "epoch": 0.3504556810256147, + "grad_norm": 0.9432954788208008, + "learning_rate": 0.00019891113388528062, + "loss": 4.7022, + "step": 187800 + }, + { + "epoch": 0.3505489865849932, + "grad_norm": 1.063997745513916, + "learning_rate": 0.00019891051554320438, + "loss": 4.4942, + "step": 187850 + }, + { + "epoch": 0.3506422921443717, + "grad_norm": 1.1391524076461792, + "learning_rate": 0.00019890989702656844, + "loss": 4.6385, + "step": 187900 + }, + { + "epoch": 0.3507355977037502, + "grad_norm": 1.218582034111023, + "learning_rate": 0.00019890927833537397, + "loss": 4.665, + "step": 187950 + }, + { + "epoch": 0.35082890326312866, + "grad_norm": 0.8306301832199097, + "learning_rate": 0.00019890865946962198, + "loss": 4.7438, + "step": 188000 + }, + { + "epoch": 0.3509222088225072, + "grad_norm": 0.9843810796737671, + "learning_rate": 0.00019890804042931362, + "loss": 4.5901, + "step": 188050 + }, + { + "epoch": 0.35101551438188566, + "grad_norm": 0.9525366425514221, + "learning_rate": 0.00019890742121445002, + "loss": 4.6783, + "step": 188100 + }, + { + "epoch": 0.35110881994126414, + "grad_norm": 1.0917189121246338, + "learning_rate": 0.00019890680182503218, + "loss": 4.6924, + "step": 188150 + }, + { + "epoch": 0.3512021255006426, + "grad_norm": 1.1915470361709595, + "learning_rate": 0.00019890618226106125, + "loss": 4.4894, + "step": 188200 + }, + { + "epoch": 0.35129543106002115, + "grad_norm": 1.0157119035720825, + "learning_rate": 0.00019890556252253832, + "loss": 4.5125, + "step": 188250 + }, + { + "epoch": 0.3513887366193996, + "grad_norm": 1.2147808074951172, + "learning_rate": 0.0001989049426094645, + "loss": 4.474, + "step": 188300 + }, + { + "epoch": 0.3514820421787781, + "grad_norm": 0.9446842074394226, + "learning_rate": 0.00019890432252184082, + "loss": 4.6241, + "step": 188350 + }, + { + "epoch": 0.35157534773815663, + "grad_norm": 0.6960589289665222, + "learning_rate": 0.00019890370225966844, + "loss": 4.5706, + "step": 188400 + }, + { + "epoch": 0.3516686532975351, + "grad_norm": 0.8260095119476318, + "learning_rate": 0.00019890308182294838, + "loss": 4.7308, + "step": 188450 + }, + { + "epoch": 0.3517619588569136, + "grad_norm": 0.9620236158370972, + "learning_rate": 0.00019890246121168184, + "loss": 4.5599, + "step": 188500 + }, + { + "epoch": 0.35185526441629206, + "grad_norm": 0.8720660209655762, + "learning_rate": 0.00019890184042586985, + "loss": 4.5758, + "step": 188550 + }, + { + "epoch": 0.3519485699756706, + "grad_norm": 1.1599175930023193, + "learning_rate": 0.00019890121946551347, + "loss": 4.7073, + "step": 188600 + }, + { + "epoch": 0.35204187553504906, + "grad_norm": 1.200167179107666, + "learning_rate": 0.00019890059833061388, + "loss": 4.6389, + "step": 188650 + }, + { + "epoch": 0.35213518109442754, + "grad_norm": 0.7152448296546936, + "learning_rate": 0.0001988999770211721, + "loss": 4.6249, + "step": 188700 + }, + { + "epoch": 0.35222848665380607, + "grad_norm": 0.6849294900894165, + "learning_rate": 0.0001988993555371893, + "loss": 4.6701, + "step": 188750 + }, + { + "epoch": 0.35232179221318455, + "grad_norm": 1.2049897909164429, + "learning_rate": 0.00019889873387866653, + "loss": 4.6354, + "step": 188800 + }, + { + "epoch": 0.352415097772563, + "grad_norm": 0.9554798603057861, + "learning_rate": 0.0001988981120456049, + "loss": 4.4661, + "step": 188850 + }, + { + "epoch": 0.3525084033319415, + "grad_norm": 1.052039623260498, + "learning_rate": 0.0001988974900380055, + "loss": 4.6176, + "step": 188900 + }, + { + "epoch": 0.35260170889132003, + "grad_norm": 1.091506838798523, + "learning_rate": 0.00019889686785586942, + "loss": 4.801, + "step": 188950 + }, + { + "epoch": 0.3526950144506985, + "grad_norm": 1.3394882678985596, + "learning_rate": 0.00019889624549919777, + "loss": 4.722, + "step": 189000 + }, + { + "epoch": 0.3526950144506985, + "eval_loss": 4.806208610534668, + "eval_runtime": 230.3394, + "eval_samples_per_second": 11.322, + "eval_steps_per_second": 11.322, + "eval_tts_loss": 7.53537485083748, + "step": 189000 + }, + { + "epoch": 0.352788320010077, + "grad_norm": 0.8463157415390015, + "learning_rate": 0.00019889562296799166, + "loss": 4.6327, + "step": 189050 + }, + { + "epoch": 0.3528816255694555, + "grad_norm": 1.0070785284042358, + "learning_rate": 0.00019889500026225217, + "loss": 4.6743, + "step": 189100 + }, + { + "epoch": 0.352974931128834, + "grad_norm": 0.8200750946998596, + "learning_rate": 0.0001988943773819804, + "loss": 4.3624, + "step": 189150 + }, + { + "epoch": 0.35306823668821247, + "grad_norm": 0.9505131840705872, + "learning_rate": 0.00019889375432717747, + "loss": 4.8476, + "step": 189200 + }, + { + "epoch": 0.35316154224759094, + "grad_norm": 0.9525319337844849, + "learning_rate": 0.00019889313109784445, + "loss": 4.511, + "step": 189250 + }, + { + "epoch": 0.3532548478069695, + "grad_norm": 0.9823152422904968, + "learning_rate": 0.00019889250769398247, + "loss": 4.6123, + "step": 189300 + }, + { + "epoch": 0.35334815336634795, + "grad_norm": 1.0408767461776733, + "learning_rate": 0.0001988918841155926, + "loss": 4.6926, + "step": 189350 + }, + { + "epoch": 0.3534414589257264, + "grad_norm": 1.0379136800765991, + "learning_rate": 0.00019889126036267595, + "loss": 4.6514, + "step": 189400 + }, + { + "epoch": 0.35353476448510496, + "grad_norm": 0.7835113406181335, + "learning_rate": 0.00019889063643523366, + "loss": 4.5634, + "step": 189450 + }, + { + "epoch": 0.35362807004448343, + "grad_norm": 1.5030099153518677, + "learning_rate": 0.00019889001233326677, + "loss": 4.5985, + "step": 189500 + }, + { + "epoch": 0.3537213756038619, + "grad_norm": 0.9151412844657898, + "learning_rate": 0.0001988893880567764, + "loss": 4.7988, + "step": 189550 + }, + { + "epoch": 0.3538146811632404, + "grad_norm": 0.9782933592796326, + "learning_rate": 0.00019888876360576367, + "loss": 4.6997, + "step": 189600 + }, + { + "epoch": 0.3539079867226189, + "grad_norm": 0.9720525145530701, + "learning_rate": 0.00019888813898022968, + "loss": 4.8163, + "step": 189650 + }, + { + "epoch": 0.3540012922819974, + "grad_norm": 1.0412356853485107, + "learning_rate": 0.00019888751418017554, + "loss": 4.7658, + "step": 189700 + }, + { + "epoch": 0.35409459784137587, + "grad_norm": 0.5875607132911682, + "learning_rate": 0.0001988868892056023, + "loss": 4.8729, + "step": 189750 + }, + { + "epoch": 0.3541879034007544, + "grad_norm": 1.0475389957427979, + "learning_rate": 0.00019888626405651112, + "loss": 4.517, + "step": 189800 + }, + { + "epoch": 0.3542812089601329, + "grad_norm": 1.3192886114120483, + "learning_rate": 0.0001988856387329031, + "loss": 4.5192, + "step": 189850 + }, + { + "epoch": 0.35437451451951135, + "grad_norm": 1.179880142211914, + "learning_rate": 0.00019888501323477928, + "loss": 4.4085, + "step": 189900 + }, + { + "epoch": 0.35446782007888983, + "grad_norm": 1.334714651107788, + "learning_rate": 0.00019888438756214086, + "loss": 4.7391, + "step": 189950 + }, + { + "epoch": 0.35456112563826836, + "grad_norm": 1.2655901908874512, + "learning_rate": 0.00019888376171498887, + "loss": 4.756, + "step": 190000 + }, + { + "epoch": 0.35465443119764684, + "grad_norm": 1.0103163719177246, + "learning_rate": 0.00019888313569332443, + "loss": 4.6622, + "step": 190050 + }, + { + "epoch": 0.3547477367570253, + "grad_norm": 0.8582555055618286, + "learning_rate": 0.00019888250949714865, + "loss": 4.4321, + "step": 190100 + }, + { + "epoch": 0.35484104231640384, + "grad_norm": 1.1723741292953491, + "learning_rate": 0.00019888188312646267, + "loss": 4.5144, + "step": 190150 + }, + { + "epoch": 0.3549343478757823, + "grad_norm": 1.05409574508667, + "learning_rate": 0.00019888125658126758, + "loss": 4.618, + "step": 190200 + }, + { + "epoch": 0.3550276534351608, + "grad_norm": 1.0219589471817017, + "learning_rate": 0.00019888062986156443, + "loss": 4.632, + "step": 190250 + }, + { + "epoch": 0.35512095899453927, + "grad_norm": 1.1344385147094727, + "learning_rate": 0.00019888000296735436, + "loss": 4.4802, + "step": 190300 + }, + { + "epoch": 0.3552142645539178, + "grad_norm": 1.0866692066192627, + "learning_rate": 0.00019887937589863853, + "loss": 4.6449, + "step": 190350 + }, + { + "epoch": 0.3553075701132963, + "grad_norm": 1.0079729557037354, + "learning_rate": 0.00019887874865541795, + "loss": 4.6405, + "step": 190400 + }, + { + "epoch": 0.35540087567267475, + "grad_norm": 1.0848729610443115, + "learning_rate": 0.00019887812123769384, + "loss": 4.6839, + "step": 190450 + }, + { + "epoch": 0.3554941812320533, + "grad_norm": 0.8781519532203674, + "learning_rate": 0.00019887749364546717, + "loss": 4.5597, + "step": 190500 + }, + { + "epoch": 0.35558748679143176, + "grad_norm": 0.9141809940338135, + "learning_rate": 0.00019887686587873916, + "loss": 4.577, + "step": 190550 + }, + { + "epoch": 0.35568079235081024, + "grad_norm": 0.9193921685218811, + "learning_rate": 0.00019887623793751088, + "loss": 4.6156, + "step": 190600 + }, + { + "epoch": 0.3557740979101887, + "grad_norm": 0.9867975115776062, + "learning_rate": 0.00019887560982178343, + "loss": 4.4942, + "step": 190650 + }, + { + "epoch": 0.35586740346956725, + "grad_norm": 0.9040959477424622, + "learning_rate": 0.00019887498153155794, + "loss": 4.8029, + "step": 190700 + }, + { + "epoch": 0.3559607090289457, + "grad_norm": 0.9523190855979919, + "learning_rate": 0.00019887435306683549, + "loss": 4.5111, + "step": 190750 + }, + { + "epoch": 0.3560540145883242, + "grad_norm": 1.135631799697876, + "learning_rate": 0.00019887372442761722, + "loss": 4.7086, + "step": 190800 + }, + { + "epoch": 0.3561473201477027, + "grad_norm": 1.1821600198745728, + "learning_rate": 0.0001988730956139042, + "loss": 4.524, + "step": 190850 + }, + { + "epoch": 0.3562406257070812, + "grad_norm": 1.0236471891403198, + "learning_rate": 0.00019887246662569758, + "loss": 4.7486, + "step": 190900 + }, + { + "epoch": 0.3563339312664597, + "grad_norm": 1.091172695159912, + "learning_rate": 0.00019887183746299846, + "loss": 4.572, + "step": 190950 + }, + { + "epoch": 0.35642723682583816, + "grad_norm": 1.0206681489944458, + "learning_rate": 0.00019887120812580792, + "loss": 4.6126, + "step": 191000 + }, + { + "epoch": 0.3565205423852167, + "grad_norm": 1.0186724662780762, + "learning_rate": 0.00019887057861412712, + "loss": 4.5049, + "step": 191050 + }, + { + "epoch": 0.35661384794459516, + "grad_norm": 0.817267656326294, + "learning_rate": 0.0001988699489279571, + "loss": 4.6261, + "step": 191100 + }, + { + "epoch": 0.35670715350397364, + "grad_norm": 1.2809371948242188, + "learning_rate": 0.0001988693190672991, + "loss": 4.7523, + "step": 191150 + }, + { + "epoch": 0.3568004590633521, + "grad_norm": 0.6853193044662476, + "learning_rate": 0.00019886868903215408, + "loss": 4.4515, + "step": 191200 + }, + { + "epoch": 0.35689376462273065, + "grad_norm": 1.1701523065567017, + "learning_rate": 0.00019886805882252324, + "loss": 4.6187, + "step": 191250 + }, + { + "epoch": 0.3569870701821091, + "grad_norm": 0.8031530976295471, + "learning_rate": 0.0001988674284384077, + "loss": 4.5504, + "step": 191300 + }, + { + "epoch": 0.3570803757414876, + "grad_norm": 0.855839192867279, + "learning_rate": 0.0001988667978798085, + "loss": 4.3099, + "step": 191350 + }, + { + "epoch": 0.35717368130086613, + "grad_norm": 1.233931541442871, + "learning_rate": 0.00019886616714672681, + "loss": 4.5372, + "step": 191400 + }, + { + "epoch": 0.3572669868602446, + "grad_norm": 1.1093002557754517, + "learning_rate": 0.00019886553623916372, + "loss": 4.8146, + "step": 191450 + }, + { + "epoch": 0.3573602924196231, + "grad_norm": 1.2051666975021362, + "learning_rate": 0.00019886490515712038, + "loss": 4.7033, + "step": 191500 + }, + { + "epoch": 0.35745359797900156, + "grad_norm": 1.0855170488357544, + "learning_rate": 0.00019886427390059788, + "loss": 4.5491, + "step": 191550 + }, + { + "epoch": 0.3575469035383801, + "grad_norm": 1.0766313076019287, + "learning_rate": 0.00019886364246959732, + "loss": 4.5545, + "step": 191600 + }, + { + "epoch": 0.35764020909775857, + "grad_norm": 0.8042473793029785, + "learning_rate": 0.00019886301086411982, + "loss": 4.8249, + "step": 191650 + }, + { + "epoch": 0.35773351465713704, + "grad_norm": 0.9476457834243774, + "learning_rate": 0.00019886237908416652, + "loss": 4.8088, + "step": 191700 + }, + { + "epoch": 0.3578268202165156, + "grad_norm": 1.0122379064559937, + "learning_rate": 0.0001988617471297385, + "loss": 4.4633, + "step": 191750 + }, + { + "epoch": 0.35792012577589405, + "grad_norm": 1.0943959951400757, + "learning_rate": 0.00019886111500083692, + "loss": 4.4537, + "step": 191800 + }, + { + "epoch": 0.3580134313352725, + "grad_norm": 0.9375251531600952, + "learning_rate": 0.00019886048269746281, + "loss": 4.7861, + "step": 191850 + }, + { + "epoch": 0.358106736894651, + "grad_norm": 1.0100510120391846, + "learning_rate": 0.00019885985021961738, + "loss": 4.6012, + "step": 191900 + }, + { + "epoch": 0.35820004245402953, + "grad_norm": 1.1179404258728027, + "learning_rate": 0.00019885921756730173, + "loss": 4.771, + "step": 191950 + }, + { + "epoch": 0.358293348013408, + "grad_norm": 1.1079944372177124, + "learning_rate": 0.00019885858474051694, + "loss": 4.5788, + "step": 192000 + }, + { + "epoch": 0.358293348013408, + "eval_loss": 4.794641971588135, + "eval_runtime": 230.5927, + "eval_samples_per_second": 11.31, + "eval_steps_per_second": 11.31, + "eval_tts_loss": 7.481033778209468, + "step": 192000 + }, + { + "epoch": 0.3583866535727865, + "grad_norm": 1.0501129627227783, + "learning_rate": 0.00019885795173926412, + "loss": 4.589, + "step": 192050 + }, + { + "epoch": 0.358479959132165, + "grad_norm": 0.9563352465629578, + "learning_rate": 0.00019885731856354442, + "loss": 4.7751, + "step": 192100 + }, + { + "epoch": 0.3585732646915435, + "grad_norm": 1.539056658744812, + "learning_rate": 0.00019885668521335898, + "loss": 4.6909, + "step": 192150 + }, + { + "epoch": 0.35866657025092197, + "grad_norm": 1.1265029907226562, + "learning_rate": 0.00019885605168870884, + "loss": 4.4262, + "step": 192200 + }, + { + "epoch": 0.35875987581030044, + "grad_norm": 0.973284900188446, + "learning_rate": 0.00019885541798959517, + "loss": 4.8141, + "step": 192250 + }, + { + "epoch": 0.358853181369679, + "grad_norm": 0.6788082718849182, + "learning_rate": 0.0001988547841160191, + "loss": 4.6917, + "step": 192300 + }, + { + "epoch": 0.35894648692905745, + "grad_norm": 0.7461669445037842, + "learning_rate": 0.0001988541500679817, + "loss": 4.7468, + "step": 192350 + }, + { + "epoch": 0.3590397924884359, + "grad_norm": 1.1224901676177979, + "learning_rate": 0.00019885351584548414, + "loss": 4.5873, + "step": 192400 + }, + { + "epoch": 0.35913309804781446, + "grad_norm": 1.1169931888580322, + "learning_rate": 0.00019885288144852754, + "loss": 4.7951, + "step": 192450 + }, + { + "epoch": 0.35922640360719293, + "grad_norm": 0.9802398085594177, + "learning_rate": 0.00019885224687711298, + "loss": 4.5911, + "step": 192500 + }, + { + "epoch": 0.3593197091665714, + "grad_norm": 0.9591051340103149, + "learning_rate": 0.00019885161213124158, + "loss": 4.4823, + "step": 192550 + }, + { + "epoch": 0.3594130147259499, + "grad_norm": 1.2416292428970337, + "learning_rate": 0.0001988509772109145, + "loss": 4.7929, + "step": 192600 + }, + { + "epoch": 0.3595063202853284, + "grad_norm": 1.196144461631775, + "learning_rate": 0.00019885034211613282, + "loss": 4.6477, + "step": 192650 + }, + { + "epoch": 0.3595996258447069, + "grad_norm": 1.0120468139648438, + "learning_rate": 0.0001988497068468977, + "loss": 4.7484, + "step": 192700 + }, + { + "epoch": 0.35969293140408537, + "grad_norm": 0.7862088680267334, + "learning_rate": 0.00019884907140321024, + "loss": 4.5306, + "step": 192750 + }, + { + "epoch": 0.3597862369634639, + "grad_norm": 1.0786339044570923, + "learning_rate": 0.00019884843578507155, + "loss": 4.861, + "step": 192800 + }, + { + "epoch": 0.3598795425228424, + "grad_norm": 1.0364389419555664, + "learning_rate": 0.00019884779999248278, + "loss": 4.6999, + "step": 192850 + }, + { + "epoch": 0.35997284808222085, + "grad_norm": 0.887571394443512, + "learning_rate": 0.00019884716402544502, + "loss": 4.6757, + "step": 192900 + }, + { + "epoch": 0.36006615364159933, + "grad_norm": 0.6657192707061768, + "learning_rate": 0.00019884652788395942, + "loss": 4.5924, + "step": 192950 + }, + { + "epoch": 0.36015945920097786, + "grad_norm": 1.246086597442627, + "learning_rate": 0.00019884589156802707, + "loss": 4.5792, + "step": 193000 + }, + { + "epoch": 0.36025276476035634, + "grad_norm": 0.8808768391609192, + "learning_rate": 0.00019884525507764912, + "loss": 4.6044, + "step": 193050 + }, + { + "epoch": 0.3603460703197348, + "grad_norm": 0.8309186697006226, + "learning_rate": 0.00019884461841282672, + "loss": 4.6616, + "step": 193100 + }, + { + "epoch": 0.36043937587911334, + "grad_norm": 0.9923182129859924, + "learning_rate": 0.0001988439815735609, + "loss": 4.6831, + "step": 193150 + }, + { + "epoch": 0.3605326814384918, + "grad_norm": 0.8415591716766357, + "learning_rate": 0.0001988433445598529, + "loss": 4.648, + "step": 193200 + }, + { + "epoch": 0.3606259869978703, + "grad_norm": 1.2085046768188477, + "learning_rate": 0.00019884270737170377, + "loss": 4.429, + "step": 193250 + }, + { + "epoch": 0.36071929255724877, + "grad_norm": 0.8806056976318359, + "learning_rate": 0.00019884207000911465, + "loss": 4.4001, + "step": 193300 + }, + { + "epoch": 0.3608125981166273, + "grad_norm": 1.0138684511184692, + "learning_rate": 0.00019884143247208665, + "loss": 4.6307, + "step": 193350 + }, + { + "epoch": 0.3609059036760058, + "grad_norm": 0.8585569858551025, + "learning_rate": 0.00019884079476062094, + "loss": 4.5959, + "step": 193400 + }, + { + "epoch": 0.36099920923538426, + "grad_norm": 0.9443112015724182, + "learning_rate": 0.00019884015687471863, + "loss": 4.7276, + "step": 193450 + }, + { + "epoch": 0.3610925147947628, + "grad_norm": 1.2051678895950317, + "learning_rate": 0.00019883951881438078, + "loss": 4.795, + "step": 193500 + }, + { + "epoch": 0.36118582035414126, + "grad_norm": 0.7949090600013733, + "learning_rate": 0.00019883888057960864, + "loss": 4.7397, + "step": 193550 + }, + { + "epoch": 0.36127912591351974, + "grad_norm": 0.9525768756866455, + "learning_rate": 0.00019883824217040323, + "loss": 4.6333, + "step": 193600 + }, + { + "epoch": 0.3613724314728982, + "grad_norm": 1.2094985246658325, + "learning_rate": 0.0001988376035867657, + "loss": 4.5322, + "step": 193650 + }, + { + "epoch": 0.36146573703227675, + "grad_norm": 1.0645558834075928, + "learning_rate": 0.00019883696482869722, + "loss": 4.7803, + "step": 193700 + }, + { + "epoch": 0.3615590425916552, + "grad_norm": 1.0554648637771606, + "learning_rate": 0.00019883632589619887, + "loss": 4.5194, + "step": 193750 + }, + { + "epoch": 0.3616523481510337, + "grad_norm": 0.8595163822174072, + "learning_rate": 0.0001988356867892718, + "loss": 4.6625, + "step": 193800 + }, + { + "epoch": 0.3617456537104122, + "grad_norm": 0.9319025874137878, + "learning_rate": 0.00019883504750791713, + "loss": 4.7127, + "step": 193850 + }, + { + "epoch": 0.3618389592697907, + "grad_norm": 1.2545781135559082, + "learning_rate": 0.000198834408052136, + "loss": 4.409, + "step": 193900 + }, + { + "epoch": 0.3619322648291692, + "grad_norm": 1.0958348512649536, + "learning_rate": 0.00019883376842192953, + "loss": 4.629, + "step": 193950 + }, + { + "epoch": 0.36202557038854766, + "grad_norm": 1.1290863752365112, + "learning_rate": 0.00019883312861729887, + "loss": 4.7757, + "step": 194000 + }, + { + "epoch": 0.3621188759479262, + "grad_norm": 1.168502926826477, + "learning_rate": 0.0001988324886382451, + "loss": 4.5306, + "step": 194050 + }, + { + "epoch": 0.36221218150730466, + "grad_norm": 1.0988664627075195, + "learning_rate": 0.00019883184848476938, + "loss": 4.5708, + "step": 194100 + }, + { + "epoch": 0.36230548706668314, + "grad_norm": 1.10366952419281, + "learning_rate": 0.00019883120815687285, + "loss": 4.7049, + "step": 194150 + }, + { + "epoch": 0.3623987926260616, + "grad_norm": 1.0056383609771729, + "learning_rate": 0.0001988305676545566, + "loss": 4.8057, + "step": 194200 + }, + { + "epoch": 0.36249209818544015, + "grad_norm": 0.930894672870636, + "learning_rate": 0.00019882992697782182, + "loss": 4.5286, + "step": 194250 + }, + { + "epoch": 0.3625854037448186, + "grad_norm": 1.0776501893997192, + "learning_rate": 0.0001988292861266696, + "loss": 4.56, + "step": 194300 + }, + { + "epoch": 0.3626787093041971, + "grad_norm": 1.0516318082809448, + "learning_rate": 0.00019882864510110105, + "loss": 4.5943, + "step": 194350 + }, + { + "epoch": 0.36277201486357563, + "grad_norm": 1.0238537788391113, + "learning_rate": 0.00019882800390111735, + "loss": 4.6839, + "step": 194400 + }, + { + "epoch": 0.3628653204229541, + "grad_norm": 1.1146060228347778, + "learning_rate": 0.00019882736252671963, + "loss": 4.6616, + "step": 194450 + }, + { + "epoch": 0.3629586259823326, + "grad_norm": 1.2036668062210083, + "learning_rate": 0.00019882672097790898, + "loss": 4.5828, + "step": 194500 + }, + { + "epoch": 0.36305193154171106, + "grad_norm": 0.9518182873725891, + "learning_rate": 0.00019882607925468656, + "loss": 4.5861, + "step": 194550 + }, + { + "epoch": 0.3631452371010896, + "grad_norm": 0.8470454812049866, + "learning_rate": 0.0001988254373570535, + "loss": 4.7907, + "step": 194600 + }, + { + "epoch": 0.36323854266046807, + "grad_norm": 0.9194369316101074, + "learning_rate": 0.00019882479528501095, + "loss": 4.4232, + "step": 194650 + }, + { + "epoch": 0.36333184821984654, + "grad_norm": 1.164222002029419, + "learning_rate": 0.00019882415303856002, + "loss": 4.4877, + "step": 194700 + }, + { + "epoch": 0.3634251537792251, + "grad_norm": 0.9167250394821167, + "learning_rate": 0.00019882351061770184, + "loss": 4.5728, + "step": 194750 + }, + { + "epoch": 0.36351845933860355, + "grad_norm": 0.8055945634841919, + "learning_rate": 0.00019882286802243754, + "loss": 4.6115, + "step": 194800 + }, + { + "epoch": 0.363611764897982, + "grad_norm": 0.8844305872917175, + "learning_rate": 0.0001988222252527683, + "loss": 4.6615, + "step": 194850 + }, + { + "epoch": 0.3637050704573605, + "grad_norm": 1.018107533454895, + "learning_rate": 0.0001988215823086952, + "loss": 4.6986, + "step": 194900 + }, + { + "epoch": 0.36379837601673903, + "grad_norm": 0.80892413854599, + "learning_rate": 0.00019882093919021938, + "loss": 4.5929, + "step": 194950 + }, + { + "epoch": 0.3638916815761175, + "grad_norm": 0.9718637466430664, + "learning_rate": 0.00019882029589734203, + "loss": 4.5956, + "step": 195000 + }, + { + "epoch": 0.3638916815761175, + "eval_loss": 4.797578811645508, + "eval_runtime": 230.7126, + "eval_samples_per_second": 11.304, + "eval_steps_per_second": 11.304, + "eval_tts_loss": 7.486753235031148, + "step": 195000 + }, + { + "epoch": 0.363984987135496, + "grad_norm": 0.9084148406982422, + "learning_rate": 0.0001988196524300642, + "loss": 4.5751, + "step": 195050 + }, + { + "epoch": 0.3640782926948745, + "grad_norm": 1.0336567163467407, + "learning_rate": 0.0001988190087883871, + "loss": 4.7999, + "step": 195100 + }, + { + "epoch": 0.364171598254253, + "grad_norm": 0.9693698287010193, + "learning_rate": 0.00019881836497231183, + "loss": 4.3249, + "step": 195150 + }, + { + "epoch": 0.36426490381363147, + "grad_norm": 1.1222256422042847, + "learning_rate": 0.00019881772098183956, + "loss": 4.587, + "step": 195200 + }, + { + "epoch": 0.36435820937300994, + "grad_norm": 1.0114097595214844, + "learning_rate": 0.00019881707681697135, + "loss": 4.6811, + "step": 195250 + }, + { + "epoch": 0.3644515149323885, + "grad_norm": 0.9249664545059204, + "learning_rate": 0.00019881643247770842, + "loss": 4.8543, + "step": 195300 + }, + { + "epoch": 0.36454482049176695, + "grad_norm": 1.0353797674179077, + "learning_rate": 0.00019881578796405186, + "loss": 4.6228, + "step": 195350 + }, + { + "epoch": 0.36463812605114543, + "grad_norm": 1.0884183645248413, + "learning_rate": 0.00019881514327600282, + "loss": 4.7143, + "step": 195400 + }, + { + "epoch": 0.36473143161052396, + "grad_norm": 1.049566388130188, + "learning_rate": 0.00019881449841356247, + "loss": 4.6686, + "step": 195450 + }, + { + "epoch": 0.36482473716990244, + "grad_norm": 0.990344226360321, + "learning_rate": 0.00019881385337673188, + "loss": 4.585, + "step": 195500 + }, + { + "epoch": 0.3649180427292809, + "grad_norm": 1.0016299486160278, + "learning_rate": 0.00019881320816551225, + "loss": 4.5803, + "step": 195550 + }, + { + "epoch": 0.3650113482886594, + "grad_norm": 0.9439250826835632, + "learning_rate": 0.0001988125627799047, + "loss": 4.5019, + "step": 195600 + }, + { + "epoch": 0.3651046538480379, + "grad_norm": 1.0324641466140747, + "learning_rate": 0.00019881191721991034, + "loss": 4.708, + "step": 195650 + }, + { + "epoch": 0.3651979594074164, + "grad_norm": 1.1070998907089233, + "learning_rate": 0.0001988112714855303, + "loss": 4.6045, + "step": 195700 + }, + { + "epoch": 0.36529126496679487, + "grad_norm": 0.7762126922607422, + "learning_rate": 0.0001988106255767658, + "loss": 4.9177, + "step": 195750 + }, + { + "epoch": 0.3653845705261734, + "grad_norm": 1.03263258934021, + "learning_rate": 0.00019880997949361793, + "loss": 4.7215, + "step": 195800 + }, + { + "epoch": 0.3654778760855519, + "grad_norm": 0.9962678551673889, + "learning_rate": 0.00019880933323608783, + "loss": 4.49, + "step": 195850 + }, + { + "epoch": 0.36557118164493035, + "grad_norm": 0.8162546157836914, + "learning_rate": 0.00019880868680417663, + "loss": 4.7263, + "step": 195900 + }, + { + "epoch": 0.36566448720430883, + "grad_norm": 0.7211110591888428, + "learning_rate": 0.0001988080401978855, + "loss": 4.7463, + "step": 195950 + }, + { + "epoch": 0.36575779276368736, + "grad_norm": 0.9785488843917847, + "learning_rate": 0.00019880739341721556, + "loss": 4.7713, + "step": 196000 + }, + { + "epoch": 0.36585109832306584, + "grad_norm": 0.8973463773727417, + "learning_rate": 0.00019880674646216795, + "loss": 4.7429, + "step": 196050 + }, + { + "epoch": 0.3659444038824443, + "grad_norm": 1.1833984851837158, + "learning_rate": 0.00019880609933274382, + "loss": 4.5631, + "step": 196100 + }, + { + "epoch": 0.36603770944182284, + "grad_norm": 1.098944067955017, + "learning_rate": 0.0001988054520289443, + "loss": 4.4881, + "step": 196150 + }, + { + "epoch": 0.3661310150012013, + "grad_norm": 0.6843777894973755, + "learning_rate": 0.00019880480455077059, + "loss": 4.525, + "step": 196200 + }, + { + "epoch": 0.3662243205605798, + "grad_norm": 0.9798048138618469, + "learning_rate": 0.00019880415689822373, + "loss": 4.5468, + "step": 196250 + }, + { + "epoch": 0.3663176261199583, + "grad_norm": 1.0058033466339111, + "learning_rate": 0.00019880350907130494, + "loss": 4.5075, + "step": 196300 + }, + { + "epoch": 0.3664109316793368, + "grad_norm": 0.8590142130851746, + "learning_rate": 0.00019880286107001535, + "loss": 4.5171, + "step": 196350 + }, + { + "epoch": 0.3665042372387153, + "grad_norm": 0.8589456081390381, + "learning_rate": 0.00019880221289435607, + "loss": 4.672, + "step": 196400 + }, + { + "epoch": 0.36659754279809376, + "grad_norm": 1.3543787002563477, + "learning_rate": 0.00019880156454432827, + "loss": 4.523, + "step": 196450 + }, + { + "epoch": 0.36669084835747223, + "grad_norm": 0.9485663771629333, + "learning_rate": 0.00019880091601993312, + "loss": 4.8311, + "step": 196500 + }, + { + "epoch": 0.36678415391685076, + "grad_norm": 0.8935502767562866, + "learning_rate": 0.00019880026732117174, + "loss": 4.604, + "step": 196550 + }, + { + "epoch": 0.36687745947622924, + "grad_norm": 0.9361104369163513, + "learning_rate": 0.00019879961844804524, + "loss": 4.6418, + "step": 196600 + }, + { + "epoch": 0.3669707650356077, + "grad_norm": 0.6516358852386475, + "learning_rate": 0.00019879896940055481, + "loss": 4.7734, + "step": 196650 + }, + { + "epoch": 0.36706407059498625, + "grad_norm": 1.2583208084106445, + "learning_rate": 0.0001987983201787016, + "loss": 4.6789, + "step": 196700 + }, + { + "epoch": 0.3671573761543647, + "grad_norm": 1.1848986148834229, + "learning_rate": 0.00019879767078248673, + "loss": 4.6755, + "step": 196750 + }, + { + "epoch": 0.3672506817137432, + "grad_norm": 0.871610164642334, + "learning_rate": 0.00019879702121191134, + "loss": 4.5271, + "step": 196800 + }, + { + "epoch": 0.3673439872731217, + "grad_norm": 1.1172187328338623, + "learning_rate": 0.00019879637146697662, + "loss": 4.6236, + "step": 196850 + }, + { + "epoch": 0.3674372928325002, + "grad_norm": 0.910994827747345, + "learning_rate": 0.00019879572154768367, + "loss": 4.5114, + "step": 196900 + }, + { + "epoch": 0.3675305983918787, + "grad_norm": 0.9143189787864685, + "learning_rate": 0.00019879507145403365, + "loss": 4.4976, + "step": 196950 + }, + { + "epoch": 0.36762390395125716, + "grad_norm": 1.1715046167373657, + "learning_rate": 0.0001987944211860277, + "loss": 4.6836, + "step": 197000 + }, + { + "epoch": 0.3677172095106357, + "grad_norm": 1.333890438079834, + "learning_rate": 0.00019879377074366703, + "loss": 4.7598, + "step": 197050 + }, + { + "epoch": 0.36781051507001417, + "grad_norm": 1.0626753568649292, + "learning_rate": 0.00019879312012695272, + "loss": 4.7344, + "step": 197100 + }, + { + "epoch": 0.36790382062939264, + "grad_norm": 0.8466847538948059, + "learning_rate": 0.0001987924693358859, + "loss": 4.6338, + "step": 197150 + }, + { + "epoch": 0.3679971261887711, + "grad_norm": 1.0398815870285034, + "learning_rate": 0.00019879181837046777, + "loss": 4.5708, + "step": 197200 + }, + { + "epoch": 0.36809043174814965, + "grad_norm": 1.2056119441986084, + "learning_rate": 0.00019879116723069948, + "loss": 4.8655, + "step": 197250 + }, + { + "epoch": 0.3681837373075281, + "grad_norm": 1.0409513711929321, + "learning_rate": 0.00019879051591658215, + "loss": 4.6327, + "step": 197300 + }, + { + "epoch": 0.3682770428669066, + "grad_norm": 1.0986347198486328, + "learning_rate": 0.00019878986442811695, + "loss": 4.6958, + "step": 197350 + }, + { + "epoch": 0.36837034842628513, + "grad_norm": 1.093003749847412, + "learning_rate": 0.00019878921276530502, + "loss": 4.6851, + "step": 197400 + }, + { + "epoch": 0.3684636539856636, + "grad_norm": 1.156713843345642, + "learning_rate": 0.0001987885609281475, + "loss": 4.7068, + "step": 197450 + }, + { + "epoch": 0.3685569595450421, + "grad_norm": 1.0253217220306396, + "learning_rate": 0.00019878790891664555, + "loss": 4.5718, + "step": 197500 + }, + { + "epoch": 0.36865026510442056, + "grad_norm": 0.8153467774391174, + "learning_rate": 0.00019878725673080038, + "loss": 4.6435, + "step": 197550 + }, + { + "epoch": 0.3687435706637991, + "grad_norm": 0.8387562036514282, + "learning_rate": 0.00019878660437061302, + "loss": 4.733, + "step": 197600 + }, + { + "epoch": 0.36883687622317757, + "grad_norm": 1.011487603187561, + "learning_rate": 0.0001987859518360847, + "loss": 4.772, + "step": 197650 + }, + { + "epoch": 0.36893018178255604, + "grad_norm": 1.247372031211853, + "learning_rate": 0.00019878529912721658, + "loss": 4.6005, + "step": 197700 + }, + { + "epoch": 0.3690234873419346, + "grad_norm": 0.8897690176963806, + "learning_rate": 0.00019878464624400978, + "loss": 4.6325, + "step": 197750 + }, + { + "epoch": 0.36911679290131305, + "grad_norm": 1.1363788843154907, + "learning_rate": 0.00019878399318646543, + "loss": 4.7405, + "step": 197800 + }, + { + "epoch": 0.3692100984606915, + "grad_norm": 0.8495743870735168, + "learning_rate": 0.00019878333995458473, + "loss": 4.4341, + "step": 197850 + }, + { + "epoch": 0.36930340402007, + "grad_norm": 0.9996325969696045, + "learning_rate": 0.00019878268654836882, + "loss": 4.6559, + "step": 197900 + }, + { + "epoch": 0.36939670957944853, + "grad_norm": 0.7758403420448303, + "learning_rate": 0.00019878203296781885, + "loss": 5.0731, + "step": 197950 + }, + { + "epoch": 0.369490015138827, + "grad_norm": 0.9682093262672424, + "learning_rate": 0.000198781379212936, + "loss": 4.4734, + "step": 198000 + }, + { + "epoch": 0.369490015138827, + "eval_loss": 4.794241905212402, + "eval_runtime": 232.0461, + "eval_samples_per_second": 11.239, + "eval_steps_per_second": 11.239, + "eval_tts_loss": 7.463459329881014, + "step": 198000 + }, + { + "epoch": 0.3695833206982055, + "grad_norm": 1.051621675491333, + "learning_rate": 0.00019878072528372136, + "loss": 4.6863, + "step": 198050 + }, + { + "epoch": 0.369676626257584, + "grad_norm": 1.0777297019958496, + "learning_rate": 0.0001987800711801761, + "loss": 4.5781, + "step": 198100 + }, + { + "epoch": 0.3697699318169625, + "grad_norm": 0.8551693558692932, + "learning_rate": 0.00019877941690230145, + "loss": 4.6632, + "step": 198150 + }, + { + "epoch": 0.36986323737634097, + "grad_norm": 0.8395803570747375, + "learning_rate": 0.00019877876245009849, + "loss": 4.6038, + "step": 198200 + }, + { + "epoch": 0.36995654293571945, + "grad_norm": 0.860999584197998, + "learning_rate": 0.0001987781078235684, + "loss": 4.7329, + "step": 198250 + }, + { + "epoch": 0.370049848495098, + "grad_norm": 1.099967360496521, + "learning_rate": 0.0001987774530227123, + "loss": 4.7567, + "step": 198300 + }, + { + "epoch": 0.37014315405447645, + "grad_norm": 0.6936938166618347, + "learning_rate": 0.00019877679804753141, + "loss": 4.673, + "step": 198350 + }, + { + "epoch": 0.37023645961385493, + "grad_norm": 0.959591805934906, + "learning_rate": 0.00019877614289802686, + "loss": 4.6583, + "step": 198400 + }, + { + "epoch": 0.37032976517323346, + "grad_norm": 0.7232271432876587, + "learning_rate": 0.00019877548757419976, + "loss": 4.5433, + "step": 198450 + }, + { + "epoch": 0.37042307073261194, + "grad_norm": 1.075395941734314, + "learning_rate": 0.00019877483207605132, + "loss": 4.5049, + "step": 198500 + }, + { + "epoch": 0.3705163762919904, + "grad_norm": 1.0577384233474731, + "learning_rate": 0.0001987741764035827, + "loss": 4.642, + "step": 198550 + }, + { + "epoch": 0.3706096818513689, + "grad_norm": 1.5923864841461182, + "learning_rate": 0.000198773520556795, + "loss": 4.7332, + "step": 198600 + }, + { + "epoch": 0.3707029874107474, + "grad_norm": 0.9335559010505676, + "learning_rate": 0.00019877286453568943, + "loss": 4.4329, + "step": 198650 + }, + { + "epoch": 0.3707962929701259, + "grad_norm": 1.0967572927474976, + "learning_rate": 0.00019877220834026718, + "loss": 4.4424, + "step": 198700 + }, + { + "epoch": 0.37088959852950437, + "grad_norm": 1.1251230239868164, + "learning_rate": 0.00019877155197052933, + "loss": 4.5098, + "step": 198750 + }, + { + "epoch": 0.3709829040888829, + "grad_norm": 1.2461490631103516, + "learning_rate": 0.00019877089542647705, + "loss": 4.636, + "step": 198800 + }, + { + "epoch": 0.3710762096482614, + "grad_norm": 0.9544318914413452, + "learning_rate": 0.00019877023870811154, + "loss": 4.6384, + "step": 198850 + }, + { + "epoch": 0.37116951520763986, + "grad_norm": 1.099353313446045, + "learning_rate": 0.00019876958181543392, + "loss": 4.7218, + "step": 198900 + }, + { + "epoch": 0.37126282076701833, + "grad_norm": 0.7116580009460449, + "learning_rate": 0.0001987689247484454, + "loss": 4.5868, + "step": 198950 + }, + { + "epoch": 0.37135612632639686, + "grad_norm": 0.9305589199066162, + "learning_rate": 0.00019876826750714707, + "loss": 4.5696, + "step": 199000 + }, + { + "epoch": 0.37144943188577534, + "grad_norm": 1.0949960947036743, + "learning_rate": 0.00019876761009154014, + "loss": 4.4849, + "step": 199050 + }, + { + "epoch": 0.3715427374451538, + "grad_norm": 0.9973856210708618, + "learning_rate": 0.00019876695250162576, + "loss": 4.6221, + "step": 199100 + }, + { + "epoch": 0.3716360430045323, + "grad_norm": 0.7113000154495239, + "learning_rate": 0.00019876629473740507, + "loss": 4.6113, + "step": 199150 + }, + { + "epoch": 0.3717293485639108, + "grad_norm": 0.8312941789627075, + "learning_rate": 0.00019876563679887928, + "loss": 4.6111, + "step": 199200 + }, + { + "epoch": 0.3718226541232893, + "grad_norm": 1.2139105796813965, + "learning_rate": 0.00019876497868604948, + "loss": 4.5554, + "step": 199250 + }, + { + "epoch": 0.3719159596826678, + "grad_norm": 1.0658022165298462, + "learning_rate": 0.0001987643203989169, + "loss": 4.6281, + "step": 199300 + }, + { + "epoch": 0.3720092652420463, + "grad_norm": 0.9115583300590515, + "learning_rate": 0.00019876366193748269, + "loss": 4.8648, + "step": 199350 + }, + { + "epoch": 0.3721025708014248, + "grad_norm": 1.0521410703659058, + "learning_rate": 0.00019876300330174798, + "loss": 4.5097, + "step": 199400 + }, + { + "epoch": 0.37219587636080326, + "grad_norm": 0.9952971935272217, + "learning_rate": 0.00019876234449171391, + "loss": 4.7379, + "step": 199450 + }, + { + "epoch": 0.37228918192018173, + "grad_norm": 0.8925333023071289, + "learning_rate": 0.00019876168550738172, + "loss": 4.5746, + "step": 199500 + }, + { + "epoch": 0.37238248747956026, + "grad_norm": 1.0621514320373535, + "learning_rate": 0.00019876102634875254, + "loss": 4.7674, + "step": 199550 + }, + { + "epoch": 0.37247579303893874, + "grad_norm": 1.0929023027420044, + "learning_rate": 0.00019876036701582748, + "loss": 4.7718, + "step": 199600 + }, + { + "epoch": 0.3725690985983172, + "grad_norm": 0.9549880027770996, + "learning_rate": 0.00019875970750860778, + "loss": 4.7339, + "step": 199650 + }, + { + "epoch": 0.37266240415769575, + "grad_norm": 0.9763375520706177, + "learning_rate": 0.0001987590478270946, + "loss": 4.6824, + "step": 199700 + }, + { + "epoch": 0.3727557097170742, + "grad_norm": 0.9068183898925781, + "learning_rate": 0.00019875838797128902, + "loss": 4.7193, + "step": 199750 + }, + { + "epoch": 0.3728490152764527, + "grad_norm": 0.9717890024185181, + "learning_rate": 0.0001987577279411923, + "loss": 4.5489, + "step": 199800 + }, + { + "epoch": 0.3729423208358312, + "grad_norm": 1.2245581150054932, + "learning_rate": 0.00019875706773680554, + "loss": 4.705, + "step": 199850 + }, + { + "epoch": 0.3730356263952097, + "grad_norm": 0.7146151065826416, + "learning_rate": 0.00019875640735812997, + "loss": 4.5574, + "step": 199900 + }, + { + "epoch": 0.3731289319545882, + "grad_norm": 0.8383366465568542, + "learning_rate": 0.00019875574680516667, + "loss": 4.5159, + "step": 199950 + }, + { + "epoch": 0.37322223751396666, + "grad_norm": 1.15836501121521, + "learning_rate": 0.0001987550860779169, + "loss": 4.8317, + "step": 200000 + }, + { + "epoch": 0.3733155430733452, + "grad_norm": 0.8754977583885193, + "learning_rate": 0.00019875442517638177, + "loss": 4.7887, + "step": 200050 + }, + { + "epoch": 0.37340884863272367, + "grad_norm": 0.8341622352600098, + "learning_rate": 0.00019875376410056244, + "loss": 4.5602, + "step": 200100 + }, + { + "epoch": 0.37350215419210214, + "grad_norm": 0.9436699151992798, + "learning_rate": 0.00019875310285046008, + "loss": 4.5878, + "step": 200150 + }, + { + "epoch": 0.3735954597514806, + "grad_norm": 1.203100562095642, + "learning_rate": 0.0001987524414260759, + "loss": 4.5758, + "step": 200200 + }, + { + "epoch": 0.37368876531085915, + "grad_norm": 0.9803832173347473, + "learning_rate": 0.00019875177982741106, + "loss": 4.5436, + "step": 200250 + }, + { + "epoch": 0.3737820708702376, + "grad_norm": 0.9167838096618652, + "learning_rate": 0.00019875111805446664, + "loss": 4.7414, + "step": 200300 + }, + { + "epoch": 0.3738753764296161, + "grad_norm": 1.015909194946289, + "learning_rate": 0.0001987504561072439, + "loss": 4.7295, + "step": 200350 + }, + { + "epoch": 0.37396868198899463, + "grad_norm": 1.1135481595993042, + "learning_rate": 0.00019874979398574398, + "loss": 4.4958, + "step": 200400 + }, + { + "epoch": 0.3740619875483731, + "grad_norm": 0.9369817972183228, + "learning_rate": 0.00019874913168996808, + "loss": 4.5166, + "step": 200450 + }, + { + "epoch": 0.3741552931077516, + "grad_norm": 1.2627172470092773, + "learning_rate": 0.0001987484692199173, + "loss": 4.661, + "step": 200500 + }, + { + "epoch": 0.37424859866713006, + "grad_norm": 1.0819026231765747, + "learning_rate": 0.00019874780657559287, + "loss": 4.4839, + "step": 200550 + }, + { + "epoch": 0.3743419042265086, + "grad_norm": 1.0878807306289673, + "learning_rate": 0.0001987471437569959, + "loss": 4.5501, + "step": 200600 + }, + { + "epoch": 0.37443520978588707, + "grad_norm": 1.0913496017456055, + "learning_rate": 0.00019874648076412762, + "loss": 4.9156, + "step": 200650 + }, + { + "epoch": 0.37452851534526554, + "grad_norm": 0.9594210386276245, + "learning_rate": 0.00019874581759698917, + "loss": 4.5145, + "step": 200700 + }, + { + "epoch": 0.3746218209046441, + "grad_norm": 0.8627813458442688, + "learning_rate": 0.00019874515425558173, + "loss": 4.5963, + "step": 200750 + }, + { + "epoch": 0.37471512646402255, + "grad_norm": 0.9256874918937683, + "learning_rate": 0.0001987444907399065, + "loss": 4.671, + "step": 200800 + }, + { + "epoch": 0.37480843202340103, + "grad_norm": 1.5067219734191895, + "learning_rate": 0.0001987438270499646, + "loss": 4.672, + "step": 200850 + }, + { + "epoch": 0.3749017375827795, + "grad_norm": 1.0874707698822021, + "learning_rate": 0.00019874316318575718, + "loss": 4.6663, + "step": 200900 + }, + { + "epoch": 0.37499504314215804, + "grad_norm": 1.2468639612197876, + "learning_rate": 0.00019874249914728548, + "loss": 4.6853, + "step": 200950 + }, + { + "epoch": 0.3750883487015365, + "grad_norm": 0.7165033221244812, + "learning_rate": 0.00019874183493455064, + "loss": 4.6766, + "step": 201000 + }, + { + "epoch": 0.3750883487015365, + "eval_loss": 4.802398681640625, + "eval_runtime": 230.2804, + "eval_samples_per_second": 11.325, + "eval_steps_per_second": 11.325, + "eval_tts_loss": 7.475072805292653, + "step": 201000 + }, + { + "epoch": 0.375181654260915, + "grad_norm": 1.1223444938659668, + "learning_rate": 0.00019874117054755386, + "loss": 4.6664, + "step": 201050 + }, + { + "epoch": 0.3752749598202935, + "grad_norm": 1.1241016387939453, + "learning_rate": 0.00019874050598629625, + "loss": 4.5235, + "step": 201100 + }, + { + "epoch": 0.375368265379672, + "grad_norm": 1.3051515817642212, + "learning_rate": 0.00019873984125077905, + "loss": 4.4631, + "step": 201150 + }, + { + "epoch": 0.37546157093905047, + "grad_norm": 1.0892348289489746, + "learning_rate": 0.0001987391763410034, + "loss": 4.4371, + "step": 201200 + }, + { + "epoch": 0.37555487649842895, + "grad_norm": 0.9838119745254517, + "learning_rate": 0.00019873851125697044, + "loss": 4.723, + "step": 201250 + }, + { + "epoch": 0.3756481820578075, + "grad_norm": 1.3931784629821777, + "learning_rate": 0.00019873784599868145, + "loss": 4.7364, + "step": 201300 + }, + { + "epoch": 0.37574148761718595, + "grad_norm": 0.9986215829849243, + "learning_rate": 0.00019873718056613747, + "loss": 4.4411, + "step": 201350 + }, + { + "epoch": 0.37583479317656443, + "grad_norm": 1.0121062994003296, + "learning_rate": 0.0001987365149593398, + "loss": 4.3812, + "step": 201400 + }, + { + "epoch": 0.37592809873594296, + "grad_norm": 1.1041994094848633, + "learning_rate": 0.0001987358491782895, + "loss": 4.4639, + "step": 201450 + }, + { + "epoch": 0.37602140429532144, + "grad_norm": 0.7851685285568237, + "learning_rate": 0.00019873518322298782, + "loss": 4.4774, + "step": 201500 + }, + { + "epoch": 0.3761147098546999, + "grad_norm": 1.1037462949752808, + "learning_rate": 0.0001987345170934359, + "loss": 4.6946, + "step": 201550 + }, + { + "epoch": 0.3762080154140784, + "grad_norm": 0.9130716323852539, + "learning_rate": 0.00019873385078963496, + "loss": 4.6075, + "step": 201600 + }, + { + "epoch": 0.3763013209734569, + "grad_norm": 1.2021543979644775, + "learning_rate": 0.00019873318431158613, + "loss": 4.5221, + "step": 201650 + }, + { + "epoch": 0.3763946265328354, + "grad_norm": 0.9670273065567017, + "learning_rate": 0.0001987325176592906, + "loss": 4.8346, + "step": 201700 + }, + { + "epoch": 0.3764879320922139, + "grad_norm": 0.8241245746612549, + "learning_rate": 0.00019873185083274956, + "loss": 4.487, + "step": 201750 + }, + { + "epoch": 0.37658123765159235, + "grad_norm": 1.071000337600708, + "learning_rate": 0.00019873118383196417, + "loss": 4.5168, + "step": 201800 + }, + { + "epoch": 0.3766745432109709, + "grad_norm": 0.627001166343689, + "learning_rate": 0.0001987305166569356, + "loss": 4.8239, + "step": 201850 + }, + { + "epoch": 0.37676784877034936, + "grad_norm": 0.781301736831665, + "learning_rate": 0.00019872984930766505, + "loss": 4.4841, + "step": 201900 + }, + { + "epoch": 0.37686115432972783, + "grad_norm": 0.7564511895179749, + "learning_rate": 0.0001987291817841537, + "loss": 4.4971, + "step": 201950 + }, + { + "epoch": 0.37695445988910636, + "grad_norm": 0.9705767035484314, + "learning_rate": 0.0001987285140864027, + "loss": 4.4673, + "step": 202000 + }, + { + "epoch": 0.37704776544848484, + "grad_norm": 0.9428151845932007, + "learning_rate": 0.00019872784621441326, + "loss": 4.8026, + "step": 202050 + }, + { + "epoch": 0.3771410710078633, + "grad_norm": 1.1641000509262085, + "learning_rate": 0.00019872717816818653, + "loss": 4.7319, + "step": 202100 + }, + { + "epoch": 0.3772343765672418, + "grad_norm": 1.1046082973480225, + "learning_rate": 0.0001987265099477237, + "loss": 4.8214, + "step": 202150 + }, + { + "epoch": 0.3773276821266203, + "grad_norm": 1.2519506216049194, + "learning_rate": 0.00019872584155302593, + "loss": 4.6536, + "step": 202200 + }, + { + "epoch": 0.3774209876859988, + "grad_norm": 1.1127455234527588, + "learning_rate": 0.0001987251729840945, + "loss": 4.5374, + "step": 202250 + }, + { + "epoch": 0.3775142932453773, + "grad_norm": 1.0181680917739868, + "learning_rate": 0.00019872450424093043, + "loss": 4.4758, + "step": 202300 + }, + { + "epoch": 0.3776075988047558, + "grad_norm": 0.7517116069793701, + "learning_rate": 0.000198723835323535, + "loss": 4.5575, + "step": 202350 + }, + { + "epoch": 0.3777009043641343, + "grad_norm": 0.9754074215888977, + "learning_rate": 0.00019872316623190938, + "loss": 4.5489, + "step": 202400 + }, + { + "epoch": 0.37779420992351276, + "grad_norm": 0.9626929759979248, + "learning_rate": 0.00019872249696605473, + "loss": 4.7142, + "step": 202450 + }, + { + "epoch": 0.37788751548289123, + "grad_norm": 0.8055295944213867, + "learning_rate": 0.0001987218275259723, + "loss": 4.591, + "step": 202500 + }, + { + "epoch": 0.37798082104226977, + "grad_norm": 0.821767270565033, + "learning_rate": 0.00019872115791166314, + "loss": 4.5601, + "step": 202550 + }, + { + "epoch": 0.37807412660164824, + "grad_norm": 1.3287692070007324, + "learning_rate": 0.00019872048812312855, + "loss": 4.6671, + "step": 202600 + }, + { + "epoch": 0.3781674321610267, + "grad_norm": 0.9818217158317566, + "learning_rate": 0.00019871981816036965, + "loss": 4.638, + "step": 202650 + }, + { + "epoch": 0.37826073772040525, + "grad_norm": 1.010910987854004, + "learning_rate": 0.00019871914802338763, + "loss": 4.742, + "step": 202700 + }, + { + "epoch": 0.3783540432797837, + "grad_norm": 1.0474064350128174, + "learning_rate": 0.00019871847771218374, + "loss": 4.7676, + "step": 202750 + }, + { + "epoch": 0.3784473488391622, + "grad_norm": 0.8638759255409241, + "learning_rate": 0.00019871780722675904, + "loss": 4.7774, + "step": 202800 + }, + { + "epoch": 0.3785406543985407, + "grad_norm": 1.0039918422698975, + "learning_rate": 0.00019871713656711482, + "loss": 4.5449, + "step": 202850 + }, + { + "epoch": 0.3786339599579192, + "grad_norm": 1.126681923866272, + "learning_rate": 0.00019871646573325222, + "loss": 4.3922, + "step": 202900 + }, + { + "epoch": 0.3787272655172977, + "grad_norm": 1.3228673934936523, + "learning_rate": 0.00019871579472517241, + "loss": 4.6475, + "step": 202950 + }, + { + "epoch": 0.37882057107667616, + "grad_norm": 0.9036591649055481, + "learning_rate": 0.00019871512354287664, + "loss": 4.7047, + "step": 203000 + }, + { + "epoch": 0.3789138766360547, + "grad_norm": 0.9258872866630554, + "learning_rate": 0.00019871445218636597, + "loss": 4.6349, + "step": 203050 + }, + { + "epoch": 0.37900718219543317, + "grad_norm": 2.290501356124878, + "learning_rate": 0.0001987137806556417, + "loss": 4.4937, + "step": 203100 + }, + { + "epoch": 0.37910048775481164, + "grad_norm": 0.8478452563285828, + "learning_rate": 0.000198713108950705, + "loss": 4.5709, + "step": 203150 + }, + { + "epoch": 0.3791937933141901, + "grad_norm": 1.1010254621505737, + "learning_rate": 0.000198712437071557, + "loss": 4.7398, + "step": 203200 + }, + { + "epoch": 0.37928709887356865, + "grad_norm": 1.4688420295715332, + "learning_rate": 0.00019871176501819892, + "loss": 4.6441, + "step": 203250 + }, + { + "epoch": 0.3793804044329471, + "grad_norm": 1.065325379371643, + "learning_rate": 0.00019871109279063196, + "loss": 4.4881, + "step": 203300 + }, + { + "epoch": 0.3794737099923256, + "grad_norm": 0.981877863407135, + "learning_rate": 0.00019871042038885727, + "loss": 4.5731, + "step": 203350 + }, + { + "epoch": 0.37956701555170413, + "grad_norm": 1.0577549934387207, + "learning_rate": 0.0001987097478128761, + "loss": 4.7339, + "step": 203400 + }, + { + "epoch": 0.3796603211110826, + "grad_norm": 0.9602108597755432, + "learning_rate": 0.00019870907506268953, + "loss": 4.5568, + "step": 203450 + }, + { + "epoch": 0.3797536266704611, + "grad_norm": 1.1359353065490723, + "learning_rate": 0.00019870840213829884, + "loss": 4.6005, + "step": 203500 + }, + { + "epoch": 0.37984693222983956, + "grad_norm": 0.9004436731338501, + "learning_rate": 0.0001987077290397052, + "loss": 4.7455, + "step": 203550 + }, + { + "epoch": 0.3799402377892181, + "grad_norm": 1.0926775932312012, + "learning_rate": 0.0001987070557669098, + "loss": 4.4615, + "step": 203600 + }, + { + "epoch": 0.38003354334859657, + "grad_norm": 0.8713423609733582, + "learning_rate": 0.00019870638231991376, + "loss": 4.4985, + "step": 203650 + }, + { + "epoch": 0.38012684890797505, + "grad_norm": 0.6343261003494263, + "learning_rate": 0.00019870570869871837, + "loss": 4.437, + "step": 203700 + }, + { + "epoch": 0.3802201544673536, + "grad_norm": 0.7554951310157776, + "learning_rate": 0.00019870503490332475, + "loss": 4.3759, + "step": 203750 + }, + { + "epoch": 0.38031346002673205, + "grad_norm": 0.9022654891014099, + "learning_rate": 0.0001987043609337341, + "loss": 4.5305, + "step": 203800 + }, + { + "epoch": 0.38040676558611053, + "grad_norm": 1.4872987270355225, + "learning_rate": 0.00019870368678994766, + "loss": 4.659, + "step": 203850 + }, + { + "epoch": 0.380500071145489, + "grad_norm": 0.8781129121780396, + "learning_rate": 0.00019870301247196655, + "loss": 4.5687, + "step": 203900 + }, + { + "epoch": 0.38059337670486754, + "grad_norm": 0.9872531294822693, + "learning_rate": 0.000198702337979792, + "loss": 4.4754, + "step": 203950 + }, + { + "epoch": 0.380686682264246, + "grad_norm": 1.1369433403015137, + "learning_rate": 0.0001987016633134252, + "loss": 4.6713, + "step": 204000 + }, + { + "epoch": 0.380686682264246, + "eval_loss": 4.801127910614014, + "eval_runtime": 232.9265, + "eval_samples_per_second": 11.197, + "eval_steps_per_second": 11.197, + "eval_tts_loss": 7.454781288711034, + "step": 204000 + }, + { + "epoch": 0.3807799878236245, + "grad_norm": 1.0344752073287964, + "learning_rate": 0.0001987009884728673, + "loss": 4.5242, + "step": 204050 + }, + { + "epoch": 0.380873293383003, + "grad_norm": 0.8404310345649719, + "learning_rate": 0.00019870031345811955, + "loss": 4.6894, + "step": 204100 + }, + { + "epoch": 0.3809665989423815, + "grad_norm": 1.081472635269165, + "learning_rate": 0.00019869963826918313, + "loss": 4.5426, + "step": 204150 + }, + { + "epoch": 0.38105990450175997, + "grad_norm": 0.9155741333961487, + "learning_rate": 0.00019869896290605917, + "loss": 4.781, + "step": 204200 + }, + { + "epoch": 0.38115321006113845, + "grad_norm": 1.0043416023254395, + "learning_rate": 0.00019869828736874894, + "loss": 4.6185, + "step": 204250 + }, + { + "epoch": 0.381246515620517, + "grad_norm": 0.6685605049133301, + "learning_rate": 0.00019869761165725356, + "loss": 4.7304, + "step": 204300 + }, + { + "epoch": 0.38133982117989546, + "grad_norm": 0.9836552143096924, + "learning_rate": 0.00019869693577157432, + "loss": 4.6497, + "step": 204350 + }, + { + "epoch": 0.38143312673927393, + "grad_norm": 1.1131340265274048, + "learning_rate": 0.0001986962597117123, + "loss": 4.5424, + "step": 204400 + }, + { + "epoch": 0.3815264322986524, + "grad_norm": 0.8396437168121338, + "learning_rate": 0.00019869558347766878, + "loss": 4.6054, + "step": 204450 + }, + { + "epoch": 0.38161973785803094, + "grad_norm": 0.8572611808776855, + "learning_rate": 0.0001986949070694449, + "loss": 4.5767, + "step": 204500 + }, + { + "epoch": 0.3817130434174094, + "grad_norm": 0.9373936653137207, + "learning_rate": 0.0001986942304870419, + "loss": 4.5014, + "step": 204550 + }, + { + "epoch": 0.3818063489767879, + "grad_norm": 1.0423073768615723, + "learning_rate": 0.00019869355373046093, + "loss": 4.3117, + "step": 204600 + }, + { + "epoch": 0.3818996545361664, + "grad_norm": 0.8610841631889343, + "learning_rate": 0.0001986928767997032, + "loss": 4.2993, + "step": 204650 + }, + { + "epoch": 0.3819929600955449, + "grad_norm": 1.0650521516799927, + "learning_rate": 0.00019869219969476993, + "loss": 4.7043, + "step": 204700 + }, + { + "epoch": 0.3820862656549234, + "grad_norm": 1.2617119550704956, + "learning_rate": 0.0001986915224156623, + "loss": 4.7328, + "step": 204750 + }, + { + "epoch": 0.38217957121430185, + "grad_norm": 1.0750634670257568, + "learning_rate": 0.00019869084496238146, + "loss": 4.6412, + "step": 204800 + }, + { + "epoch": 0.3822728767736804, + "grad_norm": 1.0128196477890015, + "learning_rate": 0.00019869016733492867, + "loss": 4.7131, + "step": 204850 + }, + { + "epoch": 0.38236618233305886, + "grad_norm": 0.9253135919570923, + "learning_rate": 0.00019868948953330508, + "loss": 4.5573, + "step": 204900 + }, + { + "epoch": 0.38245948789243733, + "grad_norm": 1.0896155834197998, + "learning_rate": 0.00019868881155751193, + "loss": 4.5266, + "step": 204950 + }, + { + "epoch": 0.38255279345181586, + "grad_norm": 1.0464351177215576, + "learning_rate": 0.00019868813340755037, + "loss": 4.5613, + "step": 205000 + }, + { + "epoch": 0.38264609901119434, + "grad_norm": 0.8733029365539551, + "learning_rate": 0.00019868745508342165, + "loss": 4.4978, + "step": 205050 + }, + { + "epoch": 0.3827394045705728, + "grad_norm": 1.1396583318710327, + "learning_rate": 0.0001986867765851269, + "loss": 4.5443, + "step": 205100 + }, + { + "epoch": 0.3828327101299513, + "grad_norm": 1.0709539651870728, + "learning_rate": 0.00019868609791266737, + "loss": 4.7764, + "step": 205150 + }, + { + "epoch": 0.3829260156893298, + "grad_norm": 0.9949414134025574, + "learning_rate": 0.00019868541906604425, + "loss": 4.5682, + "step": 205200 + }, + { + "epoch": 0.3830193212487083, + "grad_norm": 0.9429978132247925, + "learning_rate": 0.0001986847400452587, + "loss": 4.5577, + "step": 205250 + }, + { + "epoch": 0.3831126268080868, + "grad_norm": 1.0749703645706177, + "learning_rate": 0.00019868406085031198, + "loss": 4.6041, + "step": 205300 + }, + { + "epoch": 0.3832059323674653, + "grad_norm": 0.7519810795783997, + "learning_rate": 0.00019868338148120525, + "loss": 4.5119, + "step": 205350 + }, + { + "epoch": 0.3832992379268438, + "grad_norm": 1.1606097221374512, + "learning_rate": 0.00019868270193793973, + "loss": 4.6795, + "step": 205400 + }, + { + "epoch": 0.38339254348622226, + "grad_norm": 0.6909898519515991, + "learning_rate": 0.00019868202222051657, + "loss": 4.7247, + "step": 205450 + }, + { + "epoch": 0.38348584904560074, + "grad_norm": 1.001676321029663, + "learning_rate": 0.00019868134232893704, + "loss": 4.6081, + "step": 205500 + }, + { + "epoch": 0.38357915460497927, + "grad_norm": 1.1560254096984863, + "learning_rate": 0.00019868066226320227, + "loss": 4.6179, + "step": 205550 + }, + { + "epoch": 0.38367246016435774, + "grad_norm": 0.7835885882377625, + "learning_rate": 0.00019867998202331352, + "loss": 4.5546, + "step": 205600 + }, + { + "epoch": 0.3837657657237362, + "grad_norm": 0.5636594295501709, + "learning_rate": 0.00019867930160927195, + "loss": 4.508, + "step": 205650 + }, + { + "epoch": 0.38385907128311475, + "grad_norm": 1.0891164541244507, + "learning_rate": 0.00019867862102107878, + "loss": 4.6448, + "step": 205700 + }, + { + "epoch": 0.3839523768424932, + "grad_norm": 0.8994442820549011, + "learning_rate": 0.0001986779402587352, + "loss": 4.4305, + "step": 205750 + }, + { + "epoch": 0.3840456824018717, + "grad_norm": 1.0475820302963257, + "learning_rate": 0.00019867725932224244, + "loss": 4.5979, + "step": 205800 + }, + { + "epoch": 0.3841389879612502, + "grad_norm": 1.0817757844924927, + "learning_rate": 0.00019867657821160165, + "loss": 4.4495, + "step": 205850 + }, + { + "epoch": 0.3842322935206287, + "grad_norm": 1.2010945081710815, + "learning_rate": 0.0001986758969268141, + "loss": 4.5591, + "step": 205900 + }, + { + "epoch": 0.3843255990800072, + "grad_norm": 0.9652662873268127, + "learning_rate": 0.00019867521546788091, + "loss": 4.5538, + "step": 205950 + }, + { + "epoch": 0.38441890463938566, + "grad_norm": 1.1052769422531128, + "learning_rate": 0.00019867453383480338, + "loss": 4.6479, + "step": 206000 + }, + { + "epoch": 0.3845122101987642, + "grad_norm": 1.1473944187164307, + "learning_rate": 0.00019867385202758262, + "loss": 4.5248, + "step": 206050 + }, + { + "epoch": 0.38460551575814267, + "grad_norm": 1.0132917165756226, + "learning_rate": 0.00019867317004621988, + "loss": 4.6242, + "step": 206100 + }, + { + "epoch": 0.38469882131752114, + "grad_norm": 1.299413800239563, + "learning_rate": 0.00019867248789071638, + "loss": 4.6021, + "step": 206150 + }, + { + "epoch": 0.3847921268768996, + "grad_norm": 1.0853779315948486, + "learning_rate": 0.0001986718055610733, + "loss": 4.6003, + "step": 206200 + }, + { + "epoch": 0.38488543243627815, + "grad_norm": 0.9251170754432678, + "learning_rate": 0.0001986711230572918, + "loss": 4.5685, + "step": 206250 + }, + { + "epoch": 0.38497873799565663, + "grad_norm": 1.1247254610061646, + "learning_rate": 0.00019867044037937315, + "loss": 4.5821, + "step": 206300 + }, + { + "epoch": 0.3850720435550351, + "grad_norm": 1.1961967945098877, + "learning_rate": 0.00019866975752731855, + "loss": 4.7458, + "step": 206350 + }, + { + "epoch": 0.38516534911441364, + "grad_norm": 0.7294309735298157, + "learning_rate": 0.00019866907450112917, + "loss": 4.5956, + "step": 206400 + }, + { + "epoch": 0.3852586546737921, + "grad_norm": 0.7703094482421875, + "learning_rate": 0.00019866839130080623, + "loss": 4.6983, + "step": 206450 + }, + { + "epoch": 0.3853519602331706, + "grad_norm": 0.9793773293495178, + "learning_rate": 0.00019866770792635098, + "loss": 4.4386, + "step": 206500 + }, + { + "epoch": 0.38544526579254906, + "grad_norm": 0.9847137928009033, + "learning_rate": 0.00019866702437776454, + "loss": 4.6341, + "step": 206550 + }, + { + "epoch": 0.3855385713519276, + "grad_norm": 1.0143849849700928, + "learning_rate": 0.00019866634065504818, + "loss": 4.816, + "step": 206600 + }, + { + "epoch": 0.38563187691130607, + "grad_norm": 0.8812471628189087, + "learning_rate": 0.0001986656567582031, + "loss": 4.4307, + "step": 206650 + }, + { + "epoch": 0.38572518247068455, + "grad_norm": 0.9908750057220459, + "learning_rate": 0.0001986649726872305, + "loss": 4.7599, + "step": 206700 + }, + { + "epoch": 0.3858184880300631, + "grad_norm": 1.2358583211898804, + "learning_rate": 0.00019866428844213157, + "loss": 4.7192, + "step": 206750 + }, + { + "epoch": 0.38591179358944155, + "grad_norm": 0.8084837794303894, + "learning_rate": 0.00019866360402290753, + "loss": 4.6309, + "step": 206800 + }, + { + "epoch": 0.38600509914882003, + "grad_norm": 0.917667806148529, + "learning_rate": 0.0001986629194295596, + "loss": 4.5326, + "step": 206850 + }, + { + "epoch": 0.3860984047081985, + "grad_norm": 1.1653844118118286, + "learning_rate": 0.00019866223466208897, + "loss": 4.5962, + "step": 206900 + }, + { + "epoch": 0.38619171026757704, + "grad_norm": 0.9410626888275146, + "learning_rate": 0.00019866154972049687, + "loss": 4.6704, + "step": 206950 + }, + { + "epoch": 0.3862850158269555, + "grad_norm": 0.8716585040092468, + "learning_rate": 0.00019866086460478447, + "loss": 4.6156, + "step": 207000 + }, + { + "epoch": 0.3862850158269555, + "eval_loss": 4.8045244216918945, + "eval_runtime": 231.8143, + "eval_samples_per_second": 11.25, + "eval_steps_per_second": 11.25, + "eval_tts_loss": 7.503764759250659, + "step": 207000 + }, + { + "epoch": 0.386378321386334, + "grad_norm": 0.9506188631057739, + "learning_rate": 0.00019866017931495302, + "loss": 4.6642, + "step": 207050 + }, + { + "epoch": 0.38647162694571247, + "grad_norm": 1.3123427629470825, + "learning_rate": 0.00019865949385100372, + "loss": 4.6589, + "step": 207100 + }, + { + "epoch": 0.386564932505091, + "grad_norm": 1.0116405487060547, + "learning_rate": 0.00019865880821293776, + "loss": 4.6218, + "step": 207150 + }, + { + "epoch": 0.3866582380644695, + "grad_norm": 1.0257128477096558, + "learning_rate": 0.00019865812240075635, + "loss": 4.8113, + "step": 207200 + }, + { + "epoch": 0.38675154362384795, + "grad_norm": 1.1300727128982544, + "learning_rate": 0.00019865743641446075, + "loss": 4.6969, + "step": 207250 + }, + { + "epoch": 0.3868448491832265, + "grad_norm": 0.8774489164352417, + "learning_rate": 0.00019865675025405213, + "loss": 4.6738, + "step": 207300 + }, + { + "epoch": 0.38693815474260496, + "grad_norm": 1.0406553745269775, + "learning_rate": 0.00019865606391953166, + "loss": 4.7365, + "step": 207350 + }, + { + "epoch": 0.38703146030198343, + "grad_norm": 0.9450544118881226, + "learning_rate": 0.00019865537741090063, + "loss": 4.454, + "step": 207400 + }, + { + "epoch": 0.3871247658613619, + "grad_norm": 0.9951105117797852, + "learning_rate": 0.0001986546907281602, + "loss": 4.538, + "step": 207450 + }, + { + "epoch": 0.38721807142074044, + "grad_norm": 0.7700023651123047, + "learning_rate": 0.00019865400387131162, + "loss": 4.4626, + "step": 207500 + }, + { + "epoch": 0.3873113769801189, + "grad_norm": 0.8401572108268738, + "learning_rate": 0.00019865331684035609, + "loss": 4.7251, + "step": 207550 + }, + { + "epoch": 0.3874046825394974, + "grad_norm": 1.3303920030593872, + "learning_rate": 0.0001986526296352948, + "loss": 4.6399, + "step": 207600 + }, + { + "epoch": 0.3874979880988759, + "grad_norm": 0.9098189473152161, + "learning_rate": 0.00019865194225612896, + "loss": 4.6386, + "step": 207650 + }, + { + "epoch": 0.3875912936582544, + "grad_norm": 1.0674896240234375, + "learning_rate": 0.00019865125470285984, + "loss": 4.652, + "step": 207700 + }, + { + "epoch": 0.3876845992176329, + "grad_norm": 1.1178187131881714, + "learning_rate": 0.0001986505669754886, + "loss": 4.5139, + "step": 207750 + }, + { + "epoch": 0.38777790477701135, + "grad_norm": 0.8213180303573608, + "learning_rate": 0.00019864987907401645, + "loss": 4.734, + "step": 207800 + }, + { + "epoch": 0.3878712103363899, + "grad_norm": 1.1217759847640991, + "learning_rate": 0.00019864919099844461, + "loss": 4.5176, + "step": 207850 + }, + { + "epoch": 0.38796451589576836, + "grad_norm": 0.978659451007843, + "learning_rate": 0.00019864850274877433, + "loss": 4.6919, + "step": 207900 + }, + { + "epoch": 0.38805782145514683, + "grad_norm": 1.1488454341888428, + "learning_rate": 0.0001986478143250068, + "loss": 4.6732, + "step": 207950 + }, + { + "epoch": 0.38815112701452537, + "grad_norm": 0.9637132287025452, + "learning_rate": 0.00019864712572714323, + "loss": 4.6186, + "step": 208000 + }, + { + "epoch": 0.38824443257390384, + "grad_norm": 1.1298731565475464, + "learning_rate": 0.00019864643695518487, + "loss": 4.3077, + "step": 208050 + }, + { + "epoch": 0.3883377381332823, + "grad_norm": 0.889434814453125, + "learning_rate": 0.00019864574800913286, + "loss": 4.4539, + "step": 208100 + }, + { + "epoch": 0.3884310436926608, + "grad_norm": 1.0395722389221191, + "learning_rate": 0.00019864505888898847, + "loss": 4.6617, + "step": 208150 + }, + { + "epoch": 0.3885243492520393, + "grad_norm": 0.8869847059249878, + "learning_rate": 0.00019864436959475293, + "loss": 4.5674, + "step": 208200 + }, + { + "epoch": 0.3886176548114178, + "grad_norm": 1.071192979812622, + "learning_rate": 0.0001986436801264274, + "loss": 4.7536, + "step": 208250 + }, + { + "epoch": 0.3887109603707963, + "grad_norm": 1.0335016250610352, + "learning_rate": 0.00019864299048401315, + "loss": 4.5433, + "step": 208300 + }, + { + "epoch": 0.3888042659301748, + "grad_norm": 1.0146387815475464, + "learning_rate": 0.00019864230066751138, + "loss": 4.6243, + "step": 208350 + }, + { + "epoch": 0.3888975714895533, + "grad_norm": 1.2553081512451172, + "learning_rate": 0.00019864161067692333, + "loss": 4.3292, + "step": 208400 + }, + { + "epoch": 0.38899087704893176, + "grad_norm": 0.8601014018058777, + "learning_rate": 0.00019864092051225017, + "loss": 4.6767, + "step": 208450 + }, + { + "epoch": 0.38908418260831024, + "grad_norm": 0.8037707209587097, + "learning_rate": 0.00019864023017349315, + "loss": 4.5461, + "step": 208500 + }, + { + "epoch": 0.38917748816768877, + "grad_norm": 0.9061649441719055, + "learning_rate": 0.00019863953966065348, + "loss": 4.8048, + "step": 208550 + }, + { + "epoch": 0.38927079372706724, + "grad_norm": 1.0509523153305054, + "learning_rate": 0.00019863884897373236, + "loss": 4.458, + "step": 208600 + }, + { + "epoch": 0.3893640992864457, + "grad_norm": 1.026106834411621, + "learning_rate": 0.00019863815811273106, + "loss": 4.7856, + "step": 208650 + }, + { + "epoch": 0.38945740484582425, + "grad_norm": 0.9003967642784119, + "learning_rate": 0.00019863746707765074, + "loss": 4.7185, + "step": 208700 + }, + { + "epoch": 0.3895507104052027, + "grad_norm": 1.1053556203842163, + "learning_rate": 0.00019863677586849266, + "loss": 4.5309, + "step": 208750 + }, + { + "epoch": 0.3896440159645812, + "grad_norm": 0.8523988127708435, + "learning_rate": 0.00019863608448525798, + "loss": 4.7219, + "step": 208800 + }, + { + "epoch": 0.3897373215239597, + "grad_norm": 0.9140484929084778, + "learning_rate": 0.00019863539292794803, + "loss": 4.5942, + "step": 208850 + }, + { + "epoch": 0.3898306270833382, + "grad_norm": 0.9049879312515259, + "learning_rate": 0.00019863470119656394, + "loss": 4.7273, + "step": 208900 + }, + { + "epoch": 0.3899239326427167, + "grad_norm": 0.8477234840393066, + "learning_rate": 0.00019863400929110697, + "loss": 4.4955, + "step": 208950 + }, + { + "epoch": 0.39001723820209516, + "grad_norm": 1.2483469247817993, + "learning_rate": 0.00019863331721157833, + "loss": 4.8071, + "step": 209000 + }, + { + "epoch": 0.3901105437614737, + "grad_norm": 1.118116021156311, + "learning_rate": 0.0001986326249579792, + "loss": 4.6028, + "step": 209050 + }, + { + "epoch": 0.39020384932085217, + "grad_norm": 1.069517731666565, + "learning_rate": 0.0001986319325303109, + "loss": 4.761, + "step": 209100 + }, + { + "epoch": 0.39029715488023065, + "grad_norm": 1.1618387699127197, + "learning_rate": 0.00019863123992857454, + "loss": 4.5113, + "step": 209150 + }, + { + "epoch": 0.3903904604396091, + "grad_norm": 0.905892014503479, + "learning_rate": 0.00019863054715277143, + "loss": 4.6528, + "step": 209200 + }, + { + "epoch": 0.39048376599898765, + "grad_norm": 1.0331082344055176, + "learning_rate": 0.00019862985420290277, + "loss": 4.8151, + "step": 209250 + }, + { + "epoch": 0.39057707155836613, + "grad_norm": 0.9063888788223267, + "learning_rate": 0.00019862916107896971, + "loss": 4.2769, + "step": 209300 + }, + { + "epoch": 0.3906703771177446, + "grad_norm": 0.8382515907287598, + "learning_rate": 0.00019862846778097358, + "loss": 4.3706, + "step": 209350 + }, + { + "epoch": 0.39076368267712314, + "grad_norm": 0.9924123883247375, + "learning_rate": 0.00019862777430891556, + "loss": 4.7743, + "step": 209400 + }, + { + "epoch": 0.3908569882365016, + "grad_norm": 1.145404577255249, + "learning_rate": 0.00019862708066279688, + "loss": 4.5281, + "step": 209450 + }, + { + "epoch": 0.3909502937958801, + "grad_norm": 1.0358670949935913, + "learning_rate": 0.00019862638684261876, + "loss": 4.6629, + "step": 209500 + }, + { + "epoch": 0.39104359935525856, + "grad_norm": 1.2141669988632202, + "learning_rate": 0.0001986256928483824, + "loss": 4.5794, + "step": 209550 + }, + { + "epoch": 0.3911369049146371, + "grad_norm": 0.9178851246833801, + "learning_rate": 0.00019862499868008904, + "loss": 4.6268, + "step": 209600 + }, + { + "epoch": 0.39123021047401557, + "grad_norm": 1.0358548164367676, + "learning_rate": 0.00019862430433773993, + "loss": 4.7368, + "step": 209650 + }, + { + "epoch": 0.39132351603339405, + "grad_norm": 0.8665915131568909, + "learning_rate": 0.00019862360982133626, + "loss": 4.3792, + "step": 209700 + }, + { + "epoch": 0.3914168215927725, + "grad_norm": 1.0212570428848267, + "learning_rate": 0.0001986229151308793, + "loss": 4.8161, + "step": 209750 + }, + { + "epoch": 0.39151012715215106, + "grad_norm": 1.306823968887329, + "learning_rate": 0.00019862222026637022, + "loss": 4.4615, + "step": 209800 + }, + { + "epoch": 0.39160343271152953, + "grad_norm": 1.1909260749816895, + "learning_rate": 0.00019862152522781028, + "loss": 4.6705, + "step": 209850 + }, + { + "epoch": 0.391696738270908, + "grad_norm": 0.8337374925613403, + "learning_rate": 0.00019862083001520071, + "loss": 4.5671, + "step": 209900 + }, + { + "epoch": 0.39179004383028654, + "grad_norm": 1.068738341331482, + "learning_rate": 0.0001986201346285427, + "loss": 4.5786, + "step": 209950 + }, + { + "epoch": 0.391883349389665, + "grad_norm": 0.9537367820739746, + "learning_rate": 0.00019861943906783757, + "loss": 4.6939, + "step": 210000 + }, + { + "epoch": 0.391883349389665, + "eval_loss": 4.793107509613037, + "eval_runtime": 232.1494, + "eval_samples_per_second": 11.234, + "eval_steps_per_second": 11.234, + "eval_tts_loss": 7.506199054619971, + "step": 210000 + }, + { + "epoch": 0.3919766549490435, + "grad_norm": 1.085239052772522, + "learning_rate": 0.00019861874333308642, + "loss": 4.599, + "step": 210050 + }, + { + "epoch": 0.39206996050842197, + "grad_norm": 0.9580297470092773, + "learning_rate": 0.00019861804742429056, + "loss": 4.6775, + "step": 210100 + }, + { + "epoch": 0.3921632660678005, + "grad_norm": 1.1435822248458862, + "learning_rate": 0.00019861735134145118, + "loss": 4.6902, + "step": 210150 + }, + { + "epoch": 0.392256571627179, + "grad_norm": 1.1688177585601807, + "learning_rate": 0.00019861665508456958, + "loss": 4.63, + "step": 210200 + }, + { + "epoch": 0.39234987718655745, + "grad_norm": 0.7775763273239136, + "learning_rate": 0.0001986159586536469, + "loss": 4.726, + "step": 210250 + }, + { + "epoch": 0.392443182745936, + "grad_norm": 1.0630296468734741, + "learning_rate": 0.0001986152620486844, + "loss": 4.6786, + "step": 210300 + }, + { + "epoch": 0.39253648830531446, + "grad_norm": 1.1185650825500488, + "learning_rate": 0.0001986145652696833, + "loss": 4.7136, + "step": 210350 + }, + { + "epoch": 0.39262979386469293, + "grad_norm": 1.1582688093185425, + "learning_rate": 0.00019861386831664487, + "loss": 4.5912, + "step": 210400 + }, + { + "epoch": 0.3927230994240714, + "grad_norm": 1.0869005918502808, + "learning_rate": 0.00019861317118957032, + "loss": 4.5373, + "step": 210450 + }, + { + "epoch": 0.39281640498344994, + "grad_norm": 1.0537189245224, + "learning_rate": 0.00019861247388846086, + "loss": 4.7754, + "step": 210500 + }, + { + "epoch": 0.3929097105428284, + "grad_norm": 1.4728331565856934, + "learning_rate": 0.00019861177641331774, + "loss": 4.7525, + "step": 210550 + }, + { + "epoch": 0.3930030161022069, + "grad_norm": 1.0283353328704834, + "learning_rate": 0.0001986110787641422, + "loss": 4.4389, + "step": 210600 + }, + { + "epoch": 0.3930963216615854, + "grad_norm": 1.0805425643920898, + "learning_rate": 0.00019861038094093542, + "loss": 4.6202, + "step": 210650 + }, + { + "epoch": 0.3931896272209639, + "grad_norm": 0.8940476179122925, + "learning_rate": 0.0001986096829436987, + "loss": 4.6031, + "step": 210700 + }, + { + "epoch": 0.3932829327803424, + "grad_norm": 0.9329911470413208, + "learning_rate": 0.00019860898477243324, + "loss": 4.4502, + "step": 210750 + }, + { + "epoch": 0.39337623833972085, + "grad_norm": 0.9388894438743591, + "learning_rate": 0.00019860828642714027, + "loss": 4.675, + "step": 210800 + }, + { + "epoch": 0.3934695438990994, + "grad_norm": 0.6746310591697693, + "learning_rate": 0.000198607587907821, + "loss": 4.8536, + "step": 210850 + }, + { + "epoch": 0.39356284945847786, + "grad_norm": 0.9739928245544434, + "learning_rate": 0.00019860688921447673, + "loss": 4.6031, + "step": 210900 + }, + { + "epoch": 0.39365615501785634, + "grad_norm": 1.030884027481079, + "learning_rate": 0.00019860619034710864, + "loss": 4.6616, + "step": 210950 + }, + { + "epoch": 0.39374946057723487, + "grad_norm": 1.0043467283248901, + "learning_rate": 0.00019860549130571795, + "loss": 4.631, + "step": 211000 + }, + { + "epoch": 0.39384276613661334, + "grad_norm": 1.2081234455108643, + "learning_rate": 0.00019860479209030593, + "loss": 4.5703, + "step": 211050 + }, + { + "epoch": 0.3939360716959918, + "grad_norm": 0.9312835335731506, + "learning_rate": 0.0001986040927008738, + "loss": 4.6645, + "step": 211100 + }, + { + "epoch": 0.3940293772553703, + "grad_norm": 0.9531163573265076, + "learning_rate": 0.0001986033931374228, + "loss": 4.6137, + "step": 211150 + }, + { + "epoch": 0.3941226828147488, + "grad_norm": 1.0482721328735352, + "learning_rate": 0.00019860269339995416, + "loss": 4.3711, + "step": 211200 + }, + { + "epoch": 0.3942159883741273, + "grad_norm": 0.7411187887191772, + "learning_rate": 0.0001986019934884691, + "loss": 4.4535, + "step": 211250 + }, + { + "epoch": 0.3943092939335058, + "grad_norm": 1.1720820665359497, + "learning_rate": 0.0001986012934029689, + "loss": 4.6375, + "step": 211300 + }, + { + "epoch": 0.3944025994928843, + "grad_norm": 0.9703693389892578, + "learning_rate": 0.00019860059314345474, + "loss": 4.4733, + "step": 211350 + }, + { + "epoch": 0.3944959050522628, + "grad_norm": 1.1301404237747192, + "learning_rate": 0.0001985998927099279, + "loss": 4.6341, + "step": 211400 + }, + { + "epoch": 0.39458921061164126, + "grad_norm": 0.7381196618080139, + "learning_rate": 0.0001985991921023896, + "loss": 4.675, + "step": 211450 + }, + { + "epoch": 0.39468251617101974, + "grad_norm": 1.1832466125488281, + "learning_rate": 0.00019859849132084108, + "loss": 4.6554, + "step": 211500 + }, + { + "epoch": 0.39477582173039827, + "grad_norm": 1.0502104759216309, + "learning_rate": 0.00019859779036528354, + "loss": 4.663, + "step": 211550 + }, + { + "epoch": 0.39486912728977674, + "grad_norm": 1.127864122390747, + "learning_rate": 0.0001985970892357183, + "loss": 4.6673, + "step": 211600 + }, + { + "epoch": 0.3949624328491552, + "grad_norm": 1.035516619682312, + "learning_rate": 0.0001985963879321465, + "loss": 4.623, + "step": 211650 + }, + { + "epoch": 0.39505573840853375, + "grad_norm": 0.8197730779647827, + "learning_rate": 0.00019859568645456946, + "loss": 4.3858, + "step": 211700 + }, + { + "epoch": 0.39514904396791223, + "grad_norm": 0.8772891163825989, + "learning_rate": 0.00019859498480298836, + "loss": 4.6063, + "step": 211750 + }, + { + "epoch": 0.3952423495272907, + "grad_norm": 0.8859339952468872, + "learning_rate": 0.00019859428297740444, + "loss": 4.5472, + "step": 211800 + }, + { + "epoch": 0.3953356550866692, + "grad_norm": 1.1464184522628784, + "learning_rate": 0.000198593580977819, + "loss": 4.6492, + "step": 211850 + }, + { + "epoch": 0.3954289606460477, + "grad_norm": 1.1008684635162354, + "learning_rate": 0.00019859287880423317, + "loss": 4.5615, + "step": 211900 + }, + { + "epoch": 0.3955222662054262, + "grad_norm": 1.2038742303848267, + "learning_rate": 0.00019859217645664835, + "loss": 4.6712, + "step": 211950 + }, + { + "epoch": 0.39561557176480466, + "grad_norm": 1.1197429895401, + "learning_rate": 0.00019859147393506564, + "loss": 4.4475, + "step": 212000 + }, + { + "epoch": 0.3957088773241832, + "grad_norm": 0.9153450727462769, + "learning_rate": 0.00019859077123948632, + "loss": 4.4927, + "step": 212050 + }, + { + "epoch": 0.39580218288356167, + "grad_norm": 0.9062692523002625, + "learning_rate": 0.00019859006836991166, + "loss": 4.513, + "step": 212100 + }, + { + "epoch": 0.39589548844294015, + "grad_norm": 1.01179838180542, + "learning_rate": 0.00019858936532634282, + "loss": 4.4879, + "step": 212150 + }, + { + "epoch": 0.3959887940023186, + "grad_norm": 1.3619974851608276, + "learning_rate": 0.00019858866210878116, + "loss": 4.5294, + "step": 212200 + }, + { + "epoch": 0.39608209956169715, + "grad_norm": 1.009998083114624, + "learning_rate": 0.00019858795871722783, + "loss": 4.6197, + "step": 212250 + }, + { + "epoch": 0.39617540512107563, + "grad_norm": 1.0456039905548096, + "learning_rate": 0.00019858725515168414, + "loss": 4.3916, + "step": 212300 + }, + { + "epoch": 0.3962687106804541, + "grad_norm": 1.0402274131774902, + "learning_rate": 0.00019858655141215124, + "loss": 4.6354, + "step": 212350 + }, + { + "epoch": 0.39636201623983264, + "grad_norm": 1.4560786485671997, + "learning_rate": 0.00019858584749863042, + "loss": 4.6766, + "step": 212400 + }, + { + "epoch": 0.3964553217992111, + "grad_norm": 0.6697715520858765, + "learning_rate": 0.00019858514341112297, + "loss": 4.5072, + "step": 212450 + }, + { + "epoch": 0.3965486273585896, + "grad_norm": 0.9932947158813477, + "learning_rate": 0.00019858443914963004, + "loss": 4.7205, + "step": 212500 + }, + { + "epoch": 0.39664193291796807, + "grad_norm": 1.2164796590805054, + "learning_rate": 0.00019858373471415297, + "loss": 4.5836, + "step": 212550 + }, + { + "epoch": 0.3967352384773466, + "grad_norm": 1.0917052030563354, + "learning_rate": 0.00019858303010469291, + "loss": 4.5275, + "step": 212600 + }, + { + "epoch": 0.3968285440367251, + "grad_norm": 0.9048320055007935, + "learning_rate": 0.00019858232532125118, + "loss": 4.709, + "step": 212650 + }, + { + "epoch": 0.39692184959610355, + "grad_norm": 0.568315327167511, + "learning_rate": 0.000198581620363829, + "loss": 4.5221, + "step": 212700 + }, + { + "epoch": 0.397015155155482, + "grad_norm": 1.132904052734375, + "learning_rate": 0.00019858091523242756, + "loss": 4.6825, + "step": 212750 + }, + { + "epoch": 0.39710846071486056, + "grad_norm": 0.9325287342071533, + "learning_rate": 0.0001985802099270482, + "loss": 4.6598, + "step": 212800 + }, + { + "epoch": 0.39720176627423903, + "grad_norm": 1.0554152727127075, + "learning_rate": 0.00019857950444769208, + "loss": 4.7556, + "step": 212850 + }, + { + "epoch": 0.3972950718336175, + "grad_norm": 0.9634024500846863, + "learning_rate": 0.0001985787987943605, + "loss": 4.4973, + "step": 212900 + }, + { + "epoch": 0.39738837739299604, + "grad_norm": 1.161877989768982, + "learning_rate": 0.0001985780929670547, + "loss": 4.6044, + "step": 212950 + }, + { + "epoch": 0.3974816829523745, + "grad_norm": 0.954832136631012, + "learning_rate": 0.00019857738696577588, + "loss": 4.4247, + "step": 213000 + }, + { + "epoch": 0.3974816829523745, + "eval_loss": 4.804925918579102, + "eval_runtime": 229.7393, + "eval_samples_per_second": 11.352, + "eval_steps_per_second": 11.352, + "eval_tts_loss": 7.437013648446089, + "step": 213000 + }, + { + "epoch": 0.397574988511753, + "grad_norm": 0.9206513166427612, + "learning_rate": 0.00019857668079052533, + "loss": 4.6913, + "step": 213050 + }, + { + "epoch": 0.39766829407113147, + "grad_norm": 1.1719000339508057, + "learning_rate": 0.00019857597444130427, + "loss": 4.6099, + "step": 213100 + }, + { + "epoch": 0.39776159963051, + "grad_norm": 0.8833860158920288, + "learning_rate": 0.00019857526791811398, + "loss": 4.4701, + "step": 213150 + }, + { + "epoch": 0.3978549051898885, + "grad_norm": 0.8113496899604797, + "learning_rate": 0.00019857456122095567, + "loss": 4.5576, + "step": 213200 + }, + { + "epoch": 0.39794821074926695, + "grad_norm": 0.7514514327049255, + "learning_rate": 0.0001985738543498306, + "loss": 4.6348, + "step": 213250 + }, + { + "epoch": 0.3980415163086455, + "grad_norm": 0.8051292896270752, + "learning_rate": 0.00019857314730474006, + "loss": 4.6339, + "step": 213300 + }, + { + "epoch": 0.39813482186802396, + "grad_norm": 0.8697991967201233, + "learning_rate": 0.00019857244008568523, + "loss": 4.5064, + "step": 213350 + }, + { + "epoch": 0.39822812742740243, + "grad_norm": 1.3794461488723755, + "learning_rate": 0.0001985717326926674, + "loss": 4.6744, + "step": 213400 + }, + { + "epoch": 0.3983214329867809, + "grad_norm": 1.0612692832946777, + "learning_rate": 0.0001985710251256878, + "loss": 4.8218, + "step": 213450 + }, + { + "epoch": 0.39841473854615944, + "grad_norm": 0.7990536689758301, + "learning_rate": 0.00019857031738474768, + "loss": 4.8063, + "step": 213500 + }, + { + "epoch": 0.3985080441055379, + "grad_norm": 0.9997884035110474, + "learning_rate": 0.0001985696094698483, + "loss": 4.6703, + "step": 213550 + }, + { + "epoch": 0.3986013496649164, + "grad_norm": 1.0094455480575562, + "learning_rate": 0.00019856890138099088, + "loss": 4.5359, + "step": 213600 + }, + { + "epoch": 0.3986946552242949, + "grad_norm": 0.9724659323692322, + "learning_rate": 0.00019856819311817674, + "loss": 4.7684, + "step": 213650 + }, + { + "epoch": 0.3987879607836734, + "grad_norm": 0.9077069759368896, + "learning_rate": 0.00019856748468140704, + "loss": 4.4383, + "step": 213700 + }, + { + "epoch": 0.3988812663430519, + "grad_norm": 0.8644628524780273, + "learning_rate": 0.00019856677607068307, + "loss": 4.6682, + "step": 213750 + }, + { + "epoch": 0.39897457190243035, + "grad_norm": 1.038778305053711, + "learning_rate": 0.00019856606728600612, + "loss": 4.6176, + "step": 213800 + }, + { + "epoch": 0.3990678774618089, + "grad_norm": 0.8550258278846741, + "learning_rate": 0.0001985653583273774, + "loss": 4.3842, + "step": 213850 + }, + { + "epoch": 0.39916118302118736, + "grad_norm": 0.929908275604248, + "learning_rate": 0.00019856464919479813, + "loss": 4.7997, + "step": 213900 + }, + { + "epoch": 0.39925448858056584, + "grad_norm": 0.948686957359314, + "learning_rate": 0.00019856393988826963, + "loss": 4.608, + "step": 213950 + }, + { + "epoch": 0.39934779413994437, + "grad_norm": 1.081762433052063, + "learning_rate": 0.00019856323040779312, + "loss": 4.5192, + "step": 214000 + }, + { + "epoch": 0.39944109969932284, + "grad_norm": 1.0186339616775513, + "learning_rate": 0.00019856252075336984, + "loss": 4.5424, + "step": 214050 + }, + { + "epoch": 0.3995344052587013, + "grad_norm": 1.243036150932312, + "learning_rate": 0.00019856181092500104, + "loss": 4.6291, + "step": 214100 + }, + { + "epoch": 0.3996277108180798, + "grad_norm": 0.9121257066726685, + "learning_rate": 0.000198561100922688, + "loss": 4.4488, + "step": 214150 + }, + { + "epoch": 0.3997210163774583, + "grad_norm": 1.058773159980774, + "learning_rate": 0.00019856039074643195, + "loss": 4.5231, + "step": 214200 + }, + { + "epoch": 0.3998143219368368, + "grad_norm": 1.0914162397384644, + "learning_rate": 0.00019855968039623418, + "loss": 4.5719, + "step": 214250 + }, + { + "epoch": 0.3999076274962153, + "grad_norm": 1.0850412845611572, + "learning_rate": 0.0001985589698720959, + "loss": 4.6784, + "step": 214300 + }, + { + "epoch": 0.4000009330555938, + "grad_norm": 0.8191171884536743, + "learning_rate": 0.00019855825917401837, + "loss": 4.6503, + "step": 214350 + }, + { + "epoch": 0.4000942386149723, + "grad_norm": 1.133804440498352, + "learning_rate": 0.00019855754830200284, + "loss": 4.6447, + "step": 214400 + }, + { + "epoch": 0.40018754417435076, + "grad_norm": 0.927496612071991, + "learning_rate": 0.0001985568372560506, + "loss": 4.6549, + "step": 214450 + }, + { + "epoch": 0.40028084973372924, + "grad_norm": 0.9108291864395142, + "learning_rate": 0.00019855612603616288, + "loss": 4.6962, + "step": 214500 + }, + { + "epoch": 0.40037415529310777, + "grad_norm": 1.0643376111984253, + "learning_rate": 0.00019855541464234094, + "loss": 4.6508, + "step": 214550 + }, + { + "epoch": 0.40046746085248625, + "grad_norm": 1.0396971702575684, + "learning_rate": 0.00019855470307458606, + "loss": 4.6046, + "step": 214600 + }, + { + "epoch": 0.4005607664118647, + "grad_norm": 0.9104790091514587, + "learning_rate": 0.00019855399133289944, + "loss": 4.3958, + "step": 214650 + }, + { + "epoch": 0.40065407197124325, + "grad_norm": 0.8371784687042236, + "learning_rate": 0.00019855327941728238, + "loss": 4.6615, + "step": 214700 + }, + { + "epoch": 0.40074737753062173, + "grad_norm": 0.8036884665489197, + "learning_rate": 0.0001985525673277361, + "loss": 4.4121, + "step": 214750 + }, + { + "epoch": 0.4008406830900002, + "grad_norm": 1.0669821500778198, + "learning_rate": 0.0001985518550642619, + "loss": 4.4635, + "step": 214800 + }, + { + "epoch": 0.4009339886493787, + "grad_norm": 1.0389690399169922, + "learning_rate": 0.00019855114262686102, + "loss": 4.6575, + "step": 214850 + }, + { + "epoch": 0.4010272942087572, + "grad_norm": 0.661183774471283, + "learning_rate": 0.0001985504300155347, + "loss": 4.6654, + "step": 214900 + }, + { + "epoch": 0.4011205997681357, + "grad_norm": 1.4455314874649048, + "learning_rate": 0.00019854971723028424, + "loss": 4.7863, + "step": 214950 + }, + { + "epoch": 0.40121390532751416, + "grad_norm": 0.9810767769813538, + "learning_rate": 0.00019854900427111085, + "loss": 4.4652, + "step": 215000 + }, + { + "epoch": 0.4013072108868927, + "grad_norm": 0.9444175362586975, + "learning_rate": 0.00019854829113801582, + "loss": 4.6636, + "step": 215050 + }, + { + "epoch": 0.40140051644627117, + "grad_norm": 0.8920708894729614, + "learning_rate": 0.00019854757783100037, + "loss": 4.5821, + "step": 215100 + }, + { + "epoch": 0.40149382200564965, + "grad_norm": 0.9815887808799744, + "learning_rate": 0.0001985468643500658, + "loss": 4.6339, + "step": 215150 + }, + { + "epoch": 0.4015871275650281, + "grad_norm": 0.9865546226501465, + "learning_rate": 0.00019854615069521333, + "loss": 4.5329, + "step": 215200 + }, + { + "epoch": 0.40168043312440666, + "grad_norm": 1.2863136529922485, + "learning_rate": 0.00019854543686644428, + "loss": 4.7172, + "step": 215250 + }, + { + "epoch": 0.40177373868378513, + "grad_norm": 1.207463264465332, + "learning_rate": 0.00019854472286375986, + "loss": 4.4284, + "step": 215300 + }, + { + "epoch": 0.4018670442431636, + "grad_norm": 0.9688467383384705, + "learning_rate": 0.00019854400868716133, + "loss": 4.6533, + "step": 215350 + }, + { + "epoch": 0.4019603498025421, + "grad_norm": 0.767192542552948, + "learning_rate": 0.00019854329433665, + "loss": 4.4779, + "step": 215400 + }, + { + "epoch": 0.4020536553619206, + "grad_norm": 1.0222392082214355, + "learning_rate": 0.00019854257981222705, + "loss": 4.6248, + "step": 215450 + }, + { + "epoch": 0.4021469609212991, + "grad_norm": 1.1537096500396729, + "learning_rate": 0.00019854186511389382, + "loss": 4.5435, + "step": 215500 + }, + { + "epoch": 0.40224026648067757, + "grad_norm": 0.7932273745536804, + "learning_rate": 0.00019854115024165153, + "loss": 4.7153, + "step": 215550 + }, + { + "epoch": 0.4023335720400561, + "grad_norm": 0.8604770302772522, + "learning_rate": 0.00019854043519550144, + "loss": 4.7003, + "step": 215600 + }, + { + "epoch": 0.4024268775994346, + "grad_norm": 1.160075306892395, + "learning_rate": 0.00019853971997544481, + "loss": 4.4709, + "step": 215650 + }, + { + "epoch": 0.40252018315881305, + "grad_norm": 0.9051239490509033, + "learning_rate": 0.00019853900458148294, + "loss": 4.7949, + "step": 215700 + }, + { + "epoch": 0.4026134887181915, + "grad_norm": 0.8977974653244019, + "learning_rate": 0.00019853828901361705, + "loss": 4.5747, + "step": 215750 + }, + { + "epoch": 0.40270679427757006, + "grad_norm": 1.0881613492965698, + "learning_rate": 0.0001985375732718484, + "loss": 4.6564, + "step": 215800 + }, + { + "epoch": 0.40280009983694853, + "grad_norm": 1.3626446723937988, + "learning_rate": 0.00019853685735617832, + "loss": 4.613, + "step": 215850 + }, + { + "epoch": 0.402893405396327, + "grad_norm": 1.0976074934005737, + "learning_rate": 0.00019853614126660797, + "loss": 4.6058, + "step": 215900 + }, + { + "epoch": 0.40298671095570554, + "grad_norm": 1.0088049173355103, + "learning_rate": 0.0001985354250031387, + "loss": 4.5451, + "step": 215950 + }, + { + "epoch": 0.403080016515084, + "grad_norm": 0.7824239730834961, + "learning_rate": 0.00019853470856577175, + "loss": 4.5725, + "step": 216000 + }, + { + "epoch": 0.403080016515084, + "eval_loss": 4.79715633392334, + "eval_runtime": 235.0521, + "eval_samples_per_second": 11.095, + "eval_steps_per_second": 11.095, + "eval_tts_loss": 7.512439045195176, + "step": 216000 + } + ], + "logging_steps": 50, + "max_steps": 3751118, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 3000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0520178833930322e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}