salt-bigcodec / trainer_state.json
ksych's picture
Upload folder using huggingface_hub
ac812f1 verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.999804411104847,
"eval_steps": 1000,
"global_step": 108645,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002301045825327611,
"grad_norm": 3.5780375003814697,
"learning_rate": 4.7e-06,
"loss": 13.6899,
"step": 50
},
{
"epoch": 0.004602091650655222,
"grad_norm": 12.634012222290039,
"learning_rate": 9.7e-06,
"loss": 10.863,
"step": 100
},
{
"epoch": 0.006903137475982834,
"grad_norm": 4.505175590515137,
"learning_rate": 1.47e-05,
"loss": 8.9306,
"step": 150
},
{
"epoch": 0.009204183301310445,
"grad_norm": 4.666654109954834,
"learning_rate": 1.9600000000000002e-05,
"loss": 8.6804,
"step": 200
},
{
"epoch": 0.011505229126638056,
"grad_norm": 12.136724472045898,
"learning_rate": 2.46e-05,
"loss": 8.6245,
"step": 250
},
{
"epoch": 0.013806274951965668,
"grad_norm": 5.4161858558654785,
"learning_rate": 2.96e-05,
"loss": 8.5856,
"step": 300
},
{
"epoch": 0.01610732077729328,
"grad_norm": 7.054779529571533,
"learning_rate": 3.46e-05,
"loss": 8.4581,
"step": 350
},
{
"epoch": 0.01840836660262089,
"grad_norm": 4.1660475730896,
"learning_rate": 3.960000000000001e-05,
"loss": 8.387,
"step": 400
},
{
"epoch": 0.020709412427948503,
"grad_norm": 7.97440767288208,
"learning_rate": 4.46e-05,
"loss": 8.2992,
"step": 450
},
{
"epoch": 0.023010458253276113,
"grad_norm": 4.549796104431152,
"learning_rate": 4.96e-05,
"loss": 8.2301,
"step": 500
},
{
"epoch": 0.025311504078603726,
"grad_norm": 4.553031921386719,
"learning_rate": 5.4600000000000006e-05,
"loss": 8.1272,
"step": 550
},
{
"epoch": 0.027612549903931336,
"grad_norm": 5.876681327819824,
"learning_rate": 5.96e-05,
"loss": 8.0101,
"step": 600
},
{
"epoch": 0.02991359572925895,
"grad_norm": 7.108986854553223,
"learning_rate": 6.460000000000001e-05,
"loss": 8.009,
"step": 650
},
{
"epoch": 0.03221464155458656,
"grad_norm": 7.466887474060059,
"learning_rate": 6.96e-05,
"loss": 7.8225,
"step": 700
},
{
"epoch": 0.03451568737991417,
"grad_norm": 5.25390625,
"learning_rate": 7.46e-05,
"loss": 7.8382,
"step": 750
},
{
"epoch": 0.03681673320524178,
"grad_norm": 6.923309803009033,
"learning_rate": 7.960000000000001e-05,
"loss": 7.6769,
"step": 800
},
{
"epoch": 0.03911777903056939,
"grad_norm": 7.7578816413879395,
"learning_rate": 8.46e-05,
"loss": 7.5925,
"step": 850
},
{
"epoch": 0.041418824855897006,
"grad_norm": 7.556878566741943,
"learning_rate": 8.960000000000001e-05,
"loss": 7.5195,
"step": 900
},
{
"epoch": 0.04371987068122462,
"grad_norm": 5.801334381103516,
"learning_rate": 9.46e-05,
"loss": 7.5253,
"step": 950
},
{
"epoch": 0.046020916506552226,
"grad_norm": 7.2928948402404785,
"learning_rate": 9.960000000000001e-05,
"loss": 7.5399,
"step": 1000
},
{
"epoch": 0.046020916506552226,
"eval_loss": 7.314383029937744,
"eval_runtime": 32.585,
"eval_samples_per_second": 11.785,
"eval_steps_per_second": 5.892,
"eval_tts_loss": 7.115667695799289,
"step": 1000
},
{
"epoch": 0.04832196233187984,
"grad_norm": 5.685014247894287,
"learning_rate": 9.999995494244432e-05,
"loss": 7.4569,
"step": 1050
},
{
"epoch": 0.05062300815720745,
"grad_norm": 5.959719657897949,
"learning_rate": 9.999980375698306e-05,
"loss": 7.4072,
"step": 1100
},
{
"epoch": 0.05292405398253506,
"grad_norm": 6.67860221862793,
"learning_rate": 9.999954610323825e-05,
"loss": 7.4064,
"step": 1150
},
{
"epoch": 0.05522509980786267,
"grad_norm": 4.586386203765869,
"learning_rate": 9.999918198175851e-05,
"loss": 7.3695,
"step": 1200
},
{
"epoch": 0.057526145633190286,
"grad_norm": 3.8274621963500977,
"learning_rate": 9.999871139331921e-05,
"loss": 7.2462,
"step": 1250
},
{
"epoch": 0.0598271914585179,
"grad_norm": 5.492649078369141,
"learning_rate": 9.99981343389224e-05,
"loss": 7.2843,
"step": 1300
},
{
"epoch": 0.062128237283845505,
"grad_norm": 4.324922561645508,
"learning_rate": 9.999745081979683e-05,
"loss": 7.3252,
"step": 1350
},
{
"epoch": 0.06442928310917312,
"grad_norm": 3.14571213722229,
"learning_rate": 9.999666083739801e-05,
"loss": 7.2732,
"step": 1400
},
{
"epoch": 0.06673032893450073,
"grad_norm": 4.191070556640625,
"learning_rate": 9.999576439340806e-05,
"loss": 7.1295,
"step": 1450
},
{
"epoch": 0.06903137475982835,
"grad_norm": 3.528020143508911,
"learning_rate": 9.999476148973588e-05,
"loss": 7.1829,
"step": 1500
},
{
"epoch": 0.07133242058515596,
"grad_norm": 4.1083664894104,
"learning_rate": 9.999365212851702e-05,
"loss": 7.2347,
"step": 1550
},
{
"epoch": 0.07363346641048356,
"grad_norm": 3.2220022678375244,
"learning_rate": 9.99924363121137e-05,
"loss": 7.1745,
"step": 1600
},
{
"epoch": 0.07593451223581117,
"grad_norm": 4.157455921173096,
"learning_rate": 9.999111404311488e-05,
"loss": 7.128,
"step": 1650
},
{
"epoch": 0.07823555806113879,
"grad_norm": 2.825235605239868,
"learning_rate": 9.998968532433615e-05,
"loss": 7.1691,
"step": 1700
},
{
"epoch": 0.0805366038864664,
"grad_norm": 3.339789867401123,
"learning_rate": 9.998815015881976e-05,
"loss": 7.0882,
"step": 1750
},
{
"epoch": 0.08283764971179401,
"grad_norm": 2.8071019649505615,
"learning_rate": 9.998650854983472e-05,
"loss": 7.0621,
"step": 1800
},
{
"epoch": 0.08513869553712163,
"grad_norm": 3.1003334522247314,
"learning_rate": 9.998476050087657e-05,
"loss": 7.0141,
"step": 1850
},
{
"epoch": 0.08743974136244924,
"grad_norm": 2.8613014221191406,
"learning_rate": 9.998290601566759e-05,
"loss": 7.0619,
"step": 1900
},
{
"epoch": 0.08974078718777684,
"grad_norm": 2.567081928253174,
"learning_rate": 9.998094509815665e-05,
"loss": 7.0753,
"step": 1950
},
{
"epoch": 0.09204183301310445,
"grad_norm": 2.5587222576141357,
"learning_rate": 9.99788777525193e-05,
"loss": 7.0433,
"step": 2000
},
{
"epoch": 0.09204183301310445,
"eval_loss": 6.901029109954834,
"eval_runtime": 33.3966,
"eval_samples_per_second": 11.498,
"eval_steps_per_second": 5.749,
"eval_tts_loss": 7.299251959316763,
"step": 2000
},
{
"epoch": 0.09434287883843206,
"grad_norm": 2.903456687927246,
"learning_rate": 9.997670398315768e-05,
"loss": 7.0923,
"step": 2050
},
{
"epoch": 0.09664392466375968,
"grad_norm": 2.615713119506836,
"learning_rate": 9.997442379470056e-05,
"loss": 7.0888,
"step": 2100
},
{
"epoch": 0.09894497048908729,
"grad_norm": 2.7937371730804443,
"learning_rate": 9.997203719200331e-05,
"loss": 6.9845,
"step": 2150
},
{
"epoch": 0.1012460163144149,
"grad_norm": 2.678640365600586,
"learning_rate": 9.99695441801479e-05,
"loss": 7.0278,
"step": 2200
},
{
"epoch": 0.10354706213974252,
"grad_norm": 2.9163057804107666,
"learning_rate": 9.996694476444288e-05,
"loss": 6.9738,
"step": 2250
},
{
"epoch": 0.10584810796507012,
"grad_norm": 2.41644287109375,
"learning_rate": 9.99642389504234e-05,
"loss": 6.9378,
"step": 2300
},
{
"epoch": 0.10814915379039773,
"grad_norm": 2.7268104553222656,
"learning_rate": 9.996142674385114e-05,
"loss": 7.0378,
"step": 2350
},
{
"epoch": 0.11045019961572534,
"grad_norm": 2.242163896560669,
"learning_rate": 9.995850815071433e-05,
"loss": 6.8807,
"step": 2400
},
{
"epoch": 0.11275124544105296,
"grad_norm": 2.39416241645813,
"learning_rate": 9.995548317722775e-05,
"loss": 6.9694,
"step": 2450
},
{
"epoch": 0.11505229126638057,
"grad_norm": 2.4873769283294678,
"learning_rate": 9.99523518298327e-05,
"loss": 6.7956,
"step": 2500
},
{
"epoch": 0.11735333709170818,
"grad_norm": 2.4013776779174805,
"learning_rate": 9.994911411519699e-05,
"loss": 6.8655,
"step": 2550
},
{
"epoch": 0.1196543829170358,
"grad_norm": 3.897566080093384,
"learning_rate": 9.994577004021493e-05,
"loss": 6.822,
"step": 2600
},
{
"epoch": 0.1219554287423634,
"grad_norm": 2.2062366008758545,
"learning_rate": 9.994231961200731e-05,
"loss": 6.7814,
"step": 2650
},
{
"epoch": 0.12425647456769101,
"grad_norm": 2.6973462104797363,
"learning_rate": 9.993876283792138e-05,
"loss": 6.9139,
"step": 2700
},
{
"epoch": 0.12655752039301862,
"grad_norm": 2.6067421436309814,
"learning_rate": 9.993509972553084e-05,
"loss": 6.8624,
"step": 2750
},
{
"epoch": 0.12885856621834624,
"grad_norm": 2.6363160610198975,
"learning_rate": 9.993133028263581e-05,
"loss": 6.8297,
"step": 2800
},
{
"epoch": 0.13115961204367385,
"grad_norm": 2.585055112838745,
"learning_rate": 9.992745451726286e-05,
"loss": 6.9063,
"step": 2850
},
{
"epoch": 0.13346065786900146,
"grad_norm": 2.4903454780578613,
"learning_rate": 9.992347243766494e-05,
"loss": 6.8681,
"step": 2900
},
{
"epoch": 0.13576170369432908,
"grad_norm": 2.526623249053955,
"learning_rate": 9.99193840523214e-05,
"loss": 6.8679,
"step": 2950
},
{
"epoch": 0.1380627495196567,
"grad_norm": 2.4259722232818604,
"learning_rate": 9.991518936993794e-05,
"loss": 6.7638,
"step": 3000
},
{
"epoch": 0.1380627495196567,
"eval_loss": 6.7133917808532715,
"eval_runtime": 33.4323,
"eval_samples_per_second": 11.486,
"eval_steps_per_second": 5.743,
"eval_tts_loss": 7.4207137375880805,
"step": 3000
},
{
"epoch": 0.1403637953449843,
"grad_norm": 2.2677650451660156,
"learning_rate": 9.991088839944658e-05,
"loss": 6.8054,
"step": 3050
},
{
"epoch": 0.14266484117031192,
"grad_norm": 1.8182429075241089,
"learning_rate": 9.990648115000572e-05,
"loss": 6.7162,
"step": 3100
},
{
"epoch": 0.14496588699563953,
"grad_norm": 2.308943748474121,
"learning_rate": 9.990196763100004e-05,
"loss": 6.8139,
"step": 3150
},
{
"epoch": 0.14726693282096712,
"grad_norm": 2.179896593093872,
"learning_rate": 9.98973478520405e-05,
"loss": 6.6905,
"step": 3200
},
{
"epoch": 0.14956797864629473,
"grad_norm": 2.6917459964752197,
"learning_rate": 9.989262182296435e-05,
"loss": 6.7617,
"step": 3250
},
{
"epoch": 0.15186902447162234,
"grad_norm": 2.362337589263916,
"learning_rate": 9.988778955383506e-05,
"loss": 6.8229,
"step": 3300
},
{
"epoch": 0.15417007029694996,
"grad_norm": 2.33597731590271,
"learning_rate": 9.988285105494236e-05,
"loss": 6.7483,
"step": 3350
},
{
"epoch": 0.15647111612227757,
"grad_norm": 2.041456460952759,
"learning_rate": 9.987780633680213e-05,
"loss": 6.6595,
"step": 3400
},
{
"epoch": 0.15877216194760518,
"grad_norm": 2.1077539920806885,
"learning_rate": 9.98726554101565e-05,
"loss": 6.7904,
"step": 3450
},
{
"epoch": 0.1610732077729328,
"grad_norm": 2.350275993347168,
"learning_rate": 9.986739828597367e-05,
"loss": 6.7545,
"step": 3500
},
{
"epoch": 0.1633742535982604,
"grad_norm": 2.2597575187683105,
"learning_rate": 9.986203497544808e-05,
"loss": 6.7821,
"step": 3550
},
{
"epoch": 0.16567529942358802,
"grad_norm": 2.205111026763916,
"learning_rate": 9.985656549000018e-05,
"loss": 6.7361,
"step": 3600
},
{
"epoch": 0.16797634524891564,
"grad_norm": 2.0112202167510986,
"learning_rate": 9.985098984127657e-05,
"loss": 6.6824,
"step": 3650
},
{
"epoch": 0.17027739107424325,
"grad_norm": 2.225270986557007,
"learning_rate": 9.984530804114987e-05,
"loss": 6.7456,
"step": 3700
},
{
"epoch": 0.17257843689957086,
"grad_norm": 2.2781286239624023,
"learning_rate": 9.98395201017188e-05,
"loss": 6.7912,
"step": 3750
},
{
"epoch": 0.17487948272489848,
"grad_norm": 2.5008931159973145,
"learning_rate": 9.983362603530802e-05,
"loss": 6.7475,
"step": 3800
},
{
"epoch": 0.1771805285502261,
"grad_norm": 2.242917776107788,
"learning_rate": 9.98276258544682e-05,
"loss": 6.6998,
"step": 3850
},
{
"epoch": 0.17948157437555368,
"grad_norm": 1.6901216506958008,
"learning_rate": 9.982151957197598e-05,
"loss": 6.6986,
"step": 3900
},
{
"epoch": 0.1817826202008813,
"grad_norm": 2.2120909690856934,
"learning_rate": 9.981530720083391e-05,
"loss": 6.6784,
"step": 3950
},
{
"epoch": 0.1840836660262089,
"grad_norm": 2.279149055480957,
"learning_rate": 9.980898875427045e-05,
"loss": 6.6557,
"step": 4000
},
{
"epoch": 0.1840836660262089,
"eval_loss": 6.581401824951172,
"eval_runtime": 32.9434,
"eval_samples_per_second": 11.656,
"eval_steps_per_second": 5.828,
"eval_tts_loss": 7.501853841915528,
"step": 4000
},
{
"epoch": 0.18638471185153652,
"grad_norm": 1.847670078277588,
"learning_rate": 9.980256424573995e-05,
"loss": 6.6079,
"step": 4050
},
{
"epoch": 0.18868575767686413,
"grad_norm": 1.8807518482208252,
"learning_rate": 9.979603368892259e-05,
"loss": 6.705,
"step": 4100
},
{
"epoch": 0.19098680350219174,
"grad_norm": 2.1693525314331055,
"learning_rate": 9.978939709772435e-05,
"loss": 6.6899,
"step": 4150
},
{
"epoch": 0.19328784932751936,
"grad_norm": 1.934512734413147,
"learning_rate": 9.978265448627702e-05,
"loss": 6.6307,
"step": 4200
},
{
"epoch": 0.19558889515284697,
"grad_norm": 2.0667643547058105,
"learning_rate": 9.977580586893816e-05,
"loss": 6.6746,
"step": 4250
},
{
"epoch": 0.19788994097817458,
"grad_norm": 2.2924129962921143,
"learning_rate": 9.976885126029103e-05,
"loss": 6.6497,
"step": 4300
},
{
"epoch": 0.2001909868035022,
"grad_norm": 2.1842291355133057,
"learning_rate": 9.976179067514459e-05,
"loss": 6.5531,
"step": 4350
},
{
"epoch": 0.2024920326288298,
"grad_norm": 2.3213446140289307,
"learning_rate": 9.975462412853345e-05,
"loss": 6.6737,
"step": 4400
},
{
"epoch": 0.20479307845415742,
"grad_norm": 1.937312126159668,
"learning_rate": 9.974735163571789e-05,
"loss": 6.6511,
"step": 4450
},
{
"epoch": 0.20709412427948504,
"grad_norm": 1.7467703819274902,
"learning_rate": 9.973997321218376e-05,
"loss": 6.5941,
"step": 4500
},
{
"epoch": 0.20939517010481265,
"grad_norm": 1.9643598794937134,
"learning_rate": 9.973248887364251e-05,
"loss": 6.561,
"step": 4550
},
{
"epoch": 0.21169621593014024,
"grad_norm": 2.028480052947998,
"learning_rate": 9.972489863603102e-05,
"loss": 6.5878,
"step": 4600
},
{
"epoch": 0.21399726175546785,
"grad_norm": 1.9181138277053833,
"learning_rate": 9.971720251551184e-05,
"loss": 6.63,
"step": 4650
},
{
"epoch": 0.21629830758079546,
"grad_norm": 1.8953630924224854,
"learning_rate": 9.970940052847283e-05,
"loss": 6.6879,
"step": 4700
},
{
"epoch": 0.21859935340612308,
"grad_norm": 2.008338451385498,
"learning_rate": 9.970149269152736e-05,
"loss": 6.5496,
"step": 4750
},
{
"epoch": 0.2209003992314507,
"grad_norm": 2.563183546066284,
"learning_rate": 9.969347902151416e-05,
"loss": 6.5905,
"step": 4800
},
{
"epoch": 0.2232014450567783,
"grad_norm": 2.335991382598877,
"learning_rate": 9.968535953549734e-05,
"loss": 6.6685,
"step": 4850
},
{
"epoch": 0.22550249088210592,
"grad_norm": 1.9713267087936401,
"learning_rate": 9.967713425076631e-05,
"loss": 6.6486,
"step": 4900
},
{
"epoch": 0.22780353670743353,
"grad_norm": 1.923415184020996,
"learning_rate": 9.966880318483577e-05,
"loss": 6.6034,
"step": 4950
},
{
"epoch": 0.23010458253276114,
"grad_norm": 2.1620161533355713,
"learning_rate": 9.966036635544573e-05,
"loss": 6.5386,
"step": 5000
},
{
"epoch": 0.23010458253276114,
"eval_loss": 6.485872745513916,
"eval_runtime": 34.773,
"eval_samples_per_second": 11.043,
"eval_steps_per_second": 5.522,
"eval_tts_loss": 7.581498966822206,
"step": 5000
},
{
"epoch": 0.23240562835808876,
"grad_norm": 2.058232307434082,
"learning_rate": 9.96518237805613e-05,
"loss": 6.5541,
"step": 5050
},
{
"epoch": 0.23470667418341637,
"grad_norm": 2.401785373687744,
"learning_rate": 9.964317547837283e-05,
"loss": 6.5717,
"step": 5100
},
{
"epoch": 0.23700772000874398,
"grad_norm": 1.8926520347595215,
"learning_rate": 9.963442146729583e-05,
"loss": 6.5713,
"step": 5150
},
{
"epoch": 0.2393087658340716,
"grad_norm": 2.0552031993865967,
"learning_rate": 9.962556176597079e-05,
"loss": 6.5887,
"step": 5200
},
{
"epoch": 0.2416098116593992,
"grad_norm": 2.0118703842163086,
"learning_rate": 9.961659639326338e-05,
"loss": 6.5794,
"step": 5250
},
{
"epoch": 0.2439108574847268,
"grad_norm": 1.9087331295013428,
"learning_rate": 9.960752536826422e-05,
"loss": 6.5892,
"step": 5300
},
{
"epoch": 0.2462119033100544,
"grad_norm": 1.9817070960998535,
"learning_rate": 9.95983487102889e-05,
"loss": 6.6203,
"step": 5350
},
{
"epoch": 0.24851294913538202,
"grad_norm": 1.9609904289245605,
"learning_rate": 9.9589066438878e-05,
"loss": 6.6003,
"step": 5400
},
{
"epoch": 0.25081399496070966,
"grad_norm": 1.7898999452590942,
"learning_rate": 9.957967857379689e-05,
"loss": 6.5957,
"step": 5450
},
{
"epoch": 0.25311504078603725,
"grad_norm": 2.2270612716674805,
"learning_rate": 9.957018513503589e-05,
"loss": 6.548,
"step": 5500
},
{
"epoch": 0.2554160866113649,
"grad_norm": 2.224034547805786,
"learning_rate": 9.956058614281006e-05,
"loss": 6.5198,
"step": 5550
},
{
"epoch": 0.2577171324366925,
"grad_norm": 1.9815775156021118,
"learning_rate": 9.955088161755925e-05,
"loss": 6.5712,
"step": 5600
},
{
"epoch": 0.26001817826202006,
"grad_norm": 2.1270036697387695,
"learning_rate": 9.954107157994802e-05,
"loss": 6.5586,
"step": 5650
},
{
"epoch": 0.2623192240873477,
"grad_norm": 1.7809762954711914,
"learning_rate": 9.953115605086564e-05,
"loss": 6.5644,
"step": 5700
},
{
"epoch": 0.2646202699126753,
"grad_norm": 1.6975411176681519,
"learning_rate": 9.952113505142593e-05,
"loss": 6.5975,
"step": 5750
},
{
"epoch": 0.26692131573800293,
"grad_norm": 1.9254316091537476,
"learning_rate": 9.951100860296738e-05,
"loss": 6.5063,
"step": 5800
},
{
"epoch": 0.2692223615633305,
"grad_norm": 1.7933043241500854,
"learning_rate": 9.950077672705298e-05,
"loss": 6.4768,
"step": 5850
},
{
"epoch": 0.27152340738865816,
"grad_norm": 2.008695363998413,
"learning_rate": 9.94904394454702e-05,
"loss": 6.5563,
"step": 5900
},
{
"epoch": 0.27382445321398574,
"grad_norm": 2.1302218437194824,
"learning_rate": 9.947999678023103e-05,
"loss": 6.5424,
"step": 5950
},
{
"epoch": 0.2761254990393134,
"grad_norm": 1.78187894821167,
"learning_rate": 9.946944875357174e-05,
"loss": 6.61,
"step": 6000
},
{
"epoch": 0.2761254990393134,
"eval_loss": 6.425212860107422,
"eval_runtime": 35.036,
"eval_samples_per_second": 10.96,
"eval_steps_per_second": 5.48,
"eval_tts_loss": 7.612353266970973,
"step": 6000
},
{
"epoch": 0.27842654486464097,
"grad_norm": 1.921924352645874,
"learning_rate": 9.945879538795309e-05,
"loss": 6.5369,
"step": 6050
},
{
"epoch": 0.2807275906899686,
"grad_norm": 1.6859031915664673,
"learning_rate": 9.944803670606004e-05,
"loss": 6.5272,
"step": 6100
},
{
"epoch": 0.2830286365152962,
"grad_norm": 1.9181588888168335,
"learning_rate": 9.943717273080186e-05,
"loss": 6.527,
"step": 6150
},
{
"epoch": 0.28532968234062384,
"grad_norm": 1.921380877494812,
"learning_rate": 9.942620348531201e-05,
"loss": 6.5185,
"step": 6200
},
{
"epoch": 0.2876307281659514,
"grad_norm": 2.4337542057037354,
"learning_rate": 9.941512899294813e-05,
"loss": 6.5091,
"step": 6250
},
{
"epoch": 0.28993177399127906,
"grad_norm": 1.9452122449874878,
"learning_rate": 9.940394927729194e-05,
"loss": 6.5382,
"step": 6300
},
{
"epoch": 0.29223281981660665,
"grad_norm": 1.84445321559906,
"learning_rate": 9.939266436214925e-05,
"loss": 6.5412,
"step": 6350
},
{
"epoch": 0.29453386564193423,
"grad_norm": 2.0815675258636475,
"learning_rate": 9.938127427154986e-05,
"loss": 6.5187,
"step": 6400
},
{
"epoch": 0.2968349114672619,
"grad_norm": 1.921200156211853,
"learning_rate": 9.936977902974753e-05,
"loss": 6.5182,
"step": 6450
},
{
"epoch": 0.29913595729258946,
"grad_norm": 2.276503086090088,
"learning_rate": 9.935841169867299e-05,
"loss": 6.4457,
"step": 6500
},
{
"epoch": 0.3014370031179171,
"grad_norm": 2.267094612121582,
"learning_rate": 9.934670832991861e-05,
"loss": 6.5038,
"step": 6550
},
{
"epoch": 0.3037380489432447,
"grad_norm": 1.982542872428894,
"learning_rate": 9.93348998835651e-05,
"loss": 6.572,
"step": 6600
},
{
"epoch": 0.30603909476857233,
"grad_norm": 2.0786359310150146,
"learning_rate": 9.932298638475707e-05,
"loss": 6.54,
"step": 6650
},
{
"epoch": 0.3083401405938999,
"grad_norm": 2.178440809249878,
"learning_rate": 9.931096785886282e-05,
"loss": 6.4728,
"step": 6700
},
{
"epoch": 0.31064118641922756,
"grad_norm": 1.7472290992736816,
"learning_rate": 9.92988443314743e-05,
"loss": 6.4496,
"step": 6750
},
{
"epoch": 0.31294223224455514,
"grad_norm": 2.0170915126800537,
"learning_rate": 9.9286615828407e-05,
"loss": 6.5349,
"step": 6800
},
{
"epoch": 0.3152432780698828,
"grad_norm": 1.9643365144729614,
"learning_rate": 9.927428237569998e-05,
"loss": 6.472,
"step": 6850
},
{
"epoch": 0.31754432389521037,
"grad_norm": 1.955383539199829,
"learning_rate": 9.92618439996158e-05,
"loss": 6.5057,
"step": 6900
},
{
"epoch": 0.319845369720538,
"grad_norm": 1.6880296468734741,
"learning_rate": 9.92493007266404e-05,
"loss": 6.4687,
"step": 6950
},
{
"epoch": 0.3221464155458656,
"grad_norm": 1.9719414710998535,
"learning_rate": 9.923665258348311e-05,
"loss": 6.4366,
"step": 7000
},
{
"epoch": 0.3221464155458656,
"eval_loss": 6.379239559173584,
"eval_runtime": 33.7763,
"eval_samples_per_second": 11.369,
"eval_steps_per_second": 5.684,
"eval_tts_loss": 7.613140820339867,
"step": 7000
},
{
"epoch": 0.3244474613711932,
"grad_norm": 2.014941930770874,
"learning_rate": 9.922389959707654e-05,
"loss": 6.4835,
"step": 7050
},
{
"epoch": 0.3267485071965208,
"grad_norm": 1.9350296258926392,
"learning_rate": 9.921104179457658e-05,
"loss": 6.4587,
"step": 7100
},
{
"epoch": 0.3290495530218484,
"grad_norm": 1.7552827596664429,
"learning_rate": 9.91980792033623e-05,
"loss": 6.4796,
"step": 7150
},
{
"epoch": 0.33135059884717605,
"grad_norm": 1.9260075092315674,
"learning_rate": 9.91850118510359e-05,
"loss": 6.4548,
"step": 7200
},
{
"epoch": 0.33365164467250363,
"grad_norm": 1.632943868637085,
"learning_rate": 9.917183976542268e-05,
"loss": 6.4872,
"step": 7250
},
{
"epoch": 0.3359526904978313,
"grad_norm": 1.9595330953598022,
"learning_rate": 9.915856297457091e-05,
"loss": 6.4717,
"step": 7300
},
{
"epoch": 0.33825373632315886,
"grad_norm": 1.787189245223999,
"learning_rate": 9.914518150675185e-05,
"loss": 6.4497,
"step": 7350
},
{
"epoch": 0.3405547821484865,
"grad_norm": 1.894849419593811,
"learning_rate": 9.913169539045966e-05,
"loss": 6.4711,
"step": 7400
},
{
"epoch": 0.3428558279738141,
"grad_norm": 1.8900083303451538,
"learning_rate": 9.911810465441131e-05,
"loss": 6.4507,
"step": 7450
},
{
"epoch": 0.34515687379914173,
"grad_norm": 2.1415016651153564,
"learning_rate": 9.910440932754658e-05,
"loss": 6.4268,
"step": 7500
},
{
"epoch": 0.3474579196244693,
"grad_norm": 2.0673065185546875,
"learning_rate": 9.909060943902793e-05,
"loss": 6.3687,
"step": 7550
},
{
"epoch": 0.34975896544979695,
"grad_norm": 1.9016964435577393,
"learning_rate": 9.90767050182405e-05,
"loss": 6.4239,
"step": 7600
},
{
"epoch": 0.35206001127512454,
"grad_norm": 1.8827308416366577,
"learning_rate": 9.906269609479198e-05,
"loss": 6.4469,
"step": 7650
},
{
"epoch": 0.3543610571004522,
"grad_norm": 1.8428524732589722,
"learning_rate": 9.904858269851261e-05,
"loss": 6.5031,
"step": 7700
},
{
"epoch": 0.35666210292577977,
"grad_norm": 2.421943426132202,
"learning_rate": 9.903436485945512e-05,
"loss": 6.4074,
"step": 7750
},
{
"epoch": 0.35896314875110735,
"grad_norm": 1.8993301391601562,
"learning_rate": 9.902004260789457e-05,
"loss": 6.4325,
"step": 7800
},
{
"epoch": 0.361264194576435,
"grad_norm": 1.8504090309143066,
"learning_rate": 9.90056159743284e-05,
"loss": 6.4851,
"step": 7850
},
{
"epoch": 0.3635652404017626,
"grad_norm": 2.0040860176086426,
"learning_rate": 9.899108498947634e-05,
"loss": 6.5079,
"step": 7900
},
{
"epoch": 0.3658662862270902,
"grad_norm": 1.930400013923645,
"learning_rate": 9.897644968428025e-05,
"loss": 6.4366,
"step": 7950
},
{
"epoch": 0.3681673320524178,
"grad_norm": 2.233633279800415,
"learning_rate": 9.896171008990418e-05,
"loss": 6.3755,
"step": 8000
},
{
"epoch": 0.3681673320524178,
"eval_loss": 6.341970920562744,
"eval_runtime": 33.2129,
"eval_samples_per_second": 11.562,
"eval_steps_per_second": 5.781,
"eval_tts_loss": 7.688191944505792,
"step": 8000
},
{
"epoch": 0.37046837787774545,
"grad_norm": 2.337127447128296,
"learning_rate": 9.894686623773426e-05,
"loss": 6.4258,
"step": 8050
},
{
"epoch": 0.37276942370307303,
"grad_norm": 1.9181020259857178,
"learning_rate": 9.893191815937857e-05,
"loss": 6.4511,
"step": 8100
},
{
"epoch": 0.3750704695284007,
"grad_norm": 1.8250304460525513,
"learning_rate": 9.89168658866672e-05,
"loss": 6.471,
"step": 8150
},
{
"epoch": 0.37737151535372826,
"grad_norm": 1.7581539154052734,
"learning_rate": 9.890170945165203e-05,
"loss": 6.3565,
"step": 8200
},
{
"epoch": 0.3796725611790559,
"grad_norm": 1.6649830341339111,
"learning_rate": 9.88864488866068e-05,
"loss": 6.4983,
"step": 8250
},
{
"epoch": 0.3819736070043835,
"grad_norm": 1.8759455680847168,
"learning_rate": 9.887108422402696e-05,
"loss": 6.384,
"step": 8300
},
{
"epoch": 0.3842746528297111,
"grad_norm": 2.1728053092956543,
"learning_rate": 9.88556154966296e-05,
"loss": 6.4939,
"step": 8350
},
{
"epoch": 0.3865756986550387,
"grad_norm": 2.9008238315582275,
"learning_rate": 9.884004273735347e-05,
"loss": 6.375,
"step": 8400
},
{
"epoch": 0.38887674448036635,
"grad_norm": 1.6754823923110962,
"learning_rate": 9.882436597935877e-05,
"loss": 6.3691,
"step": 8450
},
{
"epoch": 0.39117779030569394,
"grad_norm": 1.9855177402496338,
"learning_rate": 9.880858525602719e-05,
"loss": 6.3651,
"step": 8500
},
{
"epoch": 0.3934788361310215,
"grad_norm": 1.8004143238067627,
"learning_rate": 9.879270060096178e-05,
"loss": 6.446,
"step": 8550
},
{
"epoch": 0.39577988195634917,
"grad_norm": 2.030467987060547,
"learning_rate": 9.877671204798693e-05,
"loss": 6.4362,
"step": 8600
},
{
"epoch": 0.39808092778167675,
"grad_norm": 1.5885698795318604,
"learning_rate": 9.876061963114823e-05,
"loss": 6.3598,
"step": 8650
},
{
"epoch": 0.4003819736070044,
"grad_norm": 1.9746173620224,
"learning_rate": 9.874442338471246e-05,
"loss": 6.4072,
"step": 8700
},
{
"epoch": 0.402683019432332,
"grad_norm": 1.8697595596313477,
"learning_rate": 9.872845036096635e-05,
"loss": 6.3998,
"step": 8750
},
{
"epoch": 0.4049840652576596,
"grad_norm": 1.9580832719802856,
"learning_rate": 9.871204863388751e-05,
"loss": 6.4055,
"step": 8800
},
{
"epoch": 0.4072851110829872,
"grad_norm": 2.037536859512329,
"learning_rate": 9.869554318063743e-05,
"loss": 6.3919,
"step": 8850
},
{
"epoch": 0.40958615690831485,
"grad_norm": 2.1049587726593018,
"learning_rate": 9.867893403636237e-05,
"loss": 6.437,
"step": 8900
},
{
"epoch": 0.41188720273364243,
"grad_norm": 1.8628839254379272,
"learning_rate": 9.866222123642946e-05,
"loss": 6.4381,
"step": 8950
},
{
"epoch": 0.4141882485589701,
"grad_norm": 1.5722795724868774,
"learning_rate": 9.864540481642644e-05,
"loss": 6.3435,
"step": 9000
},
{
"epoch": 0.4141882485589701,
"eval_loss": 6.309738636016846,
"eval_runtime": 33.7952,
"eval_samples_per_second": 11.363,
"eval_steps_per_second": 5.681,
"eval_tts_loss": 7.738714543004772,
"step": 9000
},
{
"epoch": 0.41648929438429766,
"grad_norm": 1.810716152191162,
"learning_rate": 9.86284848121618e-05,
"loss": 6.4626,
"step": 9050
},
{
"epoch": 0.4187903402096253,
"grad_norm": 1.6294385194778442,
"learning_rate": 9.861146125966455e-05,
"loss": 6.3863,
"step": 9100
},
{
"epoch": 0.4210913860349529,
"grad_norm": 1.7128016948699951,
"learning_rate": 9.85943341951842e-05,
"loss": 6.4224,
"step": 9150
},
{
"epoch": 0.42339243186028047,
"grad_norm": 1.9421542882919312,
"learning_rate": 9.857710365519066e-05,
"loss": 6.4133,
"step": 9200
},
{
"epoch": 0.4256934776856081,
"grad_norm": 1.792728066444397,
"learning_rate": 9.855976967637422e-05,
"loss": 6.3259,
"step": 9250
},
{
"epoch": 0.4279945235109357,
"grad_norm": 1.9310187101364136,
"learning_rate": 9.85423322956454e-05,
"loss": 6.3347,
"step": 9300
},
{
"epoch": 0.43029556933626334,
"grad_norm": 2.0094540119171143,
"learning_rate": 9.85247915501349e-05,
"loss": 6.3609,
"step": 9350
},
{
"epoch": 0.4325966151615909,
"grad_norm": 1.99038827419281,
"learning_rate": 9.85071474771935e-05,
"loss": 6.3763,
"step": 9400
},
{
"epoch": 0.43489766098691857,
"grad_norm": 1.6113660335540771,
"learning_rate": 9.848940011439207e-05,
"loss": 6.4504,
"step": 9450
},
{
"epoch": 0.43719870681224615,
"grad_norm": 1.7859371900558472,
"learning_rate": 9.847154949952138e-05,
"loss": 6.4023,
"step": 9500
},
{
"epoch": 0.4394997526375738,
"grad_norm": 2.0613961219787598,
"learning_rate": 9.845359567059205e-05,
"loss": 6.4021,
"step": 9550
},
{
"epoch": 0.4418007984629014,
"grad_norm": 1.7589733600616455,
"learning_rate": 9.84355386658345e-05,
"loss": 6.3691,
"step": 9600
},
{
"epoch": 0.444101844288229,
"grad_norm": 1.9880051612854004,
"learning_rate": 9.841737852369884e-05,
"loss": 6.3675,
"step": 9650
},
{
"epoch": 0.4464028901135566,
"grad_norm": 2.1406807899475098,
"learning_rate": 9.839911528285484e-05,
"loss": 6.4018,
"step": 9700
},
{
"epoch": 0.44870393593888425,
"grad_norm": 1.5996681451797485,
"learning_rate": 9.838074898219171e-05,
"loss": 6.3984,
"step": 9750
},
{
"epoch": 0.45100498176421183,
"grad_norm": 2.1226611137390137,
"learning_rate": 9.836227966081823e-05,
"loss": 6.407,
"step": 9800
},
{
"epoch": 0.4533060275895395,
"grad_norm": 1.7142243385314941,
"learning_rate": 9.834370735806248e-05,
"loss": 6.3776,
"step": 9850
},
{
"epoch": 0.45560707341486706,
"grad_norm": 1.8948795795440674,
"learning_rate": 9.832503211347183e-05,
"loss": 6.2953,
"step": 9900
},
{
"epoch": 0.45790811924019464,
"grad_norm": 1.875388264656067,
"learning_rate": 9.830625396681286e-05,
"loss": 6.2976,
"step": 9950
},
{
"epoch": 0.4602091650655223,
"grad_norm": 1.710487723350525,
"learning_rate": 9.828737295807126e-05,
"loss": 6.3461,
"step": 10000
},
{
"epoch": 0.4602091650655223,
"eval_loss": 6.289060115814209,
"eval_runtime": 34.3884,
"eval_samples_per_second": 11.167,
"eval_steps_per_second": 5.583,
"eval_tts_loss": 7.814920344420079,
"step": 10000
},
{
"epoch": 0.46251021089084987,
"grad_norm": 1.7555044889450073,
"learning_rate": 9.826838912745178e-05,
"loss": 6.4022,
"step": 10050
},
{
"epoch": 0.4648112567161775,
"grad_norm": 1.9916635751724243,
"learning_rate": 9.824930251537808e-05,
"loss": 6.4684,
"step": 10100
},
{
"epoch": 0.4671123025415051,
"grad_norm": 1.9527746438980103,
"learning_rate": 9.823011316249271e-05,
"loss": 6.3466,
"step": 10150
},
{
"epoch": 0.46941334836683274,
"grad_norm": 1.668678879737854,
"learning_rate": 9.821082110965696e-05,
"loss": 6.3642,
"step": 10200
},
{
"epoch": 0.4717143941921603,
"grad_norm": 1.691568374633789,
"learning_rate": 9.819142639795085e-05,
"loss": 6.3586,
"step": 10250
},
{
"epoch": 0.47401544001748797,
"grad_norm": 1.8639135360717773,
"learning_rate": 9.817192906867295e-05,
"loss": 6.314,
"step": 10300
},
{
"epoch": 0.47631648584281555,
"grad_norm": 1.7101272344589233,
"learning_rate": 9.815232916334039e-05,
"loss": 6.3314,
"step": 10350
},
{
"epoch": 0.4786175316681432,
"grad_norm": 1.6885664463043213,
"learning_rate": 9.813262672368868e-05,
"loss": 6.4618,
"step": 10400
},
{
"epoch": 0.4809185774934708,
"grad_norm": 1.7570464611053467,
"learning_rate": 9.81128217916717e-05,
"loss": 6.3258,
"step": 10450
},
{
"epoch": 0.4832196233187984,
"grad_norm": 1.5871278047561646,
"learning_rate": 9.809291440946154e-05,
"loss": 6.3315,
"step": 10500
},
{
"epoch": 0.485520669144126,
"grad_norm": 1.6280348300933838,
"learning_rate": 9.807290461944849e-05,
"loss": 6.3455,
"step": 10550
},
{
"epoch": 0.4878217149694536,
"grad_norm": 1.7376903295516968,
"learning_rate": 9.805279246424085e-05,
"loss": 6.3455,
"step": 10600
},
{
"epoch": 0.49012276079478123,
"grad_norm": 1.9705889225006104,
"learning_rate": 9.803257798666493e-05,
"loss": 6.2758,
"step": 10650
},
{
"epoch": 0.4924238066201088,
"grad_norm": 1.764738917350769,
"learning_rate": 9.801226122976492e-05,
"loss": 6.2885,
"step": 10700
},
{
"epoch": 0.49472485244543646,
"grad_norm": 1.740444302558899,
"learning_rate": 9.799184223680278e-05,
"loss": 6.3493,
"step": 10750
},
{
"epoch": 0.49702589827076404,
"grad_norm": 1.464941143989563,
"learning_rate": 9.79713210512582e-05,
"loss": 6.3239,
"step": 10800
},
{
"epoch": 0.4993269440960917,
"grad_norm": 1.7715671062469482,
"learning_rate": 9.795069771682845e-05,
"loss": 6.3002,
"step": 10850
},
{
"epoch": 0.5016279899214193,
"grad_norm": 1.8907268047332764,
"learning_rate": 9.792997227742833e-05,
"loss": 6.2998,
"step": 10900
},
{
"epoch": 0.5039290357467469,
"grad_norm": 1.9487262964248657,
"learning_rate": 9.790914477719005e-05,
"loss": 6.358,
"step": 10950
},
{
"epoch": 0.5062300815720745,
"grad_norm": 1.8834967613220215,
"learning_rate": 9.788821526046312e-05,
"loss": 6.3576,
"step": 11000
},
{
"epoch": 0.5062300815720745,
"eval_loss": 6.265079498291016,
"eval_runtime": 34.881,
"eval_samples_per_second": 11.009,
"eval_steps_per_second": 5.504,
"eval_tts_loss": 7.740538769862218,
"step": 11000
},
{
"epoch": 0.5085311273974021,
"grad_norm": 1.8796509504318237,
"learning_rate": 9.786718377181437e-05,
"loss": 6.3623,
"step": 11050
},
{
"epoch": 0.5108321732227298,
"grad_norm": 1.6013100147247314,
"learning_rate": 9.784605035602764e-05,
"loss": 6.3532,
"step": 11100
},
{
"epoch": 0.5131332190480573,
"grad_norm": 1.9460678100585938,
"learning_rate": 9.782481505810393e-05,
"loss": 6.35,
"step": 11150
},
{
"epoch": 0.515434264873385,
"grad_norm": 1.8577035665512085,
"learning_rate": 9.780347792326108e-05,
"loss": 6.4018,
"step": 11200
},
{
"epoch": 0.5177353106987126,
"grad_norm": 2.1580724716186523,
"learning_rate": 9.778203899693388e-05,
"loss": 6.3821,
"step": 11250
},
{
"epoch": 0.5200363565240401,
"grad_norm": 1.9228373765945435,
"learning_rate": 9.77604983247738e-05,
"loss": 6.3247,
"step": 11300
},
{
"epoch": 0.5223374023493678,
"grad_norm": 1.6796767711639404,
"learning_rate": 9.7738855952649e-05,
"loss": 6.3384,
"step": 11350
},
{
"epoch": 0.5246384481746954,
"grad_norm": 2.2095754146575928,
"learning_rate": 9.771711192664417e-05,
"loss": 6.3121,
"step": 11400
},
{
"epoch": 0.526939494000023,
"grad_norm": 1.9185891151428223,
"learning_rate": 9.769526629306046e-05,
"loss": 6.2757,
"step": 11450
},
{
"epoch": 0.5292405398253506,
"grad_norm": 1.7003620862960815,
"learning_rate": 9.767331909841544e-05,
"loss": 6.3201,
"step": 11500
},
{
"epoch": 0.5315415856506782,
"grad_norm": 1.6581804752349854,
"learning_rate": 9.765127038944285e-05,
"loss": 6.3326,
"step": 11550
},
{
"epoch": 0.5338426314760059,
"grad_norm": 1.8560444116592407,
"learning_rate": 9.762912021309268e-05,
"loss": 6.3576,
"step": 11600
},
{
"epoch": 0.5361436773013335,
"grad_norm": 1.8938666582107544,
"learning_rate": 9.76068686165309e-05,
"loss": 6.3133,
"step": 11650
},
{
"epoch": 0.538444723126661,
"grad_norm": 1.8633164167404175,
"learning_rate": 9.758451564713951e-05,
"loss": 6.3787,
"step": 11700
},
{
"epoch": 0.5407457689519887,
"grad_norm": 1.7775530815124512,
"learning_rate": 9.756206135251633e-05,
"loss": 6.3219,
"step": 11750
},
{
"epoch": 0.5430468147773163,
"grad_norm": 1.8082398176193237,
"learning_rate": 9.75399578841242e-05,
"loss": 6.2549,
"step": 11800
},
{
"epoch": 0.545347860602644,
"grad_norm": 1.79505455493927,
"learning_rate": 9.751730310680957e-05,
"loss": 6.3603,
"step": 11850
},
{
"epoch": 0.5476489064279715,
"grad_norm": 1.7274733781814575,
"learning_rate": 9.749454714738381e-05,
"loss": 6.3104,
"step": 11900
},
{
"epoch": 0.5499499522532991,
"grad_norm": 1.908056378364563,
"learning_rate": 9.747169005430285e-05,
"loss": 6.3052,
"step": 11950
},
{
"epoch": 0.5522509980786268,
"grad_norm": 1.8075348138809204,
"learning_rate": 9.744873187623798e-05,
"loss": 6.2878,
"step": 12000
},
{
"epoch": 0.5522509980786268,
"eval_loss": 6.244420528411865,
"eval_runtime": 33.9046,
"eval_samples_per_second": 11.326,
"eval_steps_per_second": 5.663,
"eval_tts_loss": 7.759218772150092,
"step": 12000
},
{
"epoch": 0.5545520439039543,
"grad_norm": 1.722110629081726,
"learning_rate": 9.742567266207576e-05,
"loss": 6.3152,
"step": 12050
},
{
"epoch": 0.5568530897292819,
"grad_norm": 1.9611711502075195,
"learning_rate": 9.74025124609179e-05,
"loss": 6.2803,
"step": 12100
},
{
"epoch": 0.5591541355546096,
"grad_norm": 2.127253532409668,
"learning_rate": 9.737925132208108e-05,
"loss": 6.3487,
"step": 12150
},
{
"epoch": 0.5614551813799372,
"grad_norm": 9.285630226135254,
"learning_rate": 9.735588929509701e-05,
"loss": 6.2823,
"step": 12200
},
{
"epoch": 0.5637562272052647,
"grad_norm": 2.125577688217163,
"learning_rate": 9.733242642971218e-05,
"loss": 6.2491,
"step": 12250
},
{
"epoch": 0.5660572730305924,
"grad_norm": 1.9916199445724487,
"learning_rate": 9.730886277588777e-05,
"loss": 6.3205,
"step": 12300
},
{
"epoch": 0.56835831885592,
"grad_norm": 1.5257749557495117,
"learning_rate": 9.728519838379966e-05,
"loss": 6.3426,
"step": 12350
},
{
"epoch": 0.5706593646812477,
"grad_norm": 1.864864706993103,
"learning_rate": 9.726143330383818e-05,
"loss": 6.3545,
"step": 12400
},
{
"epoch": 0.5729604105065752,
"grad_norm": 1.7556637525558472,
"learning_rate": 9.723756758660805e-05,
"loss": 6.3174,
"step": 12450
},
{
"epoch": 0.5752614563319028,
"grad_norm": 1.8196032047271729,
"learning_rate": 9.721360128292832e-05,
"loss": 6.2831,
"step": 12500
},
{
"epoch": 0.5775625021572305,
"grad_norm": 1.5368512868881226,
"learning_rate": 9.718953444383223e-05,
"loss": 6.2929,
"step": 12550
},
{
"epoch": 0.5798635479825581,
"grad_norm": 1.7474430799484253,
"learning_rate": 9.716536712056707e-05,
"loss": 6.2911,
"step": 12600
},
{
"epoch": 0.5821645938078857,
"grad_norm": 1.7803459167480469,
"learning_rate": 9.71410993645941e-05,
"loss": 6.2586,
"step": 12650
},
{
"epoch": 0.5844656396332133,
"grad_norm": 1.7328126430511475,
"learning_rate": 9.711673122758846e-05,
"loss": 6.2789,
"step": 12700
},
{
"epoch": 0.5867666854585409,
"grad_norm": 2.044720411300659,
"learning_rate": 9.709226276143903e-05,
"loss": 6.3616,
"step": 12750
},
{
"epoch": 0.5890677312838685,
"grad_norm": 1.506191372871399,
"learning_rate": 9.706769401824832e-05,
"loss": 6.3557,
"step": 12800
},
{
"epoch": 0.5913687771091961,
"grad_norm": 1.9317868947982788,
"learning_rate": 9.704302505033239e-05,
"loss": 6.2725,
"step": 12850
},
{
"epoch": 0.5936698229345237,
"grad_norm": 1.6422539949417114,
"learning_rate": 9.701825591022067e-05,
"loss": 6.2711,
"step": 12900
},
{
"epoch": 0.5959708687598514,
"grad_norm": 1.9548308849334717,
"learning_rate": 9.699338665065595e-05,
"loss": 6.2829,
"step": 12950
},
{
"epoch": 0.5982719145851789,
"grad_norm": 1.6549190282821655,
"learning_rate": 9.696841732459418e-05,
"loss": 6.3148,
"step": 13000
},
{
"epoch": 0.5982719145851789,
"eval_loss": 6.220914840698242,
"eval_runtime": 34.7579,
"eval_samples_per_second": 11.048,
"eval_steps_per_second": 5.524,
"eval_tts_loss": 7.842876913280491,
"step": 13000
},
{
"epoch": 0.6005729604105066,
"grad_norm": 1.860897421836853,
"learning_rate": 9.694334798520438e-05,
"loss": 6.3158,
"step": 13050
},
{
"epoch": 0.6028740062358342,
"grad_norm": 1.664921522140503,
"learning_rate": 9.691817868586856e-05,
"loss": 6.3374,
"step": 13100
},
{
"epoch": 0.6051750520611618,
"grad_norm": 1.67441987991333,
"learning_rate": 9.689290948018159e-05,
"loss": 6.2999,
"step": 13150
},
{
"epoch": 0.6074760978864894,
"grad_norm": 1.8464356660842896,
"learning_rate": 9.686754042195102e-05,
"loss": 6.3205,
"step": 13200
},
{
"epoch": 0.609777143711817,
"grad_norm": 1.770418405532837,
"learning_rate": 9.68420715651971e-05,
"loss": 6.3312,
"step": 13250
},
{
"epoch": 0.6120781895371447,
"grad_norm": 1.8769677877426147,
"learning_rate": 9.681650296415256e-05,
"loss": 6.3142,
"step": 13300
},
{
"epoch": 0.6143792353624723,
"grad_norm": 1.7331925630569458,
"learning_rate": 9.679083467326247e-05,
"loss": 6.2706,
"step": 13350
},
{
"epoch": 0.6166802811877998,
"grad_norm": 2.1047513484954834,
"learning_rate": 9.676506674718426e-05,
"loss": 6.2648,
"step": 13400
},
{
"epoch": 0.6189813270131275,
"grad_norm": 2.162065267562866,
"learning_rate": 9.673919924078745e-05,
"loss": 6.2775,
"step": 13450
},
{
"epoch": 0.6212823728384551,
"grad_norm": 1.890669345855713,
"learning_rate": 9.671323220915367e-05,
"loss": 6.3046,
"step": 13500
},
{
"epoch": 0.6235834186637826,
"grad_norm": 1.7662500143051147,
"learning_rate": 9.668716570757644e-05,
"loss": 6.336,
"step": 13550
},
{
"epoch": 0.6258844644891103,
"grad_norm": 1.6510889530181885,
"learning_rate": 9.666099979156106e-05,
"loss": 6.3583,
"step": 13600
},
{
"epoch": 0.6281855103144379,
"grad_norm": 1.7694858312606812,
"learning_rate": 9.663473451682457e-05,
"loss": 6.2774,
"step": 13650
},
{
"epoch": 0.6304865561397656,
"grad_norm": 1.576418161392212,
"learning_rate": 9.660836993929557e-05,
"loss": 6.2338,
"step": 13700
},
{
"epoch": 0.6327876019650931,
"grad_norm": 2.029823064804077,
"learning_rate": 9.658190611511411e-05,
"loss": 6.3032,
"step": 13750
},
{
"epoch": 0.6350886477904207,
"grad_norm": 1.8009357452392578,
"learning_rate": 9.655534310063155e-05,
"loss": 6.2281,
"step": 13800
},
{
"epoch": 0.6373896936157484,
"grad_norm": 1.6074466705322266,
"learning_rate": 9.65286809524105e-05,
"loss": 6.3638,
"step": 13850
},
{
"epoch": 0.639690739441076,
"grad_norm": 1.4809355735778809,
"learning_rate": 9.650191972722464e-05,
"loss": 6.3167,
"step": 13900
},
{
"epoch": 0.6419917852664035,
"grad_norm": 1.7859705686569214,
"learning_rate": 9.647505948205864e-05,
"loss": 6.1846,
"step": 13950
},
{
"epoch": 0.6442928310917312,
"grad_norm": 1.7351763248443604,
"learning_rate": 9.644810027410798e-05,
"loss": 6.2017,
"step": 14000
},
{
"epoch": 0.6442928310917312,
"eval_loss": 6.187459468841553,
"eval_runtime": 35.2876,
"eval_samples_per_second": 10.882,
"eval_steps_per_second": 5.441,
"eval_tts_loss": 7.89450947834087,
"step": 14000
},
{
"epoch": 0.6465938769170588,
"grad_norm": 1.6009070873260498,
"learning_rate": 9.642104216077894e-05,
"loss": 6.27,
"step": 14050
},
{
"epoch": 0.6488949227423864,
"grad_norm": 1.9434318542480469,
"learning_rate": 9.639388519968831e-05,
"loss": 6.246,
"step": 14100
},
{
"epoch": 0.651195968567714,
"grad_norm": 1.9468326568603516,
"learning_rate": 9.636662944866346e-05,
"loss": 6.277,
"step": 14150
},
{
"epoch": 0.6534970143930416,
"grad_norm": 1.8342995643615723,
"learning_rate": 9.633927496574207e-05,
"loss": 6.2374,
"step": 14200
},
{
"epoch": 0.6557980602183693,
"grad_norm": 1.743561029434204,
"learning_rate": 9.631182180917204e-05,
"loss": 6.2423,
"step": 14250
},
{
"epoch": 0.6580991060436968,
"grad_norm": 1.6310720443725586,
"learning_rate": 9.628427003741145e-05,
"loss": 6.2321,
"step": 14300
},
{
"epoch": 0.6604001518690245,
"grad_norm": 1.880783200263977,
"learning_rate": 9.625661970912829e-05,
"loss": 6.1948,
"step": 14350
},
{
"epoch": 0.6627011976943521,
"grad_norm": 1.8415277004241943,
"learning_rate": 9.622887088320049e-05,
"loss": 6.2053,
"step": 14400
},
{
"epoch": 0.6650022435196797,
"grad_norm": 2.1539406776428223,
"learning_rate": 9.620102361871564e-05,
"loss": 6.2076,
"step": 14450
},
{
"epoch": 0.6673032893450073,
"grad_norm": 1.9872362613677979,
"learning_rate": 9.617307797497099e-05,
"loss": 6.2048,
"step": 14500
},
{
"epoch": 0.6696043351703349,
"grad_norm": 2.019122838973999,
"learning_rate": 9.614503401147328e-05,
"loss": 6.2539,
"step": 14550
},
{
"epoch": 0.6719053809956625,
"grad_norm": 1.6095964908599854,
"learning_rate": 9.61168917879386e-05,
"loss": 6.3102,
"step": 14600
},
{
"epoch": 0.6742064268209902,
"grad_norm": 1.7596853971481323,
"learning_rate": 9.608865136429226e-05,
"loss": 6.2507,
"step": 14650
},
{
"epoch": 0.6765074726463177,
"grad_norm": 1.6878117322921753,
"learning_rate": 9.60603128006687e-05,
"loss": 6.2378,
"step": 14700
},
{
"epoch": 0.6788085184716454,
"grad_norm": 1.8443124294281006,
"learning_rate": 9.603187615741129e-05,
"loss": 6.2655,
"step": 14750
},
{
"epoch": 0.681109564296973,
"grad_norm": 1.8682785034179688,
"learning_rate": 9.60033414950723e-05,
"loss": 6.1037,
"step": 14800
},
{
"epoch": 0.6834106101223005,
"grad_norm": 1.932824730873108,
"learning_rate": 9.597470887441271e-05,
"loss": 6.208,
"step": 14850
},
{
"epoch": 0.6857116559476282,
"grad_norm": 1.65823495388031,
"learning_rate": 9.594597835640207e-05,
"loss": 6.2669,
"step": 14900
},
{
"epoch": 0.6880127017729558,
"grad_norm": 1.7727546691894531,
"learning_rate": 9.591715000221838e-05,
"loss": 6.1912,
"step": 14950
},
{
"epoch": 0.6903137475982835,
"grad_norm": 1.9301639795303345,
"learning_rate": 9.588822387324801e-05,
"loss": 6.2252,
"step": 15000
},
{
"epoch": 0.6903137475982835,
"eval_loss": 6.159511566162109,
"eval_runtime": 32.3151,
"eval_samples_per_second": 11.883,
"eval_steps_per_second": 5.941,
"eval_tts_loss": 7.847364975218921,
"step": 15000
},
{
"epoch": 0.692614793423611,
"grad_norm": 1.5824342966079712,
"learning_rate": 9.58592000310855e-05,
"loss": 6.2213,
"step": 15050
},
{
"epoch": 0.6949158392489386,
"grad_norm": 1.6961408853530884,
"learning_rate": 9.583007853753347e-05,
"loss": 6.2481,
"step": 15100
},
{
"epoch": 0.6972168850742663,
"grad_norm": 1.7308921813964844,
"learning_rate": 9.580085945460248e-05,
"loss": 6.2166,
"step": 15150
},
{
"epoch": 0.6995179308995939,
"grad_norm": 1.796118974685669,
"learning_rate": 9.577154284451087e-05,
"loss": 6.2793,
"step": 15200
},
{
"epoch": 0.7018189767249214,
"grad_norm": 1.796897053718567,
"learning_rate": 9.574212876968467e-05,
"loss": 6.1633,
"step": 15250
},
{
"epoch": 0.7041200225502491,
"grad_norm": 1.9501514434814453,
"learning_rate": 9.571261729275745e-05,
"loss": 6.2368,
"step": 15300
},
{
"epoch": 0.7064210683755767,
"grad_norm": 2.005586862564087,
"learning_rate": 9.568300847657019e-05,
"loss": 6.1743,
"step": 15350
},
{
"epoch": 0.7087221142009044,
"grad_norm": 1.8299839496612549,
"learning_rate": 9.565330238417112e-05,
"loss": 6.1763,
"step": 15400
},
{
"epoch": 0.7110231600262319,
"grad_norm": 1.7406200170516968,
"learning_rate": 9.562349907881563e-05,
"loss": 6.2527,
"step": 15450
},
{
"epoch": 0.7133242058515595,
"grad_norm": 1.6860395669937134,
"learning_rate": 9.55935986239661e-05,
"loss": 6.2061,
"step": 15500
},
{
"epoch": 0.7156252516768872,
"grad_norm": 1.9635874032974243,
"learning_rate": 9.55636010832918e-05,
"loss": 6.1784,
"step": 15550
},
{
"epoch": 0.7179262975022147,
"grad_norm": 1.8998063802719116,
"learning_rate": 9.55335065206687e-05,
"loss": 6.2378,
"step": 15600
},
{
"epoch": 0.7202273433275423,
"grad_norm": 1.7541154623031616,
"learning_rate": 9.550331500017937e-05,
"loss": 6.238,
"step": 15650
},
{
"epoch": 0.72252838915287,
"grad_norm": 1.7203463315963745,
"learning_rate": 9.547302658611287e-05,
"loss": 6.1993,
"step": 15700
},
{
"epoch": 0.7248294349781976,
"grad_norm": 1.8435782194137573,
"learning_rate": 9.544264134296455e-05,
"loss": 6.2433,
"step": 15750
},
{
"epoch": 0.7271304808035252,
"grad_norm": 1.6835908889770508,
"learning_rate": 9.541215933543595e-05,
"loss": 6.219,
"step": 15800
},
{
"epoch": 0.7294315266288528,
"grad_norm": 1.8939025402069092,
"learning_rate": 9.538219314980872e-05,
"loss": 6.2096,
"step": 15850
},
{
"epoch": 0.7317325724541804,
"grad_norm": 1.8113733530044556,
"learning_rate": 9.535151974049603e-05,
"loss": 6.214,
"step": 15900
},
{
"epoch": 0.7340336182795081,
"grad_norm": 1.863580584526062,
"learning_rate": 9.532074976083503e-05,
"loss": 6.2043,
"step": 15950
},
{
"epoch": 0.7363346641048356,
"grad_norm": 1.762188196182251,
"learning_rate": 9.528988327634654e-05,
"loss": 6.2466,
"step": 16000
},
{
"epoch": 0.7363346641048356,
"eval_loss": 6.129319667816162,
"eval_runtime": 33.6388,
"eval_samples_per_second": 11.415,
"eval_steps_per_second": 5.708,
"eval_tts_loss": 7.892954236130724,
"step": 16000
},
{
"epoch": 0.7386357099301633,
"grad_norm": 1.8499406576156616,
"learning_rate": 9.525892035275682e-05,
"loss": 6.2367,
"step": 16050
},
{
"epoch": 0.7409367557554909,
"grad_norm": 1.8135892152786255,
"learning_rate": 9.522786105599754e-05,
"loss": 6.1914,
"step": 16100
},
{
"epoch": 0.7432378015808185,
"grad_norm": 1.6833168268203735,
"learning_rate": 9.519670545220556e-05,
"loss": 6.1262,
"step": 16150
},
{
"epoch": 0.7455388474061461,
"grad_norm": 1.6315560340881348,
"learning_rate": 9.516545360772282e-05,
"loss": 6.1558,
"step": 16200
},
{
"epoch": 0.7478398932314737,
"grad_norm": 1.9072283506393433,
"learning_rate": 9.513410558909617e-05,
"loss": 6.1518,
"step": 16250
},
{
"epoch": 0.7501409390568013,
"grad_norm": 1.840588092803955,
"learning_rate": 9.510266146307725e-05,
"loss": 6.2151,
"step": 16300
},
{
"epoch": 0.7524419848821289,
"grad_norm": 1.820553183555603,
"learning_rate": 9.507112129662242e-05,
"loss": 6.2398,
"step": 16350
},
{
"epoch": 0.7547430307074565,
"grad_norm": 1.8674460649490356,
"learning_rate": 9.503948515689243e-05,
"loss": 6.1383,
"step": 16400
},
{
"epoch": 0.7570440765327842,
"grad_norm": 2.053492784500122,
"learning_rate": 9.500775311125247e-05,
"loss": 6.1975,
"step": 16450
},
{
"epoch": 0.7593451223581118,
"grad_norm": 1.882986307144165,
"learning_rate": 9.497592522727197e-05,
"loss": 6.1723,
"step": 16500
},
{
"epoch": 0.7616461681834393,
"grad_norm": 1.6095331907272339,
"learning_rate": 9.494400157272438e-05,
"loss": 6.1988,
"step": 16550
},
{
"epoch": 0.763947214008767,
"grad_norm": 1.6419812440872192,
"learning_rate": 9.491198221558708e-05,
"loss": 6.1695,
"step": 16600
},
{
"epoch": 0.7662482598340946,
"grad_norm": 1.678825855255127,
"learning_rate": 9.487986722404127e-05,
"loss": 6.1044,
"step": 16650
},
{
"epoch": 0.7685493056594223,
"grad_norm": 1.7096123695373535,
"learning_rate": 9.48476566664718e-05,
"loss": 6.1511,
"step": 16700
},
{
"epoch": 0.7708503514847498,
"grad_norm": 2.1413791179656982,
"learning_rate": 9.481535061146698e-05,
"loss": 6.1708,
"step": 16750
},
{
"epoch": 0.7731513973100774,
"grad_norm": 1.9227194786071777,
"learning_rate": 9.478294912781847e-05,
"loss": 6.1949,
"step": 16800
},
{
"epoch": 0.7754524431354051,
"grad_norm": 1.8362767696380615,
"learning_rate": 9.475045228452115e-05,
"loss": 6.1788,
"step": 16850
},
{
"epoch": 0.7777534889607327,
"grad_norm": 1.8240971565246582,
"learning_rate": 9.471786015077298e-05,
"loss": 6.1563,
"step": 16900
},
{
"epoch": 0.7800545347860602,
"grad_norm": 2.078509569168091,
"learning_rate": 9.468517279597477e-05,
"loss": 6.1832,
"step": 16950
},
{
"epoch": 0.7823555806113879,
"grad_norm": 2.1859052181243896,
"learning_rate": 9.465239028973017e-05,
"loss": 6.2047,
"step": 17000
},
{
"epoch": 0.7823555806113879,
"eval_loss": 6.096608638763428,
"eval_runtime": 34.1394,
"eval_samples_per_second": 11.248,
"eval_steps_per_second": 5.624,
"eval_tts_loss": 7.896921396746157,
"step": 17000
},
{
"epoch": 0.7846566264367155,
"grad_norm": 1.922098159790039,
"learning_rate": 9.461951270184534e-05,
"loss": 6.1982,
"step": 17050
},
{
"epoch": 0.786957672262043,
"grad_norm": 1.8664427995681763,
"learning_rate": 9.458654010232901e-05,
"loss": 6.1964,
"step": 17100
},
{
"epoch": 0.7892587180873707,
"grad_norm": 1.889997124671936,
"learning_rate": 9.455347256139215e-05,
"loss": 6.1673,
"step": 17150
},
{
"epoch": 0.7915597639126983,
"grad_norm": 1.7203173637390137,
"learning_rate": 9.452031014944792e-05,
"loss": 6.1519,
"step": 17200
},
{
"epoch": 0.793860809738026,
"grad_norm": 2.062267303466797,
"learning_rate": 9.448705293711151e-05,
"loss": 6.1475,
"step": 17250
},
{
"epoch": 0.7961618555633535,
"grad_norm": 1.6810802221298218,
"learning_rate": 9.445370099519998e-05,
"loss": 6.1636,
"step": 17300
},
{
"epoch": 0.7984629013886811,
"grad_norm": 1.6828442811965942,
"learning_rate": 9.442025439473207e-05,
"loss": 6.1879,
"step": 17350
},
{
"epoch": 0.8007639472140088,
"grad_norm": 1.7201191186904907,
"learning_rate": 9.438671320692809e-05,
"loss": 6.2027,
"step": 17400
},
{
"epoch": 0.8030649930393364,
"grad_norm": 1.9775705337524414,
"learning_rate": 9.435307750320979e-05,
"loss": 6.1444,
"step": 17450
},
{
"epoch": 0.805366038864664,
"grad_norm": 1.6285938024520874,
"learning_rate": 9.431934735520019e-05,
"loss": 6.0895,
"step": 17500
},
{
"epoch": 0.8076670846899916,
"grad_norm": 2.1120924949645996,
"learning_rate": 9.428552283472333e-05,
"loss": 6.0707,
"step": 17550
},
{
"epoch": 0.8099681305153192,
"grad_norm": 1.9405549764633179,
"learning_rate": 9.425160401380431e-05,
"loss": 6.1812,
"step": 17600
},
{
"epoch": 0.8122691763406468,
"grad_norm": 1.8059804439544678,
"learning_rate": 9.421759096466897e-05,
"loss": 6.2241,
"step": 17650
},
{
"epoch": 0.8145702221659744,
"grad_norm": 1.6747099161148071,
"learning_rate": 9.418348375974384e-05,
"loss": 6.1178,
"step": 17700
},
{
"epoch": 0.816871267991302,
"grad_norm": 1.9652963876724243,
"learning_rate": 9.414928247165587e-05,
"loss": 6.1801,
"step": 17750
},
{
"epoch": 0.8191723138166297,
"grad_norm": 1.6845933198928833,
"learning_rate": 9.411498717323244e-05,
"loss": 6.1187,
"step": 17800
},
{
"epoch": 0.8214733596419572,
"grad_norm": 2.096360206604004,
"learning_rate": 9.408059793750103e-05,
"loss": 6.1353,
"step": 17850
},
{
"epoch": 0.8237744054672849,
"grad_norm": 1.7789738178253174,
"learning_rate": 9.404611483768922e-05,
"loss": 6.0715,
"step": 17900
},
{
"epoch": 0.8260754512926125,
"grad_norm": 2.0107250213623047,
"learning_rate": 9.401153794722441e-05,
"loss": 6.1532,
"step": 17950
},
{
"epoch": 0.8283764971179401,
"grad_norm": 1.776755452156067,
"learning_rate": 9.397686733973372e-05,
"loss": 6.1186,
"step": 18000
},
{
"epoch": 0.8283764971179401,
"eval_loss": 6.063215732574463,
"eval_runtime": 33.1049,
"eval_samples_per_second": 11.599,
"eval_steps_per_second": 5.8,
"eval_tts_loss": 7.988993335909586,
"step": 18000
},
{
"epoch": 0.8306775429432677,
"grad_norm": 1.5279197692871094,
"learning_rate": 9.394210308904386e-05,
"loss": 6.1574,
"step": 18050
},
{
"epoch": 0.8329785887685953,
"grad_norm": 1.7856347560882568,
"learning_rate": 9.390724526918091e-05,
"loss": 6.0909,
"step": 18100
},
{
"epoch": 0.835279634593923,
"grad_norm": 1.844883680343628,
"learning_rate": 9.387229395437023e-05,
"loss": 6.1327,
"step": 18150
},
{
"epoch": 0.8375806804192506,
"grad_norm": 1.9506162405014038,
"learning_rate": 9.38372492190362e-05,
"loss": 6.0869,
"step": 18200
},
{
"epoch": 0.8398817262445781,
"grad_norm": 1.6576199531555176,
"learning_rate": 9.380211113780222e-05,
"loss": 6.1195,
"step": 18250
},
{
"epoch": 0.8421827720699058,
"grad_norm": 1.8321812152862549,
"learning_rate": 9.376687978549037e-05,
"loss": 6.1679,
"step": 18300
},
{
"epoch": 0.8444838178952334,
"grad_norm": 2.062652587890625,
"learning_rate": 9.373155523712138e-05,
"loss": 6.0964,
"step": 18350
},
{
"epoch": 0.8467848637205609,
"grad_norm": 1.7374162673950195,
"learning_rate": 9.369613756791445e-05,
"loss": 6.1764,
"step": 18400
},
{
"epoch": 0.8490859095458886,
"grad_norm": 1.9153779745101929,
"learning_rate": 9.366062685328703e-05,
"loss": 6.1341,
"step": 18450
},
{
"epoch": 0.8513869553712162,
"grad_norm": 1.7411820888519287,
"learning_rate": 9.362502316885469e-05,
"loss": 6.1364,
"step": 18500
},
{
"epoch": 0.8536880011965439,
"grad_norm": 2.0500361919403076,
"learning_rate": 9.358932659043102e-05,
"loss": 6.0828,
"step": 18550
},
{
"epoch": 0.8559890470218714,
"grad_norm": 1.7861090898513794,
"learning_rate": 9.35535371940274e-05,
"loss": 6.138,
"step": 18600
},
{
"epoch": 0.858290092847199,
"grad_norm": 2.1005265712738037,
"learning_rate": 9.351837360699179e-05,
"loss": 6.1243,
"step": 18650
},
{
"epoch": 0.8605911386725267,
"grad_norm": 1.8733184337615967,
"learning_rate": 9.348240065600997e-05,
"loss": 6.0982,
"step": 18700
},
{
"epoch": 0.8628921844978543,
"grad_norm": 1.5661227703094482,
"learning_rate": 9.344633511473347e-05,
"loss": 6.0685,
"step": 18750
},
{
"epoch": 0.8651932303231818,
"grad_norm": 2.270904064178467,
"learning_rate": 9.341017705995936e-05,
"loss": 6.0827,
"step": 18800
},
{
"epoch": 0.8674942761485095,
"grad_norm": 1.945278525352478,
"learning_rate": 9.337392656868162e-05,
"loss": 6.0418,
"step": 18850
},
{
"epoch": 0.8697953219738371,
"grad_norm": 1.6341246366500854,
"learning_rate": 9.333758371809113e-05,
"loss": 6.1227,
"step": 18900
},
{
"epoch": 0.8720963677991648,
"grad_norm": 1.9579964876174927,
"learning_rate": 9.330114858557541e-05,
"loss": 6.1275,
"step": 18950
},
{
"epoch": 0.8743974136244923,
"grad_norm": 2.0281152725219727,
"learning_rate": 9.326462124871846e-05,
"loss": 6.0869,
"step": 19000
},
{
"epoch": 0.8743974136244923,
"eval_loss": 6.040208339691162,
"eval_runtime": 34.65,
"eval_samples_per_second": 11.082,
"eval_steps_per_second": 5.541,
"eval_tts_loss": 8.007550918315454,
"step": 19000
},
{
"epoch": 0.87669845944982,
"grad_norm": 1.834215521812439,
"learning_rate": 9.322800178530069e-05,
"loss": 6.1467,
"step": 19050
},
{
"epoch": 0.8789995052751476,
"grad_norm": 1.8311221599578857,
"learning_rate": 9.31912902732986e-05,
"loss": 6.0677,
"step": 19100
},
{
"epoch": 0.8813005511004751,
"grad_norm": 1.7210936546325684,
"learning_rate": 9.315448679088472e-05,
"loss": 6.0809,
"step": 19150
},
{
"epoch": 0.8836015969258028,
"grad_norm": 1.8017765283584595,
"learning_rate": 9.311759141642746e-05,
"loss": 6.0724,
"step": 19200
},
{
"epoch": 0.8859026427511304,
"grad_norm": 1.822521448135376,
"learning_rate": 9.308060422849085e-05,
"loss": 6.0447,
"step": 19250
},
{
"epoch": 0.888203688576458,
"grad_norm": 2.083303451538086,
"learning_rate": 9.304352530583447e-05,
"loss": 6.1126,
"step": 19300
},
{
"epoch": 0.8905047344017856,
"grad_norm": 1.7161856889724731,
"learning_rate": 9.30063547274132e-05,
"loss": 6.1106,
"step": 19350
},
{
"epoch": 0.8928057802271132,
"grad_norm": 2.1552722454071045,
"learning_rate": 9.296909257237707e-05,
"loss": 6.0917,
"step": 19400
},
{
"epoch": 0.8951068260524409,
"grad_norm": 1.898194432258606,
"learning_rate": 9.293173892007122e-05,
"loss": 6.1337,
"step": 19450
},
{
"epoch": 0.8974078718777685,
"grad_norm": 1.6852689981460571,
"learning_rate": 9.28942938500355e-05,
"loss": 6.1289,
"step": 19500
},
{
"epoch": 0.899708917703096,
"grad_norm": 2.3115508556365967,
"learning_rate": 9.285675744200447e-05,
"loss": 6.1953,
"step": 19550
},
{
"epoch": 0.9020099635284237,
"grad_norm": 1.9407068490982056,
"learning_rate": 9.281912977590719e-05,
"loss": 6.0774,
"step": 19600
},
{
"epoch": 0.9043110093537513,
"grad_norm": 1.9050291776657104,
"learning_rate": 9.278141093186703e-05,
"loss": 6.077,
"step": 19650
},
{
"epoch": 0.906612055179079,
"grad_norm": 1.570671796798706,
"learning_rate": 9.274360099020154e-05,
"loss": 6.0458,
"step": 19700
},
{
"epoch": 0.9089131010044065,
"grad_norm": 1.8827368021011353,
"learning_rate": 9.270570003142218e-05,
"loss": 6.0504,
"step": 19750
},
{
"epoch": 0.9112141468297341,
"grad_norm": 1.8248310089111328,
"learning_rate": 9.266770813623432e-05,
"loss": 6.0229,
"step": 19800
},
{
"epoch": 0.9135151926550618,
"grad_norm": 1.6708036661148071,
"learning_rate": 9.262962538553689e-05,
"loss": 6.0974,
"step": 19850
},
{
"epoch": 0.9158162384803893,
"grad_norm": 1.7397429943084717,
"learning_rate": 9.259145186042231e-05,
"loss": 6.0746,
"step": 19900
},
{
"epoch": 0.9181172843057169,
"grad_norm": 1.5898857116699219,
"learning_rate": 9.255318764217629e-05,
"loss": 6.0868,
"step": 19950
},
{
"epoch": 0.9204183301310446,
"grad_norm": 1.8456883430480957,
"learning_rate": 9.251483281227767e-05,
"loss": 6.0739,
"step": 20000
},
{
"epoch": 0.9204183301310446,
"eval_loss": 6.005526065826416,
"eval_runtime": 34.2528,
"eval_samples_per_second": 11.211,
"eval_steps_per_second": 5.605,
"eval_tts_loss": 7.991646247745309,
"step": 20000
},
{
"epoch": 0.9227193759563722,
"grad_norm": 1.8995615243911743,
"learning_rate": 9.247638745239822e-05,
"loss": 6.1174,
"step": 20050
},
{
"epoch": 0.9250204217816997,
"grad_norm": 1.6712597608566284,
"learning_rate": 9.243785164440251e-05,
"loss": 6.0987,
"step": 20100
},
{
"epoch": 0.9273214676070274,
"grad_norm": 1.762939691543579,
"learning_rate": 9.239922547034765e-05,
"loss": 5.9939,
"step": 20150
},
{
"epoch": 0.929622513432355,
"grad_norm": 1.6403757333755493,
"learning_rate": 9.236050901248325e-05,
"loss": 6.0611,
"step": 20200
},
{
"epoch": 0.9319235592576827,
"grad_norm": 2.0031139850616455,
"learning_rate": 9.232170235325113e-05,
"loss": 6.1203,
"step": 20250
},
{
"epoch": 0.9342246050830102,
"grad_norm": 1.9146487712860107,
"learning_rate": 9.228280557528516e-05,
"loss": 6.0571,
"step": 20300
},
{
"epoch": 0.9365256509083378,
"grad_norm": 1.7245732545852661,
"learning_rate": 9.224381876141113e-05,
"loss": 6.0659,
"step": 20350
},
{
"epoch": 0.9388266967336655,
"grad_norm": 1.967654824256897,
"learning_rate": 9.220474199464657e-05,
"loss": 6.0195,
"step": 20400
},
{
"epoch": 0.941127742558993,
"grad_norm": 1.929421067237854,
"learning_rate": 9.216557535820052e-05,
"loss": 6.0432,
"step": 20450
},
{
"epoch": 0.9434287883843206,
"grad_norm": 1.8827496767044067,
"learning_rate": 9.212631893547339e-05,
"loss": 6.0518,
"step": 20500
},
{
"epoch": 0.9457298342096483,
"grad_norm": 1.6459333896636963,
"learning_rate": 9.20869728100568e-05,
"loss": 6.0171,
"step": 20550
},
{
"epoch": 0.9480308800349759,
"grad_norm": 2.004542827606201,
"learning_rate": 9.204753706573335e-05,
"loss": 6.047,
"step": 20600
},
{
"epoch": 0.9503319258603035,
"grad_norm": 1.9624686241149902,
"learning_rate": 9.200801178647652e-05,
"loss": 6.0593,
"step": 20650
},
{
"epoch": 0.9526329716856311,
"grad_norm": 1.5215227603912354,
"learning_rate": 9.196839705645039e-05,
"loss": 6.0869,
"step": 20700
},
{
"epoch": 0.9549340175109587,
"grad_norm": 1.7919971942901611,
"learning_rate": 9.192869296000954e-05,
"loss": 5.9958,
"step": 20750
},
{
"epoch": 0.9572350633362864,
"grad_norm": 1.7097506523132324,
"learning_rate": 9.188889958169884e-05,
"loss": 6.0802,
"step": 20800
},
{
"epoch": 0.9595361091616139,
"grad_norm": 1.670233130455017,
"learning_rate": 9.184901700625328e-05,
"loss": 6.0703,
"step": 20850
},
{
"epoch": 0.9618371549869416,
"grad_norm": 2.0614235401153564,
"learning_rate": 9.18090453185978e-05,
"loss": 6.0206,
"step": 20900
},
{
"epoch": 0.9641382008122692,
"grad_norm": 1.811000108718872,
"learning_rate": 9.176898460384704e-05,
"loss": 5.9914,
"step": 20950
},
{
"epoch": 0.9664392466375968,
"grad_norm": 1.9324144124984741,
"learning_rate": 9.172883494730526e-05,
"loss": 6.0325,
"step": 21000
},
{
"epoch": 0.9664392466375968,
"eval_loss": 5.979398250579834,
"eval_runtime": 33.2361,
"eval_samples_per_second": 11.554,
"eval_steps_per_second": 5.777,
"eval_tts_loss": 8.089930703009033,
"step": 21000
},
{
"epoch": 0.9687402924629244,
"grad_norm": 1.9032566547393799,
"learning_rate": 9.16885964344661e-05,
"loss": 6.0419,
"step": 21050
},
{
"epoch": 0.971041338288252,
"grad_norm": 2.04426908493042,
"learning_rate": 9.164826915101239e-05,
"loss": 6.0501,
"step": 21100
},
{
"epoch": 0.9733423841135797,
"grad_norm": 1.8017916679382324,
"learning_rate": 9.160785318281601e-05,
"loss": 6.0282,
"step": 21150
},
{
"epoch": 0.9756434299389072,
"grad_norm": 1.8715277910232544,
"learning_rate": 9.156815957498479e-05,
"loss": 6.0933,
"step": 21200
},
{
"epoch": 0.9779444757642348,
"grad_norm": 1.7041345834732056,
"learning_rate": 9.152756826507607e-05,
"loss": 6.0776,
"step": 21250
},
{
"epoch": 0.9802455215895625,
"grad_norm": 1.8159674406051636,
"learning_rate": 9.148688852744201e-05,
"loss": 6.0045,
"step": 21300
},
{
"epoch": 0.9825465674148901,
"grad_norm": 2.1322624683380127,
"learning_rate": 9.144612044870497e-05,
"loss": 6.0547,
"step": 21350
},
{
"epoch": 0.9848476132402176,
"grad_norm": 1.899408221244812,
"learning_rate": 9.14052641156755e-05,
"loss": 6.0359,
"step": 21400
},
{
"epoch": 0.9871486590655453,
"grad_norm": 1.912842869758606,
"learning_rate": 9.136431961535193e-05,
"loss": 6.032,
"step": 21450
},
{
"epoch": 0.9894497048908729,
"grad_norm": 2.2110729217529297,
"learning_rate": 9.132328703492048e-05,
"loss": 6.0135,
"step": 21500
},
{
"epoch": 0.9917507507162006,
"grad_norm": 1.8513312339782715,
"learning_rate": 9.128216646175482e-05,
"loss": 5.9792,
"step": 21550
},
{
"epoch": 0.9940517965415281,
"grad_norm": 1.685534954071045,
"learning_rate": 9.124095798341607e-05,
"loss": 5.9946,
"step": 21600
},
{
"epoch": 0.9963528423668557,
"grad_norm": 1.8378783464431763,
"learning_rate": 9.119966168765246e-05,
"loss": 5.9931,
"step": 21650
},
{
"epoch": 0.9986538881921834,
"grad_norm": 1.9953211545944214,
"learning_rate": 9.115827766239925e-05,
"loss": 6.0143,
"step": 21700
},
{
"epoch": 1.000920418330131,
"grad_norm": 1.8958375453948975,
"learning_rate": 9.111680599577854e-05,
"loss": 5.9748,
"step": 21750
},
{
"epoch": 1.0032214641554587,
"grad_norm": 1.59896719455719,
"learning_rate": 9.1075246776099e-05,
"loss": 5.8144,
"step": 21800
},
{
"epoch": 1.0055225099807863,
"grad_norm": 1.7725744247436523,
"learning_rate": 9.103360009185574e-05,
"loss": 5.8584,
"step": 21850
},
{
"epoch": 1.007823555806114,
"grad_norm": 2.1068501472473145,
"learning_rate": 9.099186603173016e-05,
"loss": 5.9851,
"step": 21900
},
{
"epoch": 1.0101246016314416,
"grad_norm": 2.0774874687194824,
"learning_rate": 9.095004468458965e-05,
"loss": 5.8846,
"step": 21950
},
{
"epoch": 1.012425647456769,
"grad_norm": 2.4163296222686768,
"learning_rate": 9.090813613948752e-05,
"loss": 5.9117,
"step": 22000
},
{
"epoch": 1.012425647456769,
"eval_loss": 5.969619274139404,
"eval_runtime": 34.7535,
"eval_samples_per_second": 11.049,
"eval_steps_per_second": 5.525,
"eval_tts_loss": 8.110409582485643,
"step": 22000
},
{
"epoch": 1.0147266932820966,
"grad_norm": 2.0644235610961914,
"learning_rate": 9.086614048566273e-05,
"loss": 5.9255,
"step": 22050
},
{
"epoch": 1.0170277391074243,
"grad_norm": 1.7027440071105957,
"learning_rate": 9.082405781253973e-05,
"loss": 5.8677,
"step": 22100
},
{
"epoch": 1.019328784932752,
"grad_norm": 2.0452287197113037,
"learning_rate": 9.07818882097283e-05,
"loss": 5.9431,
"step": 22150
},
{
"epoch": 1.0216298307580796,
"grad_norm": 1.6214035749435425,
"learning_rate": 9.073963176702324e-05,
"loss": 5.9106,
"step": 22200
},
{
"epoch": 1.0239308765834072,
"grad_norm": 1.7072349786758423,
"learning_rate": 9.06972885744044e-05,
"loss": 5.8734,
"step": 22250
},
{
"epoch": 1.0262319224087348,
"grad_norm": 1.7980152368545532,
"learning_rate": 9.06548587220362e-05,
"loss": 5.9673,
"step": 22300
},
{
"epoch": 1.0285329682340625,
"grad_norm": 2.011014938354492,
"learning_rate": 9.061234230026771e-05,
"loss": 5.9262,
"step": 22350
},
{
"epoch": 1.03083401405939,
"grad_norm": 2.122554302215576,
"learning_rate": 9.05697393996323e-05,
"loss": 5.9159,
"step": 22400
},
{
"epoch": 1.0331350598847175,
"grad_norm": 1.9338388442993164,
"learning_rate": 9.052705011084743e-05,
"loss": 5.8578,
"step": 22450
},
{
"epoch": 1.0354361057100452,
"grad_norm": 2.0208539962768555,
"learning_rate": 9.048427452481462e-05,
"loss": 5.9803,
"step": 22500
},
{
"epoch": 1.0377371515353728,
"grad_norm": 1.9141596555709839,
"learning_rate": 9.044141273261906e-05,
"loss": 5.9241,
"step": 22550
},
{
"epoch": 1.0400381973607005,
"grad_norm": 1.8084018230438232,
"learning_rate": 9.039846482552953e-05,
"loss": 5.9547,
"step": 22600
},
{
"epoch": 1.042339243186028,
"grad_norm": 1.9551680088043213,
"learning_rate": 9.03554308949982e-05,
"loss": 5.9616,
"step": 22650
},
{
"epoch": 1.0446402890113557,
"grad_norm": 1.8424718379974365,
"learning_rate": 9.031231103266044e-05,
"loss": 5.9839,
"step": 22700
},
{
"epoch": 1.0469413348366832,
"grad_norm": 1.7704012393951416,
"learning_rate": 9.02691053303345e-05,
"loss": 5.9764,
"step": 22750
},
{
"epoch": 1.0492423806620108,
"grad_norm": 2.2767720222473145,
"learning_rate": 9.022581388002152e-05,
"loss": 5.9506,
"step": 22800
},
{
"epoch": 1.0515434264873385,
"grad_norm": 1.8481018543243408,
"learning_rate": 9.018243677390518e-05,
"loss": 5.8892,
"step": 22850
},
{
"epoch": 1.053844472312666,
"grad_norm": 1.9781994819641113,
"learning_rate": 9.013897410435157e-05,
"loss": 5.8882,
"step": 22900
},
{
"epoch": 1.0561455181379937,
"grad_norm": 2.307131767272949,
"learning_rate": 9.009542596390896e-05,
"loss": 5.8953,
"step": 22950
},
{
"epoch": 1.0584465639633214,
"grad_norm": 1.830885648727417,
"learning_rate": 9.005179244530764e-05,
"loss": 5.902,
"step": 23000
},
{
"epoch": 1.0584465639633214,
"eval_loss": 5.945216655731201,
"eval_runtime": 34.7154,
"eval_samples_per_second": 11.061,
"eval_steps_per_second": 5.531,
"eval_tts_loss": 8.16352996774057,
"step": 23000
},
{
"epoch": 1.060747609788649,
"grad_norm": 2.0037059783935547,
"learning_rate": 9.000807364145967e-05,
"loss": 5.8711,
"step": 23050
},
{
"epoch": 1.0630486556139767,
"grad_norm": 1.691949725151062,
"learning_rate": 8.996426964545876e-05,
"loss": 5.887,
"step": 23100
},
{
"epoch": 1.065349701439304,
"grad_norm": 1.8151764869689941,
"learning_rate": 8.992038055058e-05,
"loss": 5.9154,
"step": 23150
},
{
"epoch": 1.0676507472646317,
"grad_norm": 1.672204613685608,
"learning_rate": 8.987640645027966e-05,
"loss": 5.9166,
"step": 23200
},
{
"epoch": 1.0699517930899594,
"grad_norm": 1.6701843738555908,
"learning_rate": 8.983234743819508e-05,
"loss": 5.907,
"step": 23250
},
{
"epoch": 1.072252838915287,
"grad_norm": 1.888508677482605,
"learning_rate": 8.978820360814436e-05,
"loss": 5.8915,
"step": 23300
},
{
"epoch": 1.0745538847406146,
"grad_norm": 2.2695815563201904,
"learning_rate": 8.974397505412624e-05,
"loss": 5.9346,
"step": 23350
},
{
"epoch": 1.0768549305659423,
"grad_norm": 1.7576676607131958,
"learning_rate": 8.969966187031985e-05,
"loss": 5.9133,
"step": 23400
},
{
"epoch": 1.07915597639127,
"grad_norm": 1.5666314363479614,
"learning_rate": 8.965526415108449e-05,
"loss": 5.9285,
"step": 23450
},
{
"epoch": 1.0814570222165973,
"grad_norm": 1.5109493732452393,
"learning_rate": 8.961078199095957e-05,
"loss": 5.9068,
"step": 23500
},
{
"epoch": 1.083758068041925,
"grad_norm": 1.9691708087921143,
"learning_rate": 8.956621548466421e-05,
"loss": 5.9005,
"step": 23550
},
{
"epoch": 1.0860591138672526,
"grad_norm": 1.978844404220581,
"learning_rate": 8.952245856729633e-05,
"loss": 5.9172,
"step": 23600
},
{
"epoch": 1.0883601596925803,
"grad_norm": 1.8941922187805176,
"learning_rate": 8.947772533572675e-05,
"loss": 5.8864,
"step": 23650
},
{
"epoch": 1.090661205517908,
"grad_norm": 1.850315809249878,
"learning_rate": 8.943290804131414e-05,
"loss": 5.9281,
"step": 23700
},
{
"epoch": 1.0929622513432355,
"grad_norm": 1.6464835405349731,
"learning_rate": 8.938800677949127e-05,
"loss": 5.9169,
"step": 23750
},
{
"epoch": 1.0952632971685632,
"grad_norm": 1.6319020986557007,
"learning_rate": 8.934302164586972e-05,
"loss": 5.8943,
"step": 23800
},
{
"epoch": 1.0975643429938908,
"grad_norm": 1.853194236755371,
"learning_rate": 8.929795273623967e-05,
"loss": 5.9478,
"step": 23850
},
{
"epoch": 1.0998653888192182,
"grad_norm": 2.057925224304199,
"learning_rate": 8.925280014656967e-05,
"loss": 5.947,
"step": 23900
},
{
"epoch": 1.102166434644546,
"grad_norm": 1.695457935333252,
"learning_rate": 8.920756397300651e-05,
"loss": 5.8765,
"step": 23950
},
{
"epoch": 1.1044674804698735,
"grad_norm": 2.0463249683380127,
"learning_rate": 8.916224431187487e-05,
"loss": 5.9068,
"step": 24000
},
{
"epoch": 1.1044674804698735,
"eval_loss": 5.919960021972656,
"eval_runtime": 34.2903,
"eval_samples_per_second": 11.199,
"eval_steps_per_second": 5.599,
"eval_tts_loss": 8.153686654926947,
"step": 24000
},
{
"epoch": 1.1067685262952012,
"grad_norm": 1.822067379951477,
"learning_rate": 8.91168412596773e-05,
"loss": 5.9224,
"step": 24050
},
{
"epoch": 1.1090695721205288,
"grad_norm": 1.8195558786392212,
"learning_rate": 8.907135491309386e-05,
"loss": 5.8628,
"step": 24100
},
{
"epoch": 1.1113706179458565,
"grad_norm": 1.7702739238739014,
"learning_rate": 8.902578536898202e-05,
"loss": 5.843,
"step": 24150
},
{
"epoch": 1.113671663771184,
"grad_norm": 1.775231957435608,
"learning_rate": 8.898013272437635e-05,
"loss": 5.9523,
"step": 24200
},
{
"epoch": 1.1159727095965115,
"grad_norm": 1.9853113889694214,
"learning_rate": 8.893439707648843e-05,
"loss": 5.9059,
"step": 24250
},
{
"epoch": 1.1182737554218392,
"grad_norm": 1.9251091480255127,
"learning_rate": 8.888857852270655e-05,
"loss": 5.8562,
"step": 24300
},
{
"epoch": 1.1205748012471668,
"grad_norm": 1.6936697959899902,
"learning_rate": 8.884267716059556e-05,
"loss": 5.8354,
"step": 24350
},
{
"epoch": 1.1228758470724944,
"grad_norm": 1.7089874744415283,
"learning_rate": 8.87966930878966e-05,
"loss": 5.8841,
"step": 24400
},
{
"epoch": 1.125176892897822,
"grad_norm": 1.789920687675476,
"learning_rate": 8.8750626402527e-05,
"loss": 5.9337,
"step": 24450
},
{
"epoch": 1.1274779387231497,
"grad_norm": 1.7186089754104614,
"learning_rate": 8.870447720257994e-05,
"loss": 5.9068,
"step": 24500
},
{
"epoch": 1.1297789845484774,
"grad_norm": 2.3741872310638428,
"learning_rate": 8.865824558632431e-05,
"loss": 5.9303,
"step": 24550
},
{
"epoch": 1.132080030373805,
"grad_norm": 1.9989960193634033,
"learning_rate": 8.861193165220456e-05,
"loss": 5.9017,
"step": 24600
},
{
"epoch": 1.1343810761991324,
"grad_norm": 1.9211207628250122,
"learning_rate": 8.856553549884034e-05,
"loss": 5.9274,
"step": 24650
},
{
"epoch": 1.13668212202446,
"grad_norm": 2.168541193008423,
"learning_rate": 8.851905722502643e-05,
"loss": 5.8614,
"step": 24700
},
{
"epoch": 1.1389831678497877,
"grad_norm": 1.7189666032791138,
"learning_rate": 8.847249692973245e-05,
"loss": 5.8518,
"step": 24750
},
{
"epoch": 1.1412842136751153,
"grad_norm": 1.8728166818618774,
"learning_rate": 8.842585471210271e-05,
"loss": 5.924,
"step": 24800
},
{
"epoch": 1.143585259500443,
"grad_norm": 2.2806296348571777,
"learning_rate": 8.837913067145589e-05,
"loss": 5.8643,
"step": 24850
},
{
"epoch": 1.1458863053257706,
"grad_norm": 2.4408116340637207,
"learning_rate": 8.833232490728499e-05,
"loss": 5.8813,
"step": 24900
},
{
"epoch": 1.1481873511510983,
"grad_norm": 2.0429434776306152,
"learning_rate": 8.828543751925697e-05,
"loss": 5.8622,
"step": 24950
},
{
"epoch": 1.1504883969764257,
"grad_norm": 1.8052806854248047,
"learning_rate": 8.823846860721262e-05,
"loss": 5.8601,
"step": 25000
},
{
"epoch": 1.1504883969764257,
"eval_loss": 5.919439792633057,
"eval_runtime": 35.1925,
"eval_samples_per_second": 10.911,
"eval_steps_per_second": 5.456,
"eval_tts_loss": 8.2057244715313,
"step": 25000
},
{
"epoch": 1.1527894428017533,
"grad_norm": 1.8507080078125,
"learning_rate": 8.819141827116633e-05,
"loss": 5.8693,
"step": 25050
},
{
"epoch": 1.155090488627081,
"grad_norm": 1.8380216360092163,
"learning_rate": 8.814428661130584e-05,
"loss": 5.8838,
"step": 25100
},
{
"epoch": 1.1573915344524086,
"grad_norm": 1.9006010293960571,
"learning_rate": 8.80970737279921e-05,
"loss": 5.9039,
"step": 25150
},
{
"epoch": 1.1596925802777363,
"grad_norm": 2.1745150089263916,
"learning_rate": 8.804977972175901e-05,
"loss": 5.937,
"step": 25200
},
{
"epoch": 1.161993626103064,
"grad_norm": 1.970659613609314,
"learning_rate": 8.800240469331315e-05,
"loss": 5.87,
"step": 25250
},
{
"epoch": 1.1642946719283915,
"grad_norm": 1.9034065008163452,
"learning_rate": 8.795494874353373e-05,
"loss": 5.8825,
"step": 25300
},
{
"epoch": 1.1665957177537192,
"grad_norm": 1.7885934114456177,
"learning_rate": 8.79074119734722e-05,
"loss": 5.9146,
"step": 25350
},
{
"epoch": 1.1688967635790466,
"grad_norm": 1.9243812561035156,
"learning_rate": 8.785979448435213e-05,
"loss": 5.8155,
"step": 25400
},
{
"epoch": 1.1711978094043742,
"grad_norm": 1.7413113117218018,
"learning_rate": 8.781209637756894e-05,
"loss": 5.9454,
"step": 25450
},
{
"epoch": 1.1734988552297019,
"grad_norm": 1.7900454998016357,
"learning_rate": 8.776431775468974e-05,
"loss": 5.8851,
"step": 25500
},
{
"epoch": 1.1757999010550295,
"grad_norm": 2.080112934112549,
"learning_rate": 8.771645871745311e-05,
"loss": 5.8682,
"step": 25550
},
{
"epoch": 1.1781009468803572,
"grad_norm": 2.1360979080200195,
"learning_rate": 8.766851936776886e-05,
"loss": 5.8843,
"step": 25600
},
{
"epoch": 1.1804019927056848,
"grad_norm": 1.5990478992462158,
"learning_rate": 8.762049980771773e-05,
"loss": 5.8718,
"step": 25650
},
{
"epoch": 1.1827030385310124,
"grad_norm": 1.6874678134918213,
"learning_rate": 8.75724001395514e-05,
"loss": 5.8586,
"step": 25700
},
{
"epoch": 1.1850040843563399,
"grad_norm": 1.6797226667404175,
"learning_rate": 8.752422046569198e-05,
"loss": 5.8377,
"step": 25750
},
{
"epoch": 1.1873051301816675,
"grad_norm": 1.8640986680984497,
"learning_rate": 8.747596088873208e-05,
"loss": 5.9506,
"step": 25800
},
{
"epoch": 1.1896061760069951,
"grad_norm": 1.6982637643814087,
"learning_rate": 8.742762151143436e-05,
"loss": 5.9223,
"step": 25850
},
{
"epoch": 1.1919072218323228,
"grad_norm": 1.8827178478240967,
"learning_rate": 8.737920243673143e-05,
"loss": 5.8191,
"step": 25900
},
{
"epoch": 1.1942082676576504,
"grad_norm": 1.896240472793579,
"learning_rate": 8.733070376772562e-05,
"loss": 5.8389,
"step": 25950
},
{
"epoch": 1.196509313482978,
"grad_norm": 2.3180794715881348,
"learning_rate": 8.728212560768874e-05,
"loss": 5.8701,
"step": 26000
},
{
"epoch": 1.196509313482978,
"eval_loss": 5.89279317855835,
"eval_runtime": 34.4045,
"eval_samples_per_second": 11.161,
"eval_steps_per_second": 5.581,
"eval_tts_loss": 8.16692597775296,
"step": 26000
},
{
"epoch": 1.1988103593083057,
"grad_norm": 1.9639935493469238,
"learning_rate": 8.723346806006186e-05,
"loss": 5.8741,
"step": 26050
},
{
"epoch": 1.2011114051336333,
"grad_norm": 1.6462650299072266,
"learning_rate": 8.718473122845508e-05,
"loss": 5.8613,
"step": 26100
},
{
"epoch": 1.2034124509589608,
"grad_norm": 1.7803330421447754,
"learning_rate": 8.713591521664737e-05,
"loss": 5.8446,
"step": 26150
},
{
"epoch": 1.2057134967842884,
"grad_norm": 1.8175888061523438,
"learning_rate": 8.708799880462161e-05,
"loss": 5.895,
"step": 26200
},
{
"epoch": 1.208014542609616,
"grad_norm": 1.984127402305603,
"learning_rate": 8.703902632284438e-05,
"loss": 5.8236,
"step": 26250
},
{
"epoch": 1.2103155884349437,
"grad_norm": 1.7971287965774536,
"learning_rate": 8.698997497112645e-05,
"loss": 5.8432,
"step": 26300
},
{
"epoch": 1.2126166342602713,
"grad_norm": 1.832613229751587,
"learning_rate": 8.694084485391647e-05,
"loss": 5.8908,
"step": 26350
},
{
"epoch": 1.214917680085599,
"grad_norm": 1.8921012878417969,
"learning_rate": 8.689163607583087e-05,
"loss": 5.8555,
"step": 26400
},
{
"epoch": 1.2172187259109264,
"grad_norm": 1.7966539859771729,
"learning_rate": 8.684234874165354e-05,
"loss": 5.9059,
"step": 26450
},
{
"epoch": 1.219519771736254,
"grad_norm": 1.910406231880188,
"learning_rate": 8.679298295633565e-05,
"loss": 5.9553,
"step": 26500
},
{
"epoch": 1.2218208175615817,
"grad_norm": 1.7525932788848877,
"learning_rate": 8.67435388249954e-05,
"loss": 5.8169,
"step": 26550
},
{
"epoch": 1.2241218633869093,
"grad_norm": 1.8935388326644897,
"learning_rate": 8.669401645291787e-05,
"loss": 5.8138,
"step": 26600
},
{
"epoch": 1.226422909212237,
"grad_norm": 2.199080467224121,
"learning_rate": 8.664441594555468e-05,
"loss": 5.8429,
"step": 26650
},
{
"epoch": 1.2287239550375646,
"grad_norm": 1.8432965278625488,
"learning_rate": 8.659473740852388e-05,
"loss": 5.847,
"step": 26700
},
{
"epoch": 1.2310250008628922,
"grad_norm": 1.6469765901565552,
"learning_rate": 8.654498094760967e-05,
"loss": 5.8197,
"step": 26750
},
{
"epoch": 1.2333260466882199,
"grad_norm": 1.7215397357940674,
"learning_rate": 8.649514666876214e-05,
"loss": 5.8082,
"step": 26800
},
{
"epoch": 1.2356270925135475,
"grad_norm": 1.6139415502548218,
"learning_rate": 8.644523467809714e-05,
"loss": 5.8674,
"step": 26850
},
{
"epoch": 1.237928138338875,
"grad_norm": 2.062239646911621,
"learning_rate": 8.639524508189591e-05,
"loss": 5.8828,
"step": 26900
},
{
"epoch": 1.2402291841642026,
"grad_norm": 1.7671411037445068,
"learning_rate": 8.634517798660507e-05,
"loss": 5.911,
"step": 26950
},
{
"epoch": 1.2425302299895302,
"grad_norm": 1.7960079908370972,
"learning_rate": 8.629503349883614e-05,
"loss": 5.8929,
"step": 27000
},
{
"epoch": 1.2425302299895302,
"eval_loss": 5.874563694000244,
"eval_runtime": 34.9389,
"eval_samples_per_second": 10.991,
"eval_steps_per_second": 5.495,
"eval_tts_loss": 8.201237089489519,
"step": 27000
},
{
"epoch": 1.2448312758148579,
"grad_norm": 1.8907201290130615,
"learning_rate": 8.624481172536551e-05,
"loss": 5.8062,
"step": 27050
},
{
"epoch": 1.2471323216401855,
"grad_norm": 1.8463737964630127,
"learning_rate": 8.619451277313413e-05,
"loss": 5.8612,
"step": 27100
},
{
"epoch": 1.2494333674655131,
"grad_norm": 1.5661523342132568,
"learning_rate": 8.614413674924726e-05,
"loss": 5.7927,
"step": 27150
},
{
"epoch": 1.2517344132908406,
"grad_norm": 1.546129584312439,
"learning_rate": 8.60936837609743e-05,
"loss": 5.8647,
"step": 27200
},
{
"epoch": 1.2540354591161682,
"grad_norm": 1.8839119672775269,
"learning_rate": 8.604315391574856e-05,
"loss": 5.8938,
"step": 27250
},
{
"epoch": 1.2563365049414958,
"grad_norm": 1.8147374391555786,
"learning_rate": 8.599254732116694e-05,
"loss": 5.8561,
"step": 27300
},
{
"epoch": 1.2586375507668235,
"grad_norm": 1.9511573314666748,
"learning_rate": 8.594186408498984e-05,
"loss": 5.8655,
"step": 27350
},
{
"epoch": 1.2609385965921511,
"grad_norm": 1.7783595323562622,
"learning_rate": 8.58911043151408e-05,
"loss": 5.8455,
"step": 27400
},
{
"epoch": 1.2632396424174788,
"grad_norm": 1.8595982789993286,
"learning_rate": 8.584026811970637e-05,
"loss": 5.8557,
"step": 27450
},
{
"epoch": 1.2655406882428064,
"grad_norm": 1.6024532318115234,
"learning_rate": 8.578935560693585e-05,
"loss": 5.8431,
"step": 27500
},
{
"epoch": 1.267841734068134,
"grad_norm": 1.6488127708435059,
"learning_rate": 8.573836688524099e-05,
"loss": 5.8319,
"step": 27550
},
{
"epoch": 1.2701427798934617,
"grad_norm": 1.9752188920974731,
"learning_rate": 8.568730206319585e-05,
"loss": 5.8757,
"step": 27600
},
{
"epoch": 1.2724438257187891,
"grad_norm": 1.7887974977493286,
"learning_rate": 8.563616124953654e-05,
"loss": 5.8791,
"step": 27650
},
{
"epoch": 1.2747448715441168,
"grad_norm": 1.8554754257202148,
"learning_rate": 8.5584944553161e-05,
"loss": 5.9141,
"step": 27700
},
{
"epoch": 1.2770459173694444,
"grad_norm": 2.1256062984466553,
"learning_rate": 8.553365208312869e-05,
"loss": 5.8593,
"step": 27750
},
{
"epoch": 1.279346963194772,
"grad_norm": 1.798427700996399,
"learning_rate": 8.548228394866052e-05,
"loss": 5.8325,
"step": 27800
},
{
"epoch": 1.2816480090200997,
"grad_norm": 2.006601095199585,
"learning_rate": 8.54308402591384e-05,
"loss": 5.8436,
"step": 27850
},
{
"epoch": 1.2839490548454273,
"grad_norm": 1.8625519275665283,
"learning_rate": 8.537932112410522e-05,
"loss": 5.9255,
"step": 27900
},
{
"epoch": 1.2862501006707547,
"grad_norm": 1.655990481376648,
"learning_rate": 8.532772665326452e-05,
"loss": 5.821,
"step": 27950
},
{
"epoch": 1.2885511464960824,
"grad_norm": 2.096052408218384,
"learning_rate": 8.527605695648016e-05,
"loss": 5.9169,
"step": 28000
},
{
"epoch": 1.2885511464960824,
"eval_loss": 5.868961811065674,
"eval_runtime": 34.5303,
"eval_samples_per_second": 11.121,
"eval_steps_per_second": 5.56,
"eval_tts_loss": 8.222427133380569,
"step": 28000
},
{
"epoch": 1.29085219232141,
"grad_norm": 1.7661573886871338,
"learning_rate": 8.52243121437763e-05,
"loss": 5.8294,
"step": 28050
},
{
"epoch": 1.2931532381467377,
"grad_norm": 1.6404305696487427,
"learning_rate": 8.517249232533697e-05,
"loss": 5.8047,
"step": 28100
},
{
"epoch": 1.2954542839720653,
"grad_norm": 1.677655816078186,
"learning_rate": 8.512059761150597e-05,
"loss": 5.8431,
"step": 28150
},
{
"epoch": 1.297755329797393,
"grad_norm": 1.4250967502593994,
"learning_rate": 8.506862811278655e-05,
"loss": 5.8489,
"step": 28200
},
{
"epoch": 1.3000563756227206,
"grad_norm": 1.6657822132110596,
"learning_rate": 8.501658393984118e-05,
"loss": 5.8159,
"step": 28250
},
{
"epoch": 1.3023574214480482,
"grad_norm": 1.8584389686584473,
"learning_rate": 8.496446520349142e-05,
"loss": 5.8423,
"step": 28300
},
{
"epoch": 1.3046584672733759,
"grad_norm": 1.83997642993927,
"learning_rate": 8.491227201471752e-05,
"loss": 5.8523,
"step": 28350
},
{
"epoch": 1.3069595130987033,
"grad_norm": 1.6837650537490845,
"learning_rate": 8.48600044846583e-05,
"loss": 5.8619,
"step": 28400
},
{
"epoch": 1.309260558924031,
"grad_norm": 1.858853816986084,
"learning_rate": 8.480766272461091e-05,
"loss": 5.8936,
"step": 28450
},
{
"epoch": 1.3115616047493586,
"grad_norm": 1.9012060165405273,
"learning_rate": 8.47552468460305e-05,
"loss": 5.8445,
"step": 28500
},
{
"epoch": 1.3138626505746862,
"grad_norm": 1.7135539054870605,
"learning_rate": 8.47027569605301e-05,
"loss": 5.8654,
"step": 28550
},
{
"epoch": 1.3161636964000138,
"grad_norm": 1.8688374757766724,
"learning_rate": 8.465019317988029e-05,
"loss": 5.8217,
"step": 28600
},
{
"epoch": 1.3184647422253415,
"grad_norm": 1.7405012845993042,
"learning_rate": 8.459755561600906e-05,
"loss": 5.9095,
"step": 28650
},
{
"epoch": 1.320765788050669,
"grad_norm": 1.8719056844711304,
"learning_rate": 8.454484438100143e-05,
"loss": 5.8759,
"step": 28700
},
{
"epoch": 1.3230668338759966,
"grad_norm": 1.9225695133209229,
"learning_rate": 8.449205958709936e-05,
"loss": 5.8396,
"step": 28750
},
{
"epoch": 1.3253678797013242,
"grad_norm": 2.018826484680176,
"learning_rate": 8.44392013467014e-05,
"loss": 5.9031,
"step": 28800
},
{
"epoch": 1.3276689255266518,
"grad_norm": 1.7171567678451538,
"learning_rate": 8.438626977236253e-05,
"loss": 5.889,
"step": 28850
},
{
"epoch": 1.3299699713519795,
"grad_norm": 1.7654656171798706,
"learning_rate": 8.433326497679386e-05,
"loss": 5.8685,
"step": 28900
},
{
"epoch": 1.3322710171773071,
"grad_norm": 1.6935468912124634,
"learning_rate": 8.428018707286242e-05,
"loss": 5.7898,
"step": 28950
},
{
"epoch": 1.3345720630026348,
"grad_norm": 1.7551668882369995,
"learning_rate": 8.422703617359096e-05,
"loss": 5.8487,
"step": 29000
},
{
"epoch": 1.3345720630026348,
"eval_loss": 5.853118419647217,
"eval_runtime": 34.41,
"eval_samples_per_second": 11.16,
"eval_steps_per_second": 5.58,
"eval_tts_loss": 8.169616448533585,
"step": 29000
},
{
"epoch": 1.3368731088279624,
"grad_norm": 2.0664875507354736,
"learning_rate": 8.417381239215756e-05,
"loss": 5.8392,
"step": 29050
},
{
"epoch": 1.33917415465329,
"grad_norm": 1.8029478788375854,
"learning_rate": 8.41205158418956e-05,
"loss": 5.89,
"step": 29100
},
{
"epoch": 1.3414752004786175,
"grad_norm": 1.7948838472366333,
"learning_rate": 8.406714663629337e-05,
"loss": 5.8001,
"step": 29150
},
{
"epoch": 1.343776246303945,
"grad_norm": 1.8767132759094238,
"learning_rate": 8.401370488899385e-05,
"loss": 5.8363,
"step": 29200
},
{
"epoch": 1.3460772921292727,
"grad_norm": 2.1527884006500244,
"learning_rate": 8.396019071379453e-05,
"loss": 5.8177,
"step": 29250
},
{
"epoch": 1.3483783379546004,
"grad_norm": 1.8454744815826416,
"learning_rate": 8.390660422464709e-05,
"loss": 5.8218,
"step": 29300
},
{
"epoch": 1.350679383779928,
"grad_norm": 1.9853605031967163,
"learning_rate": 8.385294553565723e-05,
"loss": 5.8137,
"step": 29350
},
{
"epoch": 1.3529804296052557,
"grad_norm": 1.7211130857467651,
"learning_rate": 8.379921476108432e-05,
"loss": 5.8233,
"step": 29400
},
{
"epoch": 1.355281475430583,
"grad_norm": 1.9382165670394897,
"learning_rate": 8.374541201534132e-05,
"loss": 5.8378,
"step": 29450
},
{
"epoch": 1.3575825212559107,
"grad_norm": 1.642452597618103,
"learning_rate": 8.36915374129944e-05,
"loss": 5.819,
"step": 29500
},
{
"epoch": 1.3598835670812384,
"grad_norm": 1.6594552993774414,
"learning_rate": 8.363759106876273e-05,
"loss": 5.8647,
"step": 29550
},
{
"epoch": 1.362184612906566,
"grad_norm": 1.715219497680664,
"learning_rate": 8.358357309751823e-05,
"loss": 5.8451,
"step": 29600
},
{
"epoch": 1.3644856587318936,
"grad_norm": 2.000636100769043,
"learning_rate": 8.352948361428538e-05,
"loss": 5.8495,
"step": 29650
},
{
"epoch": 1.3667867045572213,
"grad_norm": 1.6294361352920532,
"learning_rate": 8.347532273424095e-05,
"loss": 5.8231,
"step": 29700
},
{
"epoch": 1.369087750382549,
"grad_norm": 1.901503324508667,
"learning_rate": 8.342109057271368e-05,
"loss": 5.8407,
"step": 29750
},
{
"epoch": 1.3713887962078766,
"grad_norm": 2.1467387676239014,
"learning_rate": 8.336678724518414e-05,
"loss": 5.8054,
"step": 29800
},
{
"epoch": 1.3736898420332042,
"grad_norm": 1.8240388631820679,
"learning_rate": 8.331241286728442e-05,
"loss": 5.8444,
"step": 29850
},
{
"epoch": 1.3759908878585316,
"grad_norm": 1.9498894214630127,
"learning_rate": 8.325796755479788e-05,
"loss": 5.878,
"step": 29900
},
{
"epoch": 1.3782919336838593,
"grad_norm": 1.9778785705566406,
"learning_rate": 8.3204542439554e-05,
"loss": 5.8187,
"step": 29950
},
{
"epoch": 1.380592979509187,
"grad_norm": 1.907461404800415,
"learning_rate": 8.314995701876072e-05,
"loss": 5.7968,
"step": 30000
},
{
"epoch": 1.380592979509187,
"eval_loss": 5.843836307525635,
"eval_runtime": 35.0246,
"eval_samples_per_second": 10.964,
"eval_steps_per_second": 5.482,
"eval_tts_loss": 8.287326435213084,
"step": 30000
},
{
"epoch": 1.3828940253345146,
"grad_norm": 1.7704988718032837,
"learning_rate": 8.309530100930993e-05,
"loss": 5.8421,
"step": 30050
},
{
"epoch": 1.3851950711598422,
"grad_norm": 1.8107035160064697,
"learning_rate": 8.304057452758469e-05,
"loss": 5.8385,
"step": 30100
},
{
"epoch": 1.3874961169851698,
"grad_norm": 1.9480881690979004,
"learning_rate": 8.298577769011817e-05,
"loss": 5.7843,
"step": 30150
},
{
"epoch": 1.3897971628104973,
"grad_norm": 1.8153917789459229,
"learning_rate": 8.29309106135933e-05,
"loss": 5.8488,
"step": 30200
},
{
"epoch": 1.392098208635825,
"grad_norm": 1.7765768766403198,
"learning_rate": 8.287597341484265e-05,
"loss": 5.7928,
"step": 30250
},
{
"epoch": 1.3943992544611525,
"grad_norm": 1.750103235244751,
"learning_rate": 8.282096621084802e-05,
"loss": 5.8483,
"step": 30300
},
{
"epoch": 1.3967003002864802,
"grad_norm": 1.730452299118042,
"learning_rate": 8.276588911874037e-05,
"loss": 5.7923,
"step": 30350
},
{
"epoch": 1.3990013461118078,
"grad_norm": 1.7049480676651,
"learning_rate": 8.27107422557994e-05,
"loss": 5.8009,
"step": 30400
},
{
"epoch": 1.4013023919371355,
"grad_norm": 1.7733477354049683,
"learning_rate": 8.26555257394534e-05,
"loss": 5.7999,
"step": 30450
},
{
"epoch": 1.403603437762463,
"grad_norm": 1.7672901153564453,
"learning_rate": 8.2600239687279e-05,
"loss": 5.8059,
"step": 30500
},
{
"epoch": 1.4059044835877907,
"grad_norm": 1.7486193180084229,
"learning_rate": 8.254488421700089e-05,
"loss": 5.8793,
"step": 30550
},
{
"epoch": 1.4082055294131184,
"grad_norm": 1.9197908639907837,
"learning_rate": 8.248945944649156e-05,
"loss": 5.8231,
"step": 30600
},
{
"epoch": 1.4105065752384458,
"grad_norm": 1.6453993320465088,
"learning_rate": 8.243396549377106e-05,
"loss": 5.9293,
"step": 30650
},
{
"epoch": 1.4128076210637734,
"grad_norm": 1.6542948484420776,
"learning_rate": 8.237840247700682e-05,
"loss": 5.8378,
"step": 30700
},
{
"epoch": 1.415108666889101,
"grad_norm": 1.7614498138427734,
"learning_rate": 8.232277051451323e-05,
"loss": 5.7473,
"step": 30750
},
{
"epoch": 1.4174097127144287,
"grad_norm": 1.842435598373413,
"learning_rate": 8.226706972475158e-05,
"loss": 5.8296,
"step": 30800
},
{
"epoch": 1.4197107585397564,
"grad_norm": 2.2376012802124023,
"learning_rate": 8.221130022632968e-05,
"loss": 5.9024,
"step": 30850
},
{
"epoch": 1.422011804365084,
"grad_norm": 1.8776355981826782,
"learning_rate": 8.215546213800163e-05,
"loss": 5.8814,
"step": 30900
},
{
"epoch": 1.4243128501904114,
"grad_norm": 1.7883305549621582,
"learning_rate": 8.209955557866764e-05,
"loss": 5.7814,
"step": 30950
},
{
"epoch": 1.426613896015739,
"grad_norm": 1.9317328929901123,
"learning_rate": 8.204358066737368e-05,
"loss": 5.8059,
"step": 31000
},
{
"epoch": 1.426613896015739,
"eval_loss": 5.836706638336182,
"eval_runtime": 35.3062,
"eval_samples_per_second": 10.876,
"eval_steps_per_second": 5.438,
"eval_tts_loss": 8.241923826243525,
"step": 31000
},
{
"epoch": 1.4289149418410667,
"grad_norm": 1.7335458993911743,
"learning_rate": 8.198753752331125e-05,
"loss": 5.863,
"step": 31050
},
{
"epoch": 1.4312159876663944,
"grad_norm": 2.1010098457336426,
"learning_rate": 8.193142626581717e-05,
"loss": 5.8385,
"step": 31100
},
{
"epoch": 1.433517033491722,
"grad_norm": 1.677432894706726,
"learning_rate": 8.18752470143733e-05,
"loss": 5.8509,
"step": 31150
},
{
"epoch": 1.4358180793170496,
"grad_norm": 1.8363360166549683,
"learning_rate": 8.181899988860629e-05,
"loss": 5.8043,
"step": 31200
},
{
"epoch": 1.4381191251423773,
"grad_norm": 1.850675106048584,
"learning_rate": 8.176268500828728e-05,
"loss": 5.7705,
"step": 31250
},
{
"epoch": 1.440420170967705,
"grad_norm": 1.644543170928955,
"learning_rate": 8.170630249333175e-05,
"loss": 5.773,
"step": 31300
},
{
"epoch": 1.4427212167930326,
"grad_norm": 2.056574821472168,
"learning_rate": 8.164985246379915e-05,
"loss": 5.9044,
"step": 31350
},
{
"epoch": 1.44502226261836,
"grad_norm": 1.9397711753845215,
"learning_rate": 8.159333503989268e-05,
"loss": 5.7941,
"step": 31400
},
{
"epoch": 1.4473233084436876,
"grad_norm": 1.8340749740600586,
"learning_rate": 8.153675034195911e-05,
"loss": 5.807,
"step": 31450
},
{
"epoch": 1.4496243542690153,
"grad_norm": 1.899632453918457,
"learning_rate": 8.148009849048841e-05,
"loss": 5.8034,
"step": 31500
},
{
"epoch": 1.451925400094343,
"grad_norm": 1.5245773792266846,
"learning_rate": 8.142337960611357e-05,
"loss": 5.7948,
"step": 31550
},
{
"epoch": 1.4542264459196705,
"grad_norm": 1.7319273948669434,
"learning_rate": 8.136659380961031e-05,
"loss": 5.7877,
"step": 31600
},
{
"epoch": 1.4565274917449982,
"grad_norm": 2.0220251083374023,
"learning_rate": 8.130974122189681e-05,
"loss": 5.7578,
"step": 31650
},
{
"epoch": 1.4588285375703256,
"grad_norm": 1.8379764556884766,
"learning_rate": 8.125282196403353e-05,
"loss": 5.8232,
"step": 31700
},
{
"epoch": 1.4611295833956532,
"grad_norm": 1.728026032447815,
"learning_rate": 8.119583615722279e-05,
"loss": 5.8564,
"step": 31750
},
{
"epoch": 1.4634306292209809,
"grad_norm": 1.9760767221450806,
"learning_rate": 8.113878392280877e-05,
"loss": 5.8495,
"step": 31800
},
{
"epoch": 1.4657316750463085,
"grad_norm": 1.6762609481811523,
"learning_rate": 8.108166538227698e-05,
"loss": 5.8023,
"step": 31850
},
{
"epoch": 1.4680327208716362,
"grad_norm": 1.9471601247787476,
"learning_rate": 8.102448065725414e-05,
"loss": 5.8703,
"step": 31900
},
{
"epoch": 1.4703337666969638,
"grad_norm": 1.9490435123443604,
"learning_rate": 8.096722986950795e-05,
"loss": 5.8204,
"step": 31950
},
{
"epoch": 1.4726348125222914,
"grad_norm": 1.9292274713516235,
"learning_rate": 8.090991314094671e-05,
"loss": 5.8201,
"step": 32000
},
{
"epoch": 1.4726348125222914,
"eval_loss": 5.825372219085693,
"eval_runtime": 32.5118,
"eval_samples_per_second": 11.811,
"eval_steps_per_second": 5.906,
"eval_tts_loss": 8.242703154573366,
"step": 32000
},
{
"epoch": 1.474935858347619,
"grad_norm": 1.584494948387146,
"learning_rate": 8.085253059361923e-05,
"loss": 5.8162,
"step": 32050
},
{
"epoch": 1.4772369041729467,
"grad_norm": 1.9945138692855835,
"learning_rate": 8.079508234971438e-05,
"loss": 5.8252,
"step": 32100
},
{
"epoch": 1.4795379499982741,
"grad_norm": 1.899518609046936,
"learning_rate": 8.073756853156096e-05,
"loss": 5.8475,
"step": 32150
},
{
"epoch": 1.4818389958236018,
"grad_norm": 1.690568208694458,
"learning_rate": 8.067998926162739e-05,
"loss": 5.7999,
"step": 32200
},
{
"epoch": 1.4841400416489294,
"grad_norm": 2.187692403793335,
"learning_rate": 8.062234466252149e-05,
"loss": 5.8321,
"step": 32250
},
{
"epoch": 1.486441087474257,
"grad_norm": 1.73610520362854,
"learning_rate": 8.056463485699019e-05,
"loss": 5.7885,
"step": 32300
},
{
"epoch": 1.4887421332995847,
"grad_norm": 1.7122118473052979,
"learning_rate": 8.05068599679192e-05,
"loss": 5.771,
"step": 32350
},
{
"epoch": 1.4910431791249124,
"grad_norm": 1.6952186822891235,
"learning_rate": 8.044902011833292e-05,
"loss": 5.8197,
"step": 32400
},
{
"epoch": 1.4933442249502398,
"grad_norm": 2.2066383361816406,
"learning_rate": 8.039111543139399e-05,
"loss": 5.7991,
"step": 32450
},
{
"epoch": 1.4956452707755674,
"grad_norm": 1.7441210746765137,
"learning_rate": 8.033314603040316e-05,
"loss": 5.7646,
"step": 32500
},
{
"epoch": 1.497946316600895,
"grad_norm": 1.9478139877319336,
"learning_rate": 8.027511203879896e-05,
"loss": 5.8002,
"step": 32550
},
{
"epoch": 1.5002473624262227,
"grad_norm": 1.7180629968643188,
"learning_rate": 8.02170135801575e-05,
"loss": 5.7598,
"step": 32600
},
{
"epoch": 1.5025484082515503,
"grad_norm": 1.8434754610061646,
"learning_rate": 8.015885077819206e-05,
"loss": 5.7933,
"step": 32650
},
{
"epoch": 1.504849454076878,
"grad_norm": 1.6318494081497192,
"learning_rate": 8.010062375675304e-05,
"loss": 5.8359,
"step": 32700
},
{
"epoch": 1.5071504999022056,
"grad_norm": 1.9151034355163574,
"learning_rate": 8.004233263982757e-05,
"loss": 5.7906,
"step": 32750
},
{
"epoch": 1.5094515457275333,
"grad_norm": 1.8816766738891602,
"learning_rate": 7.99839775515392e-05,
"loss": 5.758,
"step": 32800
},
{
"epoch": 1.511752591552861,
"grad_norm": 1.998426914215088,
"learning_rate": 7.992555861614776e-05,
"loss": 5.8639,
"step": 32850
},
{
"epoch": 1.5140536373781885,
"grad_norm": 1.747751235961914,
"learning_rate": 7.986707595804901e-05,
"loss": 5.82,
"step": 32900
},
{
"epoch": 1.516354683203516,
"grad_norm": 1.8607374429702759,
"learning_rate": 7.980852970177439e-05,
"loss": 5.7843,
"step": 32950
},
{
"epoch": 1.5186557290288436,
"grad_norm": 2.160118818283081,
"learning_rate": 7.97499199719908e-05,
"loss": 5.76,
"step": 33000
},
{
"epoch": 1.5186557290288436,
"eval_loss": 5.812412738800049,
"eval_runtime": 33.9931,
"eval_samples_per_second": 11.296,
"eval_steps_per_second": 5.648,
"eval_tts_loss": 8.298841324188611,
"step": 33000
},
{
"epoch": 1.5209567748541712,
"grad_norm": 1.9013315439224243,
"learning_rate": 7.969124689350027e-05,
"loss": 5.7762,
"step": 33050
},
{
"epoch": 1.5232578206794989,
"grad_norm": 2.26086163520813,
"learning_rate": 7.963251059123971e-05,
"loss": 5.7748,
"step": 33100
},
{
"epoch": 1.5255588665048263,
"grad_norm": 1.5581964254379272,
"learning_rate": 7.957371119028072e-05,
"loss": 5.785,
"step": 33150
},
{
"epoch": 1.527859912330154,
"grad_norm": 1.8346717357635498,
"learning_rate": 7.951484881582917e-05,
"loss": 5.8623,
"step": 33200
},
{
"epoch": 1.5301609581554816,
"grad_norm": 1.89267897605896,
"learning_rate": 7.945592359322508e-05,
"loss": 5.8608,
"step": 33250
},
{
"epoch": 1.5324620039808092,
"grad_norm": 1.7842216491699219,
"learning_rate": 7.939693564794233e-05,
"loss": 5.7873,
"step": 33300
},
{
"epoch": 1.5347630498061369,
"grad_norm": 1.5716899633407593,
"learning_rate": 7.933906672907357e-05,
"loss": 5.7573,
"step": 33350
},
{
"epoch": 1.5370640956314645,
"grad_norm": 2.020463466644287,
"learning_rate": 7.92799549635824e-05,
"loss": 5.7473,
"step": 33400
},
{
"epoch": 1.5393651414567922,
"grad_norm": 1.8860468864440918,
"learning_rate": 7.922078085011557e-05,
"loss": 5.7842,
"step": 33450
},
{
"epoch": 1.5416661872821198,
"grad_norm": 1.9550951719284058,
"learning_rate": 7.916154451467692e-05,
"loss": 5.7405,
"step": 33500
},
{
"epoch": 1.5439672331074474,
"grad_norm": 1.8158951997756958,
"learning_rate": 7.910224608340272e-05,
"loss": 5.7826,
"step": 33550
},
{
"epoch": 1.546268278932775,
"grad_norm": 1.9261797666549683,
"learning_rate": 7.904288568256155e-05,
"loss": 5.7983,
"step": 33600
},
{
"epoch": 1.5485693247581027,
"grad_norm": 1.7144373655319214,
"learning_rate": 7.898346343855388e-05,
"loss": 5.7314,
"step": 33650
},
{
"epoch": 1.5508703705834301,
"grad_norm": 1.9878826141357422,
"learning_rate": 7.892397947791191e-05,
"loss": 5.7692,
"step": 33700
},
{
"epoch": 1.5531714164087578,
"grad_norm": 1.7931249141693115,
"learning_rate": 7.886443392729923e-05,
"loss": 5.7466,
"step": 33750
},
{
"epoch": 1.5554724622340854,
"grad_norm": 2.2137207984924316,
"learning_rate": 7.880482691351059e-05,
"loss": 5.7228,
"step": 33800
},
{
"epoch": 1.5577735080594128,
"grad_norm": 1.9212007522583008,
"learning_rate": 7.874515856347162e-05,
"loss": 5.8418,
"step": 33850
},
{
"epoch": 1.5600745538847405,
"grad_norm": 1.9778438806533813,
"learning_rate": 7.868542900423854e-05,
"loss": 5.739,
"step": 33900
},
{
"epoch": 1.5623755997100681,
"grad_norm": 1.924148678779602,
"learning_rate": 7.862563836299796e-05,
"loss": 5.8036,
"step": 33950
},
{
"epoch": 1.5646766455353958,
"grad_norm": 1.5860093832015991,
"learning_rate": 7.856578676706649e-05,
"loss": 5.7642,
"step": 34000
},
{
"epoch": 1.5646766455353958,
"eval_loss": 5.801419734954834,
"eval_runtime": 35.559,
"eval_samples_per_second": 10.799,
"eval_steps_per_second": 5.399,
"eval_tts_loss": 8.180454099509982,
"step": 34000
},
{
"epoch": 1.5669776913607234,
"grad_norm": 1.8571122884750366,
"learning_rate": 7.850587434389056e-05,
"loss": 5.7714,
"step": 34050
},
{
"epoch": 1.569278737186051,
"grad_norm": 2.0070884227752686,
"learning_rate": 7.844590122104614e-05,
"loss": 5.778,
"step": 34100
},
{
"epoch": 1.5715797830113787,
"grad_norm": 1.6417818069458008,
"learning_rate": 7.838586752623844e-05,
"loss": 5.796,
"step": 34150
},
{
"epoch": 1.5738808288367063,
"grad_norm": 2.259556531906128,
"learning_rate": 7.83269758616054e-05,
"loss": 5.7928,
"step": 34200
},
{
"epoch": 1.576181874662034,
"grad_norm": 1.756239414215088,
"learning_rate": 7.826682261157087e-05,
"loss": 5.8136,
"step": 34250
},
{
"epoch": 1.5784829204873616,
"grad_norm": 1.6941827535629272,
"learning_rate": 7.820660917089842e-05,
"loss": 5.7911,
"step": 34300
},
{
"epoch": 1.5807839663126892,
"grad_norm": 1.8113830089569092,
"learning_rate": 7.814633566780493e-05,
"loss": 5.843,
"step": 34350
},
{
"epoch": 1.583085012138017,
"grad_norm": 1.920233130455017,
"learning_rate": 7.80860022306353e-05,
"loss": 5.8077,
"step": 34400
},
{
"epoch": 1.5853860579633443,
"grad_norm": 1.5396602153778076,
"learning_rate": 7.802560898786191e-05,
"loss": 5.8207,
"step": 34450
},
{
"epoch": 1.587687103788672,
"grad_norm": 1.6108042001724243,
"learning_rate": 7.796515606808463e-05,
"loss": 5.7094,
"step": 34500
},
{
"epoch": 1.5899881496139996,
"grad_norm": 1.8248738050460815,
"learning_rate": 7.79046436000303e-05,
"loss": 5.8258,
"step": 34550
},
{
"epoch": 1.592289195439327,
"grad_norm": 2.126722812652588,
"learning_rate": 7.784407171255261e-05,
"loss": 5.7698,
"step": 34600
},
{
"epoch": 1.5945902412646547,
"grad_norm": 1.9345743656158447,
"learning_rate": 7.778344053463177e-05,
"loss": 5.8193,
"step": 34650
},
{
"epoch": 1.5968912870899823,
"grad_norm": 1.651314377784729,
"learning_rate": 7.772275019537423e-05,
"loss": 5.7444,
"step": 34700
},
{
"epoch": 1.59919233291531,
"grad_norm": 1.9649804830551147,
"learning_rate": 7.766200082401241e-05,
"loss": 5.7637,
"step": 34750
},
{
"epoch": 1.6014933787406376,
"grad_norm": 1.9861186742782593,
"learning_rate": 7.760119254990448e-05,
"loss": 5.8492,
"step": 34800
},
{
"epoch": 1.6037944245659652,
"grad_norm": 1.8200690746307373,
"learning_rate": 7.754032550253394e-05,
"loss": 5.7746,
"step": 34850
},
{
"epoch": 1.6060954703912929,
"grad_norm": 1.9275963306427002,
"learning_rate": 7.747939981150955e-05,
"loss": 5.7782,
"step": 34900
},
{
"epoch": 1.6083965162166205,
"grad_norm": 2.0152270793914795,
"learning_rate": 7.741841560656487e-05,
"loss": 5.8126,
"step": 34950
},
{
"epoch": 1.6106975620419481,
"grad_norm": 2.15605092048645,
"learning_rate": 7.73573730175581e-05,
"loss": 5.7019,
"step": 35000
},
{
"epoch": 1.6106975620419481,
"eval_loss": 5.787054538726807,
"eval_runtime": 33.4679,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 5.737,
"eval_tts_loss": 8.289476949391128,
"step": 35000
},
{
"epoch": 1.6129986078672758,
"grad_norm": 1.8516534566879272,
"learning_rate": 7.729627217447174e-05,
"loss": 5.7534,
"step": 35050
},
{
"epoch": 1.6152996536926034,
"grad_norm": 1.9034392833709717,
"learning_rate": 7.72351132074123e-05,
"loss": 5.7853,
"step": 35100
},
{
"epoch": 1.617600699517931,
"grad_norm": 4.265834808349609,
"learning_rate": 7.717389624661014e-05,
"loss": 5.7785,
"step": 35150
},
{
"epoch": 1.6199017453432585,
"grad_norm": 1.859275460243225,
"learning_rate": 7.711262142241906e-05,
"loss": 5.7689,
"step": 35200
},
{
"epoch": 1.6222027911685861,
"grad_norm": 1.9681501388549805,
"learning_rate": 7.705128886531607e-05,
"loss": 5.7189,
"step": 35250
},
{
"epoch": 1.6245038369939138,
"grad_norm": 2.0118014812469482,
"learning_rate": 7.698989870590114e-05,
"loss": 5.8077,
"step": 35300
},
{
"epoch": 1.6268048828192412,
"grad_norm": 1.8570785522460938,
"learning_rate": 7.692845107489688e-05,
"loss": 5.7515,
"step": 35350
},
{
"epoch": 1.6291059286445688,
"grad_norm": 1.7671388387680054,
"learning_rate": 7.686694610314824e-05,
"loss": 5.7661,
"step": 35400
},
{
"epoch": 1.6314069744698965,
"grad_norm": 1.5981802940368652,
"learning_rate": 7.680538392162235e-05,
"loss": 5.7891,
"step": 35450
},
{
"epoch": 1.633708020295224,
"grad_norm": 1.7688723802566528,
"learning_rate": 7.67437646614081e-05,
"loss": 5.7618,
"step": 35500
},
{
"epoch": 1.6360090661205517,
"grad_norm": 1.6916351318359375,
"learning_rate": 7.668208845371596e-05,
"loss": 5.7752,
"step": 35550
},
{
"epoch": 1.6383101119458794,
"grad_norm": 1.6966232061386108,
"learning_rate": 7.662035542987765e-05,
"loss": 5.7361,
"step": 35600
},
{
"epoch": 1.640611157771207,
"grad_norm": 1.9174034595489502,
"learning_rate": 7.655856572134583e-05,
"loss": 5.6983,
"step": 35650
},
{
"epoch": 1.6429122035965347,
"grad_norm": 1.7266641855239868,
"learning_rate": 7.649671945969392e-05,
"loss": 5.7842,
"step": 35700
},
{
"epoch": 1.6452132494218623,
"grad_norm": 2.1633241176605225,
"learning_rate": 7.643481677661576e-05,
"loss": 5.7123,
"step": 35750
},
{
"epoch": 1.64751429524719,
"grad_norm": 1.7543927431106567,
"learning_rate": 7.63728578039253e-05,
"loss": 5.7678,
"step": 35800
},
{
"epoch": 1.6498153410725176,
"grad_norm": 1.7160446643829346,
"learning_rate": 7.631084267355637e-05,
"loss": 5.7955,
"step": 35850
},
{
"epoch": 1.6521163868978452,
"grad_norm": 1.841979742050171,
"learning_rate": 7.624877151756238e-05,
"loss": 5.7917,
"step": 35900
},
{
"epoch": 1.6544174327231727,
"grad_norm": 1.9420545101165771,
"learning_rate": 7.618664446811603e-05,
"loss": 5.7249,
"step": 35950
},
{
"epoch": 1.6567184785485003,
"grad_norm": 1.8593506813049316,
"learning_rate": 7.612446165750906e-05,
"loss": 5.8221,
"step": 36000
},
{
"epoch": 1.6567184785485003,
"eval_loss": 5.778265476226807,
"eval_runtime": 33.0355,
"eval_samples_per_second": 11.624,
"eval_steps_per_second": 5.812,
"eval_tts_loss": 8.30205424102598,
"step": 36000
},
{
"epoch": 1.659019524373828,
"grad_norm": 1.7334771156311035,
"learning_rate": 7.606222321815193e-05,
"loss": 5.7638,
"step": 36050
},
{
"epoch": 1.6613205701991554,
"grad_norm": 2.0367279052734375,
"learning_rate": 7.599992928257353e-05,
"loss": 5.7243,
"step": 36100
},
{
"epoch": 1.663621616024483,
"grad_norm": 2.257939577102661,
"learning_rate": 7.593757998342096e-05,
"loss": 5.8182,
"step": 36150
},
{
"epoch": 1.6659226618498106,
"grad_norm": 1.722194790840149,
"learning_rate": 7.587517545345923e-05,
"loss": 5.8015,
"step": 36200
},
{
"epoch": 1.6682237076751383,
"grad_norm": 1.7900599241256714,
"learning_rate": 7.58127158255709e-05,
"loss": 5.752,
"step": 36250
},
{
"epoch": 1.670524753500466,
"grad_norm": 1.823088526725769,
"learning_rate": 7.57502012327559e-05,
"loss": 5.6804,
"step": 36300
},
{
"epoch": 1.6728257993257936,
"grad_norm": 1.7726881504058838,
"learning_rate": 7.568763180813112e-05,
"loss": 5.7921,
"step": 36350
},
{
"epoch": 1.6751268451511212,
"grad_norm": 1.8228272199630737,
"learning_rate": 7.562500768493032e-05,
"loss": 5.8114,
"step": 36400
},
{
"epoch": 1.6774278909764488,
"grad_norm": 1.6336501836776733,
"learning_rate": 7.556232899650371e-05,
"loss": 5.8347,
"step": 36450
},
{
"epoch": 1.6797289368017765,
"grad_norm": 1.5864665508270264,
"learning_rate": 7.549959587631758e-05,
"loss": 5.7587,
"step": 36500
},
{
"epoch": 1.6820299826271041,
"grad_norm": 1.659406304359436,
"learning_rate": 7.543680845795427e-05,
"loss": 5.8137,
"step": 36550
},
{
"epoch": 1.6843310284524318,
"grad_norm": 1.773979902267456,
"learning_rate": 7.537396687511163e-05,
"loss": 5.8097,
"step": 36600
},
{
"epoch": 1.6866320742777594,
"grad_norm": 1.7780120372772217,
"learning_rate": 7.531107126160295e-05,
"loss": 5.7593,
"step": 36650
},
{
"epoch": 1.6889331201030868,
"grad_norm": 1.6934456825256348,
"learning_rate": 7.524812175135647e-05,
"loss": 5.7162,
"step": 36700
},
{
"epoch": 1.6912341659284145,
"grad_norm": 1.9389722347259521,
"learning_rate": 7.518511847841528e-05,
"loss": 5.7942,
"step": 36750
},
{
"epoch": 1.693535211753742,
"grad_norm": 1.7949825525283813,
"learning_rate": 7.512206157693686e-05,
"loss": 5.6908,
"step": 36800
},
{
"epoch": 1.6958362575790695,
"grad_norm": 1.7409274578094482,
"learning_rate": 7.5058951181193e-05,
"loss": 5.73,
"step": 36850
},
{
"epoch": 1.6981373034043972,
"grad_norm": 1.825876235961914,
"learning_rate": 7.49957874255693e-05,
"loss": 5.7233,
"step": 36900
},
{
"epoch": 1.7004383492297248,
"grad_norm": 1.8489861488342285,
"learning_rate": 7.493257044456502e-05,
"loss": 5.8163,
"step": 36950
},
{
"epoch": 1.7027393950550525,
"grad_norm": 1.8161964416503906,
"learning_rate": 7.486930037279278e-05,
"loss": 5.7279,
"step": 37000
},
{
"epoch": 1.7027393950550525,
"eval_loss": 5.769525051116943,
"eval_runtime": 34.5593,
"eval_samples_per_second": 11.111,
"eval_steps_per_second": 5.556,
"eval_tts_loss": 8.327279314398375,
"step": 37000
},
{
"epoch": 1.70504044088038,
"grad_norm": 1.8013944625854492,
"learning_rate": 7.480597734497821e-05,
"loss": 5.7584,
"step": 37050
},
{
"epoch": 1.7073414867057077,
"grad_norm": 2.0919711589813232,
"learning_rate": 7.474260149595973e-05,
"loss": 5.7107,
"step": 37100
},
{
"epoch": 1.7096425325310354,
"grad_norm": 1.646823763847351,
"learning_rate": 7.467917296068822e-05,
"loss": 5.7999,
"step": 37150
},
{
"epoch": 1.711943578356363,
"grad_norm": 1.934715986251831,
"learning_rate": 7.461569187422675e-05,
"loss": 5.7851,
"step": 37200
},
{
"epoch": 1.7142446241816907,
"grad_norm": 1.8576382398605347,
"learning_rate": 7.455215837175031e-05,
"loss": 5.7287,
"step": 37250
},
{
"epoch": 1.7165456700070183,
"grad_norm": 1.8678678274154663,
"learning_rate": 7.448857258854549e-05,
"loss": 5.7543,
"step": 37300
},
{
"epoch": 1.718846715832346,
"grad_norm": 2.078073501586914,
"learning_rate": 7.442493466001019e-05,
"loss": 5.7417,
"step": 37350
},
{
"epoch": 1.7211477616576734,
"grad_norm": 1.7547941207885742,
"learning_rate": 7.436124472165338e-05,
"loss": 5.7377,
"step": 37400
},
{
"epoch": 1.723448807483001,
"grad_norm": 1.694883942604065,
"learning_rate": 7.429750290909476e-05,
"loss": 5.7243,
"step": 37450
},
{
"epoch": 1.7257498533083286,
"grad_norm": 1.8052371740341187,
"learning_rate": 7.423370935806447e-05,
"loss": 5.7679,
"step": 37500
},
{
"epoch": 1.7280508991336563,
"grad_norm": 1.943966269493103,
"learning_rate": 7.416986420440288e-05,
"loss": 5.748,
"step": 37550
},
{
"epoch": 1.7303519449589837,
"grad_norm": 1.619152307510376,
"learning_rate": 7.410596758406015e-05,
"loss": 5.7693,
"step": 37600
},
{
"epoch": 1.7326529907843113,
"grad_norm": 1.9282474517822266,
"learning_rate": 7.404201963309611e-05,
"loss": 5.7253,
"step": 37650
},
{
"epoch": 1.734954036609639,
"grad_norm": 2.065870523452759,
"learning_rate": 7.397802048767987e-05,
"loss": 5.7773,
"step": 37700
},
{
"epoch": 1.7372550824349666,
"grad_norm": 2.3351128101348877,
"learning_rate": 7.391397028408956e-05,
"loss": 5.7022,
"step": 37750
},
{
"epoch": 1.7395561282602943,
"grad_norm": 1.6631519794464111,
"learning_rate": 7.384986915871199e-05,
"loss": 5.7305,
"step": 37800
},
{
"epoch": 1.741857174085622,
"grad_norm": 1.9036861658096313,
"learning_rate": 7.378571724804247e-05,
"loss": 5.7526,
"step": 37850
},
{
"epoch": 1.7441582199109495,
"grad_norm": 1.6608564853668213,
"learning_rate": 7.372151468868434e-05,
"loss": 5.7262,
"step": 37900
},
{
"epoch": 1.7464592657362772,
"grad_norm": 2.1009271144866943,
"learning_rate": 7.365726161734895e-05,
"loss": 5.7258,
"step": 37950
},
{
"epoch": 1.7487603115616048,
"grad_norm": 2.0036394596099854,
"learning_rate": 7.359295817085507e-05,
"loss": 5.7435,
"step": 38000
},
{
"epoch": 1.7487603115616048,
"eval_loss": 5.767555236816406,
"eval_runtime": 35.2579,
"eval_samples_per_second": 10.891,
"eval_steps_per_second": 5.446,
"eval_tts_loss": 8.310344913623878,
"step": 38000
},
{
"epoch": 1.7510613573869325,
"grad_norm": 1.9608486890792847,
"learning_rate": 7.35286044861288e-05,
"loss": 5.7333,
"step": 38050
},
{
"epoch": 1.7533624032122601,
"grad_norm": 1.794594168663025,
"learning_rate": 7.346420070020322e-05,
"loss": 5.7214,
"step": 38100
},
{
"epoch": 1.7556634490375875,
"grad_norm": 1.8481963872909546,
"learning_rate": 7.339974695021805e-05,
"loss": 5.707,
"step": 38150
},
{
"epoch": 1.7579644948629152,
"grad_norm": 1.9794644117355347,
"learning_rate": 7.333524337341946e-05,
"loss": 5.6877,
"step": 38200
},
{
"epoch": 1.7602655406882428,
"grad_norm": 1.830579161643982,
"learning_rate": 7.327069010715966e-05,
"loss": 5.784,
"step": 38250
},
{
"epoch": 1.7625665865135705,
"grad_norm": 2.2297370433807373,
"learning_rate": 7.320608728889674e-05,
"loss": 5.7061,
"step": 38300
},
{
"epoch": 1.7648676323388979,
"grad_norm": 1.972904920578003,
"learning_rate": 7.314143505619419e-05,
"loss": 5.7295,
"step": 38350
},
{
"epoch": 1.7671686781642255,
"grad_norm": 1.9099104404449463,
"learning_rate": 7.307673354672089e-05,
"loss": 5.7593,
"step": 38400
},
{
"epoch": 1.7694697239895532,
"grad_norm": 1.6860655546188354,
"learning_rate": 7.30119828982505e-05,
"loss": 5.798,
"step": 38450
},
{
"epoch": 1.7717707698148808,
"grad_norm": 1.81305992603302,
"learning_rate": 7.294718324866138e-05,
"loss": 5.7393,
"step": 38500
},
{
"epoch": 1.7740718156402084,
"grad_norm": 1.9312961101531982,
"learning_rate": 7.28823347359362e-05,
"loss": 5.721,
"step": 38550
},
{
"epoch": 1.776372861465536,
"grad_norm": 1.8625140190124512,
"learning_rate": 7.281743749816174e-05,
"loss": 5.7543,
"step": 38600
},
{
"epoch": 1.7786739072908637,
"grad_norm": 1.628096342086792,
"learning_rate": 7.275249167352847e-05,
"loss": 5.7525,
"step": 38650
},
{
"epoch": 1.7809749531161914,
"grad_norm": 1.6411455869674683,
"learning_rate": 7.268749740033035e-05,
"loss": 5.7497,
"step": 38700
},
{
"epoch": 1.783275998941519,
"grad_norm": 2.187689781188965,
"learning_rate": 7.262245481696446e-05,
"loss": 5.7272,
"step": 38750
},
{
"epoch": 1.7855770447668466,
"grad_norm": 1.7856398820877075,
"learning_rate": 7.255736406193083e-05,
"loss": 5.7525,
"step": 38800
},
{
"epoch": 1.7878780905921743,
"grad_norm": 1.9144530296325684,
"learning_rate": 7.249222527383201e-05,
"loss": 5.7671,
"step": 38850
},
{
"epoch": 1.7901791364175017,
"grad_norm": 1.8890655040740967,
"learning_rate": 7.242703859137286e-05,
"loss": 5.7387,
"step": 38900
},
{
"epoch": 1.7924801822428293,
"grad_norm": 1.8778235912322998,
"learning_rate": 7.236180415336017e-05,
"loss": 5.748,
"step": 38950
},
{
"epoch": 1.794781228068157,
"grad_norm": 2.0300331115722656,
"learning_rate": 7.229652209870249e-05,
"loss": 5.7996,
"step": 39000
},
{
"epoch": 1.794781228068157,
"eval_loss": 5.750644207000732,
"eval_runtime": 34.215,
"eval_samples_per_second": 11.223,
"eval_steps_per_second": 5.612,
"eval_tts_loss": 8.316885493755828,
"step": 39000
},
{
"epoch": 1.7970822738934846,
"grad_norm": 1.9163450002670288,
"learning_rate": 7.223119256640974e-05,
"loss": 5.7187,
"step": 39050
},
{
"epoch": 1.799383319718812,
"grad_norm": 1.8101003170013428,
"learning_rate": 7.216581569559288e-05,
"loss": 5.7177,
"step": 39100
},
{
"epoch": 1.8016843655441397,
"grad_norm": 1.856192946434021,
"learning_rate": 7.210039162546378e-05,
"loss": 5.7414,
"step": 39150
},
{
"epoch": 1.8039854113694673,
"grad_norm": 2.071753740310669,
"learning_rate": 7.203492049533472e-05,
"loss": 5.7616,
"step": 39200
},
{
"epoch": 1.806286457194795,
"grad_norm": 2.085846185684204,
"learning_rate": 7.196940244461824e-05,
"loss": 5.6907,
"step": 39250
},
{
"epoch": 1.8085875030201226,
"grad_norm": 1.751923680305481,
"learning_rate": 7.190383761282677e-05,
"loss": 5.7245,
"step": 39300
},
{
"epoch": 1.8108885488454503,
"grad_norm": 2.07672119140625,
"learning_rate": 7.183822613957234e-05,
"loss": 5.7792,
"step": 39350
},
{
"epoch": 1.813189594670778,
"grad_norm": 2.1125259399414062,
"learning_rate": 7.177256816456637e-05,
"loss": 5.7704,
"step": 39400
},
{
"epoch": 1.8154906404961055,
"grad_norm": 1.7339011430740356,
"learning_rate": 7.170686382761917e-05,
"loss": 5.7657,
"step": 39450
},
{
"epoch": 1.8177916863214332,
"grad_norm": 1.9891151189804077,
"learning_rate": 7.164111326863991e-05,
"loss": 5.7256,
"step": 39500
},
{
"epoch": 1.8200927321467608,
"grad_norm": 1.7742396593093872,
"learning_rate": 7.157531662763612e-05,
"loss": 5.7252,
"step": 39550
},
{
"epoch": 1.8223937779720885,
"grad_norm": 2.2981250286102295,
"learning_rate": 7.150947404471342e-05,
"loss": 5.8321,
"step": 39600
},
{
"epoch": 1.8246948237974159,
"grad_norm": 1.7247025966644287,
"learning_rate": 7.144358566007531e-05,
"loss": 5.7159,
"step": 39650
},
{
"epoch": 1.8269958696227435,
"grad_norm": 2.1468214988708496,
"learning_rate": 7.137765161402285e-05,
"loss": 5.6725,
"step": 39700
},
{
"epoch": 1.8292969154480712,
"grad_norm": 1.9942362308502197,
"learning_rate": 7.131167204695424e-05,
"loss": 5.6895,
"step": 39750
},
{
"epoch": 1.8315979612733988,
"grad_norm": 1.7421067953109741,
"learning_rate": 7.124564709936465e-05,
"loss": 5.7035,
"step": 39800
},
{
"epoch": 1.8338990070987262,
"grad_norm": 1.6837613582611084,
"learning_rate": 7.11795769118459e-05,
"loss": 5.691,
"step": 39850
},
{
"epoch": 1.8362000529240539,
"grad_norm": 1.8079731464385986,
"learning_rate": 7.111346162508615e-05,
"loss": 5.7645,
"step": 39900
},
{
"epoch": 1.8385010987493815,
"grad_norm": 1.8383841514587402,
"learning_rate": 7.104730137986956e-05,
"loss": 5.8337,
"step": 39950
},
{
"epoch": 1.8408021445747091,
"grad_norm": 1.6859551668167114,
"learning_rate": 7.098109631707604e-05,
"loss": 5.6273,
"step": 40000
},
{
"epoch": 1.8408021445747091,
"eval_loss": 5.748322010040283,
"eval_runtime": 34.6862,
"eval_samples_per_second": 11.071,
"eval_steps_per_second": 5.535,
"eval_tts_loss": 8.36463535504836,
"step": 40000
},
{
"epoch": 1.8431031904000368,
"grad_norm": 1.8230650424957275,
"learning_rate": 7.091484657768092e-05,
"loss": 5.6678,
"step": 40050
},
{
"epoch": 1.8454042362253644,
"grad_norm": 2.0616815090179443,
"learning_rate": 7.084855230275466e-05,
"loss": 5.7542,
"step": 40100
},
{
"epoch": 1.847705282050692,
"grad_norm": 1.8527641296386719,
"learning_rate": 7.078221363346263e-05,
"loss": 5.6728,
"step": 40150
},
{
"epoch": 1.8500063278760197,
"grad_norm": 1.6423956155776978,
"learning_rate": 7.071583071106462e-05,
"loss": 5.7776,
"step": 40200
},
{
"epoch": 1.8523073737013473,
"grad_norm": 2.137951374053955,
"learning_rate": 7.064940367691471e-05,
"loss": 5.7493,
"step": 40250
},
{
"epoch": 1.854608419526675,
"grad_norm": 2.6266961097717285,
"learning_rate": 7.05829326724609e-05,
"loss": 5.7793,
"step": 40300
},
{
"epoch": 1.8569094653520026,
"grad_norm": 2.0090668201446533,
"learning_rate": 7.051641783924483e-05,
"loss": 5.6379,
"step": 40350
},
{
"epoch": 1.85921051117733,
"grad_norm": 1.942234754562378,
"learning_rate": 7.044985931890144e-05,
"loss": 5.805,
"step": 40400
},
{
"epoch": 1.8615115570026577,
"grad_norm": 1.8053412437438965,
"learning_rate": 7.038325725315874e-05,
"loss": 5.7068,
"step": 40450
},
{
"epoch": 1.8638126028279853,
"grad_norm": 2.0646331310272217,
"learning_rate": 7.031661178383741e-05,
"loss": 5.6697,
"step": 40500
},
{
"epoch": 1.866113648653313,
"grad_norm": 1.813453197479248,
"learning_rate": 7.025125725051633e-05,
"loss": 5.6648,
"step": 40550
},
{
"epoch": 1.8684146944786404,
"grad_norm": 1.700060248374939,
"learning_rate": 7.018586130234475e-05,
"loss": 5.7569,
"step": 40600
},
{
"epoch": 1.870715740303968,
"grad_norm": 2.1247808933258057,
"learning_rate": 7.011908819050767e-05,
"loss": 5.6719,
"step": 40650
},
{
"epoch": 1.8730167861292957,
"grad_norm": 2.1463191509246826,
"learning_rate": 7.005227223760671e-05,
"loss": 5.761,
"step": 40700
},
{
"epoch": 1.8753178319546233,
"grad_norm": 1.92280912399292,
"learning_rate": 6.998541358591804e-05,
"loss": 5.633,
"step": 40750
},
{
"epoch": 1.877618877779951,
"grad_norm": 1.797415018081665,
"learning_rate": 6.991851237780871e-05,
"loss": 5.7209,
"step": 40800
},
{
"epoch": 1.8799199236052786,
"grad_norm": 1.9149625301361084,
"learning_rate": 6.985156875573646e-05,
"loss": 5.7149,
"step": 40850
},
{
"epoch": 1.8822209694306062,
"grad_norm": 2.1698825359344482,
"learning_rate": 6.978458286224925e-05,
"loss": 5.7347,
"step": 40900
},
{
"epoch": 1.8845220152559339,
"grad_norm": 2.2453010082244873,
"learning_rate": 6.97188958123694e-05,
"loss": 5.6696,
"step": 40950
},
{
"epoch": 1.8868230610812615,
"grad_norm": 1.8774462938308716,
"learning_rate": 6.965182664237777e-05,
"loss": 5.6944,
"step": 41000
},
{
"epoch": 1.8868230610812615,
"eval_loss": 5.7265706062316895,
"eval_runtime": 35.1085,
"eval_samples_per_second": 10.938,
"eval_steps_per_second": 5.469,
"eval_tts_loss": 8.331914464921848,
"step": 41000
},
{
"epoch": 1.8891241069065892,
"grad_norm": 2.033698797225952,
"learning_rate": 6.958471562629687e-05,
"loss": 5.7328,
"step": 41050
},
{
"epoch": 1.8914251527319168,
"grad_norm": 1.8300659656524658,
"learning_rate": 6.951756290703115e-05,
"loss": 5.7555,
"step": 41100
},
{
"epoch": 1.8937261985572442,
"grad_norm": 1.8035775423049927,
"learning_rate": 6.945036862757388e-05,
"loss": 5.7462,
"step": 41150
},
{
"epoch": 1.8960272443825719,
"grad_norm": 2.0775158405303955,
"learning_rate": 6.938313293100682e-05,
"loss": 5.6722,
"step": 41200
},
{
"epoch": 1.8983282902078995,
"grad_norm": 1.7843129634857178,
"learning_rate": 6.931585596049988e-05,
"loss": 5.6753,
"step": 41250
},
{
"epoch": 1.9006293360332271,
"grad_norm": 2.2548415660858154,
"learning_rate": 6.924853785931092e-05,
"loss": 5.7172,
"step": 41300
},
{
"epoch": 1.9029303818585546,
"grad_norm": 1.7711260318756104,
"learning_rate": 6.918117877078535e-05,
"loss": 5.75,
"step": 41350
},
{
"epoch": 1.9052314276838822,
"grad_norm": 1.7503825426101685,
"learning_rate": 6.911377883835588e-05,
"loss": 5.6779,
"step": 41400
},
{
"epoch": 1.9075324735092098,
"grad_norm": 1.8140344619750977,
"learning_rate": 6.904633820554216e-05,
"loss": 5.6786,
"step": 41450
},
{
"epoch": 1.9098335193345375,
"grad_norm": 1.9237339496612549,
"learning_rate": 6.897885701595052e-05,
"loss": 5.6965,
"step": 41500
},
{
"epoch": 1.9121345651598651,
"grad_norm": 2.2851338386535645,
"learning_rate": 6.891133541327363e-05,
"loss": 5.6899,
"step": 41550
},
{
"epoch": 1.9144356109851928,
"grad_norm": 1.6410493850708008,
"learning_rate": 6.884377354129029e-05,
"loss": 5.7291,
"step": 41600
},
{
"epoch": 1.9167366568105204,
"grad_norm": 1.809895634651184,
"learning_rate": 6.877617154386498e-05,
"loss": 5.7246,
"step": 41650
},
{
"epoch": 1.919037702635848,
"grad_norm": 2.089245557785034,
"learning_rate": 6.87085295649476e-05,
"loss": 5.7542,
"step": 41700
},
{
"epoch": 1.9213387484611757,
"grad_norm": 2.0820837020874023,
"learning_rate": 6.864084774857325e-05,
"loss": 5.7559,
"step": 41750
},
{
"epoch": 1.9236397942865033,
"grad_norm": 1.8039745092391968,
"learning_rate": 6.857312623886185e-05,
"loss": 5.7623,
"step": 41800
},
{
"epoch": 1.925940840111831,
"grad_norm": 2.192228317260742,
"learning_rate": 6.85053651800178e-05,
"loss": 5.7104,
"step": 41850
},
{
"epoch": 1.9282418859371584,
"grad_norm": 2.2128982543945312,
"learning_rate": 6.843756471632974e-05,
"loss": 5.7603,
"step": 41900
},
{
"epoch": 1.930542931762486,
"grad_norm": 1.7217659950256348,
"learning_rate": 6.836972499217023e-05,
"loss": 5.7817,
"step": 41950
},
{
"epoch": 1.9328439775878137,
"grad_norm": 1.9377886056900024,
"learning_rate": 6.83018461519954e-05,
"loss": 5.6959,
"step": 42000
},
{
"epoch": 1.9328439775878137,
"eval_loss": 5.725087642669678,
"eval_runtime": 34.6969,
"eval_samples_per_second": 11.067,
"eval_steps_per_second": 5.534,
"eval_tts_loss": 8.384733506297332,
"step": 42000
},
{
"epoch": 1.9351450234131413,
"grad_norm": 1.861262559890747,
"learning_rate": 6.82339283403447e-05,
"loss": 5.7196,
"step": 42050
},
{
"epoch": 1.9374460692384687,
"grad_norm": 1.799297571182251,
"learning_rate": 6.816597170184054e-05,
"loss": 5.6952,
"step": 42100
},
{
"epoch": 1.9397471150637964,
"grad_norm": 2.116269588470459,
"learning_rate": 6.809797638118805e-05,
"loss": 5.7379,
"step": 42150
},
{
"epoch": 1.942048160889124,
"grad_norm": 1.9920415878295898,
"learning_rate": 6.802994252317467e-05,
"loss": 5.5964,
"step": 42200
},
{
"epoch": 1.9443492067144517,
"grad_norm": 1.7967939376831055,
"learning_rate": 6.796187027266995e-05,
"loss": 5.6312,
"step": 42250
},
{
"epoch": 1.9466502525397793,
"grad_norm": 2.3710741996765137,
"learning_rate": 6.789375977462515e-05,
"loss": 5.6975,
"step": 42300
},
{
"epoch": 1.948951298365107,
"grad_norm": 2.0796024799346924,
"learning_rate": 6.782561117407303e-05,
"loss": 5.7446,
"step": 42350
},
{
"epoch": 1.9512523441904346,
"grad_norm": 2.202712297439575,
"learning_rate": 6.775742461612742e-05,
"loss": 5.7564,
"step": 42400
},
{
"epoch": 1.9535533900157622,
"grad_norm": 1.7688695192337036,
"learning_rate": 6.768920024598303e-05,
"loss": 5.7891,
"step": 42450
},
{
"epoch": 1.9558544358410899,
"grad_norm": 2.2628657817840576,
"learning_rate": 6.762093820891502e-05,
"loss": 5.6503,
"step": 42500
},
{
"epoch": 1.9581554816664175,
"grad_norm": 2.0447051525115967,
"learning_rate": 6.755263865027885e-05,
"loss": 5.6383,
"step": 42550
},
{
"epoch": 1.9604565274917451,
"grad_norm": 1.6910145282745361,
"learning_rate": 6.748430171550979e-05,
"loss": 5.6793,
"step": 42600
},
{
"epoch": 1.9627575733170726,
"grad_norm": 1.6950970888137817,
"learning_rate": 6.741592755012276e-05,
"loss": 5.7613,
"step": 42650
},
{
"epoch": 1.9650586191424002,
"grad_norm": 2.0867919921875,
"learning_rate": 6.734751629971192e-05,
"loss": 5.7127,
"step": 42700
},
{
"epoch": 1.9673596649677279,
"grad_norm": 2.344790458679199,
"learning_rate": 6.727906810995041e-05,
"loss": 5.7208,
"step": 42750
},
{
"epoch": 1.9696607107930555,
"grad_norm": 2.1740739345550537,
"learning_rate": 6.721058312659001e-05,
"loss": 5.7091,
"step": 42800
},
{
"epoch": 1.971961756618383,
"grad_norm": 2.326795816421509,
"learning_rate": 6.714206149546088e-05,
"loss": 5.7289,
"step": 42850
},
{
"epoch": 1.9742628024437106,
"grad_norm": 1.5980671644210815,
"learning_rate": 6.707350336247122e-05,
"loss": 5.7832,
"step": 42900
},
{
"epoch": 1.9765638482690382,
"grad_norm": 2.2809202671051025,
"learning_rate": 6.700490887360692e-05,
"loss": 5.7059,
"step": 42950
},
{
"epoch": 1.9788648940943658,
"grad_norm": 1.9004167318344116,
"learning_rate": 6.69362781749313e-05,
"loss": 5.6628,
"step": 43000
},
{
"epoch": 1.9788648940943658,
"eval_loss": 5.719640254974365,
"eval_runtime": 34.4151,
"eval_samples_per_second": 11.158,
"eval_steps_per_second": 5.579,
"eval_tts_loss": 8.411078965968017,
"step": 43000
},
{
"epoch": 1.9811659399196935,
"grad_norm": 1.7471081018447876,
"learning_rate": 6.68676114125848e-05,
"loss": 5.6815,
"step": 43050
},
{
"epoch": 1.9834669857450211,
"grad_norm": 1.7803364992141724,
"learning_rate": 6.679890873278463e-05,
"loss": 5.7429,
"step": 43100
},
{
"epoch": 1.9857680315703488,
"grad_norm": 1.9929248094558716,
"learning_rate": 6.67301702818245e-05,
"loss": 5.6513,
"step": 43150
},
{
"epoch": 1.9880690773956764,
"grad_norm": 2.0318520069122314,
"learning_rate": 6.666139620607426e-05,
"loss": 5.7224,
"step": 43200
},
{
"epoch": 1.990370123221004,
"grad_norm": 1.9984978437423706,
"learning_rate": 6.659258665197966e-05,
"loss": 5.6904,
"step": 43250
},
{
"epoch": 1.9926711690463317,
"grad_norm": 1.683851718902588,
"learning_rate": 6.652374176606196e-05,
"loss": 5.6555,
"step": 43300
},
{
"epoch": 1.9949722148716593,
"grad_norm": 1.9689456224441528,
"learning_rate": 6.64548616949177e-05,
"loss": 5.6529,
"step": 43350
},
{
"epoch": 1.9972732606969867,
"grad_norm": 2.1004714965820312,
"learning_rate": 6.638594658521831e-05,
"loss": 5.7275,
"step": 43400
},
{
"epoch": 1.9995743065223144,
"grad_norm": 1.9687811136245728,
"learning_rate": 6.631699658370982e-05,
"loss": 5.6693,
"step": 43450
},
{
"epoch": 2.001840836660262,
"grad_norm": 2.143148899078369,
"learning_rate": 6.624801183721259e-05,
"loss": 5.5271,
"step": 43500
},
{
"epoch": 2.0041418824855897,
"grad_norm": 2.1372528076171875,
"learning_rate": 6.617899249262091e-05,
"loss": 5.536,
"step": 43550
},
{
"epoch": 2.0064429283109173,
"grad_norm": 2.233785629272461,
"learning_rate": 6.610993869690284e-05,
"loss": 5.5525,
"step": 43600
},
{
"epoch": 2.008743974136245,
"grad_norm": 1.8795326948165894,
"learning_rate": 6.604085059709968e-05,
"loss": 5.5732,
"step": 43650
},
{
"epoch": 2.0110450199615726,
"grad_norm": 1.8018789291381836,
"learning_rate": 6.597172834032587e-05,
"loss": 5.5204,
"step": 43700
},
{
"epoch": 2.0133460657869002,
"grad_norm": 1.9285874366760254,
"learning_rate": 6.590257207376853e-05,
"loss": 5.5367,
"step": 43750
},
{
"epoch": 2.015647111612228,
"grad_norm": 1.70220148563385,
"learning_rate": 6.583338194468724e-05,
"loss": 5.5598,
"step": 43800
},
{
"epoch": 2.0179481574375555,
"grad_norm": 1.989536166191101,
"learning_rate": 6.576415810041365e-05,
"loss": 5.5385,
"step": 43850
},
{
"epoch": 2.020249203262883,
"grad_norm": 1.9194942712783813,
"learning_rate": 6.569490068835121e-05,
"loss": 5.5637,
"step": 43900
},
{
"epoch": 2.022550249088211,
"grad_norm": 2.056601047515869,
"learning_rate": 6.562560985597485e-05,
"loss": 5.4488,
"step": 43950
},
{
"epoch": 2.024851294913538,
"grad_norm": 1.9041422605514526,
"learning_rate": 6.55562857508307e-05,
"loss": 5.5098,
"step": 44000
},
{
"epoch": 2.024851294913538,
"eval_loss": 5.726797580718994,
"eval_runtime": 35.4492,
"eval_samples_per_second": 10.832,
"eval_steps_per_second": 5.416,
"eval_tts_loss": 8.615723402425886,
"step": 44000
},
{
"epoch": 2.0271523407388656,
"grad_norm": 1.8542007207870483,
"learning_rate": 6.548692852053569e-05,
"loss": 5.4843,
"step": 44050
},
{
"epoch": 2.0294533865641933,
"grad_norm": 1.984620213508606,
"learning_rate": 6.541753831277729e-05,
"loss": 5.5621,
"step": 44100
},
{
"epoch": 2.031754432389521,
"grad_norm": 1.7690690755844116,
"learning_rate": 6.534811527531323e-05,
"loss": 5.5137,
"step": 44150
},
{
"epoch": 2.0340554782148486,
"grad_norm": 1.8489230871200562,
"learning_rate": 6.527865955597113e-05,
"loss": 5.5089,
"step": 44200
},
{
"epoch": 2.036356524040176,
"grad_norm": 1.9018126726150513,
"learning_rate": 6.520917130264814e-05,
"loss": 5.5108,
"step": 44250
},
{
"epoch": 2.038657569865504,
"grad_norm": 1.6569920778274536,
"learning_rate": 6.51396506633108e-05,
"loss": 5.4855,
"step": 44300
},
{
"epoch": 2.0409586156908315,
"grad_norm": 1.9102799892425537,
"learning_rate": 6.507009778599454e-05,
"loss": 5.5066,
"step": 44350
},
{
"epoch": 2.043259661516159,
"grad_norm": 1.9131476879119873,
"learning_rate": 6.500051281880343e-05,
"loss": 5.5734,
"step": 44400
},
{
"epoch": 2.0455607073414868,
"grad_norm": 1.878501296043396,
"learning_rate": 6.493089590990993e-05,
"loss": 5.4392,
"step": 44450
},
{
"epoch": 2.0478617531668144,
"grad_norm": 1.891527771949768,
"learning_rate": 6.486124720755444e-05,
"loss": 5.5329,
"step": 44500
},
{
"epoch": 2.050162798992142,
"grad_norm": 2.036492109298706,
"learning_rate": 6.47915668600451e-05,
"loss": 5.5437,
"step": 44550
},
{
"epoch": 2.0524638448174697,
"grad_norm": 2.31026029586792,
"learning_rate": 6.472185501575746e-05,
"loss": 5.5779,
"step": 44600
},
{
"epoch": 2.0547648906427973,
"grad_norm": 2.0821564197540283,
"learning_rate": 6.465211182313409e-05,
"loss": 5.5374,
"step": 44650
},
{
"epoch": 2.057065936468125,
"grad_norm": 1.7920081615447998,
"learning_rate": 6.458233743068436e-05,
"loss": 5.5222,
"step": 44700
},
{
"epoch": 2.059366982293452,
"grad_norm": 1.7931830883026123,
"learning_rate": 6.4512531986984e-05,
"loss": 5.4864,
"step": 44750
},
{
"epoch": 2.06166802811878,
"grad_norm": 1.9625083208084106,
"learning_rate": 6.444269564067495e-05,
"loss": 5.5036,
"step": 44800
},
{
"epoch": 2.0639690739441074,
"grad_norm": 1.7811228036880493,
"learning_rate": 6.437282854046489e-05,
"loss": 5.5504,
"step": 44850
},
{
"epoch": 2.066270119769435,
"grad_norm": 1.8563337326049805,
"learning_rate": 6.430293083512702e-05,
"loss": 5.5086,
"step": 44900
},
{
"epoch": 2.0685711655947627,
"grad_norm": 1.6173336505889893,
"learning_rate": 6.423300267349969e-05,
"loss": 5.5239,
"step": 44950
},
{
"epoch": 2.0708722114200904,
"grad_norm": 2.1647262573242188,
"learning_rate": 6.416304420448613e-05,
"loss": 5.5063,
"step": 45000
},
{
"epoch": 2.0708722114200904,
"eval_loss": 5.722973346710205,
"eval_runtime": 34.5048,
"eval_samples_per_second": 11.129,
"eval_steps_per_second": 5.564,
"eval_tts_loss": 8.551408735813336,
"step": 45000
},
{
"epoch": 2.073173257245418,
"grad_norm": 1.8066037893295288,
"learning_rate": 6.409305557705408e-05,
"loss": 5.572,
"step": 45050
},
{
"epoch": 2.0754743030707457,
"grad_norm": 2.123093366622925,
"learning_rate": 6.402303694023548e-05,
"loss": 5.5142,
"step": 45100
},
{
"epoch": 2.0777753488960733,
"grad_norm": 1.8673418760299683,
"learning_rate": 6.395298844312623e-05,
"loss": 5.4898,
"step": 45150
},
{
"epoch": 2.080076394721401,
"grad_norm": 2.1663706302642822,
"learning_rate": 6.388291023488575e-05,
"loss": 5.4957,
"step": 45200
},
{
"epoch": 2.0823774405467286,
"grad_norm": 1.9892181158065796,
"learning_rate": 6.381280246473678e-05,
"loss": 5.5585,
"step": 45250
},
{
"epoch": 2.084678486372056,
"grad_norm": 2.284586191177368,
"learning_rate": 6.3742665281965e-05,
"loss": 5.5635,
"step": 45300
},
{
"epoch": 2.086979532197384,
"grad_norm": 2.3510870933532715,
"learning_rate": 6.367249883591865e-05,
"loss": 5.5342,
"step": 45350
},
{
"epoch": 2.0892805780227115,
"grad_norm": 2.212132215499878,
"learning_rate": 6.36023032760084e-05,
"loss": 5.5002,
"step": 45400
},
{
"epoch": 2.091581623848039,
"grad_norm": 1.725682020187378,
"learning_rate": 6.35320787517068e-05,
"loss": 5.5957,
"step": 45450
},
{
"epoch": 2.0938826696733663,
"grad_norm": 2.1455774307250977,
"learning_rate": 6.346182541254817e-05,
"loss": 5.4675,
"step": 45500
},
{
"epoch": 2.096183715498694,
"grad_norm": 2.027888298034668,
"learning_rate": 6.33929493281682e-05,
"loss": 5.5287,
"step": 45550
},
{
"epoch": 2.0984847613240216,
"grad_norm": 1.811853051185608,
"learning_rate": 6.332263937698848e-05,
"loss": 5.5309,
"step": 45600
},
{
"epoch": 2.1007858071493493,
"grad_norm": 1.9663208723068237,
"learning_rate": 6.325230105692648e-05,
"loss": 5.4658,
"step": 45650
},
{
"epoch": 2.103086852974677,
"grad_norm": 1.7255808115005493,
"learning_rate": 6.31819345177588e-05,
"loss": 5.5497,
"step": 45700
},
{
"epoch": 2.1053878988000045,
"grad_norm": 1.5630170106887817,
"learning_rate": 6.311153990932208e-05,
"loss": 5.4754,
"step": 45750
},
{
"epoch": 2.107688944625332,
"grad_norm": 1.9326938390731812,
"learning_rate": 6.304111738151283e-05,
"loss": 5.5607,
"step": 45800
},
{
"epoch": 2.10998999045066,
"grad_norm": 1.7057856321334839,
"learning_rate": 6.297066708428691e-05,
"loss": 5.4917,
"step": 45850
},
{
"epoch": 2.1122910362759875,
"grad_norm": 1.9797825813293457,
"learning_rate": 6.290018916765936e-05,
"loss": 5.4714,
"step": 45900
},
{
"epoch": 2.114592082101315,
"grad_norm": 2.143786907196045,
"learning_rate": 6.282968378170405e-05,
"loss": 5.4879,
"step": 45950
},
{
"epoch": 2.1168931279266427,
"grad_norm": 2.2895607948303223,
"learning_rate": 6.275915107655329e-05,
"loss": 5.5423,
"step": 46000
},
{
"epoch": 2.1168931279266427,
"eval_loss": 5.717980861663818,
"eval_runtime": 34.9486,
"eval_samples_per_second": 10.988,
"eval_steps_per_second": 5.494,
"eval_tts_loss": 8.597503433606374,
"step": 46000
},
{
"epoch": 2.1191941737519704,
"grad_norm": 1.834234356880188,
"learning_rate": 6.268859120239758e-05,
"loss": 5.4918,
"step": 46050
},
{
"epoch": 2.121495219577298,
"grad_norm": 1.891732931137085,
"learning_rate": 6.261800430948532e-05,
"loss": 5.4975,
"step": 46100
},
{
"epoch": 2.1237962654026257,
"grad_norm": 1.9583476781845093,
"learning_rate": 6.25473905481224e-05,
"loss": 5.5338,
"step": 46150
},
{
"epoch": 2.1260973112279533,
"grad_norm": 2.160682439804077,
"learning_rate": 6.24767500686719e-05,
"loss": 5.534,
"step": 46200
},
{
"epoch": 2.1283983570532805,
"grad_norm": 1.747267484664917,
"learning_rate": 6.240608302155384e-05,
"loss": 5.4919,
"step": 46250
},
{
"epoch": 2.130699402878608,
"grad_norm": 2.0734097957611084,
"learning_rate": 6.233538955724481e-05,
"loss": 5.534,
"step": 46300
},
{
"epoch": 2.133000448703936,
"grad_norm": 1.9301162958145142,
"learning_rate": 6.226466982627763e-05,
"loss": 5.49,
"step": 46350
},
{
"epoch": 2.1353014945292634,
"grad_norm": 1.7851401567459106,
"learning_rate": 6.219392397924104e-05,
"loss": 5.3952,
"step": 46400
},
{
"epoch": 2.137602540354591,
"grad_norm": 1.7382208108901978,
"learning_rate": 6.212315216677942e-05,
"loss": 5.4901,
"step": 46450
},
{
"epoch": 2.1399035861799187,
"grad_norm": 2.1053762435913086,
"learning_rate": 6.205235453959244e-05,
"loss": 5.5327,
"step": 46500
},
{
"epoch": 2.1422046320052464,
"grad_norm": 1.9295450448989868,
"learning_rate": 6.19815312484347e-05,
"loss": 5.4875,
"step": 46550
},
{
"epoch": 2.144505677830574,
"grad_norm": 2.141199827194214,
"learning_rate": 6.191068244411551e-05,
"loss": 5.5757,
"step": 46600
},
{
"epoch": 2.1468067236559016,
"grad_norm": 2.1582906246185303,
"learning_rate": 6.183980827749844e-05,
"loss": 5.5675,
"step": 46650
},
{
"epoch": 2.1491077694812293,
"grad_norm": 1.7495667934417725,
"learning_rate": 6.176890889950115e-05,
"loss": 5.5199,
"step": 46700
},
{
"epoch": 2.151408815306557,
"grad_norm": 1.7843763828277588,
"learning_rate": 6.16979844610949e-05,
"loss": 5.4777,
"step": 46750
},
{
"epoch": 2.1537098611318846,
"grad_norm": 1.7702149152755737,
"learning_rate": 6.162703511330434e-05,
"loss": 5.4738,
"step": 46800
},
{
"epoch": 2.156010906957212,
"grad_norm": 1.8765311241149902,
"learning_rate": 6.155606100720717e-05,
"loss": 5.5664,
"step": 46850
},
{
"epoch": 2.15831195278254,
"grad_norm": 1.8246150016784668,
"learning_rate": 6.148506229393383e-05,
"loss": 5.5637,
"step": 46900
},
{
"epoch": 2.160612998607867,
"grad_norm": 2.1555979251861572,
"learning_rate": 6.141403912466712e-05,
"loss": 5.5311,
"step": 46950
},
{
"epoch": 2.1629140444331947,
"grad_norm": 1.9029009342193604,
"learning_rate": 6.134299165064193e-05,
"loss": 5.4818,
"step": 47000
},
{
"epoch": 2.1629140444331947,
"eval_loss": 5.707584381103516,
"eval_runtime": 33.3657,
"eval_samples_per_second": 11.509,
"eval_steps_per_second": 5.754,
"eval_tts_loss": 8.569347690304697,
"step": 47000
},
{
"epoch": 2.1652150902585223,
"grad_norm": 1.874457597732544,
"learning_rate": 6.127192002314491e-05,
"loss": 5.4641,
"step": 47050
},
{
"epoch": 2.16751613608385,
"grad_norm": 1.8370598554611206,
"learning_rate": 6.120082439351416e-05,
"loss": 5.4618,
"step": 47100
},
{
"epoch": 2.1698171819091776,
"grad_norm": 1.8363277912139893,
"learning_rate": 6.112970491313882e-05,
"loss": 5.4306,
"step": 47150
},
{
"epoch": 2.1721182277345052,
"grad_norm": 2.3280389308929443,
"learning_rate": 6.105856173345891e-05,
"loss": 5.5492,
"step": 47200
},
{
"epoch": 2.174419273559833,
"grad_norm": 2.1096155643463135,
"learning_rate": 6.098739500596485e-05,
"loss": 5.507,
"step": 47250
},
{
"epoch": 2.1767203193851605,
"grad_norm": 1.8868945837020874,
"learning_rate": 6.0916204882197204e-05,
"loss": 5.5058,
"step": 47300
},
{
"epoch": 2.179021365210488,
"grad_norm": 2.4336724281311035,
"learning_rate": 6.084499151374641e-05,
"loss": 5.4931,
"step": 47350
},
{
"epoch": 2.181322411035816,
"grad_norm": 1.9747580289840698,
"learning_rate": 6.077375505225233e-05,
"loss": 5.482,
"step": 47400
},
{
"epoch": 2.1836234568611435,
"grad_norm": 2.0846846103668213,
"learning_rate": 6.070249564940407e-05,
"loss": 5.5427,
"step": 47450
},
{
"epoch": 2.185924502686471,
"grad_norm": 1.875829815864563,
"learning_rate": 6.063121345693952e-05,
"loss": 5.5404,
"step": 47500
},
{
"epoch": 2.1882255485117987,
"grad_norm": 1.8179715871810913,
"learning_rate": 6.055990862664514e-05,
"loss": 5.5046,
"step": 47550
},
{
"epoch": 2.1905265943371264,
"grad_norm": 2.135223150253296,
"learning_rate": 6.0488581310355586e-05,
"loss": 5.5869,
"step": 47600
},
{
"epoch": 2.192827640162454,
"grad_norm": 1.8659194707870483,
"learning_rate": 6.041723165995339e-05,
"loss": 5.46,
"step": 47650
},
{
"epoch": 2.1951286859877817,
"grad_norm": 1.751010775566101,
"learning_rate": 6.0345859827368646e-05,
"loss": 5.4801,
"step": 47700
},
{
"epoch": 2.197429731813109,
"grad_norm": 1.752781629562378,
"learning_rate": 6.027446596457869e-05,
"loss": 5.4676,
"step": 47750
},
{
"epoch": 2.1997307776384365,
"grad_norm": 1.8623894453048706,
"learning_rate": 6.0203050223607736e-05,
"loss": 5.4861,
"step": 47800
},
{
"epoch": 2.202031823463764,
"grad_norm": 2.1729423999786377,
"learning_rate": 6.0131612756526635e-05,
"loss": 5.5679,
"step": 47850
},
{
"epoch": 2.204332869289092,
"grad_norm": 1.8969271183013916,
"learning_rate": 6.006015371545246e-05,
"loss": 5.5307,
"step": 47900
},
{
"epoch": 2.2066339151144194,
"grad_norm": 1.9392940998077393,
"learning_rate": 5.998867325254822e-05,
"loss": 5.5678,
"step": 47950
},
{
"epoch": 2.208934960939747,
"grad_norm": 1.9349250793457031,
"learning_rate": 5.991717152002261e-05,
"loss": 5.5207,
"step": 48000
},
{
"epoch": 2.208934960939747,
"eval_loss": 5.705930233001709,
"eval_runtime": 33.794,
"eval_samples_per_second": 11.363,
"eval_steps_per_second": 5.681,
"eval_tts_loss": 8.537661055543742,
"step": 48000
},
{
"epoch": 2.2112360067650747,
"grad_norm": 2.1593496799468994,
"learning_rate": 5.9845648670129506e-05,
"loss": 5.5717,
"step": 48050
},
{
"epoch": 2.2135370525904023,
"grad_norm": 1.7322163581848145,
"learning_rate": 5.9774104855167837e-05,
"loss": 5.4747,
"step": 48100
},
{
"epoch": 2.21583809841573,
"grad_norm": 1.878806710243225,
"learning_rate": 5.9702540227481116e-05,
"loss": 5.5021,
"step": 48150
},
{
"epoch": 2.2181391442410576,
"grad_norm": 1.9864357709884644,
"learning_rate": 5.963095493945724e-05,
"loss": 5.5111,
"step": 48200
},
{
"epoch": 2.2204401900663853,
"grad_norm": 2.206557273864746,
"learning_rate": 5.9559349143528044e-05,
"loss": 5.5295,
"step": 48250
},
{
"epoch": 2.222741235891713,
"grad_norm": 1.8144375085830688,
"learning_rate": 5.948772299216905e-05,
"loss": 5.5521,
"step": 48300
},
{
"epoch": 2.2250422817170405,
"grad_norm": 2.5472915172576904,
"learning_rate": 5.941607663789912e-05,
"loss": 5.5341,
"step": 48350
},
{
"epoch": 2.227343327542368,
"grad_norm": 2.2753031253814697,
"learning_rate": 5.9344410233280146e-05,
"loss": 5.4789,
"step": 48400
},
{
"epoch": 2.2296443733676954,
"grad_norm": 2.0804171562194824,
"learning_rate": 5.927272393091671e-05,
"loss": 5.5196,
"step": 48450
},
{
"epoch": 2.231945419193023,
"grad_norm": 2.0795419216156006,
"learning_rate": 5.9201017883455755e-05,
"loss": 5.5605,
"step": 48500
},
{
"epoch": 2.2342464650183507,
"grad_norm": 1.8969382047653198,
"learning_rate": 5.91292922435863e-05,
"loss": 5.5587,
"step": 48550
},
{
"epoch": 2.2365475108436783,
"grad_norm": 2.2194700241088867,
"learning_rate": 5.905754716403902e-05,
"loss": 5.5229,
"step": 48600
},
{
"epoch": 2.238848556669006,
"grad_norm": 1.9214152097702026,
"learning_rate": 5.8985782797586055e-05,
"loss": 5.566,
"step": 48650
},
{
"epoch": 2.2411496024943336,
"grad_norm": 1.9062443971633911,
"learning_rate": 5.891399929704058e-05,
"loss": 5.5084,
"step": 48700
},
{
"epoch": 2.2434506483196612,
"grad_norm": 2.313995599746704,
"learning_rate": 5.88421968152565e-05,
"loss": 5.547,
"step": 48750
},
{
"epoch": 2.245751694144989,
"grad_norm": 1.8967597484588623,
"learning_rate": 5.877037550512816e-05,
"loss": 5.559,
"step": 48800
},
{
"epoch": 2.2480527399703165,
"grad_norm": 2.220062494277954,
"learning_rate": 5.869853551959e-05,
"loss": 5.5156,
"step": 48850
},
{
"epoch": 2.250353785795644,
"grad_norm": 2.281755208969116,
"learning_rate": 5.8626677011616226e-05,
"loss": 5.4842,
"step": 48900
},
{
"epoch": 2.252654831620972,
"grad_norm": 1.9974268674850464,
"learning_rate": 5.8554800134220465e-05,
"loss": 5.5098,
"step": 48950
},
{
"epoch": 2.2549558774462994,
"grad_norm": 2.0634398460388184,
"learning_rate": 5.848290504045548e-05,
"loss": 5.5407,
"step": 49000
},
{
"epoch": 2.2549558774462994,
"eval_loss": 5.699641704559326,
"eval_runtime": 33.4391,
"eval_samples_per_second": 11.484,
"eval_steps_per_second": 5.742,
"eval_tts_loss": 8.564900359093189,
"step": 49000
},
{
"epoch": 2.257256923271627,
"grad_norm": 2.1197509765625,
"learning_rate": 5.8410991883412805e-05,
"loss": 5.5355,
"step": 49050
},
{
"epoch": 2.2595579690969547,
"grad_norm": 2.0381312370300293,
"learning_rate": 5.833906081622248e-05,
"loss": 5.4852,
"step": 49100
},
{
"epoch": 2.2618590149222824,
"grad_norm": 2.316908359527588,
"learning_rate": 5.826711199205261e-05,
"loss": 5.4951,
"step": 49150
},
{
"epoch": 2.26416006074761,
"grad_norm": 2.0368828773498535,
"learning_rate": 5.819514556410919e-05,
"loss": 5.5427,
"step": 49200
},
{
"epoch": 2.2664611065729376,
"grad_norm": 1.6029542684555054,
"learning_rate": 5.812316168563561e-05,
"loss": 5.4828,
"step": 49250
},
{
"epoch": 2.268762152398265,
"grad_norm": 1.8522335290908813,
"learning_rate": 5.805116050991253e-05,
"loss": 5.418,
"step": 49300
},
{
"epoch": 2.2710631982235925,
"grad_norm": 1.811919093132019,
"learning_rate": 5.7979142190257366e-05,
"loss": 5.5058,
"step": 49350
},
{
"epoch": 2.27336424404892,
"grad_norm": 1.9893652200698853,
"learning_rate": 5.790710688002403e-05,
"loss": 5.4568,
"step": 49400
},
{
"epoch": 2.2756652898742478,
"grad_norm": 2.328045606613159,
"learning_rate": 5.7835054732602664e-05,
"loss": 5.4936,
"step": 49450
},
{
"epoch": 2.2779663356995754,
"grad_norm": 1.901153326034546,
"learning_rate": 5.7762985901419254e-05,
"loss": 5.5469,
"step": 49500
},
{
"epoch": 2.280267381524903,
"grad_norm": 2.4046385288238525,
"learning_rate": 5.769090053993528e-05,
"loss": 5.4819,
"step": 49550
},
{
"epoch": 2.2825684273502307,
"grad_norm": 2.1178832054138184,
"learning_rate": 5.7618798801647424e-05,
"loss": 5.4857,
"step": 49600
},
{
"epoch": 2.2848694731755583,
"grad_norm": 1.8963590860366821,
"learning_rate": 5.7546680840087285e-05,
"loss": 5.5016,
"step": 49650
},
{
"epoch": 2.287170519000886,
"grad_norm": 1.8767808675765991,
"learning_rate": 5.747454680882095e-05,
"loss": 5.52,
"step": 49700
},
{
"epoch": 2.2894715648262136,
"grad_norm": 2.342343330383301,
"learning_rate": 5.740239686144877e-05,
"loss": 5.505,
"step": 49750
},
{
"epoch": 2.2917726106515413,
"grad_norm": 2.1045210361480713,
"learning_rate": 5.7330231151604966e-05,
"loss": 5.5173,
"step": 49800
},
{
"epoch": 2.294073656476869,
"grad_norm": 1.9127671718597412,
"learning_rate": 5.725804983295733e-05,
"loss": 5.477,
"step": 49850
},
{
"epoch": 2.2963747023021965,
"grad_norm": 2.1847991943359375,
"learning_rate": 5.718729714514758e-05,
"loss": 5.5034,
"step": 49900
},
{
"epoch": 2.2986757481275237,
"grad_norm": 1.8255267143249512,
"learning_rate": 5.711508537454882e-05,
"loss": 5.4878,
"step": 49950
},
{
"epoch": 2.3009767939528514,
"grad_norm": 1.9621268510818481,
"learning_rate": 5.7042858453272044e-05,
"loss": 5.5685,
"step": 50000
},
{
"epoch": 2.3009767939528514,
"eval_loss": 5.693967342376709,
"eval_runtime": 33.0645,
"eval_samples_per_second": 11.614,
"eval_steps_per_second": 5.807,
"eval_tts_loss": 8.575001189177728,
"step": 50000
},
{
"epoch": 2.303277839778179,
"grad_norm": 2.2509212493896484,
"learning_rate": 5.697061653511539e-05,
"loss": 5.4954,
"step": 50050
},
{
"epoch": 2.3055788856035067,
"grad_norm": 1.7002668380737305,
"learning_rate": 5.689835977390893e-05,
"loss": 5.4782,
"step": 50100
},
{
"epoch": 2.3078799314288343,
"grad_norm": 2.2835309505462646,
"learning_rate": 5.682608832351433e-05,
"loss": 5.495,
"step": 50150
},
{
"epoch": 2.310180977254162,
"grad_norm": 1.9559133052825928,
"learning_rate": 5.6753802337824526e-05,
"loss": 5.4967,
"step": 50200
},
{
"epoch": 2.3124820230794896,
"grad_norm": 1.6724474430084229,
"learning_rate": 5.6681501970763426e-05,
"loss": 5.487,
"step": 50250
},
{
"epoch": 2.314783068904817,
"grad_norm": 1.6113662719726562,
"learning_rate": 5.660918737628557e-05,
"loss": 5.4981,
"step": 50300
},
{
"epoch": 2.317084114730145,
"grad_norm": 2.1076881885528564,
"learning_rate": 5.653685870837575e-05,
"loss": 5.5033,
"step": 50350
},
{
"epoch": 2.3193851605554725,
"grad_norm": 1.9511075019836426,
"learning_rate": 5.646451612104877e-05,
"loss": 5.5259,
"step": 50400
},
{
"epoch": 2.3216862063808,
"grad_norm": 1.9726592302322388,
"learning_rate": 5.639215976834906e-05,
"loss": 5.5563,
"step": 50450
},
{
"epoch": 2.323987252206128,
"grad_norm": 1.6485875844955444,
"learning_rate": 5.631978980435035e-05,
"loss": 5.4276,
"step": 50500
},
{
"epoch": 2.3262882980314554,
"grad_norm": 1.8284225463867188,
"learning_rate": 5.624740638315538e-05,
"loss": 5.5438,
"step": 50550
},
{
"epoch": 2.328589343856783,
"grad_norm": 1.7105023860931396,
"learning_rate": 5.6175009658895486e-05,
"loss": 5.5755,
"step": 50600
},
{
"epoch": 2.3308903896821107,
"grad_norm": 2.0239145755767822,
"learning_rate": 5.6102599785730406e-05,
"loss": 5.5332,
"step": 50650
},
{
"epoch": 2.3331914355074383,
"grad_norm": 2.122833490371704,
"learning_rate": 5.6030176917847834e-05,
"loss": 5.5843,
"step": 50700
},
{
"epoch": 2.335492481332766,
"grad_norm": 1.7509838342666626,
"learning_rate": 5.595774120946316e-05,
"loss": 5.5071,
"step": 50750
},
{
"epoch": 2.337793527158093,
"grad_norm": 1.953904628753662,
"learning_rate": 5.5885292814819054e-05,
"loss": 5.5062,
"step": 50800
},
{
"epoch": 2.340094572983421,
"grad_norm": 2.0808749198913574,
"learning_rate": 5.5812831888185305e-05,
"loss": 5.5189,
"step": 50850
},
{
"epoch": 2.3423956188087485,
"grad_norm": 1.8985790014266968,
"learning_rate": 5.574035858385829e-05,
"loss": 5.5271,
"step": 50900
},
{
"epoch": 2.344696664634076,
"grad_norm": 2.062648057937622,
"learning_rate": 5.5667873056160794e-05,
"loss": 5.5491,
"step": 50950
},
{
"epoch": 2.3469977104594038,
"grad_norm": 1.934853434562683,
"learning_rate": 5.559537545944161e-05,
"loss": 5.5204,
"step": 51000
},
{
"epoch": 2.3469977104594038,
"eval_loss": 5.6846184730529785,
"eval_runtime": 32.7588,
"eval_samples_per_second": 11.722,
"eval_steps_per_second": 5.861,
"eval_tts_loss": 8.581908826910935,
"step": 51000
},
{
"epoch": 2.3492987562847314,
"grad_norm": 1.8930600881576538,
"learning_rate": 5.552286594807523e-05,
"loss": 5.4913,
"step": 51050
},
{
"epoch": 2.351599802110059,
"grad_norm": 1.7008662223815918,
"learning_rate": 5.545034467646155e-05,
"loss": 5.5381,
"step": 51100
},
{
"epoch": 2.3539008479353867,
"grad_norm": 2.0752475261688232,
"learning_rate": 5.537781179902548e-05,
"loss": 5.4563,
"step": 51150
},
{
"epoch": 2.3562018937607143,
"grad_norm": 1.9323570728302002,
"learning_rate": 5.530526747021663e-05,
"loss": 5.513,
"step": 51200
},
{
"epoch": 2.358502939586042,
"grad_norm": 2.2523136138916016,
"learning_rate": 5.5232711844509e-05,
"loss": 5.4431,
"step": 51250
},
{
"epoch": 2.3608039854113696,
"grad_norm": 1.8251011371612549,
"learning_rate": 5.516014507640067e-05,
"loss": 5.5666,
"step": 51300
},
{
"epoch": 2.3631050312366972,
"grad_norm": 2.220978260040283,
"learning_rate": 5.5087567320413435e-05,
"loss": 5.4692,
"step": 51350
},
{
"epoch": 2.365406077062025,
"grad_norm": 2.1732263565063477,
"learning_rate": 5.501497873109248e-05,
"loss": 5.5466,
"step": 51400
},
{
"epoch": 2.367707122887352,
"grad_norm": 1.958206057548523,
"learning_rate": 5.494237946300606e-05,
"loss": 5.5023,
"step": 51450
},
{
"epoch": 2.3700081687126797,
"grad_norm": 1.8300808668136597,
"learning_rate": 5.486976967074517e-05,
"loss": 5.5164,
"step": 51500
},
{
"epoch": 2.3723092145380074,
"grad_norm": 2.154264211654663,
"learning_rate": 5.479714950892322e-05,
"loss": 5.5505,
"step": 51550
},
{
"epoch": 2.374610260363335,
"grad_norm": 2.135356903076172,
"learning_rate": 5.472451913217569e-05,
"loss": 5.5268,
"step": 51600
},
{
"epoch": 2.3769113061886626,
"grad_norm": 1.6626617908477783,
"learning_rate": 5.465187869515983e-05,
"loss": 5.4863,
"step": 51650
},
{
"epoch": 2.3792123520139903,
"grad_norm": 2.074730157852173,
"learning_rate": 5.457922835255428e-05,
"loss": 5.5061,
"step": 51700
},
{
"epoch": 2.381513397839318,
"grad_norm": 1.85642409324646,
"learning_rate": 5.450656825905881e-05,
"loss": 5.5448,
"step": 51750
},
{
"epoch": 2.3838144436646456,
"grad_norm": 2.1079165935516357,
"learning_rate": 5.4433898569393926e-05,
"loss": 5.5477,
"step": 51800
},
{
"epoch": 2.386115489489973,
"grad_norm": 2.0697200298309326,
"learning_rate": 5.43612194383006e-05,
"loss": 5.5246,
"step": 51850
},
{
"epoch": 2.388416535315301,
"grad_norm": 1.7789839506149292,
"learning_rate": 5.4288531020539856e-05,
"loss": 5.413,
"step": 51900
},
{
"epoch": 2.3907175811406285,
"grad_norm": 1.9652003049850464,
"learning_rate": 5.421583347089253e-05,
"loss": 5.5111,
"step": 51950
},
{
"epoch": 2.393018626965956,
"grad_norm": 2.0850377082824707,
"learning_rate": 5.414312694415892e-05,
"loss": 5.5198,
"step": 52000
},
{
"epoch": 2.393018626965956,
"eval_loss": 5.685731410980225,
"eval_runtime": 33.6338,
"eval_samples_per_second": 11.417,
"eval_steps_per_second": 5.709,
"eval_tts_loss": 8.644896936048733,
"step": 52000
},
{
"epoch": 2.3953196727912838,
"grad_norm": 1.8597215414047241,
"learning_rate": 5.407186598759514e-05,
"loss": 5.5228,
"step": 52050
},
{
"epoch": 2.3976207186166114,
"grad_norm": 2.0384023189544678,
"learning_rate": 5.399914214299692e-05,
"loss": 5.5392,
"step": 52100
},
{
"epoch": 2.399921764441939,
"grad_norm": 2.2333080768585205,
"learning_rate": 5.392640978272927e-05,
"loss": 5.548,
"step": 52150
},
{
"epoch": 2.4022228102672667,
"grad_norm": 1.875346064567566,
"learning_rate": 5.38536690616666e-05,
"loss": 5.5701,
"step": 52200
},
{
"epoch": 2.404523856092594,
"grad_norm": 1.9534131288528442,
"learning_rate": 5.3780920134701115e-05,
"loss": 5.5083,
"step": 52250
},
{
"epoch": 2.4068249019179215,
"grad_norm": 1.6405394077301025,
"learning_rate": 5.37081631567425e-05,
"loss": 5.5181,
"step": 52300
},
{
"epoch": 2.409125947743249,
"grad_norm": 2.1161701679229736,
"learning_rate": 5.363539828271756e-05,
"loss": 5.5107,
"step": 52350
},
{
"epoch": 2.411426993568577,
"grad_norm": 1.9055460691452026,
"learning_rate": 5.3562625667569945e-05,
"loss": 5.5516,
"step": 52400
},
{
"epoch": 2.4137280393939045,
"grad_norm": 1.7940001487731934,
"learning_rate": 5.348984546625975e-05,
"loss": 5.4008,
"step": 52450
},
{
"epoch": 2.416029085219232,
"grad_norm": 2.0796279907226562,
"learning_rate": 5.341705783376325e-05,
"loss": 5.4998,
"step": 52500
},
{
"epoch": 2.4183301310445597,
"grad_norm": 1.901845932006836,
"learning_rate": 5.334426292507255e-05,
"loss": 5.5447,
"step": 52550
},
{
"epoch": 2.4206311768698874,
"grad_norm": 2.040921926498413,
"learning_rate": 5.3271460895195224e-05,
"loss": 5.5044,
"step": 52600
},
{
"epoch": 2.422932222695215,
"grad_norm": 1.8908004760742188,
"learning_rate": 5.319865189915404e-05,
"loss": 5.4536,
"step": 52650
},
{
"epoch": 2.4252332685205427,
"grad_norm": 2.002741813659668,
"learning_rate": 5.312583609198658e-05,
"loss": 5.4478,
"step": 52700
},
{
"epoch": 2.4275343143458703,
"grad_norm": 1.7424709796905518,
"learning_rate": 5.305301362874492e-05,
"loss": 5.4934,
"step": 52750
},
{
"epoch": 2.429835360171198,
"grad_norm": 1.6303448677062988,
"learning_rate": 5.2980184664495336e-05,
"loss": 5.5592,
"step": 52800
},
{
"epoch": 2.4321364059965256,
"grad_norm": 2.0222649574279785,
"learning_rate": 5.29073493543179e-05,
"loss": 5.5131,
"step": 52850
},
{
"epoch": 2.434437451821853,
"grad_norm": 1.923041582107544,
"learning_rate": 5.283450785330629e-05,
"loss": 5.4102,
"step": 52900
},
{
"epoch": 2.4367384976471804,
"grad_norm": 1.6201173067092896,
"learning_rate": 5.276166031656727e-05,
"loss": 5.5084,
"step": 52950
},
{
"epoch": 2.439039543472508,
"grad_norm": 1.916977047920227,
"learning_rate": 5.268880689922049e-05,
"loss": 5.507,
"step": 53000
},
{
"epoch": 2.439039543472508,
"eval_loss": 5.673426151275635,
"eval_runtime": 32.5382,
"eval_samples_per_second": 11.801,
"eval_steps_per_second": 5.901,
"eval_tts_loss": 8.599758905226478,
"step": 53000
},
{
"epoch": 2.4413405892978357,
"grad_norm": 1.7312027215957642,
"learning_rate": 5.261594775639812e-05,
"loss": 5.4897,
"step": 53050
},
{
"epoch": 2.4436416351231633,
"grad_norm": 1.809523582458496,
"learning_rate": 5.2543083043244544e-05,
"loss": 5.4711,
"step": 53100
},
{
"epoch": 2.445942680948491,
"grad_norm": 1.8643138408660889,
"learning_rate": 5.2470212914915995e-05,
"loss": 5.5297,
"step": 53150
},
{
"epoch": 2.4482437267738186,
"grad_norm": 2.256096124649048,
"learning_rate": 5.2397337526580205e-05,
"loss": 5.4853,
"step": 53200
},
{
"epoch": 2.4505447725991463,
"grad_norm": 1.8998197317123413,
"learning_rate": 5.2324457033416154e-05,
"loss": 5.486,
"step": 53250
},
{
"epoch": 2.452845818424474,
"grad_norm": 2.2109556198120117,
"learning_rate": 5.225157159061366e-05,
"loss": 5.5047,
"step": 53300
},
{
"epoch": 2.4551468642498016,
"grad_norm": 2.0667428970336914,
"learning_rate": 5.2178681353373096e-05,
"loss": 5.4553,
"step": 53350
},
{
"epoch": 2.457447910075129,
"grad_norm": 1.889162540435791,
"learning_rate": 5.210578647690505e-05,
"loss": 5.4784,
"step": 53400
},
{
"epoch": 2.459748955900457,
"grad_norm": 2.207839012145996,
"learning_rate": 5.203288711642996e-05,
"loss": 5.5107,
"step": 53450
},
{
"epoch": 2.4620500017257845,
"grad_norm": 1.8210887908935547,
"learning_rate": 5.195998342717785e-05,
"loss": 5.5447,
"step": 53500
},
{
"epoch": 2.464351047551112,
"grad_norm": 1.9892346858978271,
"learning_rate": 5.188707556438793e-05,
"loss": 5.5459,
"step": 53550
},
{
"epoch": 2.4666520933764398,
"grad_norm": 2.049320697784424,
"learning_rate": 5.1814163683308324e-05,
"loss": 5.5597,
"step": 53600
},
{
"epoch": 2.4689531392017674,
"grad_norm": 2.0622878074645996,
"learning_rate": 5.174124793919568e-05,
"loss": 5.5092,
"step": 53650
},
{
"epoch": 2.471254185027095,
"grad_norm": 2.1357452869415283,
"learning_rate": 5.166832848731488e-05,
"loss": 5.5497,
"step": 53700
},
{
"epoch": 2.4735552308524222,
"grad_norm": 1.9745289087295532,
"learning_rate": 5.159540548293874e-05,
"loss": 5.4755,
"step": 53750
},
{
"epoch": 2.47585627667775,
"grad_norm": 1.9489434957504272,
"learning_rate": 5.152247908134761e-05,
"loss": 5.5102,
"step": 53800
},
{
"epoch": 2.4781573225030775,
"grad_norm": 2.2445592880249023,
"learning_rate": 5.144954943782905e-05,
"loss": 5.499,
"step": 53850
},
{
"epoch": 2.480458368328405,
"grad_norm": 2.048757791519165,
"learning_rate": 5.137661670767757e-05,
"loss": 5.516,
"step": 53900
},
{
"epoch": 2.482759414153733,
"grad_norm": 1.8567497730255127,
"learning_rate": 5.130368104619422e-05,
"loss": 5.487,
"step": 53950
},
{
"epoch": 2.4850604599790604,
"grad_norm": 2.4062752723693848,
"learning_rate": 5.123074260868631e-05,
"loss": 5.5128,
"step": 54000
},
{
"epoch": 2.4850604599790604,
"eval_loss": 5.6724090576171875,
"eval_runtime": 34.2786,
"eval_samples_per_second": 11.202,
"eval_steps_per_second": 5.601,
"eval_tts_loss": 8.662209894940489,
"step": 54000
},
{
"epoch": 2.487361505804388,
"grad_norm": 1.895520806312561,
"learning_rate": 5.1157801550467064e-05,
"loss": 5.5928,
"step": 54050
},
{
"epoch": 2.4896625516297157,
"grad_norm": 2.2730302810668945,
"learning_rate": 5.108485802685526e-05,
"loss": 5.4582,
"step": 54100
},
{
"epoch": 2.4919635974550434,
"grad_norm": 1.9927568435668945,
"learning_rate": 5.101191219317495e-05,
"loss": 5.5229,
"step": 54150
},
{
"epoch": 2.494264643280371,
"grad_norm": 1.9329229593276978,
"learning_rate": 5.0940423184635246e-05,
"loss": 5.549,
"step": 54200
},
{
"epoch": 2.4965656891056986,
"grad_norm": 1.611726999282837,
"learning_rate": 5.086747323527522e-05,
"loss": 5.5071,
"step": 54250
},
{
"epoch": 2.4988667349310263,
"grad_norm": 1.863103985786438,
"learning_rate": 5.079452143874022e-05,
"loss": 5.4997,
"step": 54300
},
{
"epoch": 2.5011677807563535,
"grad_norm": 2.2052907943725586,
"learning_rate": 5.07215679503719e-05,
"loss": 5.5179,
"step": 54350
},
{
"epoch": 2.503468826581681,
"grad_norm": 1.9319581985473633,
"learning_rate": 5.064861292551552e-05,
"loss": 5.5353,
"step": 54400
},
{
"epoch": 2.5057698724070088,
"grad_norm": 2.0617754459381104,
"learning_rate": 5.057565651951963e-05,
"loss": 5.4807,
"step": 54450
},
{
"epoch": 2.5080709182323364,
"grad_norm": 2.021653652191162,
"learning_rate": 5.05026988877357e-05,
"loss": 5.5129,
"step": 54500
},
{
"epoch": 2.510371964057664,
"grad_norm": 2.167088031768799,
"learning_rate": 5.0429740185517805e-05,
"loss": 5.5161,
"step": 54550
},
{
"epoch": 2.5126730098829917,
"grad_norm": 1.8927797079086304,
"learning_rate": 5.0356780568222326e-05,
"loss": 5.4857,
"step": 54600
},
{
"epoch": 2.5149740557083193,
"grad_norm": 1.97638738155365,
"learning_rate": 5.0283820191207574e-05,
"loss": 5.4169,
"step": 54650
},
{
"epoch": 2.517275101533647,
"grad_norm": 1.7517626285552979,
"learning_rate": 5.0210859209833485e-05,
"loss": 5.4838,
"step": 54700
},
{
"epoch": 2.5195761473589746,
"grad_norm": 1.9045029878616333,
"learning_rate": 5.013789777946125e-05,
"loss": 5.474,
"step": 54750
},
{
"epoch": 2.5218771931843023,
"grad_norm": 2.1002869606018066,
"learning_rate": 5.006493605545308e-05,
"loss": 5.4747,
"step": 54800
},
{
"epoch": 2.52417823900963,
"grad_norm": 1.861536979675293,
"learning_rate": 4.9991974193171746e-05,
"loss": 5.4659,
"step": 54850
},
{
"epoch": 2.5264792848349575,
"grad_norm": 2.3293421268463135,
"learning_rate": 4.991901234798037e-05,
"loss": 5.4849,
"step": 54900
},
{
"epoch": 2.528780330660285,
"grad_norm": 1.9434945583343506,
"learning_rate": 4.9846050675241994e-05,
"loss": 5.5403,
"step": 54950
},
{
"epoch": 2.531081376485613,
"grad_norm": 1.791527509689331,
"learning_rate": 4.977308933031931e-05,
"loss": 5.5135,
"step": 55000
},
{
"epoch": 2.531081376485613,
"eval_loss": 5.663011074066162,
"eval_runtime": 33.2046,
"eval_samples_per_second": 11.565,
"eval_steps_per_second": 5.782,
"eval_tts_loss": 8.593881697905651,
"step": 55000
},
{
"epoch": 2.5333824223109405,
"grad_norm": 2.0611414909362793,
"learning_rate": 4.970012846857432e-05,
"loss": 5.5242,
"step": 55050
},
{
"epoch": 2.535683468136268,
"grad_norm": 1.8130731582641602,
"learning_rate": 4.962716824536796e-05,
"loss": 5.4987,
"step": 55100
},
{
"epoch": 2.5379845139615957,
"grad_norm": 2.204890489578247,
"learning_rate": 4.9554208816059876e-05,
"loss": 5.5122,
"step": 55150
},
{
"epoch": 2.5402855597869234,
"grad_norm": 1.6537048816680908,
"learning_rate": 4.9481250336007955e-05,
"loss": 5.5315,
"step": 55200
},
{
"epoch": 2.542586605612251,
"grad_norm": 2.3211939334869385,
"learning_rate": 4.9408292960568096e-05,
"loss": 5.5083,
"step": 55250
},
{
"epoch": 2.5448876514375782,
"grad_norm": 2.3750381469726562,
"learning_rate": 4.933533684509383e-05,
"loss": 5.4613,
"step": 55300
},
{
"epoch": 2.547188697262906,
"grad_norm": 1.7778195142745972,
"learning_rate": 4.926238214493604e-05,
"loss": 5.5137,
"step": 55350
},
{
"epoch": 2.5494897430882335,
"grad_norm": 2.055008888244629,
"learning_rate": 4.918942901544257e-05,
"loss": 5.4564,
"step": 55400
},
{
"epoch": 2.551790788913561,
"grad_norm": 2.363105297088623,
"learning_rate": 4.9116477611957865e-05,
"loss": 5.5054,
"step": 55450
},
{
"epoch": 2.554091834738889,
"grad_norm": 2.1759395599365234,
"learning_rate": 4.904352808982281e-05,
"loss": 5.4812,
"step": 55500
},
{
"epoch": 2.5563928805642164,
"grad_norm": 1.9954317808151245,
"learning_rate": 4.897058060437419e-05,
"loss": 5.5206,
"step": 55550
},
{
"epoch": 2.558693926389544,
"grad_norm": 2.1544883251190186,
"learning_rate": 4.88976353109445e-05,
"loss": 5.5394,
"step": 55600
},
{
"epoch": 2.5609949722148717,
"grad_norm": 2.238513231277466,
"learning_rate": 4.882469236486155e-05,
"loss": 5.4833,
"step": 55650
},
{
"epoch": 2.5632960180401994,
"grad_norm": 2.2825424671173096,
"learning_rate": 4.875175192144814e-05,
"loss": 5.4567,
"step": 55700
},
{
"epoch": 2.565597063865527,
"grad_norm": 1.8230923414230347,
"learning_rate": 4.867881413602178e-05,
"loss": 5.5013,
"step": 55750
},
{
"epoch": 2.5678981096908546,
"grad_norm": 2.1068201065063477,
"learning_rate": 4.8605879163894286e-05,
"loss": 5.5096,
"step": 55800
},
{
"epoch": 2.570199155516182,
"grad_norm": 2.3456852436065674,
"learning_rate": 4.853294716037149e-05,
"loss": 5.4725,
"step": 55850
},
{
"epoch": 2.5725002013415095,
"grad_norm": 2.4963150024414062,
"learning_rate": 4.846001828075292e-05,
"loss": 5.4823,
"step": 55900
},
{
"epoch": 2.574801247166837,
"grad_norm": 1.9656461477279663,
"learning_rate": 4.838709268033141e-05,
"loss": 5.4807,
"step": 55950
},
{
"epoch": 2.5771022929921648,
"grad_norm": 2.1608853340148926,
"learning_rate": 4.8314170514392874e-05,
"loss": 5.5417,
"step": 56000
},
{
"epoch": 2.5771022929921648,
"eval_loss": 5.6606011390686035,
"eval_runtime": 32.8942,
"eval_samples_per_second": 11.674,
"eval_steps_per_second": 5.837,
"eval_tts_loss": 8.585531058636887,
"step": 56000
},
{
"epoch": 2.5794033388174924,
"grad_norm": 2.1026828289031982,
"learning_rate": 4.8241251938215855e-05,
"loss": 5.4739,
"step": 56050
},
{
"epoch": 2.58170438464282,
"grad_norm": 2.2356417179107666,
"learning_rate": 4.816833710707128e-05,
"loss": 5.4498,
"step": 56100
},
{
"epoch": 2.5840054304681477,
"grad_norm": 2.1985368728637695,
"learning_rate": 4.80954261762221e-05,
"loss": 5.4379,
"step": 56150
},
{
"epoch": 2.5863064762934753,
"grad_norm": 1.9118517637252808,
"learning_rate": 4.8022519300922944e-05,
"loss": 5.5355,
"step": 56200
},
{
"epoch": 2.588607522118803,
"grad_norm": 2.080960512161255,
"learning_rate": 4.794961663641985e-05,
"loss": 5.4729,
"step": 56250
},
{
"epoch": 2.5909085679441306,
"grad_norm": 1.9839308261871338,
"learning_rate": 4.787671833794983e-05,
"loss": 5.4719,
"step": 56300
},
{
"epoch": 2.5932096137694582,
"grad_norm": Infinity,
"learning_rate": 4.78052823909725e-05,
"loss": 5.4361,
"step": 56350
},
{
"epoch": 2.595510659594786,
"grad_norm": 1.9188679456710815,
"learning_rate": 4.77323931951916e-05,
"loss": 5.5138,
"step": 56400
},
{
"epoch": 2.5978117054201135,
"grad_norm": 2.0594546794891357,
"learning_rate": 4.765950882799373e-05,
"loss": 5.4922,
"step": 56450
},
{
"epoch": 2.600112751245441,
"grad_norm": 2.096745729446411,
"learning_rate": 4.7586629444577e-05,
"loss": 5.5055,
"step": 56500
},
{
"epoch": 2.602413797070769,
"grad_norm": 2.441319227218628,
"learning_rate": 4.751375520012884e-05,
"loss": 5.4756,
"step": 56550
},
{
"epoch": 2.6047148428960964,
"grad_norm": 2.135594606399536,
"learning_rate": 4.744088624982577e-05,
"loss": 5.4237,
"step": 56600
},
{
"epoch": 2.607015888721424,
"grad_norm": 2.0528063774108887,
"learning_rate": 4.736802274883307e-05,
"loss": 5.4687,
"step": 56650
},
{
"epoch": 2.6093169345467517,
"grad_norm": 2.743708372116089,
"learning_rate": 4.7295164852304374e-05,
"loss": 5.5248,
"step": 56700
},
{
"epoch": 2.6116179803720794,
"grad_norm": 2.06074857711792,
"learning_rate": 4.722231271538139e-05,
"loss": 5.4548,
"step": 56750
},
{
"epoch": 2.6139190261974066,
"grad_norm": 2.297532558441162,
"learning_rate": 4.71509233586696e-05,
"loss": 5.4986,
"step": 56800
},
{
"epoch": 2.616220072022734,
"grad_norm": 2.359525203704834,
"learning_rate": 4.70780830834167e-05,
"loss": 5.5232,
"step": 56850
},
{
"epoch": 2.618521117848062,
"grad_norm": 2.092503786087036,
"learning_rate": 4.700524903001777e-05,
"loss": 5.4316,
"step": 56900
},
{
"epoch": 2.6208221636733895,
"grad_norm": 1.9667569398880005,
"learning_rate": 4.6932421353563806e-05,
"loss": 5.422,
"step": 56950
},
{
"epoch": 2.623123209498717,
"grad_norm": 2.1023378372192383,
"learning_rate": 4.685960020913213e-05,
"loss": 5.4845,
"step": 57000
},
{
"epoch": 2.623123209498717,
"eval_loss": 5.65239953994751,
"eval_runtime": 34.0261,
"eval_samples_per_second": 11.285,
"eval_steps_per_second": 5.643,
"eval_tts_loss": 8.61307597893829,
"step": 57000
},
{
"epoch": 2.6254242553240448,
"grad_norm": 2.099184036254883,
"learning_rate": 4.6786785751786216e-05,
"loss": 5.522,
"step": 57050
},
{
"epoch": 2.6277253011493724,
"grad_norm": 2.0189764499664307,
"learning_rate": 4.671397813657527e-05,
"loss": 5.4654,
"step": 57100
},
{
"epoch": 2.6300263469747,
"grad_norm": 1.772560954093933,
"learning_rate": 4.6641177518533926e-05,
"loss": 5.4866,
"step": 57150
},
{
"epoch": 2.6323273928000277,
"grad_norm": 2.440157890319824,
"learning_rate": 4.656838405268195e-05,
"loss": 5.5017,
"step": 57200
},
{
"epoch": 2.6346284386253553,
"grad_norm": 1.6734415292739868,
"learning_rate": 4.649559789402385e-05,
"loss": 5.5278,
"step": 57250
},
{
"epoch": 2.636929484450683,
"grad_norm": 1.8009145259857178,
"learning_rate": 4.6422819197548586e-05,
"loss": 5.4958,
"step": 57300
},
{
"epoch": 2.63923053027601,
"grad_norm": 2.123201370239258,
"learning_rate": 4.63500481182292e-05,
"loss": 5.5163,
"step": 57350
},
{
"epoch": 2.641531576101338,
"grad_norm": 1.80511474609375,
"learning_rate": 4.627728481102257e-05,
"loss": 5.431,
"step": 57400
},
{
"epoch": 2.6438326219266655,
"grad_norm": 2.240438222885132,
"learning_rate": 4.620452943086898e-05,
"loss": 5.4903,
"step": 57450
},
{
"epoch": 2.646133667751993,
"grad_norm": 2.5990798473358154,
"learning_rate": 4.613178213269184e-05,
"loss": 5.4867,
"step": 57500
},
{
"epoch": 2.6484347135773207,
"grad_norm": 2.257694959640503,
"learning_rate": 4.6059043071397374e-05,
"loss": 5.4922,
"step": 57550
},
{
"epoch": 2.6507357594026484,
"grad_norm": 2.099280595779419,
"learning_rate": 4.598631240187424e-05,
"loss": 5.5309,
"step": 57600
},
{
"epoch": 2.653036805227976,
"grad_norm": 2.5522916316986084,
"learning_rate": 4.591359027899323e-05,
"loss": 5.4671,
"step": 57650
},
{
"epoch": 2.6553378510533037,
"grad_norm": 1.9019410610198975,
"learning_rate": 4.5840876857606966e-05,
"loss": 5.4628,
"step": 57700
},
{
"epoch": 2.6576388968786313,
"grad_norm": 1.938440203666687,
"learning_rate": 4.576817229254947e-05,
"loss": 5.4546,
"step": 57750
},
{
"epoch": 2.659939942703959,
"grad_norm": 1.9857474565505981,
"learning_rate": 4.569547673863601e-05,
"loss": 5.4707,
"step": 57800
},
{
"epoch": 2.6622409885292866,
"grad_norm": 2.172809600830078,
"learning_rate": 4.562279035066259e-05,
"loss": 5.5027,
"step": 57850
},
{
"epoch": 2.6645420343546142,
"grad_norm": 2.026803731918335,
"learning_rate": 4.5550113283405716e-05,
"loss": 5.5109,
"step": 57900
},
{
"epoch": 2.666843080179942,
"grad_norm": 2.1904194355010986,
"learning_rate": 4.547744569162204e-05,
"loss": 5.4626,
"step": 57950
},
{
"epoch": 2.6691441260052695,
"grad_norm": 2.199883460998535,
"learning_rate": 4.540478773004804e-05,
"loss": 5.5057,
"step": 58000
},
{
"epoch": 2.6691441260052695,
"eval_loss": 5.646541118621826,
"eval_runtime": 32.9768,
"eval_samples_per_second": 11.645,
"eval_steps_per_second": 5.822,
"eval_tts_loss": 8.600281850515511,
"step": 58000
},
{
"epoch": 2.671445171830597,
"grad_norm": 1.8649810552597046,
"learning_rate": 4.533213955339972e-05,
"loss": 5.4598,
"step": 58050
},
{
"epoch": 2.673746217655925,
"grad_norm": 1.9523557424545288,
"learning_rate": 4.5259501316372205e-05,
"loss": 5.4585,
"step": 58100
},
{
"epoch": 2.6760472634812524,
"grad_norm": 2.0407845973968506,
"learning_rate": 4.518687317363947e-05,
"loss": 5.4856,
"step": 58150
},
{
"epoch": 2.67834830930658,
"grad_norm": 1.945887804031372,
"learning_rate": 4.5114255279853987e-05,
"loss": 5.4216,
"step": 58200
},
{
"epoch": 2.6806493551319077,
"grad_norm": 1.912412166595459,
"learning_rate": 4.5041647789646426e-05,
"loss": 5.4695,
"step": 58250
},
{
"epoch": 2.682950400957235,
"grad_norm": 2.170290946960449,
"learning_rate": 4.496905085762529e-05,
"loss": 5.4304,
"step": 58300
},
{
"epoch": 2.6852514467825626,
"grad_norm": 2.0460116863250732,
"learning_rate": 4.4896464638376594e-05,
"loss": 5.5117,
"step": 58350
},
{
"epoch": 2.68755249260789,
"grad_norm": 1.9400469064712524,
"learning_rate": 4.4823889286463554e-05,
"loss": 5.4373,
"step": 58400
},
{
"epoch": 2.689853538433218,
"grad_norm": 2.4364376068115234,
"learning_rate": 4.4751324956426235e-05,
"loss": 5.4845,
"step": 58450
},
{
"epoch": 2.6921545842585455,
"grad_norm": 1.7145034074783325,
"learning_rate": 4.467877180278124e-05,
"loss": 5.4182,
"step": 58500
},
{
"epoch": 2.694455630083873,
"grad_norm": 2.023035764694214,
"learning_rate": 4.460622998002136e-05,
"loss": 5.4882,
"step": 58550
},
{
"epoch": 2.6967566759092008,
"grad_norm": 2.2280147075653076,
"learning_rate": 4.4533699642615295e-05,
"loss": 5.4417,
"step": 58600
},
{
"epoch": 2.6990577217345284,
"grad_norm": 1.7972197532653809,
"learning_rate": 4.4461180945007235e-05,
"loss": 5.5224,
"step": 58650
},
{
"epoch": 2.701358767559856,
"grad_norm": 2.305210828781128,
"learning_rate": 4.438867404161663e-05,
"loss": 5.4077,
"step": 58700
},
{
"epoch": 2.7036598133851837,
"grad_norm": 2.05381178855896,
"learning_rate": 4.431617908683778e-05,
"loss": 5.5049,
"step": 58750
},
{
"epoch": 2.7059608592105113,
"grad_norm": 1.8015038967132568,
"learning_rate": 4.424369623503957e-05,
"loss": 5.4476,
"step": 58800
},
{
"epoch": 2.7082619050358385,
"grad_norm": 1.750535249710083,
"learning_rate": 4.417122564056508e-05,
"loss": 5.5655,
"step": 58850
},
{
"epoch": 2.710562950861166,
"grad_norm": 2.220609188079834,
"learning_rate": 4.409876745773134e-05,
"loss": 5.5094,
"step": 58900
},
{
"epoch": 2.712863996686494,
"grad_norm": 1.8255053758621216,
"learning_rate": 4.402632184082892e-05,
"loss": 5.5111,
"step": 58950
},
{
"epoch": 2.7151650425118214,
"grad_norm": 1.8717236518859863,
"learning_rate": 4.3953888944121625e-05,
"loss": 5.4799,
"step": 59000
},
{
"epoch": 2.7151650425118214,
"eval_loss": 5.639944553375244,
"eval_runtime": 33.1091,
"eval_samples_per_second": 11.598,
"eval_steps_per_second": 5.799,
"eval_tts_loss": 8.586594152952083,
"step": 59000
},
{
"epoch": 2.717466088337149,
"grad_norm": 1.870437741279602,
"learning_rate": 4.3881468921846186e-05,
"loss": 5.4899,
"step": 59050
},
{
"epoch": 2.7197671341624767,
"grad_norm": 2.0649781227111816,
"learning_rate": 4.38090619282119e-05,
"loss": 5.4897,
"step": 59100
},
{
"epoch": 2.7220681799878044,
"grad_norm": 2.2909843921661377,
"learning_rate": 4.373666811740038e-05,
"loss": 5.4443,
"step": 59150
},
{
"epoch": 2.724369225813132,
"grad_norm": 1.9959808588027954,
"learning_rate": 4.36642876435651e-05,
"loss": 5.421,
"step": 59200
},
{
"epoch": 2.7266702716384597,
"grad_norm": 1.8193045854568481,
"learning_rate": 4.3591920660831165e-05,
"loss": 5.5136,
"step": 59250
},
{
"epoch": 2.7289713174637873,
"grad_norm": 2.53044056892395,
"learning_rate": 4.351956732329494e-05,
"loss": 5.4864,
"step": 59300
},
{
"epoch": 2.731272363289115,
"grad_norm": 2.0809009075164795,
"learning_rate": 4.344722778502376e-05,
"loss": 5.5361,
"step": 59350
},
{
"epoch": 2.7335734091144426,
"grad_norm": 1.832938313484192,
"learning_rate": 4.3374902200055544e-05,
"loss": 5.4392,
"step": 59400
},
{
"epoch": 2.73587445493977,
"grad_norm": 2.035078763961792,
"learning_rate": 4.330259072239853e-05,
"loss": 5.5084,
"step": 59450
},
{
"epoch": 2.738175500765098,
"grad_norm": 2.0738892555236816,
"learning_rate": 4.3230293506030885e-05,
"loss": 5.5201,
"step": 59500
},
{
"epoch": 2.7404765465904255,
"grad_norm": 2.5500268936157227,
"learning_rate": 4.315801070490042e-05,
"loss": 5.4403,
"step": 59550
},
{
"epoch": 2.742777592415753,
"grad_norm": 2.205916404724121,
"learning_rate": 4.308574247292428e-05,
"loss": 5.4321,
"step": 59600
},
{
"epoch": 2.745078638241081,
"grad_norm": 1.8907051086425781,
"learning_rate": 4.3013488963988544e-05,
"loss": 5.5129,
"step": 59650
},
{
"epoch": 2.7473796840664084,
"grad_norm": 2.2338571548461914,
"learning_rate": 4.2941250331947955e-05,
"loss": 5.4867,
"step": 59700
},
{
"epoch": 2.749680729891736,
"grad_norm": 1.948060393333435,
"learning_rate": 4.2869026730625586e-05,
"loss": 5.4172,
"step": 59750
},
{
"epoch": 2.7519817757170633,
"grad_norm": 1.7842371463775635,
"learning_rate": 4.279681831381251e-05,
"loss": 5.4382,
"step": 59800
},
{
"epoch": 2.754282821542391,
"grad_norm": 1.6758701801300049,
"learning_rate": 4.272462523526743e-05,
"loss": 5.5079,
"step": 59850
},
{
"epoch": 2.7565838673677185,
"grad_norm": 2.385894775390625,
"learning_rate": 4.265244764871642e-05,
"loss": 5.4478,
"step": 59900
},
{
"epoch": 2.758884913193046,
"grad_norm": 1.8913649320602417,
"learning_rate": 4.2580285707852554e-05,
"loss": 5.4042,
"step": 59950
},
{
"epoch": 2.761185959018374,
"grad_norm": 2.1212165355682373,
"learning_rate": 4.250813956633561e-05,
"loss": 5.4479,
"step": 60000
},
{
"epoch": 2.761185959018374,
"eval_loss": 5.63777494430542,
"eval_runtime": 33.443,
"eval_samples_per_second": 11.482,
"eval_steps_per_second": 5.741,
"eval_tts_loss": 8.638248861690514,
"step": 60000
},
{
"epoch": 2.7634870048437015,
"grad_norm": 2.0569007396698,
"learning_rate": 4.24360093777917e-05,
"loss": 5.4278,
"step": 60050
},
{
"epoch": 2.765788050669029,
"grad_norm": 1.98093843460083,
"learning_rate": 4.236389529581297e-05,
"loss": 5.4418,
"step": 60100
},
{
"epoch": 2.7680890964943567,
"grad_norm": 1.9621886014938354,
"learning_rate": 4.229179747395727e-05,
"loss": 5.4371,
"step": 60150
},
{
"epoch": 2.7703901423196844,
"grad_norm": 2.040923595428467,
"learning_rate": 4.221971606574785e-05,
"loss": 5.4686,
"step": 60200
},
{
"epoch": 2.772691188145012,
"grad_norm": 2.557758331298828,
"learning_rate": 4.214765122467297e-05,
"loss": 5.4559,
"step": 60250
},
{
"epoch": 2.7749922339703397,
"grad_norm": 1.9892888069152832,
"learning_rate": 4.207560310418564e-05,
"loss": 5.4328,
"step": 60300
},
{
"epoch": 2.777293279795667,
"grad_norm": 1.8860628604888916,
"learning_rate": 4.200357185770326e-05,
"loss": 5.4393,
"step": 60350
},
{
"epoch": 2.7795943256209945,
"grad_norm": 2.2031514644622803,
"learning_rate": 4.193155763860727e-05,
"loss": 5.4776,
"step": 60400
},
{
"epoch": 2.781895371446322,
"grad_norm": 2.3934335708618164,
"learning_rate": 4.1859560600242904e-05,
"loss": 5.4665,
"step": 60450
},
{
"epoch": 2.78419641727165,
"grad_norm": 2.1561715602874756,
"learning_rate": 4.1787580895918774e-05,
"loss": 5.408,
"step": 60500
},
{
"epoch": 2.7864974630969774,
"grad_norm": 2.138029098510742,
"learning_rate": 4.171561867890661e-05,
"loss": 5.5624,
"step": 60550
},
{
"epoch": 2.788798508922305,
"grad_norm": 1.9366000890731812,
"learning_rate": 4.164367410244087e-05,
"loss": 5.4615,
"step": 60600
},
{
"epoch": 2.7910995547476327,
"grad_norm": 2.0961766242980957,
"learning_rate": 4.1571747319718457e-05,
"loss": 5.4132,
"step": 60650
},
{
"epoch": 2.7934006005729604,
"grad_norm": 2.026878833770752,
"learning_rate": 4.1499838483898426e-05,
"loss": 5.4426,
"step": 60700
},
{
"epoch": 2.795701646398288,
"grad_norm": 2.110774278640747,
"learning_rate": 4.142794774810156e-05,
"loss": 5.4435,
"step": 60750
},
{
"epoch": 2.7980026922236156,
"grad_norm": 1.7830619812011719,
"learning_rate": 4.135607526541013e-05,
"loss": 5.4528,
"step": 60800
},
{
"epoch": 2.8003037380489433,
"grad_norm": 1.9309149980545044,
"learning_rate": 4.128422118886754e-05,
"loss": 5.4216,
"step": 60850
},
{
"epoch": 2.802604783874271,
"grad_norm": 2.1686344146728516,
"learning_rate": 4.121238567147801e-05,
"loss": 5.4811,
"step": 60900
},
{
"epoch": 2.8049058296995986,
"grad_norm": 1.9331154823303223,
"learning_rate": 4.114056886620618e-05,
"loss": 5.4188,
"step": 60950
},
{
"epoch": 2.807206875524926,
"grad_norm": 2.110539674758911,
"learning_rate": 4.106877092597692e-05,
"loss": 5.4931,
"step": 61000
},
{
"epoch": 2.807206875524926,
"eval_loss": 5.6315765380859375,
"eval_runtime": 33.9234,
"eval_samples_per_second": 11.32,
"eval_steps_per_second": 5.66,
"eval_tts_loss": 8.616176136626422,
"step": 61000
},
{
"epoch": 2.809507921350254,
"grad_norm": 1.987831711769104,
"learning_rate": 4.099699200367488e-05,
"loss": 5.482,
"step": 61050
},
{
"epoch": 2.8118089671755815,
"grad_norm": 2.204674243927002,
"learning_rate": 4.0925232252144266e-05,
"loss": 5.4937,
"step": 61100
},
{
"epoch": 2.814110013000909,
"grad_norm": 2.2550504207611084,
"learning_rate": 4.085349182418841e-05,
"loss": 5.4465,
"step": 61150
},
{
"epoch": 2.8164110588262368,
"grad_norm": 2.125481367111206,
"learning_rate": 4.0781770872569514e-05,
"loss": 5.3974,
"step": 61200
},
{
"epoch": 2.8187121046515644,
"grad_norm": 1.787434458732605,
"learning_rate": 4.07100695500083e-05,
"loss": 5.395,
"step": 61250
},
{
"epoch": 2.8210131504768916,
"grad_norm": 2.028134346008301,
"learning_rate": 4.063982144515187e-05,
"loss": 5.4681,
"step": 61300
},
{
"epoch": 2.8233141963022192,
"grad_norm": 2.4000136852264404,
"learning_rate": 4.056815943851771e-05,
"loss": 5.4792,
"step": 61350
},
{
"epoch": 2.825615242127547,
"grad_norm": 1.750549077987671,
"learning_rate": 4.049651751579987e-05,
"loss": 5.5025,
"step": 61400
},
{
"epoch": 2.8279162879528745,
"grad_norm": 2.1570894718170166,
"learning_rate": 4.042489582955082e-05,
"loss": 5.4873,
"step": 61450
},
{
"epoch": 2.830217333778202,
"grad_norm": 1.8570895195007324,
"learning_rate": 4.0353294532279904e-05,
"loss": 5.4529,
"step": 61500
},
{
"epoch": 2.83251837960353,
"grad_norm": 2.0952110290527344,
"learning_rate": 4.028171377645307e-05,
"loss": 5.4145,
"step": 61550
},
{
"epoch": 2.8348194254288575,
"grad_norm": 1.6754381656646729,
"learning_rate": 4.021015371449254e-05,
"loss": 5.4637,
"step": 61600
},
{
"epoch": 2.837120471254185,
"grad_norm": 1.8659425973892212,
"learning_rate": 4.013861449877643e-05,
"loss": 5.4274,
"step": 61650
},
{
"epoch": 2.8394215170795127,
"grad_norm": 1.94052255153656,
"learning_rate": 4.006709628163849e-05,
"loss": 5.4469,
"step": 61700
},
{
"epoch": 2.8417225629048404,
"grad_norm": 2.1029059886932373,
"learning_rate": 3.999559921536776e-05,
"loss": 5.4863,
"step": 61750
},
{
"epoch": 2.844023608730168,
"grad_norm": 2.0127499103546143,
"learning_rate": 3.9924123452208226e-05,
"loss": 5.4694,
"step": 61800
},
{
"epoch": 2.846324654555495,
"grad_norm": 1.892794132232666,
"learning_rate": 3.985266914435853e-05,
"loss": 5.4628,
"step": 61850
},
{
"epoch": 2.848625700380823,
"grad_norm": 1.9065146446228027,
"learning_rate": 3.9781236443971624e-05,
"loss": 5.4541,
"step": 61900
},
{
"epoch": 2.8509267462061505,
"grad_norm": 1.7953850030899048,
"learning_rate": 3.970982550315445e-05,
"loss": 5.5099,
"step": 61950
},
{
"epoch": 2.853227792031478,
"grad_norm": 2.3671352863311768,
"learning_rate": 3.96384364739676e-05,
"loss": 5.4451,
"step": 62000
},
{
"epoch": 2.853227792031478,
"eval_loss": 5.629026889801025,
"eval_runtime": 33.1809,
"eval_samples_per_second": 11.573,
"eval_steps_per_second": 5.786,
"eval_tts_loss": 8.676706925015079,
"step": 62000
},
{
"epoch": 2.855528837856806,
"grad_norm": 2.3126471042633057,
"learning_rate": 3.9567069508425006e-05,
"loss": 5.4794,
"step": 62050
},
{
"epoch": 2.8578298836821334,
"grad_norm": 2.4819459915161133,
"learning_rate": 3.9495724758493645e-05,
"loss": 5.3972,
"step": 62100
},
{
"epoch": 2.860130929507461,
"grad_norm": 2.0319995880126953,
"learning_rate": 3.9424402376093166e-05,
"loss": 5.5088,
"step": 62150
},
{
"epoch": 2.8624319753327887,
"grad_norm": 2.0810019969940186,
"learning_rate": 3.9353102513095615e-05,
"loss": 5.4903,
"step": 62200
},
{
"epoch": 2.8647330211581163,
"grad_norm": 2.255082607269287,
"learning_rate": 3.928182532132506e-05,
"loss": 5.4857,
"step": 62250
},
{
"epoch": 2.867034066983444,
"grad_norm": 1.9748070240020752,
"learning_rate": 3.92105709525573e-05,
"loss": 5.4819,
"step": 62300
},
{
"epoch": 2.8693351128087716,
"grad_norm": 2.1835029125213623,
"learning_rate": 3.913933955851953e-05,
"loss": 5.4376,
"step": 62350
},
{
"epoch": 2.8716361586340993,
"grad_norm": 2.0077571868896484,
"learning_rate": 3.906813129089004e-05,
"loss": 5.4494,
"step": 62400
},
{
"epoch": 2.873937204459427,
"grad_norm": 1.9124616384506226,
"learning_rate": 3.8996946301297864e-05,
"loss": 5.5265,
"step": 62450
},
{
"epoch": 2.8762382502847545,
"grad_norm": 1.8583190441131592,
"learning_rate": 3.892578474132248e-05,
"loss": 5.4479,
"step": 62500
},
{
"epoch": 2.878539296110082,
"grad_norm": 2.0919296741485596,
"learning_rate": 3.885464676249345e-05,
"loss": 5.5268,
"step": 62550
},
{
"epoch": 2.88084034193541,
"grad_norm": 2.1244289875030518,
"learning_rate": 3.878353251629014e-05,
"loss": 5.4922,
"step": 62600
},
{
"epoch": 2.8831413877607375,
"grad_norm": 2.20938777923584,
"learning_rate": 3.871244215414138e-05,
"loss": 5.4315,
"step": 62650
},
{
"epoch": 2.885442433586065,
"grad_norm": 1.9381208419799805,
"learning_rate": 3.8641375827425155e-05,
"loss": 5.4294,
"step": 62700
},
{
"epoch": 2.8877434794113928,
"grad_norm": 2.0562620162963867,
"learning_rate": 3.8570333687468246e-05,
"loss": 5.5141,
"step": 62750
},
{
"epoch": 2.89004452523672,
"grad_norm": 2.103546619415283,
"learning_rate": 3.8499315885545936e-05,
"loss": 5.4004,
"step": 62800
},
{
"epoch": 2.8923455710620476,
"grad_norm": 2.0385918617248535,
"learning_rate": 3.8428322572881694e-05,
"loss": 5.4527,
"step": 62850
},
{
"epoch": 2.8946466168873752,
"grad_norm": 1.977550983428955,
"learning_rate": 3.835735390064682e-05,
"loss": 5.463,
"step": 62900
},
{
"epoch": 2.896947662712703,
"grad_norm": 2.7615981101989746,
"learning_rate": 3.828641001996018e-05,
"loss": 5.3981,
"step": 62950
},
{
"epoch": 2.8992487085380305,
"grad_norm": 1.8082493543624878,
"learning_rate": 3.821549108188784e-05,
"loss": 5.5473,
"step": 63000
},
{
"epoch": 2.8992487085380305,
"eval_loss": 5.622348308563232,
"eval_runtime": 34.1856,
"eval_samples_per_second": 11.233,
"eval_steps_per_second": 5.616,
"eval_tts_loss": 8.661594926281072,
"step": 63000
},
{
"epoch": 2.901549754363358,
"grad_norm": 1.863789677619934,
"learning_rate": 3.814459723744272e-05,
"loss": 5.4345,
"step": 63050
},
{
"epoch": 2.903850800188686,
"grad_norm": 1.918397068977356,
"learning_rate": 3.8073728637584364e-05,
"loss": 5.468,
"step": 63100
},
{
"epoch": 2.9061518460140134,
"grad_norm": 2.23038387298584,
"learning_rate": 3.8002885433218484e-05,
"loss": 5.481,
"step": 63150
},
{
"epoch": 2.908452891839341,
"grad_norm": 1.9242464303970337,
"learning_rate": 3.7932067775196786e-05,
"loss": 5.3977,
"step": 63200
},
{
"epoch": 2.9107539376646687,
"grad_norm": 2.0655956268310547,
"learning_rate": 3.786127581431653e-05,
"loss": 5.469,
"step": 63250
},
{
"epoch": 2.9130549834899964,
"grad_norm": 2.5878562927246094,
"learning_rate": 3.779050970132032e-05,
"loss": 5.4816,
"step": 63300
},
{
"epoch": 2.9153560293153236,
"grad_norm": 2.0930867195129395,
"learning_rate": 3.7719769586895644e-05,
"loss": 5.4662,
"step": 63350
},
{
"epoch": 2.917657075140651,
"grad_norm": 2.034477710723877,
"learning_rate": 3.764905562167468e-05,
"loss": 5.4143,
"step": 63400
},
{
"epoch": 2.919958120965979,
"grad_norm": 2.3117949962615967,
"learning_rate": 3.757836795623391e-05,
"loss": 5.4487,
"step": 63450
},
{
"epoch": 2.9222591667913065,
"grad_norm": 2.2001752853393555,
"learning_rate": 3.750770674109379e-05,
"loss": 5.4595,
"step": 63500
},
{
"epoch": 2.924560212616634,
"grad_norm": 2.0831422805786133,
"learning_rate": 3.743707212671851e-05,
"loss": 5.5518,
"step": 63550
},
{
"epoch": 2.9268612584419618,
"grad_norm": 2.453908681869507,
"learning_rate": 3.736646426351556e-05,
"loss": 5.442,
"step": 63600
},
{
"epoch": 2.9291623042672894,
"grad_norm": 2.2254319190979004,
"learning_rate": 3.72958833018355e-05,
"loss": 5.4166,
"step": 63650
},
{
"epoch": 2.931463350092617,
"grad_norm": 2.0115628242492676,
"learning_rate": 3.7225329391971595e-05,
"loss": 5.4603,
"step": 63700
},
{
"epoch": 2.9337643959179447,
"grad_norm": 2.1176035404205322,
"learning_rate": 3.715480268415951e-05,
"loss": 5.4859,
"step": 63750
},
{
"epoch": 2.9360654417432723,
"grad_norm": 2.4771876335144043,
"learning_rate": 3.708430332857698e-05,
"loss": 5.4187,
"step": 63800
},
{
"epoch": 2.9383664875686,
"grad_norm": 2.307610273361206,
"learning_rate": 3.7013831475343505e-05,
"loss": 5.4417,
"step": 63850
},
{
"epoch": 2.9406675333939276,
"grad_norm": 2.129941463470459,
"learning_rate": 3.694338727452001e-05,
"loss": 5.4669,
"step": 63900
},
{
"epoch": 2.9429685792192553,
"grad_norm": 2.152264356613159,
"learning_rate": 3.687297087610857e-05,
"loss": 5.4354,
"step": 63950
},
{
"epoch": 2.945269625044583,
"grad_norm": 1.7948946952819824,
"learning_rate": 3.680258243005201e-05,
"loss": 5.4489,
"step": 64000
},
{
"epoch": 2.945269625044583,
"eval_loss": 5.6132378578186035,
"eval_runtime": 34.2421,
"eval_samples_per_second": 11.214,
"eval_steps_per_second": 5.607,
"eval_tts_loss": 8.660041312188223,
"step": 64000
},
{
"epoch": 2.9475706708699105,
"grad_norm": 2.310068130493164,
"learning_rate": 3.673222208623367e-05,
"loss": 5.4306,
"step": 64050
},
{
"epoch": 2.949871716695238,
"grad_norm": 1.8985226154327393,
"learning_rate": 3.666188999447704e-05,
"loss": 5.4886,
"step": 64100
},
{
"epoch": 2.952172762520566,
"grad_norm": 1.9499763250350952,
"learning_rate": 3.659158630454546e-05,
"loss": 5.4375,
"step": 64150
},
{
"epoch": 2.9544738083458935,
"grad_norm": 2.236942768096924,
"learning_rate": 3.652131116614176e-05,
"loss": 5.4618,
"step": 64200
},
{
"epoch": 2.9567748541712207,
"grad_norm": 1.9788742065429688,
"learning_rate": 3.6451064728908005e-05,
"loss": 5.4773,
"step": 64250
},
{
"epoch": 2.9590758999965483,
"grad_norm": 2.213575601577759,
"learning_rate": 3.638084714242513e-05,
"loss": 5.5173,
"step": 64300
},
{
"epoch": 2.961376945821876,
"grad_norm": 2.0651750564575195,
"learning_rate": 3.631065855621263e-05,
"loss": 5.4846,
"step": 64350
},
{
"epoch": 2.9636779916472036,
"grad_norm": 2.2218382358551025,
"learning_rate": 3.624190202182424e-05,
"loss": 5.4418,
"step": 64400
},
{
"epoch": 2.965979037472531,
"grad_norm": 1.9353138208389282,
"learning_rate": 3.6171771297017563e-05,
"loss": 5.4387,
"step": 64450
},
{
"epoch": 2.968280083297859,
"grad_norm": 2.242788791656494,
"learning_rate": 3.6101670017681946e-05,
"loss": 5.4293,
"step": 64500
},
{
"epoch": 2.9705811291231865,
"grad_norm": 1.8731865882873535,
"learning_rate": 3.603159833308924e-05,
"loss": 5.5088,
"step": 64550
},
{
"epoch": 2.972882174948514,
"grad_norm": 2.418881893157959,
"learning_rate": 3.5961556392448265e-05,
"loss": 5.4744,
"step": 64600
},
{
"epoch": 2.975183220773842,
"grad_norm": 1.8968603610992432,
"learning_rate": 3.58915443449045e-05,
"loss": 5.4569,
"step": 64650
},
{
"epoch": 2.9774842665991694,
"grad_norm": 2.053711175918579,
"learning_rate": 3.58215623395398e-05,
"loss": 5.4719,
"step": 64700
},
{
"epoch": 2.979785312424497,
"grad_norm": 2.1504952907562256,
"learning_rate": 3.575161052537203e-05,
"loss": 5.4488,
"step": 64750
},
{
"epoch": 2.9820863582498247,
"grad_norm": 1.9735080003738403,
"learning_rate": 3.568168905135475e-05,
"loss": 5.3378,
"step": 64800
},
{
"epoch": 2.984387404075152,
"grad_norm": 2.453549861907959,
"learning_rate": 3.5611798066376935e-05,
"loss": 5.4512,
"step": 64850
},
{
"epoch": 2.9866884499004795,
"grad_norm": 2.467222213745117,
"learning_rate": 3.554193771926263e-05,
"loss": 5.4896,
"step": 64900
},
{
"epoch": 2.988989495725807,
"grad_norm": 1.9303815364837646,
"learning_rate": 3.5472108158770665e-05,
"loss": 5.4282,
"step": 64950
},
{
"epoch": 2.991290541551135,
"grad_norm": 2.030698776245117,
"learning_rate": 3.5402309533594276e-05,
"loss": 5.5008,
"step": 65000
},
{
"epoch": 2.991290541551135,
"eval_loss": 5.609447479248047,
"eval_runtime": 32.7857,
"eval_samples_per_second": 11.712,
"eval_steps_per_second": 5.856,
"eval_tts_loss": 8.679980597459995,
"step": 65000
},
{
"epoch": 2.9935915873764625,
"grad_norm": 2.023101568222046,
"learning_rate": 3.533254199236084e-05,
"loss": 5.4982,
"step": 65050
},
{
"epoch": 2.99589263320179,
"grad_norm": 2.273653745651245,
"learning_rate": 3.526280568363155e-05,
"loss": 5.4369,
"step": 65100
},
{
"epoch": 2.9981936790271178,
"grad_norm": 1.9517712593078613,
"learning_rate": 3.51931007559011e-05,
"loss": 5.447,
"step": 65150
},
{
"epoch": 3.0004602091650656,
"grad_norm": 2.0129427909851074,
"learning_rate": 3.5123427357597344e-05,
"loss": 5.4359,
"step": 65200
},
{
"epoch": 3.0027612549903933,
"grad_norm": 2.1433475017547607,
"learning_rate": 3.505378563708101e-05,
"loss": 5.1849,
"step": 65250
},
{
"epoch": 3.005062300815721,
"grad_norm": 2.248459577560425,
"learning_rate": 3.4984175742645374e-05,
"loss": 5.1406,
"step": 65300
},
{
"epoch": 3.0073633466410485,
"grad_norm": 1.8070954084396362,
"learning_rate": 3.491459782251593e-05,
"loss": 5.1737,
"step": 65350
},
{
"epoch": 3.009664392466376,
"grad_norm": 2.1215107440948486,
"learning_rate": 3.484505202485009e-05,
"loss": 5.2554,
"step": 65400
},
{
"epoch": 3.0119654382917034,
"grad_norm": 1.9235641956329346,
"learning_rate": 3.477553849773687e-05,
"loss": 5.2332,
"step": 65450
},
{
"epoch": 3.014266484117031,
"grad_norm": 2.419490098953247,
"learning_rate": 3.470605738919657e-05,
"loss": 5.1585,
"step": 65500
},
{
"epoch": 3.0165675299423587,
"grad_norm": 2.0951311588287354,
"learning_rate": 3.463660884718046e-05,
"loss": 5.2394,
"step": 65550
},
{
"epoch": 3.0188685757676863,
"grad_norm": 2.0080130100250244,
"learning_rate": 3.456719301957048e-05,
"loss": 5.1791,
"step": 65600
},
{
"epoch": 3.021169621593014,
"grad_norm": 2.4206161499023438,
"learning_rate": 3.4497810054178864e-05,
"loss": 5.2573,
"step": 65650
},
{
"epoch": 3.0234706674183416,
"grad_norm": 2.1025230884552,
"learning_rate": 3.442846009874791e-05,
"loss": 5.2453,
"step": 65700
},
{
"epoch": 3.0257717132436692,
"grad_norm": 2.4409613609313965,
"learning_rate": 3.435914330094959e-05,
"loss": 5.1971,
"step": 65750
},
{
"epoch": 3.028072759068997,
"grad_norm": 1.6904783248901367,
"learning_rate": 3.428985980838533e-05,
"loss": 5.1878,
"step": 65800
},
{
"epoch": 3.0303738048943245,
"grad_norm": 2.728496551513672,
"learning_rate": 3.4220609768585574e-05,
"loss": 5.2017,
"step": 65850
},
{
"epoch": 3.032674850719652,
"grad_norm": 2.500746250152588,
"learning_rate": 3.415139332900957e-05,
"loss": 5.1878,
"step": 65900
},
{
"epoch": 3.03497589654498,
"grad_norm": 2.43241024017334,
"learning_rate": 3.4082210637045e-05,
"loss": 5.1689,
"step": 65950
},
{
"epoch": 3.0372769423703074,
"grad_norm": 2.33240008354187,
"learning_rate": 3.4013061840007684e-05,
"loss": 5.226,
"step": 66000
},
{
"epoch": 3.0372769423703074,
"eval_loss": 5.651639938354492,
"eval_runtime": 33.64,
"eval_samples_per_second": 11.415,
"eval_steps_per_second": 5.707,
"eval_tts_loss": 8.971735107629264,
"step": 66000
},
{
"epoch": 3.039577988195635,
"grad_norm": 2.2935338020324707,
"learning_rate": 3.3943947085141306e-05,
"loss": 5.217,
"step": 66050
},
{
"epoch": 3.0418790340209627,
"grad_norm": 2.198589324951172,
"learning_rate": 3.3874866519616986e-05,
"loss": 5.237,
"step": 66100
},
{
"epoch": 3.0441800798462904,
"grad_norm": 1.9840471744537354,
"learning_rate": 3.380582029053312e-05,
"loss": 5.2196,
"step": 66150
},
{
"epoch": 3.0464811256716176,
"grad_norm": 2.086278200149536,
"learning_rate": 3.373680854491493e-05,
"loss": 5.1712,
"step": 66200
},
{
"epoch": 3.048782171496945,
"grad_norm": 2.9629907608032227,
"learning_rate": 3.366783142971427e-05,
"loss": 5.2284,
"step": 66250
},
{
"epoch": 3.051083217322273,
"grad_norm": 2.235788345336914,
"learning_rate": 3.359888909180918e-05,
"loss": 5.2337,
"step": 66300
},
{
"epoch": 3.0533842631476005,
"grad_norm": 2.0074462890625,
"learning_rate": 3.352998167800371e-05,
"loss": 5.1967,
"step": 66350
},
{
"epoch": 3.055685308972928,
"grad_norm": 1.99937105178833,
"learning_rate": 3.3461109335027506e-05,
"loss": 5.2483,
"step": 66400
},
{
"epoch": 3.0579863547982558,
"grad_norm": 2.276930093765259,
"learning_rate": 3.339227220953555e-05,
"loss": 5.2434,
"step": 66450
},
{
"epoch": 3.0602874006235834,
"grad_norm": 2.292680501937866,
"learning_rate": 3.332347044810782e-05,
"loss": 5.1313,
"step": 66500
},
{
"epoch": 3.062588446448911,
"grad_norm": 2.2346673011779785,
"learning_rate": 3.3254704197249006e-05,
"loss": 5.2221,
"step": 66550
},
{
"epoch": 3.0648894922742387,
"grad_norm": 2.0256075859069824,
"learning_rate": 3.3185973603388165e-05,
"loss": 5.2076,
"step": 66600
},
{
"epoch": 3.0671905380995663,
"grad_norm": 2.891855478286743,
"learning_rate": 3.311727881287846e-05,
"loss": 5.2096,
"step": 66650
},
{
"epoch": 3.069491583924894,
"grad_norm": 2.2586610317230225,
"learning_rate": 3.304861997199679e-05,
"loss": 5.166,
"step": 66700
},
{
"epoch": 3.0717926297502216,
"grad_norm": 1.9196617603302002,
"learning_rate": 3.2979997226943496e-05,
"loss": 5.171,
"step": 66750
},
{
"epoch": 3.0740936755755492,
"grad_norm": 2.212149143218994,
"learning_rate": 3.291141072384207e-05,
"loss": 5.2251,
"step": 66800
},
{
"epoch": 3.076394721400877,
"grad_norm": 2.19577693939209,
"learning_rate": 3.284286060873884e-05,
"loss": 5.1944,
"step": 66850
},
{
"epoch": 3.078695767226204,
"grad_norm": 2.422126054763794,
"learning_rate": 3.277434702760264e-05,
"loss": 5.2308,
"step": 66900
},
{
"epoch": 3.0809968130515317,
"grad_norm": 2.0679380893707275,
"learning_rate": 3.27058701263245e-05,
"loss": 5.25,
"step": 66950
},
{
"epoch": 3.0832978588768594,
"grad_norm": 1.9856886863708496,
"learning_rate": 3.2637430050717374e-05,
"loss": 5.2306,
"step": 67000
},
{
"epoch": 3.0832978588768594,
"eval_loss": 5.655904293060303,
"eval_runtime": 33.6757,
"eval_samples_per_second": 11.403,
"eval_steps_per_second": 5.701,
"eval_tts_loss": 8.988588783504623,
"step": 67000
},
{
"epoch": 3.085598904702187,
"grad_norm": 2.128262996673584,
"learning_rate": 3.2569026946515744e-05,
"loss": 5.2399,
"step": 67050
},
{
"epoch": 3.0878999505275146,
"grad_norm": 2.2318902015686035,
"learning_rate": 3.250066095937545e-05,
"loss": 5.2388,
"step": 67100
},
{
"epoch": 3.0902009963528423,
"grad_norm": 2.4614462852478027,
"learning_rate": 3.243233223487323e-05,
"loss": 5.2573,
"step": 67150
},
{
"epoch": 3.09250204217817,
"grad_norm": 2.1323800086975098,
"learning_rate": 3.236404091850649e-05,
"loss": 5.1872,
"step": 67200
},
{
"epoch": 3.0948030880034976,
"grad_norm": 2.0967583656311035,
"learning_rate": 3.2295787155692985e-05,
"loss": 5.198,
"step": 67250
},
{
"epoch": 3.097104133828825,
"grad_norm": 2.0951972007751465,
"learning_rate": 3.22275710917705e-05,
"loss": 5.1414,
"step": 67300
},
{
"epoch": 3.099405179654153,
"grad_norm": 2.2159359455108643,
"learning_rate": 3.2159392871996574e-05,
"loss": 5.2265,
"step": 67350
},
{
"epoch": 3.1017062254794805,
"grad_norm": 2.2702057361602783,
"learning_rate": 3.209125264154812e-05,
"loss": 5.1164,
"step": 67400
},
{
"epoch": 3.104007271304808,
"grad_norm": 2.1611013412475586,
"learning_rate": 3.202315054552117e-05,
"loss": 5.2414,
"step": 67450
},
{
"epoch": 3.1063083171301358,
"grad_norm": 2.3985390663146973,
"learning_rate": 3.1955086728930584e-05,
"loss": 5.228,
"step": 67500
},
{
"epoch": 3.1086093629554634,
"grad_norm": 2.3189857006073,
"learning_rate": 3.188842146705822e-05,
"loss": 5.228,
"step": 67550
},
{
"epoch": 3.110910408780791,
"grad_norm": 2.1470682621002197,
"learning_rate": 3.182043387125502e-05,
"loss": 5.211,
"step": 67600
},
{
"epoch": 3.1132114546061187,
"grad_norm": 2.4096908569335938,
"learning_rate": 3.175248498654779e-05,
"loss": 5.2127,
"step": 67650
},
{
"epoch": 3.115512500431446,
"grad_norm": 2.0130157470703125,
"learning_rate": 3.168457495762513e-05,
"loss": 5.2123,
"step": 67700
},
{
"epoch": 3.1178135462567735,
"grad_norm": 2.5356950759887695,
"learning_rate": 3.161670392909286e-05,
"loss": 5.1882,
"step": 67750
},
{
"epoch": 3.120114592082101,
"grad_norm": 2.213794708251953,
"learning_rate": 3.1548872045473806e-05,
"loss": 5.1411,
"step": 67800
},
{
"epoch": 3.122415637907429,
"grad_norm": 2.2162258625030518,
"learning_rate": 3.148107945120743e-05,
"loss": 5.1809,
"step": 67850
},
{
"epoch": 3.1247166837327565,
"grad_norm": 2.2594213485717773,
"learning_rate": 3.1413326290649514e-05,
"loss": 5.1974,
"step": 67900
},
{
"epoch": 3.127017729558084,
"grad_norm": 2.3324978351593018,
"learning_rate": 3.134561270807186e-05,
"loss": 5.2048,
"step": 67950
},
{
"epoch": 3.1293187753834117,
"grad_norm": 2.50360107421875,
"learning_rate": 3.127793884766203e-05,
"loss": 5.1518,
"step": 68000
},
{
"epoch": 3.1293187753834117,
"eval_loss": 5.650763988494873,
"eval_runtime": 34.0787,
"eval_samples_per_second": 11.268,
"eval_steps_per_second": 5.634,
"eval_tts_loss": 8.96684687412939,
"step": 68000
},
{
"epoch": 3.1316198212087394,
"grad_norm": 2.1465678215026855,
"learning_rate": 3.121030485352299e-05,
"loss": 5.1121,
"step": 68050
},
{
"epoch": 3.133920867034067,
"grad_norm": 2.2086095809936523,
"learning_rate": 3.114271086967279e-05,
"loss": 5.2291,
"step": 68100
},
{
"epoch": 3.1362219128593947,
"grad_norm": 2.5457003116607666,
"learning_rate": 3.107515704004432e-05,
"loss": 5.2126,
"step": 68150
},
{
"epoch": 3.1385229586847223,
"grad_norm": 2.3758766651153564,
"learning_rate": 3.100764350848494e-05,
"loss": 5.2588,
"step": 68200
},
{
"epoch": 3.14082400451005,
"grad_norm": 2.52122163772583,
"learning_rate": 3.0940170418756233e-05,
"loss": 5.2151,
"step": 68250
},
{
"epoch": 3.1431250503353776,
"grad_norm": 2.119819164276123,
"learning_rate": 3.0872737914533614e-05,
"loss": 5.1885,
"step": 68300
},
{
"epoch": 3.1454260961607052,
"grad_norm": 2.662466049194336,
"learning_rate": 3.0805346139406126e-05,
"loss": 5.2543,
"step": 68350
},
{
"epoch": 3.1477271419860324,
"grad_norm": 2.391465902328491,
"learning_rate": 3.073799523687606e-05,
"loss": 5.1837,
"step": 68400
},
{
"epoch": 3.15002818781136,
"grad_norm": 2.0778284072875977,
"learning_rate": 3.067068535035866e-05,
"loss": 5.1969,
"step": 68450
},
{
"epoch": 3.1523292336366877,
"grad_norm": 2.5482311248779297,
"learning_rate": 3.060341662318188e-05,
"loss": 5.2041,
"step": 68500
},
{
"epoch": 3.1546302794620154,
"grad_norm": 2.060176372528076,
"learning_rate": 3.053618919858599e-05,
"loss": 5.2056,
"step": 68550
},
{
"epoch": 3.156931325287343,
"grad_norm": 2.577627658843994,
"learning_rate": 3.046900321972333e-05,
"loss": 5.1266,
"step": 68600
},
{
"epoch": 3.1592323711126706,
"grad_norm": 2.43023943901062,
"learning_rate": 3.040185882965796e-05,
"loss": 5.1717,
"step": 68650
},
{
"epoch": 3.1615334169379983,
"grad_norm": 2.572831630706787,
"learning_rate": 3.0334756171365403e-05,
"loss": 5.2253,
"step": 68700
},
{
"epoch": 3.163834462763326,
"grad_norm": 2.3290741443634033,
"learning_rate": 3.026769538773232e-05,
"loss": 5.2018,
"step": 68750
},
{
"epoch": 3.1661355085886536,
"grad_norm": 2.153881072998047,
"learning_rate": 3.0200676621556214e-05,
"loss": 5.2132,
"step": 68800
},
{
"epoch": 3.168436554413981,
"grad_norm": 1.900630235671997,
"learning_rate": 3.013370001554508e-05,
"loss": 5.1798,
"step": 68850
},
{
"epoch": 3.170737600239309,
"grad_norm": 2.3022022247314453,
"learning_rate": 3.006676571231719e-05,
"loss": 5.2217,
"step": 68900
},
{
"epoch": 3.1730386460646365,
"grad_norm": 2.353461742401123,
"learning_rate": 2.9999873854400694e-05,
"loss": 5.1732,
"step": 68950
},
{
"epoch": 3.175339691889964,
"grad_norm": 2.6645305156707764,
"learning_rate": 2.9933024584233395e-05,
"loss": 5.2012,
"step": 69000
},
{
"epoch": 3.175339691889964,
"eval_loss": 5.656210422515869,
"eval_runtime": 33.5674,
"eval_samples_per_second": 11.44,
"eval_steps_per_second": 5.72,
"eval_tts_loss": 8.98287127706553,
"step": 69000
},
{
"epoch": 3.1776407377152918,
"grad_norm": 2.240955114364624,
"learning_rate": 2.9866218044162358e-05,
"loss": 5.2563,
"step": 69050
},
{
"epoch": 3.1799417835406194,
"grad_norm": 1.9387973546981812,
"learning_rate": 2.9799454376443735e-05,
"loss": 5.2699,
"step": 69100
},
{
"epoch": 3.182242829365947,
"grad_norm": 2.1009185314178467,
"learning_rate": 2.9732733723242322e-05,
"loss": 5.1805,
"step": 69150
},
{
"epoch": 3.1845438751912742,
"grad_norm": 2.2868292331695557,
"learning_rate": 2.9666056226631356e-05,
"loss": 5.1707,
"step": 69200
},
{
"epoch": 3.186844921016602,
"grad_norm": 2.269939661026001,
"learning_rate": 2.959942202859216e-05,
"loss": 5.2238,
"step": 69250
},
{
"epoch": 3.1891459668419295,
"grad_norm": 2.072275400161743,
"learning_rate": 2.953283127101386e-05,
"loss": 5.1747,
"step": 69300
},
{
"epoch": 3.191447012667257,
"grad_norm": 2.3713185787200928,
"learning_rate": 2.94662840956931e-05,
"loss": 5.2188,
"step": 69350
},
{
"epoch": 3.193748058492585,
"grad_norm": 2.2737011909484863,
"learning_rate": 2.9399780644333695e-05,
"loss": 5.2647,
"step": 69400
},
{
"epoch": 3.1960491043179124,
"grad_norm": 2.3678393363952637,
"learning_rate": 2.9333321058546372e-05,
"loss": 5.2281,
"step": 69450
},
{
"epoch": 3.19835015014324,
"grad_norm": 2.1628551483154297,
"learning_rate": 2.926690547984845e-05,
"loss": 5.2008,
"step": 69500
},
{
"epoch": 3.2006511959685677,
"grad_norm": 2.6043412685394287,
"learning_rate": 2.920053404966352e-05,
"loss": 5.2402,
"step": 69550
},
{
"epoch": 3.2029522417938954,
"grad_norm": 2.384507179260254,
"learning_rate": 2.9134206909321215e-05,
"loss": 5.205,
"step": 69600
},
{
"epoch": 3.205253287619223,
"grad_norm": 2.3745009899139404,
"learning_rate": 2.9067924200056774e-05,
"loss": 5.2248,
"step": 69650
},
{
"epoch": 3.2075543334445507,
"grad_norm": 1.8591980934143066,
"learning_rate": 2.9001686063010953e-05,
"loss": 5.1882,
"step": 69700
},
{
"epoch": 3.2098553792698783,
"grad_norm": 2.359219789505005,
"learning_rate": 2.893549263922945e-05,
"loss": 5.1539,
"step": 69750
},
{
"epoch": 3.212156425095206,
"grad_norm": 2.183582305908203,
"learning_rate": 2.886934406966285e-05,
"loss": 5.232,
"step": 69800
},
{
"epoch": 3.2144574709205336,
"grad_norm": 2.331078052520752,
"learning_rate": 2.88032404951662e-05,
"loss": 5.1943,
"step": 69850
},
{
"epoch": 3.2167585167458608,
"grad_norm": 1.92499577999115,
"learning_rate": 2.8737182056498728e-05,
"loss": 5.1943,
"step": 69900
},
{
"epoch": 3.2190595625711884,
"grad_norm": 2.7040982246398926,
"learning_rate": 2.8671168894323562e-05,
"loss": 5.212,
"step": 69950
},
{
"epoch": 3.221360608396516,
"grad_norm": 2.5483896732330322,
"learning_rate": 2.8605201149207416e-05,
"loss": 5.1936,
"step": 70000
},
{
"epoch": 3.221360608396516,
"eval_loss": 5.647762298583984,
"eval_runtime": 34.2747,
"eval_samples_per_second": 11.204,
"eval_steps_per_second": 5.602,
"eval_tts_loss": 8.978084567315824,
"step": 70000
},
{
"epoch": 3.2236616542218437,
"grad_norm": 2.2737507820129395,
"learning_rate": 2.8539278961620275e-05,
"loss": 5.1953,
"step": 70050
},
{
"epoch": 3.2259627000471713,
"grad_norm": 2.2077796459198,
"learning_rate": 2.8473402471935164e-05,
"loss": 5.1825,
"step": 70100
},
{
"epoch": 3.228263745872499,
"grad_norm": 1.9657273292541504,
"learning_rate": 2.8407571820427757e-05,
"loss": 5.2468,
"step": 70150
},
{
"epoch": 3.2305647916978266,
"grad_norm": 2.0409305095672607,
"learning_rate": 2.8341787147276134e-05,
"loss": 5.1942,
"step": 70200
},
{
"epoch": 3.2328658375231543,
"grad_norm": 2.118298053741455,
"learning_rate": 2.827604859256046e-05,
"loss": 5.1739,
"step": 70250
},
{
"epoch": 3.235166883348482,
"grad_norm": 2.3376784324645996,
"learning_rate": 2.821035629626272e-05,
"loss": 5.2008,
"step": 70300
},
{
"epoch": 3.2374679291738095,
"grad_norm": 2.356640100479126,
"learning_rate": 2.8144710398266373e-05,
"loss": 5.2111,
"step": 70350
},
{
"epoch": 3.239768974999137,
"grad_norm": 1.9011934995651245,
"learning_rate": 2.80791110383561e-05,
"loss": 5.2511,
"step": 70400
},
{
"epoch": 3.242070020824465,
"grad_norm": 2.2594473361968994,
"learning_rate": 2.801355835621746e-05,
"loss": 5.2381,
"step": 70450
},
{
"epoch": 3.2443710666497925,
"grad_norm": 1.9861502647399902,
"learning_rate": 2.794805249143662e-05,
"loss": 5.2064,
"step": 70500
},
{
"epoch": 3.24667211247512,
"grad_norm": 2.493450880050659,
"learning_rate": 2.788259358350011e-05,
"loss": 5.2375,
"step": 70550
},
{
"epoch": 3.2489731583004477,
"grad_norm": 2.1547906398773193,
"learning_rate": 2.7817181771794405e-05,
"loss": 5.2282,
"step": 70600
},
{
"epoch": 3.2512742041257754,
"grad_norm": 2.2504146099090576,
"learning_rate": 2.7751817195605716e-05,
"loss": 5.2275,
"step": 70650
},
{
"epoch": 3.2535752499511026,
"grad_norm": 2.092921257019043,
"learning_rate": 2.7686499994119674e-05,
"loss": 5.2166,
"step": 70700
},
{
"epoch": 3.2558762957764302,
"grad_norm": 2.1429293155670166,
"learning_rate": 2.762123030642104e-05,
"loss": 5.1866,
"step": 70750
},
{
"epoch": 3.258177341601758,
"grad_norm": 2.242461681365967,
"learning_rate": 2.7556008271493406e-05,
"loss": 5.2111,
"step": 70800
},
{
"epoch": 3.2604783874270855,
"grad_norm": 2.464775323867798,
"learning_rate": 2.7490834028218832e-05,
"loss": 5.2219,
"step": 70850
},
{
"epoch": 3.262779433252413,
"grad_norm": 2.140676736831665,
"learning_rate": 2.7425707715377667e-05,
"loss": 5.2287,
"step": 70900
},
{
"epoch": 3.265080479077741,
"grad_norm": 2.3947184085845947,
"learning_rate": 2.7361930564549054e-05,
"loss": 5.1812,
"step": 70950
},
{
"epoch": 3.2673815249030684,
"grad_norm": 2.0517752170562744,
"learning_rate": 2.7296899562996115e-05,
"loss": 5.1908,
"step": 71000
},
{
"epoch": 3.2673815249030684,
"eval_loss": 5.633924961090088,
"eval_runtime": 35.1466,
"eval_samples_per_second": 10.926,
"eval_steps_per_second": 5.463,
"eval_tts_loss": 8.900792966671826,
"step": 71000
},
{
"epoch": 3.269682570728396,
"grad_norm": 2.8329784870147705,
"learning_rate": 2.7231916904835607e-05,
"loss": 5.2412,
"step": 71050
},
{
"epoch": 3.2719836165537237,
"grad_norm": 2.969275712966919,
"learning_rate": 2.716698272843994e-05,
"loss": 5.2071,
"step": 71100
},
{
"epoch": 3.2742846623790514,
"grad_norm": 2.4925220012664795,
"learning_rate": 2.7102097172078234e-05,
"loss": 5.2071,
"step": 71150
},
{
"epoch": 3.276585708204379,
"grad_norm": 2.5884768962860107,
"learning_rate": 2.7037260373916108e-05,
"loss": 5.1944,
"step": 71200
},
{
"epoch": 3.2788867540297066,
"grad_norm": 2.3826942443847656,
"learning_rate": 2.6972472472015388e-05,
"loss": 5.2356,
"step": 71250
},
{
"epoch": 3.2811877998550343,
"grad_norm": 2.155606269836426,
"learning_rate": 2.690773360433373e-05,
"loss": 5.2248,
"step": 71300
},
{
"epoch": 3.2834888456803615,
"grad_norm": 2.455932855606079,
"learning_rate": 2.6843043908724398e-05,
"loss": 5.2249,
"step": 71350
},
{
"epoch": 3.285789891505689,
"grad_norm": 1.9317978620529175,
"learning_rate": 2.6778403522935952e-05,
"loss": 5.1775,
"step": 71400
},
{
"epoch": 3.2880909373310168,
"grad_norm": 2.2273571491241455,
"learning_rate": 2.6713812584611953e-05,
"loss": 5.2158,
"step": 71450
},
{
"epoch": 3.2903919831563444,
"grad_norm": 2.5110819339752197,
"learning_rate": 2.664927123129065e-05,
"loss": 5.2597,
"step": 71500
},
{
"epoch": 3.292693028981672,
"grad_norm": 2.180302619934082,
"learning_rate": 2.6584779600404752e-05,
"loss": 5.2747,
"step": 71550
},
{
"epoch": 3.2949940748069997,
"grad_norm": 2.3025472164154053,
"learning_rate": 2.652033782928104e-05,
"loss": 5.206,
"step": 71600
},
{
"epoch": 3.2972951206323273,
"grad_norm": 2.1360127925872803,
"learning_rate": 2.6455946055140142e-05,
"loss": 5.2514,
"step": 71650
},
{
"epoch": 3.299596166457655,
"grad_norm": 2.336792230606079,
"learning_rate": 2.639160441509626e-05,
"loss": 5.187,
"step": 71700
},
{
"epoch": 3.3018972122829826,
"grad_norm": 2.4843273162841797,
"learning_rate": 2.6327313046156797e-05,
"loss": 5.1613,
"step": 71750
},
{
"epoch": 3.3041982581083102,
"grad_norm": 2.6479268074035645,
"learning_rate": 2.626307208522212e-05,
"loss": 5.1823,
"step": 71800
},
{
"epoch": 3.306499303933638,
"grad_norm": 2.1863152980804443,
"learning_rate": 2.619888166908527e-05,
"loss": 5.2027,
"step": 71850
},
{
"epoch": 3.3088003497589655,
"grad_norm": 2.1743619441986084,
"learning_rate": 2.613474193443166e-05,
"loss": 5.1915,
"step": 71900
},
{
"epoch": 3.311101395584293,
"grad_norm": 2.2786686420440674,
"learning_rate": 2.6070653017838753e-05,
"loss": 5.2057,
"step": 71950
},
{
"epoch": 3.313402441409621,
"grad_norm": 2.755693197250366,
"learning_rate": 2.6006615055775822e-05,
"loss": 5.1556,
"step": 72000
},
{
"epoch": 3.313402441409621,
"eval_loss": 5.637906551361084,
"eval_runtime": 34.4223,
"eval_samples_per_second": 11.156,
"eval_steps_per_second": 5.578,
"eval_tts_loss": 8.920327802164545,
"step": 72000
},
{
"epoch": 3.3157034872349485,
"grad_norm": 2.590268135070801,
"learning_rate": 2.594262818460364e-05,
"loss": 5.2122,
"step": 72050
},
{
"epoch": 3.318004533060276,
"grad_norm": 2.144188404083252,
"learning_rate": 2.5878692540574216e-05,
"loss": 5.2054,
"step": 72100
},
{
"epoch": 3.3203055788856037,
"grad_norm": 2.2111215591430664,
"learning_rate": 2.5814808259830437e-05,
"loss": 5.2366,
"step": 72150
},
{
"epoch": 3.322606624710931,
"grad_norm": 2.2800168991088867,
"learning_rate": 2.5750975478405824e-05,
"loss": 5.1807,
"step": 72200
},
{
"epoch": 3.3249076705362586,
"grad_norm": 2.367213010787964,
"learning_rate": 2.5687194332224252e-05,
"loss": 5.2799,
"step": 72250
},
{
"epoch": 3.327208716361586,
"grad_norm": 2.457364559173584,
"learning_rate": 2.5623464957099628e-05,
"loss": 5.206,
"step": 72300
},
{
"epoch": 3.329509762186914,
"grad_norm": 2.512826442718506,
"learning_rate": 2.5559787488735642e-05,
"loss": 5.1831,
"step": 72350
},
{
"epoch": 3.3318108080122415,
"grad_norm": 2.583421230316162,
"learning_rate": 2.5496162062725427e-05,
"loss": 5.2227,
"step": 72400
},
{
"epoch": 3.334111853837569,
"grad_norm": 2.224989891052246,
"learning_rate": 2.5432588814551316e-05,
"loss": 5.1983,
"step": 72450
},
{
"epoch": 3.336412899662897,
"grad_norm": 2.566269874572754,
"learning_rate": 2.536906787958452e-05,
"loss": 5.1781,
"step": 72500
},
{
"epoch": 3.3387139454882244,
"grad_norm": 2.3426711559295654,
"learning_rate": 2.53055993930849e-05,
"loss": 5.1926,
"step": 72550
},
{
"epoch": 3.341014991313552,
"grad_norm": 2.5896639823913574,
"learning_rate": 2.524218349020059e-05,
"loss": 5.1832,
"step": 72600
},
{
"epoch": 3.3433160371388797,
"grad_norm": 2.0832769870758057,
"learning_rate": 2.5178820305967764e-05,
"loss": 5.1218,
"step": 72650
},
{
"epoch": 3.3456170829642073,
"grad_norm": 2.4810876846313477,
"learning_rate": 2.5115509975310336e-05,
"loss": 5.2014,
"step": 72700
},
{
"epoch": 3.347918128789535,
"grad_norm": 2.3670923709869385,
"learning_rate": 2.5052252633039685e-05,
"loss": 5.1575,
"step": 72750
},
{
"epoch": 3.3502191746148626,
"grad_norm": 2.1796634197235107,
"learning_rate": 2.498904841385435e-05,
"loss": 5.2025,
"step": 72800
},
{
"epoch": 3.35252022044019,
"grad_norm": 2.492060899734497,
"learning_rate": 2.4925897452339758e-05,
"loss": 5.1873,
"step": 72850
},
{
"epoch": 3.3548212662655175,
"grad_norm": 2.886307716369629,
"learning_rate": 2.486279988296792e-05,
"loss": 5.2142,
"step": 72900
},
{
"epoch": 3.357122312090845,
"grad_norm": 2.1251938343048096,
"learning_rate": 2.4799755840097167e-05,
"loss": 5.1861,
"step": 72950
},
{
"epoch": 3.3594233579161727,
"grad_norm": 2.2697665691375732,
"learning_rate": 2.4736765457971844e-05,
"loss": 5.2057,
"step": 73000
},
{
"epoch": 3.3594233579161727,
"eval_loss": 5.634683132171631,
"eval_runtime": 33.3076,
"eval_samples_per_second": 11.529,
"eval_steps_per_second": 5.764,
"eval_tts_loss": 8.970257855765373,
"step": 73000
},
{
"epoch": 3.3617244037415004,
"grad_norm": 2.1380538940429688,
"learning_rate": 2.4673828870722034e-05,
"loss": 5.219,
"step": 73050
},
{
"epoch": 3.364025449566828,
"grad_norm": 2.223712205886841,
"learning_rate": 2.4610946212363273e-05,
"loss": 5.2005,
"step": 73100
},
{
"epoch": 3.3663264953921557,
"grad_norm": 2.7483530044555664,
"learning_rate": 2.4548117616796263e-05,
"loss": 5.2475,
"step": 73150
},
{
"epoch": 3.3686275412174833,
"grad_norm": 2.250915765762329,
"learning_rate": 2.448534321780658e-05,
"loss": 5.2745,
"step": 73200
},
{
"epoch": 3.370928587042811,
"grad_norm": 2.6626806259155273,
"learning_rate": 2.4422623149064394e-05,
"loss": 5.2455,
"step": 73250
},
{
"epoch": 3.3732296328681386,
"grad_norm": 2.2990479469299316,
"learning_rate": 2.43599575441242e-05,
"loss": 5.1909,
"step": 73300
},
{
"epoch": 3.3755306786934662,
"grad_norm": 2.3052256107330322,
"learning_rate": 2.4297346536424486e-05,
"loss": 5.1939,
"step": 73350
},
{
"epoch": 3.377831724518794,
"grad_norm": 2.1850342750549316,
"learning_rate": 2.423479025928755e-05,
"loss": 5.2347,
"step": 73400
},
{
"epoch": 3.3801327703441215,
"grad_norm": 2.586782455444336,
"learning_rate": 2.417228884591907e-05,
"loss": 5.1781,
"step": 73450
},
{
"epoch": 3.382433816169449,
"grad_norm": 2.611459732055664,
"learning_rate": 2.4109842429407946e-05,
"loss": 5.2054,
"step": 73500
},
{
"epoch": 3.384734861994777,
"grad_norm": 2.115934133529663,
"learning_rate": 2.404745114272596e-05,
"loss": 5.194,
"step": 73550
},
{
"epoch": 3.3870359078201044,
"grad_norm": 2.2402496337890625,
"learning_rate": 2.3985115118727476e-05,
"loss": 5.1598,
"step": 73600
},
{
"epoch": 3.389336953645432,
"grad_norm": 2.0667028427124023,
"learning_rate": 2.3922834490149222e-05,
"loss": 5.2018,
"step": 73650
},
{
"epoch": 3.3916379994707593,
"grad_norm": 2.5982606410980225,
"learning_rate": 2.386060938960995e-05,
"loss": 5.1964,
"step": 73700
},
{
"epoch": 3.393939045296087,
"grad_norm": 2.2558162212371826,
"learning_rate": 2.3798439949610164e-05,
"loss": 5.2415,
"step": 73750
},
{
"epoch": 3.3962400911214146,
"grad_norm": 2.270799160003662,
"learning_rate": 2.3736326302531864e-05,
"loss": 5.2191,
"step": 73800
},
{
"epoch": 3.398541136946742,
"grad_norm": 2.231250762939453,
"learning_rate": 2.367426858063821e-05,
"loss": 5.1996,
"step": 73850
},
{
"epoch": 3.40084218277207,
"grad_norm": 2.2880544662475586,
"learning_rate": 2.3612266916073344e-05,
"loss": 5.1638,
"step": 73900
},
{
"epoch": 3.4031432285973975,
"grad_norm": 1.8324086666107178,
"learning_rate": 2.355032144086199e-05,
"loss": 5.1729,
"step": 73950
},
{
"epoch": 3.405444274422725,
"grad_norm": 2.1723499298095703,
"learning_rate": 2.3488432286909224e-05,
"loss": 5.1841,
"step": 74000
},
{
"epoch": 3.405444274422725,
"eval_loss": 5.629993915557861,
"eval_runtime": 34.5361,
"eval_samples_per_second": 11.119,
"eval_steps_per_second": 5.559,
"eval_tts_loss": 8.953473981543894,
"step": 74000
},
{
"epoch": 3.4077453202480528,
"grad_norm": 2.0692930221557617,
"learning_rate": 2.3426599586000203e-05,
"loss": 5.1673,
"step": 74050
},
{
"epoch": 3.4100463660733804,
"grad_norm": 2.055471181869507,
"learning_rate": 2.3364823469799895e-05,
"loss": 5.1185,
"step": 74100
},
{
"epoch": 3.412347411898708,
"grad_norm": 2.4703516960144043,
"learning_rate": 2.330310406985273e-05,
"loss": 5.2096,
"step": 74150
},
{
"epoch": 3.4146484577240357,
"grad_norm": 2.161728620529175,
"learning_rate": 2.324144151758238e-05,
"loss": 5.1937,
"step": 74200
},
{
"epoch": 3.4169495035493633,
"grad_norm": 2.2135536670684814,
"learning_rate": 2.3179835944291524e-05,
"loss": 5.2268,
"step": 74250
},
{
"epoch": 3.419250549374691,
"grad_norm": 2.2817091941833496,
"learning_rate": 2.3118287481161456e-05,
"loss": 5.1812,
"step": 74300
},
{
"epoch": 3.421551595200018,
"grad_norm": 2.546320915222168,
"learning_rate": 2.3056796259251874e-05,
"loss": 5.1771,
"step": 74350
},
{
"epoch": 3.423852641025346,
"grad_norm": 1.982683539390564,
"learning_rate": 2.299536240950058e-05,
"loss": 5.2161,
"step": 74400
},
{
"epoch": 3.4261536868506735,
"grad_norm": 1.9768283367156982,
"learning_rate": 2.293398606272323e-05,
"loss": 5.2182,
"step": 74450
},
{
"epoch": 3.428454732676001,
"grad_norm": 2.2743465900421143,
"learning_rate": 2.287266734961302e-05,
"loss": 5.2402,
"step": 74500
},
{
"epoch": 3.4307557785013287,
"grad_norm": 2.384610176086426,
"learning_rate": 2.281140640074042e-05,
"loss": 5.2114,
"step": 74550
},
{
"epoch": 3.4330568243266564,
"grad_norm": 2.158867120742798,
"learning_rate": 2.2750203346552908e-05,
"loss": 5.1912,
"step": 74600
},
{
"epoch": 3.435357870151984,
"grad_norm": 2.0050137042999268,
"learning_rate": 2.2689058317374674e-05,
"loss": 5.2088,
"step": 74650
},
{
"epoch": 3.4376589159773117,
"grad_norm": 2.704558849334717,
"learning_rate": 2.2627971443406344e-05,
"loss": 5.1843,
"step": 74700
},
{
"epoch": 3.4399599618026393,
"grad_norm": 2.4487814903259277,
"learning_rate": 2.256694285472475e-05,
"loss": 5.2682,
"step": 74750
},
{
"epoch": 3.442261007627967,
"grad_norm": 2.0930614471435547,
"learning_rate": 2.2505972681282566e-05,
"loss": 5.1559,
"step": 74800
},
{
"epoch": 3.4445620534532946,
"grad_norm": 2.091146945953369,
"learning_rate": 2.24450610529081e-05,
"loss": 5.2461,
"step": 74850
},
{
"epoch": 3.4468630992786222,
"grad_norm": 2.681933641433716,
"learning_rate": 2.2384208099304994e-05,
"loss": 5.2385,
"step": 74900
},
{
"epoch": 3.44916414510395,
"grad_norm": 2.2790756225585938,
"learning_rate": 2.232341395005194e-05,
"loss": 5.2283,
"step": 74950
},
{
"epoch": 3.4514651909292775,
"grad_norm": 2.6264398097991943,
"learning_rate": 2.226267873460242e-05,
"loss": 5.222,
"step": 75000
},
{
"epoch": 3.4514651909292775,
"eval_loss": 5.62860631942749,
"eval_runtime": 33.0203,
"eval_samples_per_second": 11.629,
"eval_steps_per_second": 5.815,
"eval_tts_loss": 8.956448180894176,
"step": 75000
},
{
"epoch": 3.453766236754605,
"grad_norm": 2.6697559356689453,
"learning_rate": 2.220200258228444e-05,
"loss": 5.183,
"step": 75050
},
{
"epoch": 3.456067282579933,
"grad_norm": 2.3682973384857178,
"learning_rate": 2.21413856223002e-05,
"loss": 5.2183,
"step": 75100
},
{
"epoch": 3.4583683284052604,
"grad_norm": 2.0403831005096436,
"learning_rate": 2.208082798372589e-05,
"loss": 5.2039,
"step": 75150
},
{
"epoch": 3.4606693742305876,
"grad_norm": 2.470094919204712,
"learning_rate": 2.2022748579976672e-05,
"loss": 5.1217,
"step": 75200
},
{
"epoch": 3.4629704200559153,
"grad_norm": 2.267977476119995,
"learning_rate": 2.196230758530608e-05,
"loss": 5.2224,
"step": 75250
},
{
"epoch": 3.465271465881243,
"grad_norm": 2.673049211502075,
"learning_rate": 2.1901926293369545e-05,
"loss": 5.1764,
"step": 75300
},
{
"epoch": 3.4675725117065705,
"grad_norm": 2.249359607696533,
"learning_rate": 2.184160483274142e-05,
"loss": 5.1799,
"step": 75350
},
{
"epoch": 3.469873557531898,
"grad_norm": 2.004697561264038,
"learning_rate": 2.178134333186865e-05,
"loss": 5.1787,
"step": 75400
},
{
"epoch": 3.472174603357226,
"grad_norm": 2.118774890899658,
"learning_rate": 2.1721141919070507e-05,
"loss": 5.1152,
"step": 75450
},
{
"epoch": 3.4744756491825535,
"grad_norm": 2.2793705463409424,
"learning_rate": 2.1661000722538326e-05,
"loss": 5.1887,
"step": 75500
},
{
"epoch": 3.476776695007881,
"grad_norm": 2.111333131790161,
"learning_rate": 2.1600919870335214e-05,
"loss": 5.2267,
"step": 75550
},
{
"epoch": 3.4790777408332088,
"grad_norm": 1.9498274326324463,
"learning_rate": 2.1540899490395755e-05,
"loss": 5.1931,
"step": 75600
},
{
"epoch": 3.4813787866585364,
"grad_norm": 2.229842185974121,
"learning_rate": 2.1480939710525795e-05,
"loss": 5.1317,
"step": 75650
},
{
"epoch": 3.483679832483864,
"grad_norm": 2.236582040786743,
"learning_rate": 2.142104065840215e-05,
"loss": 5.202,
"step": 75700
},
{
"epoch": 3.4859808783091917,
"grad_norm": 2.623948812484741,
"learning_rate": 2.136120246157229e-05,
"loss": 5.1442,
"step": 75750
},
{
"epoch": 3.4882819241345193,
"grad_norm": 2.466200113296509,
"learning_rate": 2.1301425247454127e-05,
"loss": 5.1478,
"step": 75800
},
{
"epoch": 3.4905829699598465,
"grad_norm": 2.081251382827759,
"learning_rate": 2.1241709143335696e-05,
"loss": 5.2129,
"step": 75850
},
{
"epoch": 3.492884015785174,
"grad_norm": 2.99832820892334,
"learning_rate": 2.118205427637493e-05,
"loss": 5.2158,
"step": 75900
},
{
"epoch": 3.495185061610502,
"grad_norm": 1.9218469858169556,
"learning_rate": 2.1122460773599345e-05,
"loss": 5.2348,
"step": 75950
},
{
"epoch": 3.4974861074358294,
"grad_norm": 2.370565176010132,
"learning_rate": 2.1062928761905805e-05,
"loss": 5.2643,
"step": 76000
},
{
"epoch": 3.4974861074358294,
"eval_loss": 5.631509780883789,
"eval_runtime": 33.4738,
"eval_samples_per_second": 11.472,
"eval_steps_per_second": 5.736,
"eval_tts_loss": 8.98740807562086,
"step": 76000
},
{
"epoch": 3.499787153261157,
"grad_norm": 2.2371163368225098,
"learning_rate": 2.100345836806022e-05,
"loss": 5.2018,
"step": 76050
},
{
"epoch": 3.5020881990864847,
"grad_norm": 2.599576950073242,
"learning_rate": 2.094404971869731e-05,
"loss": 5.1568,
"step": 76100
},
{
"epoch": 3.5043892449118124,
"grad_norm": 2.2047317028045654,
"learning_rate": 2.088470294032029e-05,
"loss": 5.2178,
"step": 76150
},
{
"epoch": 3.50669029073714,
"grad_norm": 2.1263697147369385,
"learning_rate": 2.0826603246530247e-05,
"loss": 5.2013,
"step": 76200
},
{
"epoch": 3.5089913365624676,
"grad_norm": 2.0238592624664307,
"learning_rate": 2.076737934539928e-05,
"loss": 5.1675,
"step": 76250
},
{
"epoch": 3.5112923823877953,
"grad_norm": 2.288980722427368,
"learning_rate": 2.0708217691451527e-05,
"loss": 5.2044,
"step": 76300
},
{
"epoch": 3.513593428213123,
"grad_norm": 2.3009746074676514,
"learning_rate": 2.0649118410664274e-05,
"loss": 5.2159,
"step": 76350
},
{
"epoch": 3.5158944740384506,
"grad_norm": 2.0506417751312256,
"learning_rate": 2.0590081628881996e-05,
"loss": 5.1579,
"step": 76400
},
{
"epoch": 3.518195519863778,
"grad_norm": 2.861734628677368,
"learning_rate": 2.0531107471816084e-05,
"loss": 5.1541,
"step": 76450
},
{
"epoch": 3.520496565689106,
"grad_norm": 2.453308343887329,
"learning_rate": 2.047219606504458e-05,
"loss": 5.1878,
"step": 76500
},
{
"epoch": 3.5227976115144335,
"grad_norm": 2.1938374042510986,
"learning_rate": 2.0413347534011896e-05,
"loss": 5.2198,
"step": 76550
},
{
"epoch": 3.525098657339761,
"grad_norm": 2.3495683670043945,
"learning_rate": 2.0354562004028566e-05,
"loss": 5.2011,
"step": 76600
},
{
"epoch": 3.5273997031650888,
"grad_norm": 2.0548179149627686,
"learning_rate": 2.0295839600270972e-05,
"loss": 5.2454,
"step": 76650
},
{
"epoch": 3.5297007489904164,
"grad_norm": 2.2966115474700928,
"learning_rate": 2.0237180447781083e-05,
"loss": 5.2069,
"step": 76700
},
{
"epoch": 3.5320017948157436,
"grad_norm": 2.0031216144561768,
"learning_rate": 2.0178584671466165e-05,
"loss": 5.2371,
"step": 76750
},
{
"epoch": 3.5343028406410713,
"grad_norm": 2.0526063442230225,
"learning_rate": 2.0120052396098528e-05,
"loss": 5.2393,
"step": 76800
},
{
"epoch": 3.536603886466399,
"grad_norm": 2.4385509490966797,
"learning_rate": 2.0061583746315317e-05,
"loss": 5.1446,
"step": 76850
},
{
"epoch": 3.5389049322917265,
"grad_norm": 2.629765510559082,
"learning_rate": 2.0003178846618137e-05,
"loss": 5.2291,
"step": 76900
},
{
"epoch": 3.541205978117054,
"grad_norm": 2.386939525604248,
"learning_rate": 1.9944837821372857e-05,
"loss": 5.2247,
"step": 76950
},
{
"epoch": 3.543507023942382,
"grad_norm": 2.1514835357666016,
"learning_rate": 1.9886560794809355e-05,
"loss": 5.1999,
"step": 77000
},
{
"epoch": 3.543507023942382,
"eval_loss": 5.626565456390381,
"eval_runtime": 32.058,
"eval_samples_per_second": 11.978,
"eval_steps_per_second": 5.989,
"eval_tts_loss": 8.992654431853728,
"step": 77000
},
{
"epoch": 3.5458080697677095,
"grad_norm": 2.399937868118286,
"learning_rate": 1.982834789102121e-05,
"loss": 5.1952,
"step": 77050
},
{
"epoch": 3.548109115593037,
"grad_norm": 2.338700532913208,
"learning_rate": 1.977019923396547e-05,
"loss": 5.1768,
"step": 77100
},
{
"epoch": 3.5504101614183647,
"grad_norm": 2.1986091136932373,
"learning_rate": 1.9712114947462366e-05,
"loss": 5.1892,
"step": 77150
},
{
"epoch": 3.5527112072436924,
"grad_norm": 2.2410266399383545,
"learning_rate": 1.9654095155195086e-05,
"loss": 5.1718,
"step": 77200
},
{
"epoch": 3.55501225306902,
"grad_norm": 2.3475921154022217,
"learning_rate": 1.9596139980709448e-05,
"loss": 5.2131,
"step": 77250
},
{
"epoch": 3.557313298894347,
"grad_norm": 2.4052350521087646,
"learning_rate": 1.95382495474137e-05,
"loss": 5.2332,
"step": 77300
},
{
"epoch": 3.559614344719675,
"grad_norm": 2.216869831085205,
"learning_rate": 1.9480423978578245e-05,
"loss": 5.1678,
"step": 77350
},
{
"epoch": 3.5619153905450025,
"grad_norm": 2.421912908554077,
"learning_rate": 1.9422663397335354e-05,
"loss": 5.1614,
"step": 77400
},
{
"epoch": 3.56421643637033,
"grad_norm": 2.5841050148010254,
"learning_rate": 1.9364967926678877e-05,
"loss": 5.2593,
"step": 77450
},
{
"epoch": 3.566517482195658,
"grad_norm": 1.9473857879638672,
"learning_rate": 1.9307337689464065e-05,
"loss": 5.2039,
"step": 77500
},
{
"epoch": 3.5688185280209854,
"grad_norm": 2.1319632530212402,
"learning_rate": 1.9249772808407245e-05,
"loss": 5.1229,
"step": 77550
},
{
"epoch": 3.571119573846313,
"grad_norm": 2.149664878845215,
"learning_rate": 1.9192273406085588e-05,
"loss": 5.2108,
"step": 77600
},
{
"epoch": 3.5734206196716407,
"grad_norm": 2.748605966567993,
"learning_rate": 1.91348396049368e-05,
"loss": 5.1874,
"step": 77650
},
{
"epoch": 3.5757216654969683,
"grad_norm": 2.3713808059692383,
"learning_rate": 1.9077471527258967e-05,
"loss": 5.1959,
"step": 77700
},
{
"epoch": 3.578022711322296,
"grad_norm": 2.1292314529418945,
"learning_rate": 1.9020169295210162e-05,
"loss": 5.2464,
"step": 77750
},
{
"epoch": 3.5803237571476236,
"grad_norm": 2.0167505741119385,
"learning_rate": 1.8962933030808277e-05,
"loss": 5.1975,
"step": 77800
},
{
"epoch": 3.5826248029729513,
"grad_norm": 2.4200239181518555,
"learning_rate": 1.890576285593072e-05,
"loss": 5.1826,
"step": 77850
},
{
"epoch": 3.584925848798279,
"grad_norm": 2.4658398628234863,
"learning_rate": 1.884865889231418e-05,
"loss": 5.1882,
"step": 77900
},
{
"epoch": 3.5872268946236066,
"grad_norm": 3.0140624046325684,
"learning_rate": 1.8791621261554354e-05,
"loss": 5.197,
"step": 77950
},
{
"epoch": 3.589527940448934,
"grad_norm": 2.1732044219970703,
"learning_rate": 1.873465008510569e-05,
"loss": 5.215,
"step": 78000
},
{
"epoch": 3.589527940448934,
"eval_loss": 5.619250774383545,
"eval_runtime": 34.2668,
"eval_samples_per_second": 11.206,
"eval_steps_per_second": 5.603,
"eval_tts_loss": 8.996872534198715,
"step": 78000
},
{
"epoch": 3.591828986274262,
"grad_norm": 2.3192954063415527,
"learning_rate": 1.8677745484281135e-05,
"loss": 5.243,
"step": 78050
},
{
"epoch": 3.5941300320995895,
"grad_norm": 2.143805742263794,
"learning_rate": 1.8620907580251863e-05,
"loss": 5.1755,
"step": 78100
},
{
"epoch": 3.596431077924917,
"grad_norm": 2.305589437484741,
"learning_rate": 1.856413649404702e-05,
"loss": 5.2402,
"step": 78150
},
{
"epoch": 3.5987321237502448,
"grad_norm": 2.2067654132843018,
"learning_rate": 1.8507432346553504e-05,
"loss": 5.1785,
"step": 78200
},
{
"epoch": 3.601033169575572,
"grad_norm": 2.503913640975952,
"learning_rate": 1.845079525851565e-05,
"loss": 5.2286,
"step": 78250
},
{
"epoch": 3.6033342154008996,
"grad_norm": 2.5515477657318115,
"learning_rate": 1.8394225350535006e-05,
"loss": 5.1196,
"step": 78300
},
{
"epoch": 3.6056352612262272,
"grad_norm": 2.8195738792419434,
"learning_rate": 1.8337722743070047e-05,
"loss": 5.2123,
"step": 78350
},
{
"epoch": 3.607936307051555,
"grad_norm": 2.128828525543213,
"learning_rate": 1.8281287556435972e-05,
"loss": 5.2335,
"step": 78400
},
{
"epoch": 3.6102373528768825,
"grad_norm": 2.656996726989746,
"learning_rate": 1.8224919910804395e-05,
"loss": 5.204,
"step": 78450
},
{
"epoch": 3.61253839870221,
"grad_norm": 2.313681125640869,
"learning_rate": 1.8168619926203124e-05,
"loss": 5.193,
"step": 78500
},
{
"epoch": 3.614839444527538,
"grad_norm": 2.0406699180603027,
"learning_rate": 1.8112387722515872e-05,
"loss": 5.2394,
"step": 78550
},
{
"epoch": 3.6171404903528654,
"grad_norm": 2.2279775142669678,
"learning_rate": 1.8056223419482043e-05,
"loss": 5.179,
"step": 78600
},
{
"epoch": 3.619441536178193,
"grad_norm": 2.2751965522766113,
"learning_rate": 1.8000127136696448e-05,
"loss": 5.231,
"step": 78650
},
{
"epoch": 3.6217425820035207,
"grad_norm": 2.374161958694458,
"learning_rate": 1.7944098993609042e-05,
"loss": 5.1862,
"step": 78700
},
{
"epoch": 3.6240436278288484,
"grad_norm": 2.4539670944213867,
"learning_rate": 1.788813910952471e-05,
"loss": 5.2105,
"step": 78750
},
{
"epoch": 3.6263446736541756,
"grad_norm": 2.1793766021728516,
"learning_rate": 1.783224760360297e-05,
"loss": 5.1908,
"step": 78800
},
{
"epoch": 3.628645719479503,
"grad_norm": 2.398728132247925,
"learning_rate": 1.7776424594857738e-05,
"loss": 5.2523,
"step": 78850
},
{
"epoch": 3.630946765304831,
"grad_norm": 2.1088905334472656,
"learning_rate": 1.772067020215709e-05,
"loss": 5.1547,
"step": 78900
},
{
"epoch": 3.6332478111301585,
"grad_norm": 1.9953407049179077,
"learning_rate": 1.766498454422298e-05,
"loss": 5.1972,
"step": 78950
},
{
"epoch": 3.635548856955486,
"grad_norm": 2.152448892593384,
"learning_rate": 1.7609367739630976e-05,
"loss": 5.2446,
"step": 79000
},
{
"epoch": 3.635548856955486,
"eval_loss": 5.617215633392334,
"eval_runtime": 32.3456,
"eval_samples_per_second": 11.872,
"eval_steps_per_second": 5.936,
"eval_tts_loss": 8.97757097060575,
"step": 79000
},
{
"epoch": 3.6378499027808138,
"grad_norm": 2.023789644241333,
"learning_rate": 1.75538199068101e-05,
"loss": 5.19,
"step": 79050
},
{
"epoch": 3.6401509486061414,
"grad_norm": 2.7019007205963135,
"learning_rate": 1.7498341164042442e-05,
"loss": 5.2325,
"step": 79100
},
{
"epoch": 3.642451994431469,
"grad_norm": 1.858165979385376,
"learning_rate": 1.7442931629463e-05,
"loss": 5.1992,
"step": 79150
},
{
"epoch": 3.6447530402567967,
"grad_norm": 2.1814475059509277,
"learning_rate": 1.7387591421059395e-05,
"loss": 5.2014,
"step": 79200
},
{
"epoch": 3.6470540860821243,
"grad_norm": 2.373379707336426,
"learning_rate": 1.733232065667163e-05,
"loss": 5.1136,
"step": 79250
},
{
"epoch": 3.649355131907452,
"grad_norm": 2.069084644317627,
"learning_rate": 1.7277119453991834e-05,
"loss": 5.1485,
"step": 79300
},
{
"epoch": 3.6516561777327796,
"grad_norm": 2.114830255508423,
"learning_rate": 1.722198793056402e-05,
"loss": 5.19,
"step": 79350
},
{
"epoch": 3.6539572235581073,
"grad_norm": 1.8629753589630127,
"learning_rate": 1.7166926203783822e-05,
"loss": 5.1707,
"step": 79400
},
{
"epoch": 3.656258269383435,
"grad_norm": 2.268953323364258,
"learning_rate": 1.7111934390898225e-05,
"loss": 5.2116,
"step": 79450
},
{
"epoch": 3.6585593152087625,
"grad_norm": 2.427870988845825,
"learning_rate": 1.7057012609005406e-05,
"loss": 5.1697,
"step": 79500
},
{
"epoch": 3.66086036103409,
"grad_norm": 2.392437696456909,
"learning_rate": 1.7002160975054366e-05,
"loss": 5.1588,
"step": 79550
},
{
"epoch": 3.663161406859418,
"grad_norm": 2.81585955619812,
"learning_rate": 1.6947379605844766e-05,
"loss": 5.1922,
"step": 79600
},
{
"epoch": 3.6654624526847455,
"grad_norm": 2.525327205657959,
"learning_rate": 1.6892668618026596e-05,
"loss": 5.2282,
"step": 79650
},
{
"epoch": 3.6677634985100727,
"grad_norm": 2.584698438644409,
"learning_rate": 1.6838028128100032e-05,
"loss": 5.1869,
"step": 79700
},
{
"epoch": 3.6700645443354003,
"grad_norm": 2.473557710647583,
"learning_rate": 1.6783458252415108e-05,
"loss": 5.2196,
"step": 79750
},
{
"epoch": 3.672365590160728,
"grad_norm": 2.360694408416748,
"learning_rate": 1.6728959107171504e-05,
"loss": 5.1844,
"step": 79800
},
{
"epoch": 3.6746666359860556,
"grad_norm": 2.617858648300171,
"learning_rate": 1.667453080841827e-05,
"loss": 5.1233,
"step": 79850
},
{
"epoch": 3.6769676818113832,
"grad_norm": 2.8224070072174072,
"learning_rate": 1.662017347205364e-05,
"loss": 5.2381,
"step": 79900
},
{
"epoch": 3.679268727636711,
"grad_norm": 1.9658337831497192,
"learning_rate": 1.6565887213824704e-05,
"loss": 5.1793,
"step": 79950
},
{
"epoch": 3.6815697734620385,
"grad_norm": 2.1769447326660156,
"learning_rate": 1.6511672149327206e-05,
"loss": 5.1578,
"step": 80000
},
{
"epoch": 3.6815697734620385,
"eval_loss": 5.613487243652344,
"eval_runtime": 33.8101,
"eval_samples_per_second": 11.358,
"eval_steps_per_second": 5.679,
"eval_tts_loss": 8.99393554956893,
"step": 80000
},
{
"epoch": 3.683870819287366,
"grad_norm": 2.6718637943267822,
"learning_rate": 1.6457528394005296e-05,
"loss": 5.2155,
"step": 80050
},
{
"epoch": 3.686171865112694,
"grad_norm": 2.3300375938415527,
"learning_rate": 1.640345606315128e-05,
"loss": 5.2027,
"step": 80100
},
{
"epoch": 3.6884729109380214,
"grad_norm": 2.3993618488311768,
"learning_rate": 1.634945527190538e-05,
"loss": 5.2122,
"step": 80150
},
{
"epoch": 3.690773956763349,
"grad_norm": 2.0944197177886963,
"learning_rate": 1.6295526135255472e-05,
"loss": 5.2506,
"step": 80200
},
{
"epoch": 3.6930750025886767,
"grad_norm": 2.1264808177948,
"learning_rate": 1.6241668768036862e-05,
"loss": 5.1621,
"step": 80250
},
{
"epoch": 3.695376048414004,
"grad_norm": 2.0797226428985596,
"learning_rate": 1.6187883284932015e-05,
"loss": 5.1699,
"step": 80300
},
{
"epoch": 3.6976770942393316,
"grad_norm": 2.431781768798828,
"learning_rate": 1.613416980047037e-05,
"loss": 5.2216,
"step": 80350
},
{
"epoch": 3.699978140064659,
"grad_norm": 2.323768377304077,
"learning_rate": 1.6080528429028015e-05,
"loss": 5.2144,
"step": 80400
},
{
"epoch": 3.702279185889987,
"grad_norm": 2.5759477615356445,
"learning_rate": 1.6026959284827485e-05,
"loss": 5.2031,
"step": 80450
},
{
"epoch": 3.7045802317153145,
"grad_norm": 2.581083297729492,
"learning_rate": 1.597346248193753e-05,
"loss": 5.181,
"step": 80500
},
{
"epoch": 3.706881277540642,
"grad_norm": 1.9495331048965454,
"learning_rate": 1.5920038134272853e-05,
"loss": 5.1314,
"step": 80550
},
{
"epoch": 3.7091823233659698,
"grad_norm": 2.2382566928863525,
"learning_rate": 1.5866686355593874e-05,
"loss": 5.2137,
"step": 80600
},
{
"epoch": 3.7114833691912974,
"grad_norm": 2.1297311782836914,
"learning_rate": 1.581340725950648e-05,
"loss": 5.2077,
"step": 80650
},
{
"epoch": 3.713784415016625,
"grad_norm": 2.0254764556884766,
"learning_rate": 1.5760200959461797e-05,
"loss": 5.2052,
"step": 80700
},
{
"epoch": 3.7160854608419527,
"grad_norm": 2.093926191329956,
"learning_rate": 1.5707067568755933e-05,
"loss": 5.1405,
"step": 80750
},
{
"epoch": 3.7183865066672803,
"grad_norm": 2.399249792098999,
"learning_rate": 1.5654007200529763e-05,
"loss": 5.1569,
"step": 80800
},
{
"epoch": 3.720687552492608,
"grad_norm": 2.097546100616455,
"learning_rate": 1.5601019967768648e-05,
"loss": 5.2015,
"step": 80850
},
{
"epoch": 3.7229885983179356,
"grad_norm": 2.278393268585205,
"learning_rate": 1.5548105983302224e-05,
"loss": 5.1624,
"step": 80900
},
{
"epoch": 3.7252896441432632,
"grad_norm": 2.1321325302124023,
"learning_rate": 1.549526535980416e-05,
"loss": 5.2292,
"step": 80950
},
{
"epoch": 3.727590689968591,
"grad_norm": 2.526561737060547,
"learning_rate": 1.5442498209791905e-05,
"loss": 5.2001,
"step": 81000
},
{
"epoch": 3.727590689968591,
"eval_loss": 5.611248016357422,
"eval_runtime": 33.0436,
"eval_samples_per_second": 11.621,
"eval_steps_per_second": 5.811,
"eval_tts_loss": 8.977350676607491,
"step": 81000
},
{
"epoch": 3.7298917357939185,
"grad_norm": 2.070404052734375,
"learning_rate": 1.5389804645626453e-05,
"loss": 5.1921,
"step": 81050
},
{
"epoch": 3.732192781619246,
"grad_norm": 2.317744255065918,
"learning_rate": 1.5337184779512125e-05,
"loss": 5.1866,
"step": 81100
},
{
"epoch": 3.734493827444574,
"grad_norm": 1.896338701248169,
"learning_rate": 1.5284638723496268e-05,
"loss": 5.2012,
"step": 81150
},
{
"epoch": 3.736794873269901,
"grad_norm": 2.6844286918640137,
"learning_rate": 1.5232166589469127e-05,
"loss": 5.1391,
"step": 81200
},
{
"epoch": 3.7390959190952286,
"grad_norm": 2.197406768798828,
"learning_rate": 1.5179768489163481e-05,
"loss": 5.1941,
"step": 81250
},
{
"epoch": 3.7413969649205563,
"grad_norm": 2.0176520347595215,
"learning_rate": 1.512744453415449e-05,
"loss": 5.1764,
"step": 81300
},
{
"epoch": 3.743698010745884,
"grad_norm": 1.9290133714675903,
"learning_rate": 1.5075194835859418e-05,
"loss": 5.1211,
"step": 81350
},
{
"epoch": 3.7459990565712116,
"grad_norm": 1.9766486883163452,
"learning_rate": 1.5023019505537422e-05,
"loss": 5.1829,
"step": 81400
},
{
"epoch": 3.748300102396539,
"grad_norm": 2.4024932384490967,
"learning_rate": 1.4970918654289296e-05,
"loss": 5.1121,
"step": 81450
},
{
"epoch": 3.750601148221867,
"grad_norm": 2.7011728286743164,
"learning_rate": 1.4918892393057227e-05,
"loss": 5.1788,
"step": 81500
},
{
"epoch": 3.7529021940471945,
"grad_norm": 2.2868247032165527,
"learning_rate": 1.4866940832624598e-05,
"loss": 5.1545,
"step": 81550
},
{
"epoch": 3.755203239872522,
"grad_norm": 2.1724374294281006,
"learning_rate": 1.481506408361571e-05,
"loss": 5.1113,
"step": 81600
},
{
"epoch": 3.7575042856978498,
"grad_norm": 2.5170412063598633,
"learning_rate": 1.4763262256495547e-05,
"loss": 5.1083,
"step": 81650
},
{
"epoch": 3.7598053315231774,
"grad_norm": 2.5945959091186523,
"learning_rate": 1.4711535461569609e-05,
"loss": 5.0641,
"step": 81700
},
{
"epoch": 3.762106377348505,
"grad_norm": 2.386784791946411,
"learning_rate": 1.4659883808983565e-05,
"loss": 5.1631,
"step": 81750
},
{
"epoch": 3.7644074231738323,
"grad_norm": 2.390890598297119,
"learning_rate": 1.4608307408723126e-05,
"loss": 5.1748,
"step": 81800
},
{
"epoch": 3.76670846899916,
"grad_norm": 2.576148271560669,
"learning_rate": 1.4556806370613707e-05,
"loss": 5.1687,
"step": 81850
},
{
"epoch": 3.7690095148244875,
"grad_norm": 2.564168691635132,
"learning_rate": 1.4505380804320295e-05,
"loss": 5.133,
"step": 81900
},
{
"epoch": 3.771310560649815,
"grad_norm": 2.1351702213287354,
"learning_rate": 1.4454030819347158e-05,
"loss": 5.2315,
"step": 81950
},
{
"epoch": 3.773611606475143,
"grad_norm": 2.4622039794921875,
"learning_rate": 1.440275652503762e-05,
"loss": 5.1477,
"step": 82000
},
{
"epoch": 3.773611606475143,
"eval_loss": 5.609696865081787,
"eval_runtime": 34.3084,
"eval_samples_per_second": 11.193,
"eval_steps_per_second": 5.596,
"eval_tts_loss": 8.997026660141712,
"step": 82000
},
{
"epoch": 3.7759126523004705,
"grad_norm": 2.6888697147369385,
"learning_rate": 1.4351558030573814e-05,
"loss": 5.168,
"step": 82050
},
{
"epoch": 3.778213698125798,
"grad_norm": 2.8415167331695557,
"learning_rate": 1.4300435444976513e-05,
"loss": 5.2276,
"step": 82100
},
{
"epoch": 3.7805147439511257,
"grad_norm": 2.3651936054229736,
"learning_rate": 1.4249388877104814e-05,
"loss": 5.185,
"step": 82150
},
{
"epoch": 3.7828157897764534,
"grad_norm": 2.442162275314331,
"learning_rate": 1.4198418435655947e-05,
"loss": 5.1614,
"step": 82200
},
{
"epoch": 3.785116835601781,
"grad_norm": 1.8948955535888672,
"learning_rate": 1.4147524229165049e-05,
"loss": 5.1829,
"step": 82250
},
{
"epoch": 3.7874178814271087,
"grad_norm": 2.331746816635132,
"learning_rate": 1.409670636600492e-05,
"loss": 5.1549,
"step": 82300
},
{
"epoch": 3.7897189272524363,
"grad_norm": 2.317063093185425,
"learning_rate": 1.4045964954385804e-05,
"loss": 5.1202,
"step": 82350
},
{
"epoch": 3.792019973077764,
"grad_norm": 2.8078041076660156,
"learning_rate": 1.3995300102355136e-05,
"loss": 5.1692,
"step": 82400
},
{
"epoch": 3.7943210189030916,
"grad_norm": 2.278687000274658,
"learning_rate": 1.3945722929450245e-05,
"loss": 5.1753,
"step": 82450
},
{
"epoch": 3.7966220647284192,
"grad_norm": 2.2202608585357666,
"learning_rate": 1.3895209983528002e-05,
"loss": 5.2014,
"step": 82500
},
{
"epoch": 3.798923110553747,
"grad_norm": 2.7063558101654053,
"learning_rate": 1.3844773918207926e-05,
"loss": 5.1813,
"step": 82550
},
{
"epoch": 3.8012241563790745,
"grad_norm": 2.3519999980926514,
"learning_rate": 1.379441484088727e-05,
"loss": 5.2055,
"step": 82600
},
{
"epoch": 3.803525202204402,
"grad_norm": 2.1331920623779297,
"learning_rate": 1.3744132858799346e-05,
"loss": 5.1639,
"step": 82650
},
{
"epoch": 3.8058262480297294,
"grad_norm": 2.179452657699585,
"learning_rate": 1.3693928079013301e-05,
"loss": 5.1549,
"step": 82700
},
{
"epoch": 3.808127293855057,
"grad_norm": 2.223172903060913,
"learning_rate": 1.3643800608433888e-05,
"loss": 5.2003,
"step": 82750
},
{
"epoch": 3.8104283396803846,
"grad_norm": 2.3072891235351562,
"learning_rate": 1.3593750553801227e-05,
"loss": 5.2101,
"step": 82800
},
{
"epoch": 3.8127293855057123,
"grad_norm": 2.3883447647094727,
"learning_rate": 1.3543778021690629e-05,
"loss": 5.1857,
"step": 82850
},
{
"epoch": 3.81503043133104,
"grad_norm": 2.0408358573913574,
"learning_rate": 1.3493883118512313e-05,
"loss": 5.2079,
"step": 82900
},
{
"epoch": 3.8173314771563676,
"grad_norm": 2.2231454849243164,
"learning_rate": 1.3444065950511159e-05,
"loss": 5.0707,
"step": 82950
},
{
"epoch": 3.819632522981695,
"grad_norm": 2.178635358810425,
"learning_rate": 1.3394326623766563e-05,
"loss": 5.179,
"step": 83000
},
{
"epoch": 3.819632522981695,
"eval_loss": 5.609046459197998,
"eval_runtime": 35.4306,
"eval_samples_per_second": 10.838,
"eval_steps_per_second": 5.419,
"eval_tts_loss": 9.01051702197809,
"step": 83000
},
{
"epoch": 3.821933568807023,
"grad_norm": 2.3109848499298096,
"learning_rate": 1.3344665244192155e-05,
"loss": 5.2137,
"step": 83050
},
{
"epoch": 3.8242346146323505,
"grad_norm": 2.1816065311431885,
"learning_rate": 1.329508191753559e-05,
"loss": 5.136,
"step": 83100
},
{
"epoch": 3.826535660457678,
"grad_norm": 2.9564876556396484,
"learning_rate": 1.3245576749378297e-05,
"loss": 5.1775,
"step": 83150
},
{
"epoch": 3.8288367062830058,
"grad_norm": 2.3853161334991455,
"learning_rate": 1.319614984513533e-05,
"loss": 5.2273,
"step": 83200
},
{
"epoch": 3.831137752108333,
"grad_norm": 2.4375455379486084,
"learning_rate": 1.314680131005504e-05,
"loss": 5.0597,
"step": 83250
},
{
"epoch": 3.8334387979336606,
"grad_norm": 2.3408586978912354,
"learning_rate": 1.3097531249218914e-05,
"loss": 5.1273,
"step": 83300
},
{
"epoch": 3.8357398437589882,
"grad_norm": 2.303234815597534,
"learning_rate": 1.304833976754134e-05,
"loss": 5.2115,
"step": 83350
},
{
"epoch": 3.838040889584316,
"grad_norm": 2.0342304706573486,
"learning_rate": 1.2999226969769379e-05,
"loss": 5.1762,
"step": 83400
},
{
"epoch": 3.8403419354096435,
"grad_norm": 2.351905584335327,
"learning_rate": 1.295019296048255e-05,
"loss": 5.18,
"step": 83450
},
{
"epoch": 3.842642981234971,
"grad_norm": 2.341549873352051,
"learning_rate": 1.29012378440926e-05,
"loss": 5.2408,
"step": 83500
},
{
"epoch": 3.844944027060299,
"grad_norm": 2.472702741622925,
"learning_rate": 1.2852361724843282e-05,
"loss": 5.1864,
"step": 83550
},
{
"epoch": 3.8472450728856264,
"grad_norm": 2.308812141418457,
"learning_rate": 1.2803564706810135e-05,
"loss": 5.2345,
"step": 83600
},
{
"epoch": 3.849546118710954,
"grad_norm": 2.3374664783477783,
"learning_rate": 1.275484689390024e-05,
"loss": 5.2227,
"step": 83650
},
{
"epoch": 3.8518471645362817,
"grad_norm": 2.4744300842285156,
"learning_rate": 1.2706208389852087e-05,
"loss": 5.2145,
"step": 83700
},
{
"epoch": 3.8541482103616094,
"grad_norm": 2.243946075439453,
"learning_rate": 1.2657649298235213e-05,
"loss": 5.1909,
"step": 83750
},
{
"epoch": 3.856449256186937,
"grad_norm": 2.732558012008667,
"learning_rate": 1.2609169722450093e-05,
"loss": 5.1886,
"step": 83800
},
{
"epoch": 3.8587503020122647,
"grad_norm": 2.3516011238098145,
"learning_rate": 1.2560769765727881e-05,
"loss": 5.146,
"step": 83850
},
{
"epoch": 3.8610513478375923,
"grad_norm": 2.6143431663513184,
"learning_rate": 1.2512449531130183e-05,
"loss": 5.1711,
"step": 83900
},
{
"epoch": 3.86335239366292,
"grad_norm": 2.5239765644073486,
"learning_rate": 1.2464209121548848e-05,
"loss": 5.1856,
"step": 83950
},
{
"epoch": 3.8656534394882476,
"grad_norm": 2.2582340240478516,
"learning_rate": 1.2416048639705757e-05,
"loss": 5.1832,
"step": 84000
},
{
"epoch": 3.8656534394882476,
"eval_loss": 5.601240158081055,
"eval_runtime": 34.4022,
"eval_samples_per_second": 11.162,
"eval_steps_per_second": 5.581,
"eval_tts_loss": 9.00224707553787,
"step": 84000
},
{
"epoch": 3.867954485313575,
"grad_norm": 2.666868209838867,
"learning_rate": 1.236796818815259e-05,
"loss": 5.1795,
"step": 84050
},
{
"epoch": 3.870255531138903,
"grad_norm": 1.95122492313385,
"learning_rate": 1.231996786927061e-05,
"loss": 5.1342,
"step": 84100
},
{
"epoch": 3.8725565769642305,
"grad_norm": 2.664674758911133,
"learning_rate": 1.2272047785270446e-05,
"loss": 5.2027,
"step": 84150
},
{
"epoch": 3.8748576227895577,
"grad_norm": 2.152007818222046,
"learning_rate": 1.2224208038191886e-05,
"loss": 5.1717,
"step": 84200
},
{
"epoch": 3.8771586686148853,
"grad_norm": 2.2395548820495605,
"learning_rate": 1.217644872990364e-05,
"loss": 5.1786,
"step": 84250
},
{
"epoch": 3.879459714440213,
"grad_norm": 2.2887299060821533,
"learning_rate": 1.2128769962103142e-05,
"loss": 5.1917,
"step": 84300
},
{
"epoch": 3.8817607602655406,
"grad_norm": 2.3832855224609375,
"learning_rate": 1.2081171836316325e-05,
"loss": 5.2202,
"step": 84350
},
{
"epoch": 3.8840618060908683,
"grad_norm": 2.2544591426849365,
"learning_rate": 1.2033654453897391e-05,
"loss": 5.1446,
"step": 84400
},
{
"epoch": 3.886362851916196,
"grad_norm": 2.403583526611328,
"learning_rate": 1.198621791602863e-05,
"loss": 5.2115,
"step": 84450
},
{
"epoch": 3.8886638977415235,
"grad_norm": 2.506659507751465,
"learning_rate": 1.1939808641647338e-05,
"loss": 5.1118,
"step": 84500
},
{
"epoch": 3.890964943566851,
"grad_norm": 2.4026944637298584,
"learning_rate": 1.1892532473821888e-05,
"loss": 5.1479,
"step": 84550
},
{
"epoch": 3.893265989392179,
"grad_norm": 2.1828866004943848,
"learning_rate": 1.1845337451048095e-05,
"loss": 5.1982,
"step": 84600
},
{
"epoch": 3.8955670352175065,
"grad_norm": 2.180025339126587,
"learning_rate": 1.17982236738218e-05,
"loss": 5.1594,
"step": 84650
},
{
"epoch": 3.897868081042834,
"grad_norm": 2.3280653953552246,
"learning_rate": 1.175119124246587e-05,
"loss": 5.2365,
"step": 84700
},
{
"epoch": 3.9001691268681613,
"grad_norm": 2.364051580429077,
"learning_rate": 1.1704240257129934e-05,
"loss": 5.1986,
"step": 84750
},
{
"epoch": 3.902470172693489,
"grad_norm": 2.603224754333496,
"learning_rate": 1.1657370817790186e-05,
"loss": 5.1579,
"step": 84800
},
{
"epoch": 3.9047712185188166,
"grad_norm": 2.6435306072235107,
"learning_rate": 1.1610583024249227e-05,
"loss": 5.1749,
"step": 84850
},
{
"epoch": 3.9070722643441442,
"grad_norm": 2.8594369888305664,
"learning_rate": 1.1563876976135751e-05,
"loss": 5.1947,
"step": 84900
},
{
"epoch": 3.909373310169472,
"grad_norm": 2.1866133213043213,
"learning_rate": 1.1517252772904408e-05,
"loss": 5.186,
"step": 84950
},
{
"epoch": 3.9116743559947995,
"grad_norm": 2.3272008895874023,
"learning_rate": 1.1470710513835559e-05,
"loss": 5.1384,
"step": 85000
},
{
"epoch": 3.9116743559947995,
"eval_loss": 5.6002326011657715,
"eval_runtime": 33.5323,
"eval_samples_per_second": 11.452,
"eval_steps_per_second": 5.726,
"eval_tts_loss": 9.014526556404022,
"step": 85000
},
{
"epoch": 3.913975401820127,
"grad_norm": 2.3301563262939453,
"learning_rate": 1.1424250298035083e-05,
"loss": 5.2276,
"step": 85050
},
{
"epoch": 3.916276447645455,
"grad_norm": 2.2211480140686035,
"learning_rate": 1.1377872224434178e-05,
"loss": 5.2068,
"step": 85100
},
{
"epoch": 3.9185774934707824,
"grad_norm": 2.2685728073120117,
"learning_rate": 1.1331576391789062e-05,
"loss": 5.1322,
"step": 85150
},
{
"epoch": 3.92087853929611,
"grad_norm": 2.3483150005340576,
"learning_rate": 1.1285362898680923e-05,
"loss": 5.1722,
"step": 85200
},
{
"epoch": 3.9231795851214377,
"grad_norm": 2.068774938583374,
"learning_rate": 1.1239231843515552e-05,
"loss": 5.1993,
"step": 85250
},
{
"epoch": 3.9254806309467654,
"grad_norm": 2.5643553733825684,
"learning_rate": 1.1193183324523232e-05,
"loss": 5.169,
"step": 85300
},
{
"epoch": 3.927781676772093,
"grad_norm": 2.1620678901672363,
"learning_rate": 1.1147217439758473e-05,
"loss": 5.1113,
"step": 85350
},
{
"epoch": 3.9300827225974206,
"grad_norm": 2.5162718296051025,
"learning_rate": 1.1101334287099835e-05,
"loss": 5.1935,
"step": 85400
},
{
"epoch": 3.9323837684227483,
"grad_norm": 2.4817066192626953,
"learning_rate": 1.105553396424972e-05,
"loss": 5.1891,
"step": 85450
},
{
"epoch": 3.934684814248076,
"grad_norm": 2.493964433670044,
"learning_rate": 1.1009816568734143e-05,
"loss": 5.1429,
"step": 85500
},
{
"epoch": 3.9369858600734036,
"grad_norm": 2.5530591011047363,
"learning_rate": 1.096418219790254e-05,
"loss": 5.1996,
"step": 85550
},
{
"epoch": 3.939286905898731,
"grad_norm": 2.2496776580810547,
"learning_rate": 1.0918630948927554e-05,
"loss": 5.1454,
"step": 85600
},
{
"epoch": 3.941587951724059,
"grad_norm": 2.241255521774292,
"learning_rate": 1.0873162918804814e-05,
"loss": 5.1635,
"step": 85650
},
{
"epoch": 3.943888997549386,
"grad_norm": 2.414360284805298,
"learning_rate": 1.082777820435279e-05,
"loss": 5.132,
"step": 85700
},
{
"epoch": 3.9461900433747137,
"grad_norm": 2.702965259552002,
"learning_rate": 1.0782476902212507e-05,
"loss": 5.1775,
"step": 85750
},
{
"epoch": 3.9484910892000413,
"grad_norm": 2.7579190731048584,
"learning_rate": 1.073725910884738e-05,
"loss": 5.1708,
"step": 85800
},
{
"epoch": 3.950792135025369,
"grad_norm": 2.170175313949585,
"learning_rate": 1.0692124920542996e-05,
"loss": 5.1411,
"step": 85850
},
{
"epoch": 3.9530931808506966,
"grad_norm": 2.2946743965148926,
"learning_rate": 1.0647074433406934e-05,
"loss": 5.1557,
"step": 85900
},
{
"epoch": 3.9553942266760242,
"grad_norm": 2.442544937133789,
"learning_rate": 1.0602107743368522e-05,
"loss": 5.1201,
"step": 85950
},
{
"epoch": 3.957695272501352,
"grad_norm": 2.3080036640167236,
"learning_rate": 1.0557224946178667e-05,
"loss": 5.1919,
"step": 86000
},
{
"epoch": 3.957695272501352,
"eval_loss": 5.599061489105225,
"eval_runtime": 34.3103,
"eval_samples_per_second": 11.192,
"eval_steps_per_second": 5.596,
"eval_tts_loss": 9.014872747805995,
"step": 86000
},
{
"epoch": 3.9599963183266795,
"grad_norm": 2.5096185207366943,
"learning_rate": 1.051242613740963e-05,
"loss": 5.1564,
"step": 86050
},
{
"epoch": 3.962297364152007,
"grad_norm": 2.575779914855957,
"learning_rate": 1.0467711412454823e-05,
"loss": 5.1363,
"step": 86100
},
{
"epoch": 3.964598409977335,
"grad_norm": 2.8018746376037598,
"learning_rate": 1.042308086652864e-05,
"loss": 5.1536,
"step": 86150
},
{
"epoch": 3.9668994558026625,
"grad_norm": 2.1475179195404053,
"learning_rate": 1.0378534594666206e-05,
"loss": 5.2278,
"step": 86200
},
{
"epoch": 3.9692005016279897,
"grad_norm": 2.4993419647216797,
"learning_rate": 1.0334072691723168e-05,
"loss": 5.1654,
"step": 86250
},
{
"epoch": 3.9715015474533173,
"grad_norm": 2.0179409980773926,
"learning_rate": 1.028969525237557e-05,
"loss": 5.2237,
"step": 86300
},
{
"epoch": 3.973802593278645,
"grad_norm": 2.1702675819396973,
"learning_rate": 1.0245402371119567e-05,
"loss": 5.109,
"step": 86350
},
{
"epoch": 3.9761036391039726,
"grad_norm": 2.7319464683532715,
"learning_rate": 1.0201194142271281e-05,
"loss": 5.1879,
"step": 86400
},
{
"epoch": 3.9784046849293,
"grad_norm": 2.3209543228149414,
"learning_rate": 1.015707065996656e-05,
"loss": 5.0868,
"step": 86450
},
{
"epoch": 3.980705730754628,
"grad_norm": 2.6626105308532715,
"learning_rate": 1.0113032018160783e-05,
"loss": 5.1558,
"step": 86500
},
{
"epoch": 3.9830067765799555,
"grad_norm": 2.4389021396636963,
"learning_rate": 1.0069078310628732e-05,
"loss": 5.2351,
"step": 86550
},
{
"epoch": 3.985307822405283,
"grad_norm": 2.3185486793518066,
"learning_rate": 1.002608617067996e-05,
"loss": 5.144,
"step": 86600
},
{
"epoch": 3.987608868230611,
"grad_norm": 2.1815450191497803,
"learning_rate": 9.982300908956022e-06,
"loss": 5.1203,
"step": 86650
},
{
"epoch": 3.9899099140559384,
"grad_norm": 2.7808938026428223,
"learning_rate": 9.938600859881219e-06,
"loss": 5.1624,
"step": 86700
},
{
"epoch": 3.992210959881266,
"grad_norm": 2.6030192375183105,
"learning_rate": 9.894986116509297e-06,
"loss": 5.2076,
"step": 86750
},
{
"epoch": 3.9945120057065937,
"grad_norm": 2.2136263847351074,
"learning_rate": 9.851456771712342e-06,
"loss": 5.2135,
"step": 86800
},
{
"epoch": 3.9968130515319213,
"grad_norm": 2.76670241355896,
"learning_rate": 9.808012918180643e-06,
"loss": 5.2533,
"step": 86850
},
{
"epoch": 3.999114097357249,
"grad_norm": 2.3571465015411377,
"learning_rate": 9.764654648422394e-06,
"loss": 5.2091,
"step": 86900
},
{
"epoch": 4.001380627495196,
"grad_norm": 2.9005565643310547,
"learning_rate": 9.721382054763572e-06,
"loss": 5.0138,
"step": 86950
},
{
"epoch": 4.003681673320524,
"grad_norm": 2.7534894943237305,
"learning_rate": 9.678195229347714e-06,
"loss": 4.9149,
"step": 87000
},
{
"epoch": 4.003681673320524,
"eval_loss": 5.650022029876709,
"eval_runtime": 34.0508,
"eval_samples_per_second": 11.277,
"eval_steps_per_second": 5.639,
"eval_tts_loss": 9.280170685445977,
"step": 87000
},
{
"epoch": 4.005982719145852,
"grad_norm": 2.4095113277435303,
"learning_rate": 9.635094264135724e-06,
"loss": 4.9212,
"step": 87050
},
{
"epoch": 4.008283764971179,
"grad_norm": 2.556870222091675,
"learning_rate": 9.592079250905684e-06,
"loss": 4.9187,
"step": 87100
},
{
"epoch": 4.010584810796507,
"grad_norm": 2.400278091430664,
"learning_rate": 9.549150281252633e-06,
"loss": 4.9294,
"step": 87150
},
{
"epoch": 4.012885856621835,
"grad_norm": 2.6151013374328613,
"learning_rate": 9.506307446588414e-06,
"loss": 4.933,
"step": 87200
},
{
"epoch": 4.015186902447162,
"grad_norm": 3.015284299850464,
"learning_rate": 9.463550838141427e-06,
"loss": 4.9152,
"step": 87250
},
{
"epoch": 4.01748794827249,
"grad_norm": 2.4685239791870117,
"learning_rate": 9.420880546956528e-06,
"loss": 4.9318,
"step": 87300
},
{
"epoch": 4.0197889940978175,
"grad_norm": 2.854645252227783,
"learning_rate": 9.378296663894692e-06,
"loss": 4.9648,
"step": 87350
},
{
"epoch": 4.022090039923145,
"grad_norm": 2.287388563156128,
"learning_rate": 9.335799279632934e-06,
"loss": 4.954,
"step": 87400
},
{
"epoch": 4.024391085748473,
"grad_norm": 2.6801013946533203,
"learning_rate": 9.293388484664089e-06,
"loss": 4.9327,
"step": 87450
},
{
"epoch": 4.0266921315738005,
"grad_norm": 2.134859561920166,
"learning_rate": 9.251064369296597e-06,
"loss": 4.9183,
"step": 87500
},
{
"epoch": 4.028993177399128,
"grad_norm": 2.5280423164367676,
"learning_rate": 9.208827023654336e-06,
"loss": 4.9208,
"step": 87550
},
{
"epoch": 4.031294223224456,
"grad_norm": 2.4778597354888916,
"learning_rate": 9.166676537676416e-06,
"loss": 4.9699,
"step": 87600
},
{
"epoch": 4.033595269049783,
"grad_norm": 2.7387208938598633,
"learning_rate": 9.12461300111696e-06,
"loss": 4.9429,
"step": 87650
},
{
"epoch": 4.035896314875111,
"grad_norm": 2.26796555519104,
"learning_rate": 9.08263650354501e-06,
"loss": 4.932,
"step": 87700
},
{
"epoch": 4.038197360700439,
"grad_norm": 2.425043821334839,
"learning_rate": 9.040747134344208e-06,
"loss": 4.8945,
"step": 87750
},
{
"epoch": 4.040498406525766,
"grad_norm": 2.616593599319458,
"learning_rate": 8.998944982712692e-06,
"loss": 4.9216,
"step": 87800
},
{
"epoch": 4.042799452351094,
"grad_norm": 2.8648416996002197,
"learning_rate": 8.957230137662881e-06,
"loss": 4.9731,
"step": 87850
},
{
"epoch": 4.045100498176422,
"grad_norm": 2.239473581314087,
"learning_rate": 8.91560268802128e-06,
"loss": 4.9001,
"step": 87900
},
{
"epoch": 4.047401544001749,
"grad_norm": 2.3506898880004883,
"learning_rate": 8.874892663824048e-06,
"loss": 4.9684,
"step": 87950
},
{
"epoch": 4.049702589827076,
"grad_norm": 2.2261645793914795,
"learning_rate": 8.833438518418107e-06,
"loss": 5.0119,
"step": 88000
},
{
"epoch": 4.049702589827076,
"eval_loss": 5.670416355133057,
"eval_runtime": 33.0376,
"eval_samples_per_second": 11.623,
"eval_steps_per_second": 5.812,
"eval_tts_loss": 9.288442969832703,
"step": 88000
},
{
"epoch": 4.052003635652404,
"grad_norm": 2.7639026641845703,
"learning_rate": 8.792072032019038e-06,
"loss": 4.9038,
"step": 88050
},
{
"epoch": 4.054304681477731,
"grad_norm": 2.569985866546631,
"learning_rate": 8.750793292711563e-06,
"loss": 4.977,
"step": 88100
},
{
"epoch": 4.056605727303059,
"grad_norm": 3.0355279445648193,
"learning_rate": 8.70960238839355e-06,
"loss": 4.9272,
"step": 88150
},
{
"epoch": 4.0589067731283865,
"grad_norm": 2.587646245956421,
"learning_rate": 8.668499406775849e-06,
"loss": 4.9789,
"step": 88200
},
{
"epoch": 4.061207818953714,
"grad_norm": 2.128152847290039,
"learning_rate": 8.627484435382072e-06,
"loss": 4.9165,
"step": 88250
},
{
"epoch": 4.063508864779042,
"grad_norm": 2.4285528659820557,
"learning_rate": 8.586557561548442e-06,
"loss": 4.9074,
"step": 88300
},
{
"epoch": 4.0658099106043695,
"grad_norm": 2.435657501220703,
"learning_rate": 8.545718872423581e-06,
"loss": 4.9301,
"step": 88350
},
{
"epoch": 4.068110956429697,
"grad_norm": 2.365025043487549,
"learning_rate": 8.504968454968326e-06,
"loss": 4.94,
"step": 88400
},
{
"epoch": 4.070412002255025,
"grad_norm": 2.667067766189575,
"learning_rate": 8.464306395955568e-06,
"loss": 4.9485,
"step": 88450
},
{
"epoch": 4.072713048080352,
"grad_norm": 2.2761590480804443,
"learning_rate": 8.423732781970029e-06,
"loss": 4.9872,
"step": 88500
},
{
"epoch": 4.07501409390568,
"grad_norm": 2.8008153438568115,
"learning_rate": 8.383247699408115e-06,
"loss": 4.9176,
"step": 88550
},
{
"epoch": 4.077315139731008,
"grad_norm": 2.3409249782562256,
"learning_rate": 8.34285123447771e-06,
"loss": 4.9863,
"step": 88600
},
{
"epoch": 4.079616185556335,
"grad_norm": 2.465102195739746,
"learning_rate": 8.302543473197993e-06,
"loss": 4.9414,
"step": 88650
},
{
"epoch": 4.081917231381663,
"grad_norm": 2.3558099269866943,
"learning_rate": 8.262324501399265e-06,
"loss": 4.8924,
"step": 88700
},
{
"epoch": 4.084218277206991,
"grad_norm": 2.590440034866333,
"learning_rate": 8.222194404722767e-06,
"loss": 4.9291,
"step": 88750
},
{
"epoch": 4.086519323032318,
"grad_norm": 2.5797536373138428,
"learning_rate": 8.182153268620462e-06,
"loss": 4.9662,
"step": 88800
},
{
"epoch": 4.088820368857646,
"grad_norm": 2.68279767036438,
"learning_rate": 8.142201178354936e-06,
"loss": 4.9116,
"step": 88850
},
{
"epoch": 4.0911214146829735,
"grad_norm": 2.7302539348602295,
"learning_rate": 8.102338218999118e-06,
"loss": 5.0191,
"step": 88900
},
{
"epoch": 4.093422460508301,
"grad_norm": 2.9300999641418457,
"learning_rate": 8.062564475436157e-06,
"loss": 4.9311,
"step": 88950
},
{
"epoch": 4.095723506333629,
"grad_norm": 2.688512086868286,
"learning_rate": 8.022880032359231e-06,
"loss": 4.863,
"step": 89000
},
{
"epoch": 4.095723506333629,
"eval_loss": 5.680548191070557,
"eval_runtime": 34.1834,
"eval_samples_per_second": 11.234,
"eval_steps_per_second": 5.617,
"eval_tts_loss": 9.326126809397413,
"step": 89000
},
{
"epoch": 4.0980245521589564,
"grad_norm": 2.644500732421875,
"learning_rate": 7.983284974271365e-06,
"loss": 4.943,
"step": 89050
},
{
"epoch": 4.100325597984284,
"grad_norm": 2.59487247467041,
"learning_rate": 7.943779385485244e-06,
"loss": 4.9148,
"step": 89100
},
{
"epoch": 4.102626643809612,
"grad_norm": 2.785487174987793,
"learning_rate": 7.904363350123045e-06,
"loss": 4.8976,
"step": 89150
},
{
"epoch": 4.104927689634939,
"grad_norm": 2.6682450771331787,
"learning_rate": 7.86503695211625e-06,
"loss": 4.9197,
"step": 89200
},
{
"epoch": 4.107228735460267,
"grad_norm": 3.0894181728363037,
"learning_rate": 7.825800275205459e-06,
"loss": 4.9131,
"step": 89250
},
{
"epoch": 4.109529781285595,
"grad_norm": 2.421407461166382,
"learning_rate": 7.786653402940247e-06,
"loss": 4.9777,
"step": 89300
},
{
"epoch": 4.111830827110922,
"grad_norm": 2.4230687618255615,
"learning_rate": 7.747596418678949e-06,
"loss": 4.9121,
"step": 89350
},
{
"epoch": 4.11413187293625,
"grad_norm": 2.8178746700286865,
"learning_rate": 7.708629405588486e-06,
"loss": 4.9246,
"step": 89400
},
{
"epoch": 4.116432918761577,
"grad_norm": 2.6915271282196045,
"learning_rate": 7.669752446644196e-06,
"loss": 4.9551,
"step": 89450
},
{
"epoch": 4.118733964586904,
"grad_norm": 2.7895405292510986,
"learning_rate": 7.630965624629693e-06,
"loss": 4.9911,
"step": 89500
},
{
"epoch": 4.121035010412232,
"grad_norm": 2.5442700386047363,
"learning_rate": 7.592269022136594e-06,
"loss": 4.9471,
"step": 89550
},
{
"epoch": 4.12333605623756,
"grad_norm": 2.405792713165283,
"learning_rate": 7.5536627215644585e-06,
"loss": 4.9543,
"step": 89600
},
{
"epoch": 4.125637102062887,
"grad_norm": 2.431641101837158,
"learning_rate": 7.515146805120521e-06,
"loss": 4.9375,
"step": 89650
},
{
"epoch": 4.127938147888215,
"grad_norm": 2.513671398162842,
"learning_rate": 7.476721354819604e-06,
"loss": 4.9233,
"step": 89700
},
{
"epoch": 4.1302391937135425,
"grad_norm": 2.221925735473633,
"learning_rate": 7.4383864524838474e-06,
"loss": 4.9451,
"step": 89750
},
{
"epoch": 4.13254023953887,
"grad_norm": 2.2546591758728027,
"learning_rate": 7.4001421797425975e-06,
"loss": 4.9119,
"step": 89800
},
{
"epoch": 4.134841285364198,
"grad_norm": 2.906501054763794,
"learning_rate": 7.361988618032212e-06,
"loss": 4.8577,
"step": 89850
},
{
"epoch": 4.1371423311895255,
"grad_norm": 2.6346490383148193,
"learning_rate": 7.323925848595903e-06,
"loss": 4.9964,
"step": 89900
},
{
"epoch": 4.139443377014853,
"grad_norm": 2.4832003116607666,
"learning_rate": 7.285953952483537e-06,
"loss": 4.9329,
"step": 89950
},
{
"epoch": 4.141744422840181,
"grad_norm": 2.5018067359924316,
"learning_rate": 7.248073010551481e-06,
"loss": 4.9203,
"step": 90000
},
{
"epoch": 4.141744422840181,
"eval_loss": 5.682537078857422,
"eval_runtime": 34.224,
"eval_samples_per_second": 11.22,
"eval_steps_per_second": 5.61,
"eval_tts_loss": 9.324252976809897,
"step": 90000
},
{
"epoch": 4.144045468665508,
"grad_norm": 2.496283531188965,
"learning_rate": 7.210283103462434e-06,
"loss": 4.9322,
"step": 90050
},
{
"epoch": 4.146346514490836,
"grad_norm": 2.9973666667938232,
"learning_rate": 7.172584311685232e-06,
"loss": 4.9629,
"step": 90100
},
{
"epoch": 4.148647560316164,
"grad_norm": 2.648648500442505,
"learning_rate": 7.134976715494723e-06,
"loss": 4.9944,
"step": 90150
},
{
"epoch": 4.150948606141491,
"grad_norm": 3.1687586307525635,
"learning_rate": 7.097460394971534e-06,
"loss": 4.8967,
"step": 90200
},
{
"epoch": 4.153249651966819,
"grad_norm": 3.1536190509796143,
"learning_rate": 7.060035430001943e-06,
"loss": 4.9063,
"step": 90250
},
{
"epoch": 4.155550697792147,
"grad_norm": 2.530733823776245,
"learning_rate": 7.022701900277701e-06,
"loss": 4.8927,
"step": 90300
},
{
"epoch": 4.157851743617474,
"grad_norm": 2.789060592651367,
"learning_rate": 6.985459885295853e-06,
"loss": 4.9174,
"step": 90350
},
{
"epoch": 4.160152789442802,
"grad_norm": 2.395866632461548,
"learning_rate": 6.9483094643585826e-06,
"loss": 4.9536,
"step": 90400
},
{
"epoch": 4.1624538352681295,
"grad_norm": 2.376652717590332,
"learning_rate": 6.911250716573031e-06,
"loss": 4.9544,
"step": 90450
},
{
"epoch": 4.164754881093457,
"grad_norm": 2.4753246307373047,
"learning_rate": 6.874283720851132e-06,
"loss": 4.9249,
"step": 90500
},
{
"epoch": 4.167055926918785,
"grad_norm": 2.8535377979278564,
"learning_rate": 6.837408555909447e-06,
"loss": 5.0122,
"step": 90550
},
{
"epoch": 4.169356972744112,
"grad_norm": 2.5676331520080566,
"learning_rate": 6.800625300268987e-06,
"loss": 4.913,
"step": 90600
},
{
"epoch": 4.17165801856944,
"grad_norm": 2.30092716217041,
"learning_rate": 6.763934032255076e-06,
"loss": 4.942,
"step": 90650
},
{
"epoch": 4.173959064394768,
"grad_norm": 3.224811553955078,
"learning_rate": 6.727334829997134e-06,
"loss": 4.9063,
"step": 90700
},
{
"epoch": 4.176260110220095,
"grad_norm": 2.8014469146728516,
"learning_rate": 6.690827771428559e-06,
"loss": 4.9177,
"step": 90750
},
{
"epoch": 4.178561156045423,
"grad_norm": 2.4171247482299805,
"learning_rate": 6.65441293428653e-06,
"loss": 4.9258,
"step": 90800
},
{
"epoch": 4.180862201870751,
"grad_norm": 2.6210904121398926,
"learning_rate": 6.618090396111859e-06,
"loss": 4.885,
"step": 90850
},
{
"epoch": 4.183163247696078,
"grad_norm": 2.62601637840271,
"learning_rate": 6.58186023424881e-06,
"loss": 4.9679,
"step": 90900
},
{
"epoch": 4.185464293521406,
"grad_norm": 2.9907238483428955,
"learning_rate": 6.545722525844944e-06,
"loss": 4.9243,
"step": 90950
},
{
"epoch": 4.187765339346733,
"grad_norm": 2.3237407207489014,
"learning_rate": 6.509677347850973e-06,
"loss": 4.96,
"step": 91000
},
{
"epoch": 4.187765339346733,
"eval_loss": 5.679426670074463,
"eval_runtime": 35.225,
"eval_samples_per_second": 10.901,
"eval_steps_per_second": 5.451,
"eval_tts_loss": 9.328892827813839,
"step": 91000
},
{
"epoch": 4.19006638517206,
"grad_norm": 2.776470422744751,
"learning_rate": 6.473724777020557e-06,
"loss": 4.9766,
"step": 91050
},
{
"epoch": 4.192367430997388,
"grad_norm": 2.488623857498169,
"learning_rate": 6.437864889910172e-06,
"loss": 4.9468,
"step": 91100
},
{
"epoch": 4.194668476822716,
"grad_norm": 2.666916608810425,
"learning_rate": 6.402097762878917e-06,
"loss": 4.896,
"step": 91150
},
{
"epoch": 4.196969522648043,
"grad_norm": 2.505510091781616,
"learning_rate": 6.366423472088396e-06,
"loss": 4.9794,
"step": 91200
},
{
"epoch": 4.199270568473371,
"grad_norm": 2.4801712036132812,
"learning_rate": 6.33084209350251e-06,
"loss": 4.9606,
"step": 91250
},
{
"epoch": 4.2015716142986985,
"grad_norm": 2.5550425052642822,
"learning_rate": 6.2953537028873245e-06,
"loss": 4.8826,
"step": 91300
},
{
"epoch": 4.203872660124026,
"grad_norm": 3.3445262908935547,
"learning_rate": 6.259958375810898e-06,
"loss": 4.9318,
"step": 91350
},
{
"epoch": 4.206173705949354,
"grad_norm": 2.3678958415985107,
"learning_rate": 6.2246561876431185e-06,
"loss": 4.9158,
"step": 91400
},
{
"epoch": 4.208474751774681,
"grad_norm": 2.5698840618133545,
"learning_rate": 6.189447213555538e-06,
"loss": 4.9356,
"step": 91450
},
{
"epoch": 4.210775797600009,
"grad_norm": 2.515235185623169,
"learning_rate": 6.154331528521251e-06,
"loss": 4.9391,
"step": 91500
},
{
"epoch": 4.213076843425337,
"grad_norm": 3.0568439960479736,
"learning_rate": 6.119309207314683e-06,
"loss": 4.8527,
"step": 91550
},
{
"epoch": 4.215377889250664,
"grad_norm": 2.7625505924224854,
"learning_rate": 6.084380324511446e-06,
"loss": 4.8952,
"step": 91600
},
{
"epoch": 4.217678935075992,
"grad_norm": 2.4264955520629883,
"learning_rate": 6.049544954488201e-06,
"loss": 4.9947,
"step": 91650
},
{
"epoch": 4.21997998090132,
"grad_norm": 2.910615921020508,
"learning_rate": 6.014803171422489e-06,
"loss": 4.9183,
"step": 91700
},
{
"epoch": 4.222281026726647,
"grad_norm": 2.3442256450653076,
"learning_rate": 5.9801550492925404e-06,
"loss": 4.8731,
"step": 91750
},
{
"epoch": 4.224582072551975,
"grad_norm": 2.891833782196045,
"learning_rate": 5.945600661877165e-06,
"loss": 4.9631,
"step": 91800
},
{
"epoch": 4.226883118377303,
"grad_norm": 2.956780433654785,
"learning_rate": 5.911140082755595e-06,
"loss": 4.9261,
"step": 91850
},
{
"epoch": 4.22918416420263,
"grad_norm": 2.5378317832946777,
"learning_rate": 5.876773385307294e-06,
"loss": 5.0156,
"step": 91900
},
{
"epoch": 4.231485210027958,
"grad_norm": 2.6025545597076416,
"learning_rate": 5.842500642711807e-06,
"loss": 4.9972,
"step": 91950
},
{
"epoch": 4.2337862558532855,
"grad_norm": 3.2855794429779053,
"learning_rate": 5.808321927948618e-06,
"loss": 4.9,
"step": 92000
},
{
"epoch": 4.2337862558532855,
"eval_loss": 5.68091344833374,
"eval_runtime": 35.0218,
"eval_samples_per_second": 10.965,
"eval_steps_per_second": 5.482,
"eval_tts_loss": 9.340122075743121,
"step": 92000
},
{
"epoch": 4.236087301678613,
"grad_norm": 2.4028210639953613,
"learning_rate": 5.774237313797004e-06,
"loss": 4.9617,
"step": 92050
},
{
"epoch": 4.238388347503941,
"grad_norm": 2.223472833633423,
"learning_rate": 5.7402468728358524e-06,
"loss": 4.9744,
"step": 92100
},
{
"epoch": 4.240689393329268,
"grad_norm": 2.3175554275512695,
"learning_rate": 5.706350677443522e-06,
"loss": 4.9287,
"step": 92150
},
{
"epoch": 4.242990439154596,
"grad_norm": 2.640993356704712,
"learning_rate": 5.6725487997977e-06,
"loss": 4.8685,
"step": 92200
},
{
"epoch": 4.245291484979924,
"grad_norm": 2.1567435264587402,
"learning_rate": 5.638841311875215e-06,
"loss": 4.9202,
"step": 92250
},
{
"epoch": 4.247592530805251,
"grad_norm": 2.5117077827453613,
"learning_rate": 5.605899619794419e-06,
"loss": 4.9533,
"step": 92300
},
{
"epoch": 4.249893576630579,
"grad_norm": 2.356736421585083,
"learning_rate": 5.5723792350834196e-06,
"loss": 4.9547,
"step": 92350
},
{
"epoch": 4.252194622455907,
"grad_norm": 2.7864232063293457,
"learning_rate": 5.538953453394236e-06,
"loss": 4.9201,
"step": 92400
},
{
"epoch": 4.254495668281233,
"grad_norm": 2.60893177986145,
"learning_rate": 5.50562234590285e-06,
"loss": 4.9713,
"step": 92450
},
{
"epoch": 4.256796714106561,
"grad_norm": 3.0111494064331055,
"learning_rate": 5.472385983583667e-06,
"loss": 4.8794,
"step": 92500
},
{
"epoch": 4.259097759931889,
"grad_norm": 2.4298832416534424,
"learning_rate": 5.439244437209329e-06,
"loss": 4.9719,
"step": 92550
},
{
"epoch": 4.261398805757216,
"grad_norm": 2.4979469776153564,
"learning_rate": 5.406197777350574e-06,
"loss": 4.9165,
"step": 92600
},
{
"epoch": 4.263699851582544,
"grad_norm": 2.7282731533050537,
"learning_rate": 5.3732460743761255e-06,
"loss": 4.959,
"step": 92650
},
{
"epoch": 4.266000897407872,
"grad_norm": 2.674407720565796,
"learning_rate": 5.340389398452472e-06,
"loss": 4.9879,
"step": 92700
},
{
"epoch": 4.268301943233199,
"grad_norm": 2.50618577003479,
"learning_rate": 5.307627819543759e-06,
"loss": 4.9555,
"step": 92750
},
{
"epoch": 4.270602989058527,
"grad_norm": 2.716242790222168,
"learning_rate": 5.27496140741166e-06,
"loss": 4.9114,
"step": 92800
},
{
"epoch": 4.2729040348838545,
"grad_norm": 2.615030527114868,
"learning_rate": 5.2423902316151575e-06,
"loss": 4.9386,
"step": 92850
},
{
"epoch": 4.275205080709182,
"grad_norm": 2.4920854568481445,
"learning_rate": 5.209914361510487e-06,
"loss": 4.9112,
"step": 92900
},
{
"epoch": 4.27750612653451,
"grad_norm": 2.827641725540161,
"learning_rate": 5.177533866250905e-06,
"loss": 4.9114,
"step": 92950
},
{
"epoch": 4.279807172359837,
"grad_norm": 2.755664587020874,
"learning_rate": 5.145248814786619e-06,
"loss": 4.9433,
"step": 93000
},
{
"epoch": 4.279807172359837,
"eval_loss": 5.679144382476807,
"eval_runtime": 34.4128,
"eval_samples_per_second": 11.159,
"eval_steps_per_second": 5.579,
"eval_tts_loss": 9.318372178651252,
"step": 93000
},
{
"epoch": 4.282108218185165,
"grad_norm": 2.069977045059204,
"learning_rate": 5.113059275864573e-06,
"loss": 4.8856,
"step": 93050
},
{
"epoch": 4.284409264010493,
"grad_norm": 2.4945602416992188,
"learning_rate": 5.08096531802833e-06,
"loss": 4.937,
"step": 93100
},
{
"epoch": 4.28671030983582,
"grad_norm": 2.9363367557525635,
"learning_rate": 5.048967009617933e-06,
"loss": 4.952,
"step": 93150
},
{
"epoch": 4.289011355661148,
"grad_norm": 2.544668436050415,
"learning_rate": 5.0170644187697455e-06,
"loss": 4.8924,
"step": 93200
},
{
"epoch": 4.291312401486476,
"grad_norm": 2.6560354232788086,
"learning_rate": 4.9852576134163224e-06,
"loss": 4.8599,
"step": 93250
},
{
"epoch": 4.293613447311803,
"grad_norm": 2.506303548812866,
"learning_rate": 4.95354666128624e-06,
"loss": 4.9508,
"step": 93300
},
{
"epoch": 4.295914493137131,
"grad_norm": 2.6585988998413086,
"learning_rate": 4.921931629903986e-06,
"loss": 4.8903,
"step": 93350
},
{
"epoch": 4.298215538962459,
"grad_norm": 2.9596993923187256,
"learning_rate": 4.890412586589777e-06,
"loss": 4.9403,
"step": 93400
},
{
"epoch": 4.300516584787786,
"grad_norm": 2.412736177444458,
"learning_rate": 4.858989598459446e-06,
"loss": 4.9516,
"step": 93450
},
{
"epoch": 4.302817630613114,
"grad_norm": 2.6511383056640625,
"learning_rate": 4.827662732424299e-06,
"loss": 4.9228,
"step": 93500
},
{
"epoch": 4.3051186764384415,
"grad_norm": 3.0368809700012207,
"learning_rate": 4.796432055190947e-06,
"loss": 4.8955,
"step": 93550
},
{
"epoch": 4.307419722263769,
"grad_norm": 2.5606002807617188,
"learning_rate": 4.7652976332611785e-06,
"loss": 4.9485,
"step": 93600
},
{
"epoch": 4.309720768089097,
"grad_norm": 2.8457987308502197,
"learning_rate": 4.7342595329318225e-06,
"loss": 4.9452,
"step": 93650
},
{
"epoch": 4.312021813914424,
"grad_norm": 2.4588279724121094,
"learning_rate": 4.703317820294612e-06,
"loss": 4.9085,
"step": 93700
},
{
"epoch": 4.314322859739752,
"grad_norm": 2.9369304180145264,
"learning_rate": 4.6724725612360186e-06,
"loss": 4.936,
"step": 93750
},
{
"epoch": 4.31662390556508,
"grad_norm": 2.102524995803833,
"learning_rate": 4.641723821437132e-06,
"loss": 4.9798,
"step": 93800
},
{
"epoch": 4.318924951390407,
"grad_norm": 2.3589839935302734,
"learning_rate": 4.611071666373534e-06,
"loss": 4.934,
"step": 93850
},
{
"epoch": 4.321225997215734,
"grad_norm": 2.485720634460449,
"learning_rate": 4.5805161613151124e-06,
"loss": 4.8953,
"step": 93900
},
{
"epoch": 4.323527043041063,
"grad_norm": 2.6778573989868164,
"learning_rate": 4.5500573713259776e-06,
"loss": 4.866,
"step": 93950
},
{
"epoch": 4.325828088866389,
"grad_norm": 2.9395782947540283,
"learning_rate": 4.519695361264281e-06,
"loss": 4.9388,
"step": 94000
},
{
"epoch": 4.325828088866389,
"eval_loss": 5.68023157119751,
"eval_runtime": 34.8801,
"eval_samples_per_second": 11.009,
"eval_steps_per_second": 5.505,
"eval_tts_loss": 9.319495803010797,
"step": 94000
},
{
"epoch": 4.328129134691717,
"grad_norm": 2.3470025062561035,
"learning_rate": 4.489430195782096e-06,
"loss": 4.9749,
"step": 94050
},
{
"epoch": 4.330430180517045,
"grad_norm": 2.6966683864593506,
"learning_rate": 4.459261939325282e-06,
"loss": 4.9726,
"step": 94100
},
{
"epoch": 4.332731226342372,
"grad_norm": 2.843234062194824,
"learning_rate": 4.4291906561333415e-06,
"loss": 4.9067,
"step": 94150
},
{
"epoch": 4.3350322721677,
"grad_norm": 2.3041133880615234,
"learning_rate": 4.399216410239288e-06,
"loss": 4.869,
"step": 94200
},
{
"epoch": 4.337333317993028,
"grad_norm": 2.9902963638305664,
"learning_rate": 4.369339265469496e-06,
"loss": 4.8756,
"step": 94250
},
{
"epoch": 4.339634363818355,
"grad_norm": 2.85142183303833,
"learning_rate": 4.339559285443573e-06,
"loss": 4.9174,
"step": 94300
},
{
"epoch": 4.341935409643683,
"grad_norm": 3.339543581008911,
"learning_rate": 4.309876533574258e-06,
"loss": 4.9478,
"step": 94350
},
{
"epoch": 4.3442364554690105,
"grad_norm": 2.467912435531616,
"learning_rate": 4.280291073067216e-06,
"loss": 4.8815,
"step": 94400
},
{
"epoch": 4.346537501294338,
"grad_norm": 3.1716277599334717,
"learning_rate": 4.250802966920964e-06,
"loss": 4.893,
"step": 94450
},
{
"epoch": 4.348838547119666,
"grad_norm": 2.9320003986358643,
"learning_rate": 4.221412277926706e-06,
"loss": 4.963,
"step": 94500
},
{
"epoch": 4.351139592944993,
"grad_norm": 2.2422378063201904,
"learning_rate": 4.19211906866821e-06,
"loss": 4.9208,
"step": 94550
},
{
"epoch": 4.353440638770321,
"grad_norm": 2.2541680335998535,
"learning_rate": 4.162923401521684e-06,
"loss": 4.9744,
"step": 94600
},
{
"epoch": 4.355741684595649,
"grad_norm": 2.8150317668914795,
"learning_rate": 4.1338253386556166e-06,
"loss": 4.9426,
"step": 94650
},
{
"epoch": 4.358042730420976,
"grad_norm": 2.552889108657837,
"learning_rate": 4.104824942030666e-06,
"loss": 4.9327,
"step": 94700
},
{
"epoch": 4.360343776246304,
"grad_norm": 2.3204643726348877,
"learning_rate": 4.075922273399524e-06,
"loss": 4.9854,
"step": 94750
},
{
"epoch": 4.362644822071632,
"grad_norm": 2.234245777130127,
"learning_rate": 4.047117394306777e-06,
"loss": 4.9436,
"step": 94800
},
{
"epoch": 4.364945867896959,
"grad_norm": 2.6050384044647217,
"learning_rate": 4.018410366088804e-06,
"loss": 4.9197,
"step": 94850
},
{
"epoch": 4.367246913722287,
"grad_norm": 2.8665106296539307,
"learning_rate": 3.989801249873593e-06,
"loss": 4.9342,
"step": 94900
},
{
"epoch": 4.3695479595476145,
"grad_norm": 2.768986701965332,
"learning_rate": 3.961859368918858e-06,
"loss": 4.8924,
"step": 94950
},
{
"epoch": 4.371849005372942,
"grad_norm": 2.7984561920166016,
"learning_rate": 3.9334442979928e-06,
"loss": 4.9995,
"step": 95000
},
{
"epoch": 4.371849005372942,
"eval_loss": 5.678844451904297,
"eval_runtime": 33.1028,
"eval_samples_per_second": 11.6,
"eval_steps_per_second": 5.8,
"eval_tts_loss": 9.321806622066815,
"step": 95000
},
{
"epoch": 4.37415005119827,
"grad_norm": 2.769041061401367,
"learning_rate": 3.9051273199940455e-06,
"loss": 4.9815,
"step": 95050
},
{
"epoch": 4.3764510970235975,
"grad_norm": 2.3030972480773926,
"learning_rate": 3.876908495220033e-06,
"loss": 4.9004,
"step": 95100
},
{
"epoch": 4.378752142848925,
"grad_norm": 2.612943172454834,
"learning_rate": 3.848787883759192e-06,
"loss": 4.9345,
"step": 95150
},
{
"epoch": 4.381053188674253,
"grad_norm": 2.815553665161133,
"learning_rate": 3.820765545490823e-06,
"loss": 4.969,
"step": 95200
},
{
"epoch": 4.38335423449958,
"grad_norm": 2.8260498046875,
"learning_rate": 3.7928415400849683e-06,
"loss": 4.9135,
"step": 95250
},
{
"epoch": 4.385655280324908,
"grad_norm": 2.7037949562072754,
"learning_rate": 3.7650159270022745e-06,
"loss": 4.9189,
"step": 95300
},
{
"epoch": 4.387956326150236,
"grad_norm": 2.645465850830078,
"learning_rate": 3.737288765493885e-06,
"loss": 4.9661,
"step": 95350
},
{
"epoch": 4.390257371975563,
"grad_norm": 2.275775194168091,
"learning_rate": 3.7096601146012877e-06,
"loss": 4.9557,
"step": 95400
},
{
"epoch": 4.39255841780089,
"grad_norm": 2.8104965686798096,
"learning_rate": 3.6821300331562224e-06,
"loss": 4.9196,
"step": 95450
},
{
"epoch": 4.394859463626218,
"grad_norm": 2.825328826904297,
"learning_rate": 3.65469857978053e-06,
"loss": 4.9656,
"step": 95500
},
{
"epoch": 4.397160509451545,
"grad_norm": 2.970402240753174,
"learning_rate": 3.6273658128860357e-06,
"loss": 5.0275,
"step": 95550
},
{
"epoch": 4.399461555276873,
"grad_norm": 2.230492115020752,
"learning_rate": 3.6001317906744216e-06,
"loss": 5.0396,
"step": 95600
},
{
"epoch": 4.401762601102201,
"grad_norm": 2.658487558364868,
"learning_rate": 3.572996571137105e-06,
"loss": 4.9603,
"step": 95650
},
{
"epoch": 4.404063646927528,
"grad_norm": 2.7010092735290527,
"learning_rate": 3.545960212055116e-06,
"loss": 4.9494,
"step": 95700
},
{
"epoch": 4.406364692752856,
"grad_norm": 2.616330623626709,
"learning_rate": 3.5190227709989863e-06,
"loss": 4.9521,
"step": 95750
},
{
"epoch": 4.408665738578184,
"grad_norm": 2.6494617462158203,
"learning_rate": 3.4921843053285886e-06,
"loss": 4.9551,
"step": 95800
},
{
"epoch": 4.410966784403511,
"grad_norm": 2.8855628967285156,
"learning_rate": 3.465444872193069e-06,
"loss": 4.9162,
"step": 95850
},
{
"epoch": 4.413267830228839,
"grad_norm": 2.9472038745880127,
"learning_rate": 3.4388045285306593e-06,
"loss": 4.9305,
"step": 95900
},
{
"epoch": 4.4155688760541665,
"grad_norm": 2.7934985160827637,
"learning_rate": 3.4122633310686435e-06,
"loss": 4.9496,
"step": 95950
},
{
"epoch": 4.417869921879494,
"grad_norm": 2.439134120941162,
"learning_rate": 3.3858213363231415e-06,
"loss": 4.9566,
"step": 96000
},
{
"epoch": 4.417869921879494,
"eval_loss": 5.680164337158203,
"eval_runtime": 33.1921,
"eval_samples_per_second": 11.569,
"eval_steps_per_second": 5.785,
"eval_tts_loss": 9.328472778896515,
"step": 96000
},
{
"epoch": 4.420170967704822,
"grad_norm": 2.765312433242798,
"learning_rate": 3.3594786005990586e-06,
"loss": 4.9377,
"step": 96050
},
{
"epoch": 4.422472013530149,
"grad_norm": 2.464045524597168,
"learning_rate": 3.3332351799899354e-06,
"loss": 4.9079,
"step": 96100
},
{
"epoch": 4.424773059355477,
"grad_norm": 2.796971321105957,
"learning_rate": 3.307091130377815e-06,
"loss": 4.8846,
"step": 96150
},
{
"epoch": 4.427074105180805,
"grad_norm": 2.300090789794922,
"learning_rate": 3.281046507433161e-06,
"loss": 4.9408,
"step": 96200
},
{
"epoch": 4.429375151006132,
"grad_norm": 2.481484889984131,
"learning_rate": 3.2551013666147266e-06,
"loss": 4.9107,
"step": 96250
},
{
"epoch": 4.43167619683146,
"grad_norm": 2.670475959777832,
"learning_rate": 3.229255763169403e-06,
"loss": 4.9368,
"step": 96300
},
{
"epoch": 4.433977242656788,
"grad_norm": 2.8900489807128906,
"learning_rate": 3.2035097521321665e-06,
"loss": 4.9616,
"step": 96350
},
{
"epoch": 4.436278288482115,
"grad_norm": 2.6688849925994873,
"learning_rate": 3.1778633883259026e-06,
"loss": 4.9095,
"step": 96400
},
{
"epoch": 4.438579334307443,
"grad_norm": 2.6334035396575928,
"learning_rate": 3.1523167263613042e-06,
"loss": 4.9026,
"step": 96450
},
{
"epoch": 4.4408803801327705,
"grad_norm": 2.841860055923462,
"learning_rate": 3.1268698206367687e-06,
"loss": 4.9327,
"step": 96500
},
{
"epoch": 4.443181425958098,
"grad_norm": 2.5172505378723145,
"learning_rate": 3.101522725338285e-06,
"loss": 4.8795,
"step": 96550
},
{
"epoch": 4.445482471783426,
"grad_norm": 2.743206024169922,
"learning_rate": 3.0762754944392958e-06,
"loss": 4.9006,
"step": 96600
},
{
"epoch": 4.4477835176087535,
"grad_norm": 2.925295114517212,
"learning_rate": 3.051128181700591e-06,
"loss": 4.963,
"step": 96650
},
{
"epoch": 4.450084563434081,
"grad_norm": 2.951690196990967,
"learning_rate": 3.02608084067022e-06,
"loss": 4.939,
"step": 96700
},
{
"epoch": 4.452385609259409,
"grad_norm": 3.2038767337799072,
"learning_rate": 3.0011335246833193e-06,
"loss": 4.9009,
"step": 96750
},
{
"epoch": 4.454686655084736,
"grad_norm": 2.2985875606536865,
"learning_rate": 2.976286286862079e-06,
"loss": 4.9868,
"step": 96800
},
{
"epoch": 4.456987700910064,
"grad_norm": 2.6529111862182617,
"learning_rate": 2.951539180115548e-06,
"loss": 4.8608,
"step": 96850
},
{
"epoch": 4.459288746735391,
"grad_norm": 2.827261209487915,
"learning_rate": 2.9268922571395797e-06,
"loss": 4.8992,
"step": 96900
},
{
"epoch": 4.461589792560719,
"grad_norm": 2.702986717224121,
"learning_rate": 2.902345570416687e-06,
"loss": 4.9245,
"step": 96950
},
{
"epoch": 4.463890838386046,
"grad_norm": 2.3246467113494873,
"learning_rate": 2.8778991722159476e-06,
"loss": 4.9339,
"step": 97000
},
{
"epoch": 4.463890838386046,
"eval_loss": 5.68049955368042,
"eval_runtime": 33.2125,
"eval_samples_per_second": 11.562,
"eval_steps_per_second": 5.781,
"eval_tts_loss": 9.329744280777785,
"step": 97000
},
{
"epoch": 4.466191884211374,
"grad_norm": 2.637733221054077,
"learning_rate": 2.8535531145928884e-06,
"loss": 4.8988,
"step": 97050
},
{
"epoch": 4.468492930036701,
"grad_norm": 2.5782065391540527,
"learning_rate": 2.829791378513441e-06,
"loss": 4.9646,
"step": 97100
},
{
"epoch": 4.470793975862029,
"grad_norm": 2.7204387187957764,
"learning_rate": 2.805644147972042e-06,
"loss": 4.9783,
"step": 97150
},
{
"epoch": 4.473095021687357,
"grad_norm": 2.453364372253418,
"learning_rate": 2.7815974118662824e-06,
"loss": 4.9549,
"step": 97200
},
{
"epoch": 4.475396067512684,
"grad_norm": 3.313816547393799,
"learning_rate": 2.75765122140067e-06,
"loss": 4.9501,
"step": 97250
},
{
"epoch": 4.477697113338012,
"grad_norm": 3.019824981689453,
"learning_rate": 2.7338056275656056e-06,
"loss": 4.8977,
"step": 97300
},
{
"epoch": 4.4799981591633395,
"grad_norm": 2.641376256942749,
"learning_rate": 2.7100606811372664e-06,
"loss": 4.9102,
"step": 97350
},
{
"epoch": 4.482299204988667,
"grad_norm": 2.7181406021118164,
"learning_rate": 2.6864164326775354e-06,
"loss": 4.9606,
"step": 97400
},
{
"epoch": 4.484600250813995,
"grad_norm": 2.706712245941162,
"learning_rate": 2.6628729325338442e-06,
"loss": 4.9555,
"step": 97450
},
{
"epoch": 4.4869012966393225,
"grad_norm": 2.717064380645752,
"learning_rate": 2.639430230839135e-06,
"loss": 4.9223,
"step": 97500
},
{
"epoch": 4.48920234246465,
"grad_norm": 2.9090964794158936,
"learning_rate": 2.6160883775116718e-06,
"loss": 4.9483,
"step": 97550
},
{
"epoch": 4.491503388289978,
"grad_norm": 2.4921481609344482,
"learning_rate": 2.5928474222550012e-06,
"loss": 4.929,
"step": 97600
},
{
"epoch": 4.493804434115305,
"grad_norm": 2.7136213779449463,
"learning_rate": 2.569707414557798e-06,
"loss": 4.8908,
"step": 97650
},
{
"epoch": 4.496105479940633,
"grad_norm": 2.4080166816711426,
"learning_rate": 2.546668403693797e-06,
"loss": 4.8863,
"step": 97700
},
{
"epoch": 4.498406525765961,
"grad_norm": 2.7067811489105225,
"learning_rate": 2.5237304387216777e-06,
"loss": 4.9743,
"step": 97750
},
{
"epoch": 4.500707571591288,
"grad_norm": 2.41428542137146,
"learning_rate": 2.5008935684849357e-06,
"loss": 4.8811,
"step": 97800
},
{
"epoch": 4.503008617416616,
"grad_norm": 2.815544843673706,
"learning_rate": 2.4786115646308226e-06,
"loss": 4.998,
"step": 97850
},
{
"epoch": 4.505309663241944,
"grad_norm": 1.8985440731048584,
"learning_rate": 2.4559750052256082e-06,
"loss": 4.9947,
"step": 97900
},
{
"epoch": 4.507610709067271,
"grad_norm": 2.4579977989196777,
"learning_rate": 2.4334396848324337e-06,
"loss": 4.9729,
"step": 97950
},
{
"epoch": 4.509911754892599,
"grad_norm": 2.3383259773254395,
"learning_rate": 2.4110056514374313e-06,
"loss": 4.9476,
"step": 98000
},
{
"epoch": 4.509911754892599,
"eval_loss": 5.68021821975708,
"eval_runtime": 34.9078,
"eval_samples_per_second": 11.0,
"eval_steps_per_second": 5.5,
"eval_tts_loss": 9.33405117698406,
"step": 98000
},
{
"epoch": 4.5122128007179265,
"grad_norm": 2.444965362548828,
"learning_rate": 2.3886729528110495e-06,
"loss": 4.8585,
"step": 98050
},
{
"epoch": 4.514513846543254,
"grad_norm": 2.5604538917541504,
"learning_rate": 2.366441636507949e-06,
"loss": 5.0007,
"step": 98100
},
{
"epoch": 4.516814892368582,
"grad_norm": 2.7612502574920654,
"learning_rate": 2.3443117498669178e-06,
"loss": 4.9423,
"step": 98150
},
{
"epoch": 4.519115938193909,
"grad_norm": 2.63486647605896,
"learning_rate": 2.3222833400107615e-06,
"loss": 4.949,
"step": 98200
},
{
"epoch": 4.521416984019237,
"grad_norm": 2.565378427505493,
"learning_rate": 2.3003564538462252e-06,
"loss": 4.8717,
"step": 98250
},
{
"epoch": 4.523718029844565,
"grad_norm": 2.9743268489837646,
"learning_rate": 2.2785311380638218e-06,
"loss": 4.9079,
"step": 98300
},
{
"epoch": 4.5260190756698915,
"grad_norm": 2.619223117828369,
"learning_rate": 2.256807439137826e-06,
"loss": 4.9379,
"step": 98350
},
{
"epoch": 4.52832012149522,
"grad_norm": 2.2771501541137695,
"learning_rate": 2.2351854033261122e-06,
"loss": 4.9124,
"step": 98400
},
{
"epoch": 4.530621167320547,
"grad_norm": 2.3378474712371826,
"learning_rate": 2.2136650766700908e-06,
"loss": 4.9547,
"step": 98450
},
{
"epoch": 4.532922213145875,
"grad_norm": 2.4339568614959717,
"learning_rate": 2.1922465049945773e-06,
"loss": 4.8822,
"step": 98500
},
{
"epoch": 4.535223258971202,
"grad_norm": 2.3399932384490967,
"learning_rate": 2.170929733907723e-06,
"loss": 4.904,
"step": 98550
},
{
"epoch": 4.53752430479653,
"grad_norm": 2.650240659713745,
"learning_rate": 2.1497148088009068e-06,
"loss": 4.9557,
"step": 98600
},
{
"epoch": 4.539825350621857,
"grad_norm": 2.8209807872772217,
"learning_rate": 2.1286017748486488e-06,
"loss": 4.8756,
"step": 98650
},
{
"epoch": 4.542126396447185,
"grad_norm": 2.7290356159210205,
"learning_rate": 2.1075906770084863e-06,
"loss": 4.8983,
"step": 98700
},
{
"epoch": 4.544427442272513,
"grad_norm": 2.846173048019409,
"learning_rate": 2.0866815600209034e-06,
"loss": 4.9577,
"step": 98750
},
{
"epoch": 4.54672848809784,
"grad_norm": 2.816352128982544,
"learning_rate": 2.0658744684092403e-06,
"loss": 4.9076,
"step": 98800
},
{
"epoch": 4.549029533923168,
"grad_norm": 2.602374315261841,
"learning_rate": 2.0451694464795725e-06,
"loss": 4.9418,
"step": 98850
},
{
"epoch": 4.5513305797484955,
"grad_norm": 2.383106231689453,
"learning_rate": 2.0245665383206325e-06,
"loss": 4.8878,
"step": 98900
},
{
"epoch": 4.553631625573823,
"grad_norm": 2.5587480068206787,
"learning_rate": 2.004065787803716e-06,
"loss": 4.9493,
"step": 98950
},
{
"epoch": 4.555932671399151,
"grad_norm": 2.5952608585357666,
"learning_rate": 1.9836672385825983e-06,
"loss": 4.9341,
"step": 99000
},
{
"epoch": 4.555932671399151,
"eval_loss": 5.680870056152344,
"eval_runtime": 32.9306,
"eval_samples_per_second": 11.661,
"eval_steps_per_second": 5.83,
"eval_tts_loss": 9.328455072720036,
"step": 99000
},
{
"epoch": 4.5582337172244785,
"grad_norm": 2.5696303844451904,
"learning_rate": 1.9633709340934116e-06,
"loss": 4.9358,
"step": 99050
},
{
"epoch": 4.560534763049806,
"grad_norm": 3.0702054500579834,
"learning_rate": 1.9431769175545846e-06,
"loss": 4.9556,
"step": 99100
},
{
"epoch": 4.562835808875134,
"grad_norm": 2.621656656265259,
"learning_rate": 1.9230852319667316e-06,
"loss": 4.9203,
"step": 99150
},
{
"epoch": 4.565136854700461,
"grad_norm": 3.1234617233276367,
"learning_rate": 1.9030959201125631e-06,
"loss": 4.9338,
"step": 99200
},
{
"epoch": 4.567437900525789,
"grad_norm": 2.6556556224823,
"learning_rate": 1.8832090245568034e-06,
"loss": 4.8807,
"step": 99250
},
{
"epoch": 4.569738946351117,
"grad_norm": 2.8958590030670166,
"learning_rate": 1.8634245876460953e-06,
"loss": 4.9994,
"step": 99300
},
{
"epoch": 4.572039992176444,
"grad_norm": 2.4126973152160645,
"learning_rate": 1.8437426515089006e-06,
"loss": 4.9562,
"step": 99350
},
{
"epoch": 4.574341038001772,
"grad_norm": 2.059985876083374,
"learning_rate": 1.8241632580554169e-06,
"loss": 4.9111,
"step": 99400
},
{
"epoch": 4.5766420838271,
"grad_norm": 2.619472026824951,
"learning_rate": 1.804686448977494e-06,
"loss": 4.8848,
"step": 99450
},
{
"epoch": 4.578943129652427,
"grad_norm": 2.500753879547119,
"learning_rate": 1.7853122657485567e-06,
"loss": 4.9496,
"step": 99500
},
{
"epoch": 4.581244175477755,
"grad_norm": 2.4524269104003906,
"learning_rate": 1.7660407496234876e-06,
"loss": 4.9079,
"step": 99550
},
{
"epoch": 4.5835452213030825,
"grad_norm": 2.433594226837158,
"learning_rate": 1.7468719416385393e-06,
"loss": 4.937,
"step": 99600
},
{
"epoch": 4.58584626712841,
"grad_norm": 2.4936323165893555,
"learning_rate": 1.7278058826112886e-06,
"loss": 4.9127,
"step": 99650
},
{
"epoch": 4.588147312953738,
"grad_norm": 2.664189577102661,
"learning_rate": 1.7088426131404933e-06,
"loss": 4.9445,
"step": 99700
},
{
"epoch": 4.590448358779065,
"grad_norm": 2.1606667041778564,
"learning_rate": 1.6899821736060529e-06,
"loss": 4.9695,
"step": 99750
},
{
"epoch": 4.592749404604393,
"grad_norm": 2.475398302078247,
"learning_rate": 1.6712246041688972e-06,
"loss": 4.9838,
"step": 99800
},
{
"epoch": 4.595050450429721,
"grad_norm": 2.5283830165863037,
"learning_rate": 1.6525699447709098e-06,
"loss": 4.9915,
"step": 99850
},
{
"epoch": 4.5973514962550475,
"grad_norm": 2.233774185180664,
"learning_rate": 1.6340182351348266e-06,
"loss": 4.9693,
"step": 99900
},
{
"epoch": 4.599652542080376,
"grad_norm": 2.912449598312378,
"learning_rate": 1.6155695147641925e-06,
"loss": 4.9467,
"step": 99950
},
{
"epoch": 4.601953587905703,
"grad_norm": 2.5109288692474365,
"learning_rate": 1.5972238229432279e-06,
"loss": 4.9133,
"step": 100000
},
{
"epoch": 4.601953587905703,
"eval_loss": 5.6785407066345215,
"eval_runtime": 34.8739,
"eval_samples_per_second": 11.011,
"eval_steps_per_second": 5.506,
"eval_tts_loss": 9.332583423361912,
"step": 100000
},
{
"epoch": 4.60425463373103,
"grad_norm": 2.881924867630005,
"learning_rate": 1.5789811987367675e-06,
"loss": 4.8805,
"step": 100050
},
{
"epoch": 4.606555679556358,
"grad_norm": 2.44765305519104,
"learning_rate": 1.5608416809901882e-06,
"loss": 4.8991,
"step": 100100
},
{
"epoch": 4.608856725381686,
"grad_norm": 2.5008325576782227,
"learning_rate": 1.542805308329298e-06,
"loss": 4.9804,
"step": 100150
},
{
"epoch": 4.611157771207013,
"grad_norm": 2.615161180496216,
"learning_rate": 1.524872119160292e-06,
"loss": 4.8991,
"step": 100200
},
{
"epoch": 4.613458817032341,
"grad_norm": 2.3106582164764404,
"learning_rate": 1.5070421516696353e-06,
"loss": 4.8969,
"step": 100250
},
{
"epoch": 4.615759862857669,
"grad_norm": 2.350234031677246,
"learning_rate": 1.489315443823991e-06,
"loss": 4.9601,
"step": 100300
},
{
"epoch": 4.618060908682996,
"grad_norm": 2.6425936222076416,
"learning_rate": 1.4716920333701534e-06,
"loss": 4.9984,
"step": 100350
},
{
"epoch": 4.620361954508324,
"grad_norm": 2.9497785568237305,
"learning_rate": 1.4541719578349488e-06,
"loss": 4.8953,
"step": 100400
},
{
"epoch": 4.6226630003336515,
"grad_norm": 2.536822557449341,
"learning_rate": 1.4367552545251905e-06,
"loss": 4.9373,
"step": 100450
},
{
"epoch": 4.624964046158979,
"grad_norm": 2.1799700260162354,
"learning_rate": 1.41944196052754e-06,
"loss": 4.9222,
"step": 100500
},
{
"epoch": 4.627265091984307,
"grad_norm": 2.7928173542022705,
"learning_rate": 1.4022321127084737e-06,
"loss": 4.9494,
"step": 100550
},
{
"epoch": 4.629566137809634,
"grad_norm": 2.839073896408081,
"learning_rate": 1.3851257477142e-06,
"loss": 4.8917,
"step": 100600
},
{
"epoch": 4.631867183634962,
"grad_norm": 2.6892263889312744,
"learning_rate": 1.3681229019705643e-06,
"loss": 4.9696,
"step": 100650
},
{
"epoch": 4.63416822946029,
"grad_norm": 2.6419594287872314,
"learning_rate": 1.351223611682989e-06,
"loss": 4.9139,
"step": 100700
},
{
"epoch": 4.636469275285617,
"grad_norm": 2.7339677810668945,
"learning_rate": 1.3344279128363778e-06,
"loss": 5.0286,
"step": 100750
},
{
"epoch": 4.638770321110945,
"grad_norm": 2.5983216762542725,
"learning_rate": 1.3177358411950668e-06,
"loss": 4.9363,
"step": 100800
},
{
"epoch": 4.641071366936273,
"grad_norm": 2.3199923038482666,
"learning_rate": 1.3011474323027074e-06,
"loss": 4.9999,
"step": 100850
},
{
"epoch": 4.6433724127616,
"grad_norm": 2.283487558364868,
"learning_rate": 1.2846627214822382e-06,
"loss": 4.8808,
"step": 100900
},
{
"epoch": 4.645673458586928,
"grad_norm": 2.314702272415161,
"learning_rate": 1.2682817438357642e-06,
"loss": 4.9613,
"step": 100950
},
{
"epoch": 4.647974504412256,
"grad_norm": 2.938246965408325,
"learning_rate": 1.2520045342445164e-06,
"loss": 4.9292,
"step": 101000
},
{
"epoch": 4.647974504412256,
"eval_loss": 5.677786350250244,
"eval_runtime": 34.7565,
"eval_samples_per_second": 11.048,
"eval_steps_per_second": 5.524,
"eval_tts_loss": 9.33160708535609,
"step": 101000
},
{
"epoch": 4.650275550237583,
"grad_norm": 3.057169198989868,
"learning_rate": 1.2358311273687696e-06,
"loss": 4.9679,
"step": 101050
},
{
"epoch": 4.652576596062911,
"grad_norm": 2.5875933170318604,
"learning_rate": 1.2197615576477417e-06,
"loss": 4.9252,
"step": 101100
},
{
"epoch": 4.6548776418882385,
"grad_norm": 2.5108046531677246,
"learning_rate": 1.2037958592995724e-06,
"loss": 4.8977,
"step": 101150
},
{
"epoch": 4.657178687713566,
"grad_norm": 2.690922498703003,
"learning_rate": 1.1879340663212002e-06,
"loss": 4.9365,
"step": 101200
},
{
"epoch": 4.659479733538894,
"grad_norm": 2.1565539836883545,
"learning_rate": 1.1721762124883019e-06,
"loss": 4.8915,
"step": 101250
},
{
"epoch": 4.661780779364221,
"grad_norm": 2.7533767223358154,
"learning_rate": 1.1565223313552643e-06,
"loss": 4.912,
"step": 101300
},
{
"epoch": 4.664081825189548,
"grad_norm": 3.154649257659912,
"learning_rate": 1.1409724562550406e-06,
"loss": 4.9524,
"step": 101350
},
{
"epoch": 4.666382871014877,
"grad_norm": 2.9312243461608887,
"learning_rate": 1.1255266202991389e-06,
"loss": 4.8855,
"step": 101400
},
{
"epoch": 4.6686839168402035,
"grad_norm": 2.4187536239624023,
"learning_rate": 1.1101848563775163e-06,
"loss": 4.9802,
"step": 101450
},
{
"epoch": 4.670984962665532,
"grad_norm": 2.9323229789733887,
"learning_rate": 1.0949471971585246e-06,
"loss": 4.9253,
"step": 101500
},
{
"epoch": 4.673286008490859,
"grad_norm": 2.3062210083007812,
"learning_rate": 1.0798136750888476e-06,
"loss": 4.844,
"step": 101550
},
{
"epoch": 4.675587054316186,
"grad_norm": 3.0856621265411377,
"learning_rate": 1.0647843223934028e-06,
"loss": 4.9659,
"step": 101600
},
{
"epoch": 4.677888100141514,
"grad_norm": 2.496002435684204,
"learning_rate": 1.0498591710753126e-06,
"loss": 4.9148,
"step": 101650
},
{
"epoch": 4.680189145966842,
"grad_norm": 2.5412819385528564,
"learning_rate": 1.0350382529158043e-06,
"loss": 4.9533,
"step": 101700
},
{
"epoch": 4.682490191792169,
"grad_norm": 2.6318113803863525,
"learning_rate": 1.0203215994741556e-06,
"loss": 4.9336,
"step": 101750
},
{
"epoch": 4.684791237617497,
"grad_norm": 3.0665981769561768,
"learning_rate": 1.0057092420876269e-06,
"loss": 4.9408,
"step": 101800
},
{
"epoch": 4.687092283442825,
"grad_norm": 2.4351532459259033,
"learning_rate": 9.912012118713898e-07,
"loss": 4.9675,
"step": 101850
},
{
"epoch": 4.689393329268152,
"grad_norm": 2.9481053352355957,
"learning_rate": 9.77084590253763e-07,
"loss": 4.8896,
"step": 101900
},
{
"epoch": 4.69169437509348,
"grad_norm": 2.7210726737976074,
"learning_rate": 9.62783218761143e-07,
"loss": 4.9012,
"step": 101950
},
{
"epoch": 4.6939954209188075,
"grad_norm": 2.8957622051239014,
"learning_rate": 9.485862658443812e-07,
"loss": 4.9575,
"step": 102000
},
{
"epoch": 4.6939954209188075,
"eval_loss": 5.678743362426758,
"eval_runtime": 33.3391,
"eval_samples_per_second": 11.518,
"eval_steps_per_second": 5.759,
"eval_tts_loss": 9.337093838506295,
"step": 102000
},
{
"epoch": 4.696296466744135,
"grad_norm": 2.884580135345459,
"learning_rate": 9.344937617340898e-07,
"loss": 4.9248,
"step": 102050
},
{
"epoch": 4.698597512569463,
"grad_norm": 2.360043525695801,
"learning_rate": 9.205057364384872e-07,
"loss": 4.9822,
"step": 102100
},
{
"epoch": 4.70089855839479,
"grad_norm": 2.644174337387085,
"learning_rate": 9.066222197433138e-07,
"loss": 4.8702,
"step": 102150
},
{
"epoch": 4.703199604220118,
"grad_norm": 2.578793525695801,
"learning_rate": 8.92843241211766e-07,
"loss": 4.9325,
"step": 102200
},
{
"epoch": 4.705500650045446,
"grad_norm": 2.5854337215423584,
"learning_rate": 8.791688301844403e-07,
"loss": 4.9065,
"step": 102250
},
{
"epoch": 4.707801695870773,
"grad_norm": 2.5102357864379883,
"learning_rate": 8.655990157792726e-07,
"loss": 4.9669,
"step": 102300
},
{
"epoch": 4.710102741696101,
"grad_norm": 3.339261770248413,
"learning_rate": 8.521338268914824e-07,
"loss": 4.9129,
"step": 102350
},
{
"epoch": 4.712403787521429,
"grad_norm": 2.8844716548919678,
"learning_rate": 8.387732921934788e-07,
"loss": 4.9146,
"step": 102400
},
{
"epoch": 4.714704833346756,
"grad_norm": 2.4359424114227295,
"learning_rate": 8.255174401348431e-07,
"loss": 4.9007,
"step": 102450
},
{
"epoch": 4.717005879172084,
"grad_norm": 2.860389232635498,
"learning_rate": 8.12366298942252e-07,
"loss": 4.9032,
"step": 102500
},
{
"epoch": 4.719306924997412,
"grad_norm": 2.6299808025360107,
"learning_rate": 7.993198966193938e-07,
"loss": 4.9112,
"step": 102550
},
{
"epoch": 4.721607970822739,
"grad_norm": 2.6053991317749023,
"learning_rate": 7.863782609469406e-07,
"loss": 4.8932,
"step": 102600
},
{
"epoch": 4.723909016648067,
"grad_norm": 3.325043201446533,
"learning_rate": 7.735414194824819e-07,
"loss": 4.9195,
"step": 102650
},
{
"epoch": 4.7262100624733945,
"grad_norm": 2.7334275245666504,
"learning_rate": 7.608093995604415e-07,
"loss": 4.9408,
"step": 102700
},
{
"epoch": 4.728511108298722,
"grad_norm": 2.373932123184204,
"learning_rate": 7.481822282920547e-07,
"loss": 4.9523,
"step": 102750
},
{
"epoch": 4.73081215412405,
"grad_norm": 2.570046901702881,
"learning_rate": 7.356599325653024e-07,
"loss": 4.9191,
"step": 102800
},
{
"epoch": 4.733113199949377,
"grad_norm": 2.2466907501220703,
"learning_rate": 7.232425390448272e-07,
"loss": 4.9136,
"step": 102850
},
{
"epoch": 4.735414245774704,
"grad_norm": 2.5075948238372803,
"learning_rate": 7.109300741719116e-07,
"loss": 4.9233,
"step": 102900
},
{
"epoch": 4.737715291600033,
"grad_norm": 3.1715610027313232,
"learning_rate": 6.987225641643891e-07,
"loss": 4.939,
"step": 102950
},
{
"epoch": 4.740016337425359,
"grad_norm": 2.431623697280884,
"learning_rate": 6.866200350166218e-07,
"loss": 4.9124,
"step": 103000
},
{
"epoch": 4.740016337425359,
"eval_loss": 5.6778388023376465,
"eval_runtime": 32.9075,
"eval_samples_per_second": 11.669,
"eval_steps_per_second": 5.835,
"eval_tts_loss": 9.333679660500833,
"step": 103000
},
{
"epoch": 4.742317383250687,
"grad_norm": 2.0969090461730957,
"learning_rate": 6.746225124994176e-07,
"loss": 4.9657,
"step": 103050
},
{
"epoch": 4.744618429076015,
"grad_norm": 2.663628101348877,
"learning_rate": 6.627300221599908e-07,
"loss": 4.9273,
"step": 103100
},
{
"epoch": 4.746919474901342,
"grad_norm": 3.7632904052734375,
"learning_rate": 6.509425893219012e-07,
"loss": 4.9684,
"step": 103150
},
{
"epoch": 4.74922052072667,
"grad_norm": 2.8351118564605713,
"learning_rate": 6.392602390849988e-07,
"loss": 4.9627,
"step": 103200
},
{
"epoch": 4.751521566551998,
"grad_norm": 2.918797254562378,
"learning_rate": 6.276829963253794e-07,
"loss": 4.9821,
"step": 103250
},
{
"epoch": 4.753822612377325,
"grad_norm": 3.037306308746338,
"learning_rate": 6.162108856953286e-07,
"loss": 4.9707,
"step": 103300
},
{
"epoch": 4.756123658202653,
"grad_norm": 2.749039888381958,
"learning_rate": 6.048439316232502e-07,
"loss": 4.9084,
"step": 103350
},
{
"epoch": 4.758424704027981,
"grad_norm": 2.600416898727417,
"learning_rate": 5.935821583136492e-07,
"loss": 4.9592,
"step": 103400
},
{
"epoch": 4.760725749853308,
"grad_norm": 2.7607455253601074,
"learning_rate": 5.824255897470487e-07,
"loss": 4.9454,
"step": 103450
},
{
"epoch": 4.763026795678636,
"grad_norm": 2.995832920074463,
"learning_rate": 5.713742496799623e-07,
"loss": 4.9448,
"step": 103500
},
{
"epoch": 4.7653278415039635,
"grad_norm": 2.9551029205322266,
"learning_rate": 5.60428161644816e-07,
"loss": 4.9486,
"step": 103550
},
{
"epoch": 4.767628887329291,
"grad_norm": 2.6423027515411377,
"learning_rate": 5.49587348949937e-07,
"loss": 4.9013,
"step": 103600
},
{
"epoch": 4.769929933154619,
"grad_norm": 2.4621798992156982,
"learning_rate": 5.388518346794602e-07,
"loss": 4.9932,
"step": 103650
},
{
"epoch": 4.772230978979946,
"grad_norm": 2.4298455715179443,
"learning_rate": 5.282216416933161e-07,
"loss": 4.9677,
"step": 103700
},
{
"epoch": 4.774532024805274,
"grad_norm": 2.3390984535217285,
"learning_rate": 5.176967926271647e-07,
"loss": 4.928,
"step": 103750
},
{
"epoch": 4.776833070630602,
"grad_norm": 2.9800028800964355,
"learning_rate": 5.072773098923401e-07,
"loss": 4.8998,
"step": 103800
},
{
"epoch": 4.779134116455929,
"grad_norm": 2.249694347381592,
"learning_rate": 4.969632156758219e-07,
"loss": 4.9175,
"step": 103850
},
{
"epoch": 4.781435162281257,
"grad_norm": 2.700409173965454,
"learning_rate": 4.867545319401701e-07,
"loss": 4.9035,
"step": 103900
},
{
"epoch": 4.783736208106585,
"grad_norm": 2.519153118133545,
"learning_rate": 4.7665128042349573e-07,
"loss": 4.9496,
"step": 103950
},
{
"epoch": 4.786037253931912,
"grad_norm": 3.556790828704834,
"learning_rate": 4.6665348263939536e-07,
"loss": 4.9067,
"step": 104000
},
{
"epoch": 4.786037253931912,
"eval_loss": 5.678002834320068,
"eval_runtime": 34.3512,
"eval_samples_per_second": 11.179,
"eval_steps_per_second": 5.589,
"eval_tts_loss": 9.335159319797523,
"step": 104000
},
{
"epoch": 4.78833829975724,
"grad_norm": 2.6343655586242676,
"learning_rate": 4.567611598769228e-07,
"loss": 4.9074,
"step": 104050
},
{
"epoch": 4.7906393455825675,
"grad_norm": 2.6444284915924072,
"learning_rate": 4.469743332005338e-07,
"loss": 4.9405,
"step": 104100
},
{
"epoch": 4.792940391407895,
"grad_norm": 2.464726686477661,
"learning_rate": 4.3729302345004166e-07,
"loss": 4.9708,
"step": 104150
},
{
"epoch": 4.795241437233223,
"grad_norm": 3.8831639289855957,
"learning_rate": 4.2771725124057806e-07,
"loss": 4.8792,
"step": 104200
},
{
"epoch": 4.7975424830585505,
"grad_norm": 2.8383708000183105,
"learning_rate": 4.184354066495755e-07,
"loss": 4.9004,
"step": 104250
},
{
"epoch": 4.799843528883878,
"grad_norm": 2.8688228130340576,
"learning_rate": 4.090686587104875e-07,
"loss": 4.9201,
"step": 104300
},
{
"epoch": 4.802144574709205,
"grad_norm": 2.409350633621216,
"learning_rate": 3.998075084126529e-07,
"loss": 4.8971,
"step": 104350
},
{
"epoch": 4.804445620534533,
"grad_norm": 2.6027088165283203,
"learning_rate": 3.906519754765248e-07,
"loss": 5.0035,
"step": 104400
},
{
"epoch": 4.80674666635986,
"grad_norm": 2.6355624198913574,
"learning_rate": 3.816020793976638e-07,
"loss": 4.919,
"step": 104450
},
{
"epoch": 4.809047712185188,
"grad_norm": 2.6161468029022217,
"learning_rate": 3.726578394466773e-07,
"loss": 4.9573,
"step": 104500
},
{
"epoch": 4.811348758010515,
"grad_norm": 2.807112455368042,
"learning_rate": 3.638192746692026e-07,
"loss": 4.968,
"step": 104550
},
{
"epoch": 4.813649803835843,
"grad_norm": 2.690364122390747,
"learning_rate": 3.550864038858459e-07,
"loss": 4.9339,
"step": 104600
},
{
"epoch": 4.815950849661171,
"grad_norm": 2.5228426456451416,
"learning_rate": 3.4645924569216e-07,
"loss": 4.8549,
"step": 104650
},
{
"epoch": 4.818251895486498,
"grad_norm": 2.660872459411621,
"learning_rate": 3.379378184585835e-07,
"loss": 4.9103,
"step": 104700
},
{
"epoch": 4.820552941311826,
"grad_norm": 2.904383659362793,
"learning_rate": 3.295221403304294e-07,
"loss": 4.9719,
"step": 104750
},
{
"epoch": 4.822853987137154,
"grad_norm": 2.389962673187256,
"learning_rate": 3.212122292278241e-07,
"loss": 4.9299,
"step": 104800
},
{
"epoch": 4.825155032962481,
"grad_norm": 2.7333154678344727,
"learning_rate": 3.130081028456688e-07,
"loss": 4.9329,
"step": 104850
},
{
"epoch": 4.827456078787809,
"grad_norm": 2.3956751823425293,
"learning_rate": 3.0490977865362815e-07,
"loss": 4.9459,
"step": 104900
},
{
"epoch": 4.8297571246131366,
"grad_norm": 2.398859977722168,
"learning_rate": 2.969172738960524e-07,
"loss": 4.9526,
"step": 104950
},
{
"epoch": 4.832058170438464,
"grad_norm": 2.804928779602051,
"learning_rate": 2.8903060559198335e-07,
"loss": 4.9288,
"step": 105000
},
{
"epoch": 4.832058170438464,
"eval_loss": 5.678267002105713,
"eval_runtime": 34.178,
"eval_samples_per_second": 11.235,
"eval_steps_per_second": 5.618,
"eval_tts_loss": 9.336976003650609,
"step": 105000
},
{
"epoch": 4.834359216263792,
"grad_norm": 2.7038073539733887,
"learning_rate": 2.812497905350819e-07,
"loss": 4.8927,
"step": 105050
},
{
"epoch": 4.8366602620891195,
"grad_norm": 2.4471280574798584,
"learning_rate": 2.735748452936171e-07,
"loss": 4.9623,
"step": 105100
},
{
"epoch": 4.838961307914447,
"grad_norm": 2.022722005844116,
"learning_rate": 2.660057862104104e-07,
"loss": 4.9395,
"step": 105150
},
{
"epoch": 4.841262353739775,
"grad_norm": 2.6244399547576904,
"learning_rate": 2.585426294028304e-07,
"loss": 4.9201,
"step": 105200
},
{
"epoch": 4.843563399565102,
"grad_norm": 2.5239951610565186,
"learning_rate": 2.511853907627204e-07,
"loss": 4.9401,
"step": 105250
},
{
"epoch": 4.84586444539043,
"grad_norm": 1.9543540477752686,
"learning_rate": 2.4393408595639875e-07,
"loss": 4.9751,
"step": 105300
},
{
"epoch": 4.848165491215758,
"grad_norm": 2.3956873416900635,
"learning_rate": 2.3678873042460302e-07,
"loss": 4.8881,
"step": 105350
},
{
"epoch": 4.850466537041085,
"grad_norm": 2.3740742206573486,
"learning_rate": 2.297493393824679e-07,
"loss": 4.8846,
"step": 105400
},
{
"epoch": 4.852767582866413,
"grad_norm": 2.7427990436553955,
"learning_rate": 2.228159278194919e-07,
"loss": 4.9626,
"step": 105450
},
{
"epoch": 4.855068628691741,
"grad_norm": 2.6279170513153076,
"learning_rate": 2.1598851049949854e-07,
"loss": 4.947,
"step": 105500
},
{
"epoch": 4.857369674517068,
"grad_norm": 2.6171083450317383,
"learning_rate": 2.0926710196061404e-07,
"loss": 4.9544,
"step": 105550
},
{
"epoch": 4.859670720342396,
"grad_norm": 2.7386815547943115,
"learning_rate": 2.026517165152342e-07,
"loss": 4.9288,
"step": 105600
},
{
"epoch": 4.8619717661677235,
"grad_norm": 2.7789573669433594,
"learning_rate": 1.9614236824998523e-07,
"loss": 4.9173,
"step": 105650
},
{
"epoch": 4.864272811993051,
"grad_norm": 2.6970458030700684,
"learning_rate": 1.8973907102570189e-07,
"loss": 4.9651,
"step": 105700
},
{
"epoch": 4.866573857818379,
"grad_norm": 2.866112232208252,
"learning_rate": 1.8344183847740503e-07,
"loss": 4.922,
"step": 105750
},
{
"epoch": 4.868874903643706,
"grad_norm": 2.490802764892578,
"learning_rate": 1.7725068401425739e-07,
"loss": 4.8887,
"step": 105800
},
{
"epoch": 4.871175949469034,
"grad_norm": 2.6515631675720215,
"learning_rate": 1.7116562081954113e-07,
"loss": 4.8661,
"step": 105850
},
{
"epoch": 4.873476995294361,
"grad_norm": 2.1467347145080566,
"learning_rate": 1.6518666185063037e-07,
"loss": 4.957,
"step": 105900
},
{
"epoch": 4.875778041119689,
"grad_norm": 3.2990024089813232,
"learning_rate": 1.5931381983896877e-07,
"loss": 4.918,
"step": 105950
},
{
"epoch": 4.878079086945016,
"grad_norm": 2.501412868499756,
"learning_rate": 1.535471072900252e-07,
"loss": 4.8902,
"step": 106000
},
{
"epoch": 4.878079086945016,
"eval_loss": 5.677829742431641,
"eval_runtime": 32.6971,
"eval_samples_per_second": 11.744,
"eval_steps_per_second": 5.872,
"eval_tts_loss": 9.334307465951118,
"step": 106000
},
{
"epoch": 4.880380132770344,
"grad_norm": 3.173262119293213,
"learning_rate": 1.478865364832993e-07,
"loss": 4.9177,
"step": 106050
},
{
"epoch": 4.882681178595671,
"grad_norm": 2.9706802368164062,
"learning_rate": 1.4233211947224934e-07,
"loss": 4.99,
"step": 106100
},
{
"epoch": 4.884982224420999,
"grad_norm": 3.015517473220825,
"learning_rate": 1.3688386808431985e-07,
"loss": 4.859,
"step": 106150
},
{
"epoch": 4.887283270246327,
"grad_norm": 3.1198346614837646,
"learning_rate": 1.3154179392086962e-07,
"loss": 4.8891,
"step": 106200
},
{
"epoch": 4.889584316071654,
"grad_norm": 3.3529605865478516,
"learning_rate": 1.2630590835717715e-07,
"loss": 4.958,
"step": 106250
},
{
"epoch": 4.891885361896982,
"grad_norm": 3.0760412216186523,
"learning_rate": 1.211762225423907e-07,
"loss": 4.9196,
"step": 106300
},
{
"epoch": 4.89418640772231,
"grad_norm": 2.513789653778076,
"learning_rate": 1.1615274739954495e-07,
"loss": 4.8735,
"step": 106350
},
{
"epoch": 4.896487453547637,
"grad_norm": 2.1810100078582764,
"learning_rate": 1.1123549362548336e-07,
"loss": 4.848,
"step": 106400
},
{
"epoch": 4.898788499372965,
"grad_norm": 2.3476059436798096,
"learning_rate": 1.0642447169089132e-07,
"loss": 4.9512,
"step": 106450
},
{
"epoch": 4.9010895451982925,
"grad_norm": 2.690737009048462,
"learning_rate": 1.0171969184021857e-07,
"loss": 4.9523,
"step": 106500
},
{
"epoch": 4.90339059102362,
"grad_norm": 2.9952900409698486,
"learning_rate": 9.712116409171801e-08,
"loss": 4.87,
"step": 106550
},
{
"epoch": 4.905691636848948,
"grad_norm": 2.496338367462158,
"learning_rate": 9.262889823736243e-08,
"loss": 4.8978,
"step": 106600
},
{
"epoch": 4.9079926826742755,
"grad_norm": 2.607712984085083,
"learning_rate": 8.824290384287227e-08,
"loss": 4.9671,
"step": 106650
},
{
"epoch": 4.910293728499603,
"grad_norm": 2.359705686569214,
"learning_rate": 8.396319024766564e-08,
"loss": 4.8706,
"step": 106700
},
{
"epoch": 4.912594774324931,
"grad_norm": 2.2572617530822754,
"learning_rate": 7.978976656485837e-08,
"loss": 4.9829,
"step": 106750
},
{
"epoch": 4.914895820150258,
"grad_norm": 2.9874086380004883,
"learning_rate": 7.572264168123067e-08,
"loss": 4.9902,
"step": 106800
},
{
"epoch": 4.917196865975586,
"grad_norm": 2.823206901550293,
"learning_rate": 7.17618242572049e-08,
"loss": 4.944,
"step": 106850
},
{
"epoch": 4.919497911800914,
"grad_norm": 2.656986951828003,
"learning_rate": 6.790732272685119e-08,
"loss": 4.9266,
"step": 106900
},
{
"epoch": 4.921798957626241,
"grad_norm": 2.7516186237335205,
"learning_rate": 6.415914529784295e-08,
"loss": 4.9177,
"step": 106950
},
{
"epoch": 4.924100003451569,
"grad_norm": 3.0680367946624756,
"learning_rate": 6.05172999514514e-08,
"loss": 4.9755,
"step": 107000
},
{
"epoch": 4.924100003451569,
"eval_loss": 5.677335262298584,
"eval_runtime": 34.2155,
"eval_samples_per_second": 11.223,
"eval_steps_per_second": 5.611,
"eval_tts_loss": 9.332648532018462,
"step": 107000
},
{
"epoch": 4.926401049276897,
"grad_norm": 2.8450987339019775,
"learning_rate": 5.6981794442528824e-08,
"loss": 4.9353,
"step": 107050
},
{
"epoch": 4.928702095102224,
"grad_norm": 2.833491086959839,
"learning_rate": 5.355263629948648e-08,
"loss": 4.9363,
"step": 107100
},
{
"epoch": 4.931003140927552,
"grad_norm": 2.4455604553222656,
"learning_rate": 5.0229832824283396e-08,
"loss": 4.9182,
"step": 107150
},
{
"epoch": 4.9333041867528795,
"grad_norm": 2.173336982727051,
"learning_rate": 4.701339109240977e-08,
"loss": 4.951,
"step": 107200
},
{
"epoch": 4.935605232578207,
"grad_norm": 2.993123769760132,
"learning_rate": 4.3903317952881385e-08,
"loss": 4.8801,
"step": 107250
},
{
"epoch": 4.937906278403535,
"grad_norm": 2.1944525241851807,
"learning_rate": 4.0899620028200804e-08,
"loss": 4.921,
"step": 107300
},
{
"epoch": 4.9402073242288616,
"grad_norm": 2.7427523136138916,
"learning_rate": 3.8002303714362864e-08,
"loss": 4.9377,
"step": 107350
},
{
"epoch": 4.94250837005419,
"grad_norm": 2.353147506713867,
"learning_rate": 3.5211375180843606e-08,
"loss": 4.9212,
"step": 107400
},
{
"epoch": 4.944809415879517,
"grad_norm": 2.595211982727051,
"learning_rate": 3.252684037056697e-08,
"loss": 4.9031,
"step": 107450
},
{
"epoch": 4.9471104617048445,
"grad_norm": 2.404395341873169,
"learning_rate": 2.994870499992142e-08,
"loss": 4.9661,
"step": 107500
},
{
"epoch": 4.949411507530172,
"grad_norm": 2.551229476928711,
"learning_rate": 2.7476974558710013e-08,
"loss": 4.918,
"step": 107550
},
{
"epoch": 4.9517125533555,
"grad_norm": 2.5151207447052,
"learning_rate": 2.5111654310178144e-08,
"loss": 4.9241,
"step": 107600
},
{
"epoch": 4.954013599180827,
"grad_norm": 2.604194402694702,
"learning_rate": 2.2852749290974696e-08,
"loss": 4.9312,
"step": 107650
},
{
"epoch": 4.956314645006155,
"grad_norm": 3.0182950496673584,
"learning_rate": 2.0700264311157568e-08,
"loss": 4.8306,
"step": 107700
},
{
"epoch": 4.958615690831483,
"grad_norm": 2.690030336380005,
"learning_rate": 1.865420395416595e-08,
"loss": 4.8744,
"step": 107750
},
{
"epoch": 4.96091673665681,
"grad_norm": 2.436689853668213,
"learning_rate": 1.67145725768314e-08,
"loss": 4.8807,
"step": 107800
},
{
"epoch": 4.963217782482138,
"grad_norm": 2.4815070629119873,
"learning_rate": 1.4881374309355656e-08,
"loss": 4.8777,
"step": 107850
},
{
"epoch": 4.965518828307466,
"grad_norm": 2.296535015106201,
"learning_rate": 1.3154613055299525e-08,
"loss": 4.8823,
"step": 107900
},
{
"epoch": 4.967819874132793,
"grad_norm": 2.163233518600464,
"learning_rate": 1.1534292491588438e-08,
"loss": 4.994,
"step": 107950
},
{
"epoch": 4.970120919958121,
"grad_norm": 2.8297841548919678,
"learning_rate": 1.0020416068490245e-08,
"loss": 4.9759,
"step": 108000
},
{
"epoch": 4.970120919958121,
"eval_loss": 5.677609920501709,
"eval_runtime": 33.7841,
"eval_samples_per_second": 11.366,
"eval_steps_per_second": 5.683,
"eval_tts_loss": 9.33482673060466,
"step": 108000
},
{
"epoch": 4.9724219657834485,
"grad_norm": 2.1278953552246094,
"learning_rate": 8.612987009609663e-09,
"loss": 4.9734,
"step": 108050
},
{
"epoch": 4.974723011608776,
"grad_norm": 2.8732552528381348,
"learning_rate": 7.312008311888274e-09,
"loss": 4.986,
"step": 108100
},
{
"epoch": 4.977024057434104,
"grad_norm": 2.6053032875061035,
"learning_rate": 6.117482745610082e-09,
"loss": 4.9327,
"step": 108150
},
{
"epoch": 4.9793251032594315,
"grad_norm": 2.6100637912750244,
"learning_rate": 5.029412854351545e-09,
"loss": 4.9559,
"step": 108200
},
{
"epoch": 4.981626149084759,
"grad_norm": 2.834904193878174,
"learning_rate": 4.066389890794442e-09,
"loss": 4.8548,
"step": 108250
},
{
"epoch": 4.983927194910087,
"grad_norm": 2.3319642543792725,
"learning_rate": 3.189108852957867e-09,
"loss": 4.9586,
"step": 108300
},
{
"epoch": 4.986228240735414,
"grad_norm": 2.7623283863067627,
"learning_rate": 2.4182897257529757e-09,
"loss": 4.9031,
"step": 108350
},
{
"epoch": 4.988529286560742,
"grad_norm": 2.888908624649048,
"learning_rate": 1.7539341505390383e-09,
"loss": 4.926,
"step": 108400
},
{
"epoch": 4.99083033238607,
"grad_norm": 2.4926486015319824,
"learning_rate": 1.196043541973335e-09,
"loss": 4.8925,
"step": 108450
},
{
"epoch": 4.993131378211397,
"grad_norm": 2.533823251724243,
"learning_rate": 7.446190880222581e-10,
"loss": 4.9152,
"step": 108500
},
{
"epoch": 4.995432424036725,
"grad_norm": 2.3294498920440674,
"learning_rate": 3.996617499280042e-10,
"loss": 4.9371,
"step": 108550
},
{
"epoch": 4.997733469862053,
"grad_norm": 3.2538695335388184,
"learning_rate": 1.6117226224188208e-10,
"loss": 4.9338,
"step": 108600
}
],
"logging_steps": 50,
"max_steps": 108645,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.757344604245459e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}