{ "train_log": [ { "step": 1, "loss": 11.233720779418945, "lm_loss": 11.233201026916504, "ppl": 75599.20353921076, "gate_mean": 0.5205078125, "lr": 1.8000000000000002e-07, "steps_per_second": 0.1909228282952979 }, { "step": 10, "loss": 10.273698806762695, "lm_loss": 10.273258209228516, "ppl": 28948.0524897914, "gate_mean": 0.4423828125, "lr": 1.8000000000000001e-06, "steps_per_second": 1.4129946263942752 }, { "step": 20, "loss": 9.363899230957031, "lm_loss": 9.363483428955078, "ppl": 11654.916987385875, "gate_mean": 0.416259765625, "lr": 3.6000000000000003e-06, "steps_per_second": 2.1864083493060154 }, { "step": 30, "loss": 8.880487442016602, "lm_loss": 8.880151748657227, "ppl": 7187.88140439652, "gate_mean": 0.3345947265625, "lr": 5.4e-06, "steps_per_second": 2.705438685768979 }, { "step": 40, "loss": 8.729056358337402, "lm_loss": 8.728835105895996, "ppl": 6178.526588316003, "gate_mean": 0.22198486328125, "lr": 7.2000000000000005e-06, "steps_per_second": 3.042588877017156 }, { "step": 50, "loss": 8.576288223266602, "lm_loss": 8.576188087463379, "ppl": 5303.849133035767, "gate_mean": 0.100006103515625, "lr": 9e-06, "steps_per_second": 3.311641103371472 }, { "step": 60, "loss": 8.3945951461792, "lm_loss": 8.394566535949707, "ppl": 4422.969296000518, "gate_mean": 0.02899169921875, "lr": 1.08e-05, "steps_per_second": 3.510756786542193 }, { "step": 70, "loss": 8.277499198913574, "lm_loss": 8.277486801147461, "ppl": 3934.294282855093, "gate_mean": 0.01195526123046875, "lr": 1.26e-05, "steps_per_second": 3.663828617825931 }, { "step": 80, "loss": 8.030324935913086, "lm_loss": 8.030316352844238, "ppl": 3072.7135812120177, "gate_mean": 0.007982254028320312, "lr": 1.4400000000000001e-05, "steps_per_second": 3.8058175608419376 }, { "step": 90, "loss": 7.84493350982666, "lm_loss": 7.8449273109436035, "ppl": 2552.752099658049, "gate_mean": 0.0061931610107421875, "lr": 1.6200000000000004e-05, "steps_per_second": 3.918434011377292 }, { "step": 100, "loss": 7.645411968231201, "lm_loss": 7.645407199859619, "ppl": 2091.019865495556, "gate_mean": 0.0048370361328125, "lr": 1.8e-05, "steps_per_second": 4.023377679271962 }, { "step": 110, "loss": 7.52968692779541, "lm_loss": 7.529682636260986, "ppl": 1862.5143152435571, "gate_mean": 0.004425048828125, "lr": 1.98e-05, "steps_per_second": 4.108769796812219 }, { "step": 120, "loss": 7.445955276489258, "lm_loss": 7.445951461791992, "ppl": 1712.9142895648097, "gate_mean": 0.0033674240112304688, "lr": 2.16e-05, "steps_per_second": 4.178962922636927 }, { "step": 130, "loss": 7.372180461883545, "lm_loss": 7.372176647186279, "ppl": 1591.0932653214777, "gate_mean": 0.0033359527587890625, "lr": 2.34e-05, "steps_per_second": 4.224253044244577 }, { "step": 140, "loss": 7.301083564758301, "lm_loss": 7.301080703735352, "ppl": 1481.9005579935733, "gate_mean": 0.0030231475830078125, "lr": 2.52e-05, "steps_per_second": 4.281567951471963 }, { "step": 150, "loss": 7.245668411254883, "lm_loss": 7.245664596557617, "ppl": 1402.0133596698925, "gate_mean": 0.00354766845703125, "lr": 2.7000000000000002e-05, "steps_per_second": 4.332507374834405 }, { "step": 160, "loss": 7.255303382873535, "lm_loss": 7.255300521850586, "ppl": 1415.5883544436072, "gate_mean": 0.0019216537475585938, "lr": 2.8800000000000002e-05, "steps_per_second": 4.378480428092798 }, { "step": 170, "loss": 7.168065071105957, "lm_loss": 7.168062686920166, "ppl": 1297.3288345554747, "gate_mean": 0.0025844573974609375, "lr": 3.0600000000000005e-05, "steps_per_second": 4.416959416605673 }, { "step": 180, "loss": 6.88969087600708, "lm_loss": 6.889688968658447, "ppl": 982.0959071015918, "gate_mean": 0.00168609619140625, "lr": 3.240000000000001e-05, "steps_per_second": 4.451087720428561 }, { "step": 190, "loss": 6.992722988128662, "lm_loss": 6.992722034454346, "ppl": 1088.6808734152355, "gate_mean": 0.0016126632690429688, "lr": 3.4200000000000005e-05, "steps_per_second": 4.487236262358404 }, { "step": 200, "loss": 7.0246195793151855, "lm_loss": 7.024619102478027, "ppl": 1123.9663614587846, "gate_mean": 0.0009412765502929688, "lr": 3.6e-05, "steps_per_second": 4.5211997152562065 }, { "step": 210, "loss": 6.902554988861084, "lm_loss": 6.902554035186768, "ppl": 994.8122592521157, "gate_mean": 0.0012059211730957031, "lr": 3.78e-05, "steps_per_second": 4.550802983825748 }, { "step": 220, "loss": 6.8810553550720215, "lm_loss": 6.881054878234863, "ppl": 973.6529035136056, "gate_mean": 0.0010366439819335938, "lr": 3.96e-05, "steps_per_second": 4.57299282947344 }, { "step": 230, "loss": 6.8949666023254395, "lm_loss": 6.894965171813965, "ppl": 987.2913386491568, "gate_mean": 0.0012125968933105469, "lr": 4.14e-05, "steps_per_second": 4.595653554377861 }, { "step": 240, "loss": 7.0583696365356445, "lm_loss": 7.058369159698486, "ppl": 1162.5476893286382, "gate_mean": 0.0009522438049316406, "lr": 4.32e-05, "steps_per_second": 4.618841973189477 }, { "step": 250, "loss": 6.820333003997803, "lm_loss": 6.8203325271606445, "ppl": 916.289650623565, "gate_mean": 0.0007138252258300781, "lr": 4.5e-05, "steps_per_second": 4.637938759778652 }, { "step": 260, "loss": 6.769063949584961, "lm_loss": 6.769063472747803, "ppl": 870.4962686441833, "gate_mean": 0.0007138252258300781, "lr": 4.68e-05, "steps_per_second": 4.655918336319353 }, { "step": 270, "loss": 6.780544281005859, "lm_loss": 6.780542850494385, "ppl": 880.5465995455008, "gate_mean": 0.0008058547973632812, "lr": 4.86e-05, "steps_per_second": 4.674539741272961 }, { "step": 280, "loss": 6.780987739562988, "lm_loss": 6.780986785888672, "ppl": 880.9375921286602, "gate_mean": 0.0008051395416259766, "lr": 5.04e-05, "steps_per_second": 4.69256362327752 }, { "step": 290, "loss": 6.698133945465088, "lm_loss": 6.6981329917907715, "ppl": 810.8904718514455, "gate_mean": 0.0008447170257568359, "lr": 5.22e-05, "steps_per_second": 4.705744024732873 }, { "step": 300, "loss": 6.715235233306885, "lm_loss": 6.715234756469727, "ppl": 824.8773894734313, "gate_mean": 0.0006525516510009766, "lr": 5.4000000000000005e-05, "steps_per_second": 4.72349925518496 }, { "step": 310, "loss": 6.792382717132568, "lm_loss": 6.792381763458252, "ppl": 891.0332669858354, "gate_mean": 0.0006488561630249023, "lr": 5.58e-05, "steps_per_second": 4.7310606053025595 }, { "step": 320, "loss": 6.6053266525268555, "lm_loss": 6.605326175689697, "ppl": 739.020880548951, "gate_mean": 0.0004658699035644531, "lr": 5.7600000000000004e-05, "steps_per_second": 4.7422458648211 }, { "step": 330, "loss": 6.520622730255127, "lm_loss": 6.520622253417969, "ppl": 679.0007644588765, "gate_mean": 0.0007431507110595703, "lr": 5.94e-05, "steps_per_second": 4.753137228842241 }, { "step": 340, "loss": 6.6657938957214355, "lm_loss": 6.665793418884277, "ppl": 785.0861200886151, "gate_mean": 0.00027751922607421875, "lr": 6.120000000000001e-05, "steps_per_second": 4.765112542914431 }, { "step": 350, "loss": 6.475612640380859, "lm_loss": 6.475612163543701, "ppl": 649.1164714488118, "gate_mean": 0.0004177093505859375, "lr": 6.3e-05, "steps_per_second": 4.770983457512478 }, { "step": 360, "loss": 6.4616193771362305, "lm_loss": 6.461618900299072, "ppl": 640.0964705950587, "gate_mean": 0.00026786327362060547, "lr": 6.480000000000002e-05, "steps_per_second": 4.780779437709777 }, { "step": 370, "loss": 6.582224369049072, "lm_loss": 6.582221984863281, "ppl": 722.1421367797358, "gate_mean": 0.0005065202713012695, "lr": 6.66e-05, "steps_per_second": 4.790715917387122 }, { "step": 380, "loss": 6.633972644805908, "lm_loss": 6.63397216796875, "ppl": 760.4970007987946, "gate_mean": 0.0003714561462402344, "lr": 6.840000000000001e-05, "steps_per_second": 4.801153099335523 }, { "step": 390, "loss": 6.3759894371032715, "lm_loss": 6.3759894371032715, "ppl": 587.5665035261446, "gate_mean": 0.00031125545501708984, "lr": 7.02e-05, "steps_per_second": 4.805492681383727 }, { "step": 400, "loss": 6.543242454528809, "lm_loss": 6.543242454528809, "ppl": 694.5349289045686, "gate_mean": 0.00015485286712646484, "lr": 7.2e-05, "steps_per_second": 4.809572286175367 }, { "step": 410, "loss": 6.600608825683594, "lm_loss": 6.600606918334961, "ppl": 735.541467403766, "gate_mean": 0.00022661685943603516, "lr": 7.38e-05, "steps_per_second": 4.8183139031724265 }, { "step": 420, "loss": 6.626696586608887, "lm_loss": 6.626696586608887, "ppl": 754.9840223757608, "gate_mean": 0.00022941827774047852, "lr": 7.56e-05, "steps_per_second": 4.826733608990607 }, { "step": 430, "loss": 6.440532207489014, "lm_loss": 6.4405317306518555, "ppl": 626.739968077519, "gate_mean": 0.0002549886703491211, "lr": 7.740000000000001e-05, "steps_per_second": 4.833983184928327 }, { "step": 440, "loss": 6.407415390014648, "lm_loss": 6.407415390014648, "ppl": 606.3245416635633, "gate_mean": 0.00018483400344848633, "lr": 7.92e-05, "steps_per_second": 4.839980193753232 }, { "step": 450, "loss": 6.468347549438477, "lm_loss": 6.468347549438477, "ppl": 644.4179778093901, "gate_mean": 0.0001609325408935547, "lr": 8.1e-05, "steps_per_second": 4.846769334667692 }, { "step": 460, "loss": 6.490776538848877, "lm_loss": 6.490776062011719, "ppl": 659.0346165377227, "gate_mean": 0.0002632737159729004, "lr": 8.28e-05, "steps_per_second": 4.8538372299972945 }, { "step": 470, "loss": 6.353217124938965, "lm_loss": 6.353217124938965, "ppl": 574.3374553839144, "gate_mean": 0.00018274784088134766, "lr": 8.460000000000001e-05, "steps_per_second": 4.858504303457794 }, { "step": 480, "loss": 6.243738651275635, "lm_loss": 6.243738651275635, "ppl": 514.7794987832533, "gate_mean": 0.00013846158981323242, "lr": 8.64e-05, "steps_per_second": 4.864264535577768 }, { "step": 490, "loss": 6.2320098876953125, "lm_loss": 6.2320098876953125, "ppl": 508.7770412585936, "gate_mean": 0.00014024972915649414, "lr": 8.82e-05, "steps_per_second": 4.87004485788904 }, { "step": 500, "loss": 6.340229034423828, "lm_loss": 6.340229034423828, "ppl": 566.9261421154872, "gate_mean": 0.0001201629638671875, "lr": 9e-05, "steps_per_second": 4.87633681845079 }, { "step": 510, "loss": 6.318823337554932, "lm_loss": 6.318823337554932, "ppl": 554.9196550298757, "gate_mean": 0.00011736154556274414, "lr": 9.180000000000001e-05, "steps_per_second": 4.880745725828352 }, { "step": 520, "loss": 6.190738677978516, "lm_loss": 6.190738677978516, "ppl": 488.2066005182155, "gate_mean": 8.845329284667969e-05, "lr": 9.36e-05, "steps_per_second": 4.886251693285725 }, { "step": 530, "loss": 6.247452259063721, "lm_loss": 6.247452259063721, "ppl": 516.6947419690131, "gate_mean": 5.8144330978393555e-05, "lr": 9.540000000000001e-05, "steps_per_second": 4.892139403856039 }, { "step": 540, "loss": 6.3264312744140625, "lm_loss": 6.3264312744140625, "ppl": 559.1575491014855, "gate_mean": 0.00013211369514465332, "lr": 9.72e-05, "steps_per_second": 4.896324467290218 }, { "step": 550, "loss": 6.254067420959473, "lm_loss": 6.254067420959473, "ppl": 520.1240916838111, "gate_mean": 8.024275302886963e-05, "lr": 9.900000000000001e-05, "steps_per_second": 4.901188487226059 }, { "step": 560, "loss": 6.135735511779785, "lm_loss": 6.135735511779785, "ppl": 462.0788334876185, "gate_mean": 5.066394805908203e-05, "lr": 0.0001008, "steps_per_second": 4.903215717882523 }, { "step": 570, "loss": 6.264244556427002, "lm_loss": 6.264244556427002, "ppl": 525.4444923226006, "gate_mean": 9.036064147949219e-05, "lr": 0.00010260000000000001, "steps_per_second": 4.9056304100595804 }, { "step": 580, "loss": 6.222881317138672, "lm_loss": 6.222881317138672, "ppl": 504.1537681821487, "gate_mean": 9.129941463470459e-05, "lr": 0.0001044, "steps_per_second": 4.910017362256976 }, { "step": 590, "loss": 6.1265692710876465, "lm_loss": 6.1265692710876465, "ppl": 457.8626604298379, "gate_mean": 0.00010713934898376465, "lr": 0.0001062, "steps_per_second": 4.912867259773288 }, { "step": 600, "loss": 6.231856346130371, "lm_loss": 6.231856346130371, "ppl": 508.69892883237804, "gate_mean": 4.604458808898926e-05, "lr": 0.00010800000000000001, "steps_per_second": 4.917047551806766 }, { "step": 610, "loss": 6.15090799331665, "lm_loss": 6.15090799331665, "ppl": 469.1431723129678, "gate_mean": 5.297362804412842e-05, "lr": 0.00010980000000000001, "steps_per_second": 4.920405184347364 }, { "step": 620, "loss": 6.14816427230835, "lm_loss": 6.14816427230835, "ppl": 467.8577385773603, "gate_mean": 3.819167613983154e-05, "lr": 0.0001116, "steps_per_second": 4.924744951270379 }, { "step": 630, "loss": 6.250295162200928, "lm_loss": 6.250295162200928, "ppl": 518.1657450408266, "gate_mean": 4.941225051879883e-05, "lr": 0.0001134, "steps_per_second": 4.926561479195798 }, { "step": 640, "loss": 6.061864852905273, "lm_loss": 6.061864852905273, "ppl": 429.1750393760701, "gate_mean": 3.866851329803467e-05, "lr": 0.00011520000000000001, "steps_per_second": 4.930282071090148 }, { "step": 650, "loss": 5.999403953552246, "lm_loss": 5.999403953552246, "ppl": 403.18840284256567, "gate_mean": 3.1463801860809326e-05, "lr": 0.00011700000000000001, "steps_per_second": 4.933352641639311 }, { "step": 660, "loss": 5.9969940185546875, "lm_loss": 5.9969940185546875, "ppl": 402.21791487610204, "gate_mean": 3.626197576522827e-05, "lr": 0.0001188, "steps_per_second": 4.93724111722979 }, { "step": 670, "loss": 5.972357749938965, "lm_loss": 5.972357749938965, "ppl": 392.4298322639633, "gate_mean": 3.121793270111084e-05, "lr": 0.00012060000000000002, "steps_per_second": 4.9405679383021415 }, { "step": 680, "loss": 5.946016311645508, "lm_loss": 5.946016311645508, "ppl": 382.227626352996, "gate_mean": 1.457706093788147e-05, "lr": 0.00012240000000000002, "steps_per_second": 4.944847239529045 }, { "step": 690, "loss": 6.094211578369141, "lm_loss": 6.094211578369141, "ppl": 443.28441234687716, "gate_mean": 1.8831342458724976e-05, "lr": 0.0001242, "steps_per_second": 4.947100742264488 }, { "step": 700, "loss": 5.899153709411621, "lm_loss": 5.899153709411621, "ppl": 364.728670776334, "gate_mean": 1.9885599613189697e-05, "lr": 0.000126, "steps_per_second": 4.950276117897427 }, { "step": 710, "loss": 5.904991149902344, "lm_loss": 5.904991149902344, "ppl": 366.86397899105884, "gate_mean": 3.9599835872650146e-05, "lr": 0.0001278, "steps_per_second": 4.953562070564359 }, { "step": 720, "loss": 5.916191101074219, "lm_loss": 5.916191101074219, "ppl": 370.99593329346544, "gate_mean": 1.1980533599853516e-05, "lr": 0.00012960000000000003, "steps_per_second": 4.9560163519552445 }, { "step": 730, "loss": 5.946123123168945, "lm_loss": 5.946123123168945, "ppl": 382.2684548485048, "gate_mean": 1.588091254234314e-05, "lr": 0.00013140000000000002, "steps_per_second": 4.958446951854931 }, { "step": 740, "loss": 5.8896484375, "lm_loss": 5.8896484375, "ppl": 361.2782501585402, "gate_mean": 1.4740973711013794e-05, "lr": 0.0001332, "steps_per_second": 4.961046612450754 }, { "step": 750, "loss": 6.007106781005859, "lm_loss": 6.007106781005859, "ppl": 406.3060856119231, "gate_mean": 2.481788396835327e-05, "lr": 0.000135, "steps_per_second": 4.961530250064908 }, { "step": 760, "loss": 5.971518516540527, "lm_loss": 5.971518516540527, "ppl": 392.1006302001893, "gate_mean": 1.0406598448753357e-05, "lr": 0.00013680000000000002, "steps_per_second": 4.964229958657293 }, { "step": 770, "loss": 6.0307841300964355, "lm_loss": 6.0307841300964355, "ppl": 416.04113188474946, "gate_mean": 1.0922551155090332e-05, "lr": 0.0001386, "steps_per_second": 4.962763180733292 }, { "step": 780, "loss": 5.97560977935791, "lm_loss": 5.97560977935791, "ppl": 393.7081029838007, "gate_mean": 9.417533874511719e-06, "lr": 0.0001404, "steps_per_second": 4.965057524753104 }, { "step": 790, "loss": 5.9480881690979, "lm_loss": 5.9480881690979, "ppl": 383.0203684498999, "gate_mean": 1.0721385478973389e-05, "lr": 0.00014220000000000001, "steps_per_second": 4.967158367700966 }, { "step": 800, "loss": 6.000330924987793, "lm_loss": 6.000330924987793, "ppl": 403.56232025374936, "gate_mean": 6.203353404998779e-05, "lr": 0.000144, "steps_per_second": 4.9694996847842825 }, { "step": 810, "loss": 6.088346481323242, "lm_loss": 6.088346481323242, "ppl": 440.6921157162923, "gate_mean": 9.085983037948608e-06, "lr": 0.00014580000000000002, "steps_per_second": 4.969279725129938 }, { "step": 820, "loss": 5.9758100509643555, "lm_loss": 5.9758100509643555, "ppl": 393.7869594341462, "gate_mean": 7.3052942752838135e-06, "lr": 0.0001476, "steps_per_second": 4.968076433588739 }, { "step": 830, "loss": 5.83988094329834, "lm_loss": 5.83988094329834, "ppl": 343.7384138669212, "gate_mean": 7.733702659606934e-06, "lr": 0.0001494, "steps_per_second": 4.970148576030047 }, { "step": 840, "loss": 5.914444446563721, "lm_loss": 5.914444446563721, "ppl": 370.3484971614221, "gate_mean": 7.834285497665405e-06, "lr": 0.0001512, "steps_per_second": 4.971735857087139 }, { "step": 850, "loss": 5.84937047958374, "lm_loss": 5.84937047958374, "ppl": 347.0158581363505, "gate_mean": 5.418434739112854e-06, "lr": 0.000153, "steps_per_second": 4.974128391617983 }, { "step": 860, "loss": 5.876492023468018, "lm_loss": 5.876492023468018, "ppl": 356.5562542961586, "gate_mean": 5.5246055126190186e-06, "lr": 0.00015480000000000002, "steps_per_second": 4.9756370170859014 }, { "step": 870, "loss": 5.9336957931518555, "lm_loss": 5.9336957931518555, "ppl": 377.5472751946416, "gate_mean": 5.018897354602814e-06, "lr": 0.0001566, "steps_per_second": 4.977257049643528 }, { "step": 880, "loss": 5.712811470031738, "lm_loss": 5.712811470031738, "ppl": 302.7209639129396, "gate_mean": 4.859641194343567e-06, "lr": 0.0001584, "steps_per_second": 4.979398508805002 }, { "step": 890, "loss": 5.6356706619262695, "lm_loss": 5.6356706619262695, "ppl": 280.2468051609242, "gate_mean": 3.0812807381153107e-06, "lr": 0.00016020000000000002, "steps_per_second": 4.977052998955405 }, { "step": 900, "loss": 5.7885565757751465, "lm_loss": 5.7885565757751465, "ppl": 326.54134635238097, "gate_mean": 5.493871867656708e-06, "lr": 0.000162, "steps_per_second": 4.978690602810935 }, { "step": 910, "loss": 5.815799713134766, "lm_loss": 5.815799713134766, "ppl": 335.559642681363, "gate_mean": 2.983957529067993e-06, "lr": 0.0001638, "steps_per_second": 4.979821899993001 }, { "step": 920, "loss": 5.698291778564453, "lm_loss": 5.698291778564453, "ppl": 298.3573050687551, "gate_mean": 3.497116267681122e-06, "lr": 0.0001656, "steps_per_second": 4.978450025958899 }, { "step": 930, "loss": 5.755545139312744, "lm_loss": 5.755545139312744, "ppl": 315.93773066540643, "gate_mean": 2.1886080503463745e-06, "lr": 0.00016740000000000003, "steps_per_second": 4.978128499783527 }, { "step": 940, "loss": 5.667996406555176, "lm_loss": 5.667996406555176, "ppl": 289.4540048666361, "gate_mean": 3.1525269150733948e-06, "lr": 0.00016920000000000002, "steps_per_second": 4.980585995611729 }, { "step": 950, "loss": 5.734357833862305, "lm_loss": 5.734357833862305, "ppl": 309.3142758146188, "gate_mean": 3.0170194804668427e-06, "lr": 0.000171, "steps_per_second": 4.982936888479588 }, { "step": 960, "loss": 5.686685085296631, "lm_loss": 5.686685085296631, "ppl": 294.91438246900543, "gate_mean": 2.4600885808467865e-06, "lr": 0.0001728, "steps_per_second": 4.984488219537185 }, { "step": 970, "loss": 5.7012834548950195, "lm_loss": 5.7012834548950195, "ppl": 299.2512300567664, "gate_mean": 1.8677674233913422e-06, "lr": 0.00017460000000000002, "steps_per_second": 4.984704007140753 }, { "step": 980, "loss": 5.623720169067383, "lm_loss": 5.623720169067383, "ppl": 276.91764986125554, "gate_mean": 1.7276033759117126e-06, "lr": 0.0001764, "steps_per_second": 4.985313274603363 }, { "step": 990, "loss": 5.56369686126709, "lm_loss": 5.56369686126709, "ppl": 260.78514296396327, "gate_mean": 1.426786184310913e-06, "lr": 0.00017820000000000002, "steps_per_second": 4.985963236011533 }, { "step": 1000, "loss": 5.543086528778076, "lm_loss": 5.543086528778076, "ppl": 255.4652847974373, "gate_mean": 1.7085112631320953e-06, "lr": 0.00018, "steps_per_second": 4.987849680878828 }, { "step": 1010, "loss": 5.667332172393799, "lm_loss": 5.667332172393799, "ppl": 289.26180346889015, "gate_mean": 1.2083910405635834e-06, "lr": 0.00017999991031237878, "steps_per_second": 4.9889628636678385 }, { "step": 1020, "loss": 5.489459991455078, "lm_loss": 5.489459991455078, "ppl": 242.12642121204325, "gate_mean": 2.2281892597675323e-06, "lr": 0.00017999960028135053, "steps_per_second": 4.991366812447148 }, { "step": 1030, "loss": 5.4231343269348145, "lm_loss": 5.4231343269348145, "ppl": 226.58821219866303, "gate_mean": 2.0712614059448242e-06, "lr": 0.00017999906880050807, "steps_per_second": 4.991325009386117 }, { "step": 1040, "loss": 5.465798854827881, "lm_loss": 5.465798854827881, "ppl": 236.46468062847327, "gate_mean": 2.6640482246875763e-06, "lr": 0.00017999831587130443, "steps_per_second": 4.993330734526731 }, { "step": 1050, "loss": 5.486927509307861, "lm_loss": 5.486927509307861, "ppl": 241.51401615269728, "gate_mean": 1.966487616300583e-06, "lr": 0.0001799973414957981, "steps_per_second": 4.995363553794485 }, { "step": 1060, "loss": 5.517140865325928, "lm_loss": 5.517140865325928, "ppl": 248.92231636693035, "gate_mean": 1.5436671674251556e-06, "lr": 0.00017999614567665303, "steps_per_second": 4.996965497808028 }, { "step": 1070, "loss": 5.256202697753906, "lm_loss": 5.256202697753906, "ppl": 191.75196688696684, "gate_mean": 1.0782387107610703e-06, "lr": 0.0001799947284171385, "steps_per_second": 4.996503286808864 }, { "step": 1080, "loss": 5.446969509124756, "lm_loss": 5.446969509124756, "ppl": 232.05386214182792, "gate_mean": 2.7208589017391205e-06, "lr": 0.00017999308972112924, "steps_per_second": 4.996779076457794 }, { "step": 1090, "loss": 5.439919948577881, "lm_loss": 5.439919948577881, "ppl": 230.4237369744834, "gate_mean": 1.41095370054245e-06, "lr": 0.0001799912295931054, "steps_per_second": 4.998187888349153 }, { "step": 1100, "loss": 5.503893852233887, "lm_loss": 5.503893852233887, "ppl": 245.64658392131642, "gate_mean": 1.185806468129158e-06, "lr": 0.00017998914803815254, "steps_per_second": 4.997251775719242 }, { "step": 1110, "loss": 5.750015735626221, "lm_loss": 5.750015735626221, "ppl": 314.19560431138507, "gate_mean": 2.248212695121765e-06, "lr": 0.00017998684506196147, "steps_per_second": 4.998207219828242 }, { "step": 1120, "loss": 5.523724555969238, "lm_loss": 5.523724555969238, "ppl": 250.56655051749868, "gate_mean": 1.307111233472824e-06, "lr": 0.00017998432067082846, "steps_per_second": 4.999876115879734 }, { "step": 1130, "loss": 5.414978981018066, "lm_loss": 5.414978981018066, "ppl": 224.7478216585658, "gate_mean": 9.515788406133652e-07, "lr": 0.00017998157487165512, "steps_per_second": 4.99955020474015 }, { "step": 1140, "loss": 5.439026832580566, "lm_loss": 5.439026832580566, "ppl": 230.21803372093711, "gate_mean": 1.1676456779241562e-06, "lr": 0.0001799786076719484, "steps_per_second": 5.000769567336301 }, { "step": 1150, "loss": 5.399542808532715, "lm_loss": 5.399542808532715, "ppl": 221.30521421600187, "gate_mean": 1.3026874512434006e-06, "lr": 0.0001799754190798204, "steps_per_second": 5.001387711655579 }, { "step": 1160, "loss": 5.446412086486816, "lm_loss": 5.446412086486816, "ppl": 231.92454611103872, "gate_mean": 8.28644260764122e-07, "lr": 0.0001799720091039887, "steps_per_second": 5.002295241058596 }, { "step": 1170, "loss": 5.411089897155762, "lm_loss": 5.411089897155762, "ppl": 223.87545598344195, "gate_mean": 1.285923644900322e-06, "lr": 0.000179968377753776, "steps_per_second": 5.0029146432567835 }, { "step": 1180, "loss": 5.35762882232666, "lm_loss": 5.35762882232666, "ppl": 212.22113535482865, "gate_mean": 1.0456424206495285e-06, "lr": 0.00017996452503911035, "steps_per_second": 5.004185934145227 }, { "step": 1190, "loss": 5.304915428161621, "lm_loss": 5.304915428161621, "ppl": 201.32397535499598, "gate_mean": 1.110835000872612e-06, "lr": 0.00017996045097052484, "steps_per_second": 5.0057241337197995 }, { "step": 1200, "loss": 5.383498668670654, "lm_loss": 5.383498668670654, "ppl": 217.78289426158355, "gate_mean": 1.271488144993782e-06, "lr": 0.0001799561555591578, "steps_per_second": 5.0060185630179515 }, { "step": 1210, "loss": 5.301718711853027, "lm_loss": 5.301718711853027, "ppl": 200.68142728879178, "gate_mean": 1.3390090316534042e-06, "lr": 0.0001799516388167528, "steps_per_second": 5.007523824744479 }, { "step": 1220, "loss": 5.374334812164307, "lm_loss": 5.374334812164307, "ppl": 215.7962794977028, "gate_mean": 1.0747462511062622e-06, "lr": 0.00017994690075565843, "steps_per_second": 5.009186347225098 }, { "step": 1230, "loss": 5.276976585388184, "lm_loss": 5.276976585388184, "ppl": 195.77706440954702, "gate_mean": 8.849892765283585e-07, "lr": 0.0001799419413888283, "steps_per_second": 5.010867242509591 }, { "step": 1240, "loss": 5.295742511749268, "lm_loss": 5.295742511749268, "ppl": 199.48569145930338, "gate_mean": 1.1094380170106888e-06, "lr": 0.0001799367607298212, "steps_per_second": 5.012286693764793 }, { "step": 1250, "loss": 5.250421524047852, "lm_loss": 5.250421524047852, "ppl": 190.6466136560214, "gate_mean": 1.2046657502651215e-06, "lr": 0.00017993135879280082, "steps_per_second": 5.013517749635097 }, { "step": 1260, "loss": 5.273642539978027, "lm_loss": 5.273642539978027, "ppl": 195.12542169337257, "gate_mean": 6.41215592622757e-07, "lr": 0.00017992573559253585, "steps_per_second": 5.015198053580968 }, { "step": 1270, "loss": 5.288361072540283, "lm_loss": 5.288361072540283, "ppl": 198.01862116102856, "gate_mean": 1.043081283569336e-06, "lr": 0.0001799198911443999, "steps_per_second": 5.016221838487786 }, { "step": 1280, "loss": 5.241655349731445, "lm_loss": 5.241655349731445, "ppl": 188.9826760469858, "gate_mean": 1.0707881301641464e-06, "lr": 0.0001799138254643715, "steps_per_second": 5.017498274712779 }, { "step": 1290, "loss": 5.112671852111816, "lm_loss": 5.112671852111816, "ppl": 166.1135934315204, "gate_mean": 7.454073056578636e-07, "lr": 0.00017990753856903397, "steps_per_second": 5.019036110633409 }, { "step": 1300, "loss": 5.140140533447266, "lm_loss": 5.140140533447266, "ppl": 170.73976128261495, "gate_mean": 1.1471565812826157e-06, "lr": 0.0001799010304755754, "steps_per_second": 5.020226523912804 }, { "step": 1310, "loss": 5.200603485107422, "lm_loss": 5.200603485107422, "ppl": 181.38166998930933, "gate_mean": 1.671724021434784e-06, "lr": 0.00017989430120178877, "steps_per_second": 5.021537502743857 }, { "step": 1320, "loss": 5.1720428466796875, "lm_loss": 5.1720428466796875, "ppl": 176.27457184169214, "gate_mean": 1.0414514690637589e-06, "lr": 0.00017988735076607154, "steps_per_second": 5.022611006660186 }, { "step": 1330, "loss": 5.091760635375977, "lm_loss": 5.091760635375977, "ppl": 162.6760232281431, "gate_mean": 7.648486644029617e-07, "lr": 0.000179880179187426, "steps_per_second": 5.024392972972484 }, { "step": 1340, "loss": 5.079955101013184, "lm_loss": 5.079955101013184, "ppl": 160.76683749844105, "gate_mean": 1.11432746052742e-06, "lr": 0.00017987278648545898, "steps_per_second": 5.025492046759458 }, { "step": 1350, "loss": 5.080170631408691, "lm_loss": 5.080170631408691, "ppl": 160.80149137285912, "gate_mean": 1.221662387251854e-06, "lr": 0.00017986517268038185, "steps_per_second": 5.0267092006760326 }, { "step": 1360, "loss": 5.167274475097656, "lm_loss": 5.167274475097656, "ppl": 175.43603001105748, "gate_mean": 1.1802185326814651e-06, "lr": 0.0001798573377930104, "steps_per_second": 5.028071758160501 }, { "step": 1370, "loss": 5.109740734100342, "lm_loss": 5.109740734100342, "ppl": 165.62740776772725, "gate_mean": 1.4191027730703354e-06, "lr": 0.000179849281844765, "steps_per_second": 5.028886147314893 }, { "step": 1380, "loss": 5.06371545791626, "lm_loss": 5.06371545791626, "ppl": 158.17712634235988, "gate_mean": 1.8361024558544159e-06, "lr": 0.0001798410048576702, "steps_per_second": 5.030169592077488 }, { "step": 1390, "loss": 5.1354289054870605, "lm_loss": 5.1354289054870605, "ppl": 169.93719123988018, "gate_mean": 2.6563648134469986e-06, "lr": 0.00017983250685435509, "steps_per_second": 5.031615019135703 }, { "step": 1400, "loss": 5.102712631225586, "lm_loss": 5.102712631225586, "ppl": 164.46744226014542, "gate_mean": 1.300126314163208e-06, "lr": 0.0001798237878580528, "steps_per_second": 5.032963970357685 }, { "step": 1410, "loss": 4.87202787399292, "lm_loss": 4.87202787399292, "ppl": 130.58545943337603, "gate_mean": 1.3068784028291702e-06, "lr": 0.0001798148478926007, "steps_per_second": 5.033579219932048 }, { "step": 1420, "loss": 5.096330642700195, "lm_loss": 5.096330642700195, "ppl": 163.42115517816748, "gate_mean": 2.451939508318901e-06, "lr": 0.00017980568698244042, "steps_per_second": 5.034488915966207 }, { "step": 1430, "loss": 4.978705883026123, "lm_loss": 4.978705883026123, "ppl": 145.2862425503083, "gate_mean": 1.0496005415916443e-06, "lr": 0.0001797963051526175, "steps_per_second": 5.035534232250242 }, { "step": 1440, "loss": 4.989412307739258, "lm_loss": 4.989412307739258, "ppl": 146.85009546642726, "gate_mean": 4.214700311422348e-06, "lr": 0.00017978670242878146, "steps_per_second": 5.036865048960468 }, { "step": 1450, "loss": 4.998150825500488, "lm_loss": 4.998150825500488, "ppl": 148.138970862454, "gate_mean": 2.6908237487077713e-06, "lr": 0.0001797768788371858, "steps_per_second": 5.037788970514083 }, { "step": 1460, "loss": 5.096205234527588, "lm_loss": 5.096205234527588, "ppl": 163.40066211475687, "gate_mean": 1.632142812013626e-06, "lr": 0.00017976683440468788, "steps_per_second": 5.0387862535495485 }, { "step": 1470, "loss": 5.092321395874023, "lm_loss": 5.092321395874023, "ppl": 162.76727109765807, "gate_mean": 1.8118880689144135e-06, "lr": 0.0001797565691587488, "steps_per_second": 5.040003966067574 }, { "step": 1480, "loss": 5.11201810836792, "lm_loss": 5.11201810836792, "ppl": 166.00503319819146, "gate_mean": 2.2258609533309937e-06, "lr": 0.00017974608312743326, "steps_per_second": 5.04059204971844 }, { "step": 1490, "loss": 5.051664352416992, "lm_loss": 5.051664352416992, "ppl": 156.2823570686984, "gate_mean": 3.6226119846105576e-06, "lr": 0.00017973537633940976, "steps_per_second": 5.041111491770234 }, { "step": 1500, "loss": 5.1165008544921875, "lm_loss": 5.1165008544921875, "ppl": 166.75086204911204, "gate_mean": 2.410728484392166e-06, "lr": 0.00017972444882395023, "steps_per_second": 5.042086012870037 }, { "step": 1510, "loss": 4.905524253845215, "lm_loss": 4.905524253845215, "ppl": 135.03368338132896, "gate_mean": 2.8552021831274033e-06, "lr": 0.00017971330061093005, "steps_per_second": 5.043096086965011 }, { "step": 1520, "loss": 5.022196292877197, "lm_loss": 5.022196292877197, "ppl": 151.7442128111331, "gate_mean": 3.1802337616682053e-06, "lr": 0.00017970193173082806, "steps_per_second": 5.043399466095695 }, { "step": 1530, "loss": 5.0815863609313965, "lm_loss": 5.0815863609313965, "ppl": 161.0293040139813, "gate_mean": 2.4687033146619797e-06, "lr": 0.0001796903422147263, "steps_per_second": 5.044827578795577 }, { "step": 1540, "loss": 5.037697792053223, "lm_loss": 5.037697792053223, "ppl": 154.1148019738056, "gate_mean": 3.311317414045334e-06, "lr": 0.00017967853209431006, "steps_per_second": 5.044140511730603 }, { "step": 1550, "loss": 4.986074924468994, "lm_loss": 4.986074924468994, "ppl": 146.36081732306283, "gate_mean": 4.011206328868866e-06, "lr": 0.00017966650140186782, "steps_per_second": 5.04514025608307 }, { "step": 1560, "loss": 4.980757713317871, "lm_loss": 4.980757713317871, "ppl": 145.58465130111247, "gate_mean": 6.702030077576637e-06, "lr": 0.00017965425017029093, "steps_per_second": 5.045402719257565 }, { "step": 1570, "loss": 4.929959297180176, "lm_loss": 4.929959297180176, "ppl": 138.373880018229, "gate_mean": 2.2472813725471497e-06, "lr": 0.00017964177843307388, "steps_per_second": 5.046476707404617 }, { "step": 1580, "loss": 4.940421104431152, "lm_loss": 4.940421104431152, "ppl": 139.8291198258309, "gate_mean": 3.146938979625702e-06, "lr": 0.0001796290862243139, "steps_per_second": 5.047207845721918 }, { "step": 1590, "loss": 4.936993598937988, "lm_loss": 4.936993598937988, "ppl": 139.35067515378338, "gate_mean": 1.750187948346138e-06, "lr": 0.000179616173578711, "steps_per_second": 5.047159319573996 }, { "step": 1600, "loss": 4.86615514755249, "lm_loss": 4.86615514755249, "ppl": 129.8208142267347, "gate_mean": 1.8012942746281624e-06, "lr": 0.00017960304053156784, "steps_per_second": 5.047527529490109 }, { "step": 1610, "loss": 4.905635833740234, "lm_loss": 4.905635833740234, "ppl": 135.04875126616562, "gate_mean": 1.526903361082077e-06, "lr": 0.00017958968711878967, "steps_per_second": 5.0482833626417145 }, { "step": 1620, "loss": 4.884360313415527, "lm_loss": 4.884360313415527, "ppl": 132.20586795915452, "gate_mean": 3.209570422768593e-06, "lr": 0.00017957611337688425, "steps_per_second": 5.049160000303896 }, { "step": 1630, "loss": 4.877418041229248, "lm_loss": 4.877418041229248, "ppl": 131.29123731997555, "gate_mean": 2.8065405786037445e-06, "lr": 0.00017956231934296166, "steps_per_second": 5.046173519560606 }, { "step": 1640, "loss": 4.893198013305664, "lm_loss": 4.893198013305664, "ppl": 133.37944195278598, "gate_mean": 1.3636890798807144e-06, "lr": 0.00017954830505473424, "steps_per_second": 5.04594568937452 }, { "step": 1650, "loss": 4.829442024230957, "lm_loss": 4.829442024230957, "ppl": 125.14111546047637, "gate_mean": 1.0111834853887558e-06, "lr": 0.00017953407055051654, "steps_per_second": 5.045540402422997 }, { "step": 1660, "loss": 4.853125095367432, "lm_loss": 4.853125095367432, "ppl": 128.14021516730176, "gate_mean": 1.4193356037139893e-06, "lr": 0.0001795196158692251, "steps_per_second": 5.0466433739069725 }, { "step": 1670, "loss": 4.8245062828063965, "lm_loss": 4.8245062828063965, "ppl": 124.52497308355673, "gate_mean": 8.51927325129509e-07, "lr": 0.00017950494105037855, "steps_per_second": 5.047033491955557 }, { "step": 1680, "loss": 4.859042167663574, "lm_loss": 4.859042167663574, "ppl": 128.90067772168757, "gate_mean": 1.1364463716745377e-06, "lr": 0.0001794900461340972, "steps_per_second": 5.047485482546921 }, { "step": 1690, "loss": 4.789924621582031, "lm_loss": 4.789924621582031, "ppl": 120.2923008781281, "gate_mean": 1.3443641364574432e-06, "lr": 0.0001794749311611033, "steps_per_second": 5.047949354743044 }, { "step": 1700, "loss": 4.782723426818848, "lm_loss": 4.782723426818848, "ppl": 119.42916412889758, "gate_mean": 9.238719940185547e-07, "lr": 0.0001794595961727205, "steps_per_second": 5.048310449991816 }, { "step": 1710, "loss": 4.608159065246582, "lm_loss": 4.608159065246582, "ppl": 100.29933504115708, "gate_mean": 4.2121391743421555e-06, "lr": 0.00017944404121087405, "steps_per_second": 5.049056203036635 }, { "step": 1720, "loss": 4.6518964767456055, "lm_loss": 4.6518964767456055, "ppl": 104.78351676558829, "gate_mean": 2.6435591280460358e-06, "lr": 0.0001794282663180907, "steps_per_second": 5.049649446380586 }, { "step": 1730, "loss": 4.72628927230835, "lm_loss": 4.72628927230835, "ppl": 112.87593241582535, "gate_mean": 1.871492713689804e-06, "lr": 0.0001794122715374984, "steps_per_second": 5.050347911060359 }, { "step": 1740, "loss": 4.69297981262207, "lm_loss": 4.69297981262207, "ppl": 109.17802563604232, "gate_mean": 1.0915100574493408e-06, "lr": 0.00017939605691282627, "steps_per_second": 5.050748709938202 }, { "step": 1750, "loss": 4.699845790863037, "lm_loss": 4.699845790863037, "ppl": 109.930218900778, "gate_mean": 1.3741664588451385e-06, "lr": 0.00017937962248840443, "steps_per_second": 5.0486863613348 }, { "step": 1760, "loss": 4.78010892868042, "lm_loss": 4.78010892868042, "ppl": 119.11732463113596, "gate_mean": 1.389533281326294e-06, "lr": 0.000179362968309164, "steps_per_second": 5.049195472344436 }, { "step": 1770, "loss": 4.641818523406982, "lm_loss": 4.641818523406982, "ppl": 103.73281671884985, "gate_mean": 3.7045683711767197e-06, "lr": 0.0001793460944206369, "steps_per_second": 5.049781068658828 }, { "step": 1780, "loss": 4.527978420257568, "lm_loss": 4.527978420257568, "ppl": 92.57123166856275, "gate_mean": 1.2989621609449387e-06, "lr": 0.00017932900086895561, "steps_per_second": 5.050481581407798 }, { "step": 1790, "loss": 4.679769515991211, "lm_loss": 4.679769515991211, "ppl": 107.74523615535506, "gate_mean": 2.3329630494117737e-06, "lr": 0.00017931168770085333, "steps_per_second": 5.051222712375244 }, { "step": 1800, "loss": 4.79580020904541, "lm_loss": 4.79580020904541, "ppl": 121.0011692829386, "gate_mean": 8.777715265750885e-07, "lr": 0.00017929415496366352, "steps_per_second": 5.0514536451721535 }, { "step": 1810, "loss": 4.654391288757324, "lm_loss": 4.654391288757324, "ppl": 105.04525830405377, "gate_mean": 7.988419383764267e-07, "lr": 0.00017927640270532012, "steps_per_second": 5.009996806597256 }, { "step": 1820, "loss": 4.737006664276123, "lm_loss": 4.737006664276123, "ppl": 114.09217385367432, "gate_mean": 1.1869706213474274e-06, "lr": 0.00017925843097435704, "steps_per_second": 4.971191130024964 }, { "step": 1830, "loss": 4.67437219619751, "lm_loss": 4.67437219619751, "ppl": 107.1652672064458, "gate_mean": 7.988419383764267e-07, "lr": 0.00017924023981990837, "steps_per_second": 4.952504793096879 }, { "step": 1840, "loss": 4.738030910491943, "lm_loss": 4.738030910491943, "ppl": 114.2090921973628, "gate_mean": 1.0128132998943329e-06, "lr": 0.000179221829291708, "steps_per_second": 4.9327645930639115 }, { "step": 1850, "loss": 4.682421684265137, "lm_loss": 4.682421684265137, "ppl": 108.0313739273891, "gate_mean": 1.0756775736808777e-06, "lr": 0.00017920319944008956, "steps_per_second": 4.932943427747166 }, { "step": 1860, "loss": 4.589024543762207, "lm_loss": 4.589024543762207, "ppl": 98.3983999998818, "gate_mean": 7.725320756435394e-07, "lr": 0.0001791843503159864, "steps_per_second": 4.93307115672525 }, { "step": 1870, "loss": 4.729719638824463, "lm_loss": 4.729719638824463, "ppl": 113.26380312385149, "gate_mean": 1.1899974197149277e-06, "lr": 0.00017916528197093137, "steps_per_second": 4.933545192815622 }, { "step": 1880, "loss": 4.696857452392578, "lm_loss": 4.696857452392578, "ppl": 109.60220055759265, "gate_mean": 7.790513336658478e-07, "lr": 0.0001791459944570565, "steps_per_second": 4.91632086252201 }, { "step": 1890, "loss": 4.700132369995117, "lm_loss": 4.700132369995117, "ppl": 109.96172712208367, "gate_mean": 1.18953175842762e-06, "lr": 0.00017912648782709314, "steps_per_second": 4.917386209295038 }, { "step": 1900, "loss": 4.646261215209961, "lm_loss": 4.646261215209961, "ppl": 104.1946948848641, "gate_mean": 8.689239621162415e-07, "lr": 0.0001791067621343717, "steps_per_second": 4.91817654839558 }, { "step": 1910, "loss": 4.628539085388184, "lm_loss": 4.628539085388184, "ppl": 102.36440915948712, "gate_mean": 1.314561814069748e-06, "lr": 0.00017908681743282145, "steps_per_second": 4.9188335881234595 }, { "step": 1920, "loss": 4.6453704833984375, "lm_loss": 4.6453704833984375, "ppl": 104.10192667746844, "gate_mean": 8.007045835256577e-07, "lr": 0.00017906665377697044, "steps_per_second": 4.919807409848885 }, { "step": 1930, "loss": 4.66136360168457, "lm_loss": 4.66136360168457, "ppl": 105.78022595126511, "gate_mean": 1.110835000872612e-06, "lr": 0.00017904627122194535, "steps_per_second": 4.920786431155256 }, { "step": 1940, "loss": 4.591633319854736, "lm_loss": 4.591633319854736, "ppl": 98.65543452032533, "gate_mean": 6.502959877252579e-07, "lr": 0.00017902566982347132, "steps_per_second": 4.921527170983595 }, { "step": 1950, "loss": 4.616796970367432, "lm_loss": 4.616796970367432, "ppl": 101.16946381555918, "gate_mean": 1.1872034519910812e-06, "lr": 0.00017900484963787175, "steps_per_second": 4.920531561541742 }, { "step": 1960, "loss": 4.6992411613464355, "lm_loss": 4.6992411613464355, "ppl": 109.86377193558664, "gate_mean": 1.8579885363578796e-06, "lr": 0.00017898381072206822, "steps_per_second": 4.921587270014651 }, { "step": 1970, "loss": 4.711071014404297, "lm_loss": 4.711071014404297, "ppl": 111.17116208373453, "gate_mean": 1.5869736671447754e-06, "lr": 0.00017896255313358037, "steps_per_second": 4.923060238883022 }, { "step": 1980, "loss": 4.655065059661865, "lm_loss": 4.655065059661865, "ppl": 105.11605859166754, "gate_mean": 1.255422830581665e-06, "lr": 0.0001789410769305256, "steps_per_second": 4.924320099631113 }, { "step": 1990, "loss": 4.57464075088501, "lm_loss": 4.57464075088501, "ppl": 96.99318816021025, "gate_mean": 1.0258518159389496e-06, "lr": 0.00017891938217161904, "steps_per_second": 4.925173846349554 }, { "step": 2000, "loss": 4.543638706207275, "lm_loss": 4.543638706207275, "ppl": 94.03233441007362, "gate_mean": 9.683426469564438e-07, "lr": 0.00017889746891617338, "steps_per_second": 4.926081801290332 }, { "step": 2010, "loss": 4.524898529052734, "lm_loss": 4.524898529052734, "ppl": 92.28656094877492, "gate_mean": 9.380746632814407e-07, "lr": 0.00017887533722409855, "steps_per_second": 4.832531031038423 }, { "step": 2020, "loss": 4.499791145324707, "lm_loss": 4.499791145324707, "ppl": 89.99833276494259, "gate_mean": 1.2293457984924316e-06, "lr": 0.0001788529871559018, "steps_per_second": 4.832558564758683 }, { "step": 2030, "loss": 4.550511837005615, "lm_loss": 4.550511837005615, "ppl": 94.68085708130819, "gate_mean": 1.0242220014333725e-06, "lr": 0.00017883041877268734, "steps_per_second": 4.832887848694121 }, { "step": 2040, "loss": 4.538496971130371, "lm_loss": 4.538496971130371, "ppl": 93.55008591732044, "gate_mean": 1.023290678858757e-06, "lr": 0.00017880763213615626, "steps_per_second": 4.832875197007589 }, { "step": 2050, "loss": 4.615577697753906, "lm_loss": 4.615577697753906, "ppl": 101.0461858290126, "gate_mean": 1.2172386050224304e-06, "lr": 0.00017878462730860635, "steps_per_second": 4.833382349265833 }, { "step": 2060, "loss": 4.598205089569092, "lm_loss": 4.598205089569092, "ppl": 99.30591036472025, "gate_mean": 1.344829797744751e-06, "lr": 0.00017876140435293196, "steps_per_second": 4.834454080899951 }, { "step": 2070, "loss": 4.4929304122924805, "lm_loss": 4.4929304122924805, "ppl": 89.3829914902582, "gate_mean": 1.0777730494737625e-06, "lr": 0.0001787379633326238, "steps_per_second": 4.836031742900407 }, { "step": 2080, "loss": 4.5405378341674805, "lm_loss": 4.5405378341674805, "ppl": 93.74120378614886, "gate_mean": 9.592622518539429e-07, "lr": 0.0001787143043117686, "steps_per_second": 4.837976143569802 }, { "step": 2090, "loss": 4.4176411628723145, "lm_loss": 4.4176411628723145, "ppl": 82.90050575214804, "gate_mean": 1.1543743312358856e-06, "lr": 0.00017869042735504936, "steps_per_second": 4.839350094806646 }, { "step": 2100, "loss": 4.307323932647705, "lm_loss": 4.307323932647705, "ppl": 74.24154748729097, "gate_mean": 1.3471581041812897e-06, "lr": 0.00017866633252774464, "steps_per_second": 4.840855508001465 }, { "step": 2110, "loss": 4.497512340545654, "lm_loss": 4.497512340545654, "ppl": 89.7934776352056, "gate_mean": 1.3457611203193665e-06, "lr": 0.00017864201989572888, "steps_per_second": 4.841527513930918 }, { "step": 2120, "loss": 4.412723064422607, "lm_loss": 4.412723064422607, "ppl": 82.49379384770668, "gate_mean": 7.578637450933456e-07, "lr": 0.0001786174895254718, "steps_per_second": 4.84273974541953 }, { "step": 2130, "loss": 4.5941033363342285, "lm_loss": 4.5941033363342285, "ppl": 98.89941626480278, "gate_mean": 1.1511147022247314e-06, "lr": 0.0001785927414840386, "steps_per_second": 4.844245334345087 }, { "step": 2140, "loss": 4.48301362991333, "lm_loss": 4.48301362991333, "ppl": 88.50098039953137, "gate_mean": 8.409842848777771e-07, "lr": 0.00017856777583908942, "steps_per_second": 4.845269006483785 }, { "step": 2150, "loss": 4.528942108154297, "lm_loss": 4.528942108154297, "ppl": 92.6604844431089, "gate_mean": 1.085689291357994e-06, "lr": 0.00017854259265887937, "steps_per_second": 4.845689706306823 }, { "step": 2160, "loss": 4.470763206481934, "lm_loss": 4.470763206481934, "ppl": 87.42341967911165, "gate_mean": 1.2326054275035858e-06, "lr": 0.00017851719201225833, "steps_per_second": 4.847138626329638 }, { "step": 2170, "loss": 4.435583591461182, "lm_loss": 4.435583591461182, "ppl": 84.40136643518358, "gate_mean": 9.192153811454773e-07, "lr": 0.00017849157396867066, "steps_per_second": 4.848549326609348 }, { "step": 2180, "loss": 4.539826393127441, "lm_loss": 4.539826393127441, "ppl": 93.67453616448574, "gate_mean": 1.575332134962082e-06, "lr": 0.00017846573859815515, "steps_per_second": 4.850287312883877 }, { "step": 2190, "loss": 4.523014068603516, "lm_loss": 4.523014068603516, "ppl": 92.11281433540037, "gate_mean": 9.764917194843292e-07, "lr": 0.0001784396859713447, "steps_per_second": 4.851488179354604 }, { "step": 2200, "loss": 4.545041084289551, "lm_loss": 4.545041084289551, "ppl": 94.16429580313118, "gate_mean": 8.330680429935455e-07, "lr": 0.00017841341615946622, "steps_per_second": 4.852078529017708 }, { "step": 2210, "loss": 4.511151313781738, "lm_loss": 4.511151313781738, "ppl": 91.02655833587404, "gate_mean": 1.3690441846847534e-06, "lr": 0.00017838692923434032, "steps_per_second": 4.853505917515411 }, { "step": 2220, "loss": 4.505857467651367, "lm_loss": 4.505857467651367, "ppl": 90.5459509953318, "gate_mean": 8.76840204000473e-07, "lr": 0.00017836022526838128, "steps_per_second": 4.854307802767528 }, { "step": 2230, "loss": 4.585221290588379, "lm_loss": 4.585221290588379, "ppl": 98.02487672481615, "gate_mean": 1.4256220310926437e-06, "lr": 0.00017833330433459675, "steps_per_second": 4.855062339324707 }, { "step": 2240, "loss": 4.595559120178223, "lm_loss": 4.595559120178223, "ppl": 99.04349728714782, "gate_mean": 1.5080440789461136e-06, "lr": 0.00017830616650658747, "steps_per_second": 4.8562766353933124 }, { "step": 2250, "loss": 4.519858360290527, "lm_loss": 4.519858360290527, "ppl": 91.82259133189045, "gate_mean": 1.11432746052742e-06, "lr": 0.0001782788118585473, "steps_per_second": 4.857744565675766 }, { "step": 2260, "loss": 4.44379186630249, "lm_loss": 4.44379186630249, "ppl": 85.0970071471696, "gate_mean": 1.1548399925231934e-06, "lr": 0.00017825124046526273, "steps_per_second": 4.858873483942737 }, { "step": 2270, "loss": 4.638138294219971, "lm_loss": 4.638138294219971, "ppl": 103.35175780127658, "gate_mean": 1.5937257558107376e-06, "lr": 0.00017822345240211293, "steps_per_second": 4.859872890314451 }, { "step": 2280, "loss": 4.390237331390381, "lm_loss": 4.390237331390381, "ppl": 80.65955975449795, "gate_mean": 1.3508833944797516e-06, "lr": 0.00017819544774506943, "steps_per_second": 4.86107920613953 }, { "step": 2290, "loss": 4.40644645690918, "lm_loss": 4.40644645690918, "ppl": 81.97763424160426, "gate_mean": 8.859205991029739e-07, "lr": 0.00017816722657069587, "steps_per_second": 4.862086962277082 }, { "step": 2300, "loss": 4.384567737579346, "lm_loss": 4.384567737579346, "ppl": 80.20354673951417, "gate_mean": 1.266133040189743e-06, "lr": 0.0001781387889561478, "steps_per_second": 4.863343713039956 }, { "step": 2310, "loss": 4.437526702880859, "lm_loss": 4.437526702880859, "ppl": 84.56552713375267, "gate_mean": 9.41101461648941e-07, "lr": 0.00017811013497917266, "steps_per_second": 4.864414691331872 }, { "step": 2320, "loss": 4.495616912841797, "lm_loss": 4.495616912841797, "ppl": 89.62344178630602, "gate_mean": 9.157229214906693e-07, "lr": 0.0001780812647181093, "steps_per_second": 4.865755204450426 }, { "step": 2330, "loss": 4.444353103637695, "lm_loss": 4.444353103637695, "ppl": 85.14478016944268, "gate_mean": 8.016359061002731e-07, "lr": 0.00017805217825188794, "steps_per_second": 4.866963673815909 }, { "step": 2340, "loss": 4.3367438316345215, "lm_loss": 4.3367438316345215, "ppl": 76.45817288521488, "gate_mean": 1.2121163308620453e-06, "lr": 0.0001780228756600298, "steps_per_second": 4.867956936254912 }, { "step": 2350, "loss": 4.487395286560059, "lm_loss": 4.487395286560059, "ppl": 88.88961211212887, "gate_mean": 9.231735020875931e-07, "lr": 0.0001779933570226471, "steps_per_second": 4.868959075614053 }, { "step": 2360, "loss": 4.349601745605469, "lm_loss": 4.349601745605469, "ppl": 77.44761293040011, "gate_mean": 1.3932585716247559e-06, "lr": 0.00017796362242044266, "steps_per_second": 4.870153925762001 }, { "step": 2370, "loss": 4.389095306396484, "lm_loss": 4.389095306396484, "ppl": 80.56749710019358, "gate_mean": 8.025672286748886e-07, "lr": 0.00017793367193470974, "steps_per_second": 4.870732431033898 }, { "step": 2380, "loss": 4.481957912445068, "lm_loss": 4.481957912445068, "ppl": 88.40759767012786, "gate_mean": 1.1962838470935822e-06, "lr": 0.00017790350564733182, "steps_per_second": 4.871942933621001 }, { "step": 2390, "loss": 4.458811283111572, "lm_loss": 4.458811283111572, "ppl": 86.38476101491734, "gate_mean": 1.2011732906103134e-06, "lr": 0.00017787312364078237, "steps_per_second": 4.872989903249481 }, { "step": 2400, "loss": 4.327341079711914, "lm_loss": 4.327341079711914, "ppl": 75.74262498471332, "gate_mean": 9.26898792386055e-07, "lr": 0.0001778425259981247, "steps_per_second": 4.874207201063873 }, { "step": 2410, "loss": 4.299369812011719, "lm_loss": 4.299369812011719, "ppl": 73.65336360624765, "gate_mean": 1.100124791264534e-06, "lr": 0.00017781171280301146, "steps_per_second": 4.875457191046164 }, { "step": 2420, "loss": 4.3979339599609375, "lm_loss": 4.3979339599609375, "ppl": 81.28276162682307, "gate_mean": 8.279457688331604e-07, "lr": 0.00017778068413968492, "steps_per_second": 4.876584405240261 }, { "step": 2430, "loss": 4.366809844970703, "lm_loss": 4.366809844970703, "ppl": 78.79187205003525, "gate_mean": 1.3825483620166779e-06, "lr": 0.00017774944009297614, "steps_per_second": 4.877110899888731 }, { "step": 2440, "loss": 4.343160152435303, "lm_loss": 4.343160152435303, "ppl": 76.95033028168321, "gate_mean": 9.776558727025986e-07, "lr": 0.00017771798074830517, "steps_per_second": 4.878168504936347 }, { "step": 2450, "loss": 4.284587860107422, "lm_loss": 4.284587860107422, "ppl": 72.57263047920996, "gate_mean": 8.882489055395126e-07, "lr": 0.00017768630619168069, "steps_per_second": 4.879121537274207 }, { "step": 2460, "loss": 4.232186317443848, "lm_loss": 4.232186317443848, "ppl": 68.86763421062943, "gate_mean": 9.869690984487534e-07, "lr": 0.0001776544165096997, "steps_per_second": 4.880367675319887 }, { "step": 2470, "loss": 4.209272384643555, "lm_loss": 4.209272384643555, "ppl": 67.30754798315799, "gate_mean": 9.087380021810532e-07, "lr": 0.0001776223117895474, "steps_per_second": 4.881307953043909 }, { "step": 2480, "loss": 4.252678871154785, "lm_loss": 4.252678871154785, "ppl": 70.29346748890326, "gate_mean": 1.2086238712072372e-06, "lr": 0.00017758999211899684, "steps_per_second": 4.882344835348772 }, { "step": 2490, "loss": 4.251650333404541, "lm_loss": 4.251650333404541, "ppl": 70.22120517262593, "gate_mean": 1.0735820978879929e-06, "lr": 0.0001775574575864088, "steps_per_second": 4.8833062781859295 }, { "step": 2500, "loss": 4.371561527252197, "lm_loss": 4.371561527252197, "ppl": 79.16715690346209, "gate_mean": 1.1266674846410751e-06, "lr": 0.0001775247082807314, "steps_per_second": 4.884641687707504 }, { "step": 2510, "loss": 4.2640700340271, "lm_loss": 4.2640700340271, "ppl": 71.09876978283613, "gate_mean": 1.0763760656118393e-06, "lr": 0.00017749174429150004, "steps_per_second": 4.885755916994938 }, { "step": 2520, "loss": 4.230929851531982, "lm_loss": 4.230929851531982, "ppl": 68.7811587139465, "gate_mean": 9.709037840366364e-07, "lr": 0.00017745856570883695, "steps_per_second": 4.887063335095113 }, { "step": 2530, "loss": 4.351896286010742, "lm_loss": 4.351896286010742, "ppl": 77.62552364116101, "gate_mean": 1.5543773770332336e-06, "lr": 0.0001774251726234512, "steps_per_second": 4.888084861662312 }, { "step": 2540, "loss": 4.307230472564697, "lm_loss": 4.307230472564697, "ppl": 74.23460919033214, "gate_mean": 1.093139871954918e-06, "lr": 0.0001773915651266381, "steps_per_second": 4.889085419112022 }, { "step": 2550, "loss": 4.414677143096924, "lm_loss": 4.414677143096924, "ppl": 82.65515081178508, "gate_mean": 1.2691598385572433e-06, "lr": 0.00017735774331027935, "steps_per_second": 4.889577249009781 }, { "step": 2560, "loss": 4.377175331115723, "lm_loss": 4.377175331115723, "ppl": 79.61283560069185, "gate_mean": 1.35740265250206e-06, "lr": 0.00017732370726684246, "steps_per_second": 4.890305263650183 }, { "step": 2570, "loss": 4.309380054473877, "lm_loss": 4.309380054473877, "ppl": 74.39435419425678, "gate_mean": 1.159030944108963e-06, "lr": 0.0001772894570893807, "steps_per_second": 4.890911858236983 }, { "step": 2580, "loss": 4.503558158874512, "lm_loss": 4.503558158874512, "ppl": 90.33799706226917, "gate_mean": 1.6146805137395859e-06, "lr": 0.00017725499287153275, "steps_per_second": 4.891610654210613 }, { "step": 2590, "loss": 4.436933517456055, "lm_loss": 4.436933517456055, "ppl": 84.51537897066618, "gate_mean": 1.6060657799243927e-06, "lr": 0.0001772203147075224, "steps_per_second": 4.8924954805493055 }, { "step": 2600, "loss": 4.291831970214844, "lm_loss": 4.291831970214844, "ppl": 73.10026341333561, "gate_mean": 1.2633390724658966e-06, "lr": 0.00017718542269215852, "steps_per_second": 4.893151990115591 }, { "step": 2610, "loss": 4.321216106414795, "lm_loss": 4.321216106414795, "ppl": 75.28012128654156, "gate_mean": 1.110835000872612e-06, "lr": 0.00017715031692083446, "steps_per_second": 4.89402630020983 }, { "step": 2620, "loss": 4.370292663574219, "lm_loss": 4.370292663574219, "ppl": 79.06676827678842, "gate_mean": 1.2347009032964706e-06, "lr": 0.00017711499748952812, "steps_per_second": 4.894307811007526 }, { "step": 2630, "loss": 4.357864856719971, "lm_loss": 4.357864856719971, "ppl": 78.0902224822712, "gate_mean": 9.236391633749008e-07, "lr": 0.00017707946449480147, "steps_per_second": 4.894847612869601 }, { "step": 2640, "loss": 4.45888614654541, "lm_loss": 4.45888614654541, "ppl": 86.39122831683738, "gate_mean": 1.0998919606208801e-06, "lr": 0.00017704371803380035, "steps_per_second": 4.895859745811479 }, { "step": 2650, "loss": 4.431661605834961, "lm_loss": 4.431661605834961, "ppl": 84.07099377109311, "gate_mean": 1.3513490557670593e-06, "lr": 0.00017700775820425418, "steps_per_second": 4.896525918307369 }, { "step": 2660, "loss": 4.369226455688477, "lm_loss": 4.369226455688477, "ppl": 78.98251159050514, "gate_mean": 1.1711381375789642e-06, "lr": 0.00017697158510447585, "steps_per_second": 4.89761025176822 }, { "step": 2670, "loss": 4.360993385314941, "lm_loss": 4.360993385314941, "ppl": 78.3349125366314, "gate_mean": 1.1154916137456894e-06, "lr": 0.00017693519883336112, "steps_per_second": 4.898433160359219 }, { "step": 2680, "loss": 4.326424598693848, "lm_loss": 4.326424598693848, "ppl": 75.67324010647481, "gate_mean": 9.327195584774017e-07, "lr": 0.0001768985994903888, "steps_per_second": 4.89906403772424 }, { "step": 2690, "loss": 4.247864246368408, "lm_loss": 4.247864246368408, "ppl": 69.95584423484706, "gate_mean": 8.677598088979721e-07, "lr": 0.00017686178717562, "steps_per_second": 4.898799179022909 }, { "step": 2700, "loss": 4.3213725090026855, "lm_loss": 4.3213725090026855, "ppl": 75.29189621311798, "gate_mean": 9.688083082437515e-07, "lr": 0.00017682476198969816, "steps_per_second": 4.899899120744084 }, { "step": 2710, "loss": 4.323512554168701, "lm_loss": 4.323512554168701, "ppl": 75.4531968053651, "gate_mean": 1.0719522833824158e-06, "lr": 0.0001767875240338488, "steps_per_second": 4.900778840859838 }, { "step": 2720, "loss": 4.250503063201904, "lm_loss": 4.250503063201904, "ppl": 70.1406886722605, "gate_mean": 1.267995685338974e-06, "lr": 0.00017675007340987905, "steps_per_second": 4.901425680352983 }, { "step": 2730, "loss": 4.246930122375488, "lm_loss": 4.246930122375488, "ppl": 69.8905273141033, "gate_mean": 1.057283952832222e-06, "lr": 0.00017671241022017747, "steps_per_second": 4.902522543049397 }, { "step": 2740, "loss": 4.322524547576904, "lm_loss": 4.322524547576904, "ppl": 75.37868536450786, "gate_mean": 1.2917444109916687e-06, "lr": 0.0001766745345677138, "steps_per_second": 4.903367743862377 }, { "step": 2750, "loss": 4.309090614318848, "lm_loss": 4.309090614318848, "ppl": 74.37282459676081, "gate_mean": 1.0302755981683731e-06, "lr": 0.0001766364465560386, "steps_per_second": 4.903584005026796 }, { "step": 2760, "loss": 4.242462635040283, "lm_loss": 4.242462635040283, "ppl": 69.57898868407078, "gate_mean": 1.2987293303012848e-06, "lr": 0.00017659814628928307, "steps_per_second": 4.904589030336818 }, { "step": 2770, "loss": 4.36271333694458, "lm_loss": 4.36271333694458, "ppl": 78.46976073004869, "gate_mean": 8.388888090848923e-07, "lr": 0.00017655963387215866, "steps_per_second": 4.90558111300528 }, { "step": 2780, "loss": 4.323798179626465, "lm_loss": 4.323798179626465, "ppl": 75.47475123734307, "gate_mean": 8.454080671072006e-07, "lr": 0.00017652090940995684, "steps_per_second": 4.905978619377272 }, { "step": 2790, "loss": 4.224799156188965, "lm_loss": 4.224799156188965, "ppl": 68.36077233164892, "gate_mean": 8.295755833387375e-07, "lr": 0.00017648197300854882, "steps_per_second": 4.906998833999751 }, { "step": 2800, "loss": 4.303623676300049, "lm_loss": 4.303623676300049, "ppl": 73.96734235744306, "gate_mean": 1.135747879743576e-06, "lr": 0.00017644282477438528, "steps_per_second": 4.90802886724499 }, { "step": 2810, "loss": 4.286107063293457, "lm_loss": 4.286107063293457, "ppl": 72.68296684110803, "gate_mean": 7.664784789085388e-07, "lr": 0.00017640346481449592, "steps_per_second": 4.9089097926237875 }, { "step": 2820, "loss": 4.126556396484375, "lm_loss": 4.126556396484375, "ppl": 61.96417506343467, "gate_mean": 8.265487849712372e-07, "lr": 0.00017636389323648938, "steps_per_second": 4.909555545358945 }, { "step": 2830, "loss": 4.1531291007995605, "lm_loss": 4.1531291007995605, "ppl": 63.63280255311248, "gate_mean": 1.123407855629921e-06, "lr": 0.00017632411014855288, "steps_per_second": 4.910407611481571 }, { "step": 2840, "loss": 4.2061872482299805, "lm_loss": 4.2061872482299805, "ppl": 67.10021500567677, "gate_mean": 8.984934538602829e-07, "lr": 0.00017628411565945187, "steps_per_second": 4.910456350171639 }, { "step": 2850, "loss": 4.265985012054443, "lm_loss": 4.265985012054443, "ppl": 71.23505281259776, "gate_mean": 1.405598595738411e-06, "lr": 0.00017624390987852972, "steps_per_second": 4.911481174687552 }, { "step": 2860, "loss": 4.373809337615967, "lm_loss": 4.373809337615967, "ppl": 79.34530981118398, "gate_mean": 1.4887191355228424e-06, "lr": 0.0001762034929157075, "steps_per_second": 4.912313556195647 }, { "step": 2870, "loss": 4.142016887664795, "lm_loss": 4.142016887664795, "ppl": 62.92961550680081, "gate_mean": 1.1047814041376114e-06, "lr": 0.00017616286488148365, "steps_per_second": 4.913064479336927 }, { "step": 2880, "loss": 4.184587001800537, "lm_loss": 4.184587001800537, "ppl": 65.66637522095999, "gate_mean": 9.522773325443268e-07, "lr": 0.00017612202588693367, "steps_per_second": 4.914005500359746 }, { "step": 2890, "loss": 4.274256229400635, "lm_loss": 4.274256229400635, "ppl": 71.82669685229018, "gate_mean": 1.491047441959381e-06, "lr": 0.00017608097604370984, "steps_per_second": 4.914712707138527 }, { "step": 2900, "loss": 4.173416614532471, "lm_loss": 4.173416614532471, "ppl": 64.93693801207385, "gate_mean": 1.1569354683160782e-06, "lr": 0.00017603971546404086, "steps_per_second": 4.9155369586018605 }, { "step": 2910, "loss": 4.132335186004639, "lm_loss": 4.132335186004639, "ppl": 62.32328961325396, "gate_mean": 1.1078082025051117e-06, "lr": 0.0001759982442607315, "steps_per_second": 4.916440639007143 }, { "step": 2920, "loss": 4.164177417755127, "lm_loss": 4.164177417755127, "ppl": 64.33973594559468, "gate_mean": 9.830109775066376e-07, "lr": 0.0001759565625471625, "steps_per_second": 4.917014302764926 }, { "step": 2930, "loss": 4.151350021362305, "lm_loss": 4.151350021362305, "ppl": 63.51969538570632, "gate_mean": 1.287786290049553e-06, "lr": 0.00017591467043729013, "steps_per_second": 4.917803344443261 }, { "step": 2940, "loss": 4.366632461547852, "lm_loss": 4.366632461547852, "ppl": 78.7778969175932, "gate_mean": 1.8402934074401855e-06, "lr": 0.00017587256804564572, "steps_per_second": 4.918570930667327 }, { "step": 2950, "loss": 4.284486770629883, "lm_loss": 4.284486770629883, "ppl": 72.56529452071153, "gate_mean": 1.078704372048378e-06, "lr": 0.00017583025548733566, "steps_per_second": 4.919164967467688 }, { "step": 2960, "loss": 4.20198392868042, "lm_loss": 4.20198392868042, "ppl": 66.81876329030061, "gate_mean": 1.1301599442958832e-06, "lr": 0.00017578773287804086, "steps_per_second": 4.91995153656228 }, { "step": 2970, "loss": 4.280450344085693, "lm_loss": 4.280450344085693, "ppl": 72.27298038886478, "gate_mean": 1.1676456779241562e-06, "lr": 0.0001757450003340165, "steps_per_second": 4.920045126257289 }, { "step": 2980, "loss": 4.216513633728027, "lm_loss": 4.216513633728027, "ppl": 67.79670762933914, "gate_mean": 9.904615581035614e-07, "lr": 0.0001757020579720917, "steps_per_second": 4.9208679286727754 }, { "step": 2990, "loss": 4.384860992431641, "lm_loss": 4.384860992431641, "ppl": 80.22707026779261, "gate_mean": 9.98144969344139e-07, "lr": 0.00017565890590966928, "steps_per_second": 4.921507212657577 }, { "step": 3000, "loss": 4.175650119781494, "lm_loss": 4.175650119781494, "ppl": 65.08213709507532, "gate_mean": 1.0903459042310715e-06, "lr": 0.0001756155442647253, "steps_per_second": 4.922085012515507 }, { "step": 3010, "loss": 4.236281394958496, "lm_loss": 4.236281394958496, "ppl": 69.15023074340543, "gate_mean": 9.527429938316345e-07, "lr": 0.00017557197315580888, "steps_per_second": 4.922836243066976 }, { "step": 3020, "loss": 4.135134696960449, "lm_loss": 4.135134696960449, "ppl": 62.49800879535068, "gate_mean": 1.1438969522714615e-06, "lr": 0.0001755281927020418, "steps_per_second": 4.923372766314328 }, { "step": 3030, "loss": 4.294905662536621, "lm_loss": 4.294905662536621, "ppl": 73.32529679623119, "gate_mean": 1.362757757306099e-06, "lr": 0.00017548420302311801, "steps_per_second": 4.924120678025066 }, { "step": 3040, "loss": 4.214450836181641, "lm_loss": 4.214450836181641, "ppl": 67.65700089008695, "gate_mean": 9.23406332731247e-07, "lr": 0.00017544000423930372, "steps_per_second": 4.924919416177766 }, { "step": 3050, "loss": 4.225525379180908, "lm_loss": 4.225525379180908, "ppl": 68.41043552735776, "gate_mean": 1.1182855814695358e-06, "lr": 0.0001753955964714367, "steps_per_second": 4.925455075027713 }, { "step": 3060, "loss": 4.109155654907227, "lm_loss": 4.109155654907227, "ppl": 60.89527922663384, "gate_mean": 1.1618249118328094e-06, "lr": 0.00017535097984092607, "steps_per_second": 4.926320451506833 }, { "step": 3070, "loss": 4.193411827087402, "lm_loss": 4.193411827087402, "ppl": 66.24843401565421, "gate_mean": 1.1371448636054993e-06, "lr": 0.00017530615446975206, "steps_per_second": 4.9269222533885975 }, { "step": 3080, "loss": 4.192971229553223, "lm_loss": 4.192971229553223, "ppl": 66.2192515483173, "gate_mean": 1.2097880244255066e-06, "lr": 0.0001752611204804654, "steps_per_second": 4.9272683680123475 }, { "step": 3090, "loss": 4.125191688537598, "lm_loss": 4.125191688537598, "ppl": 61.87966973697081, "gate_mean": 1.000240445137024e-06, "lr": 0.00017521587799618744, "steps_per_second": 4.9279578451870325 }, { "step": 3100, "loss": 4.298957347869873, "lm_loss": 4.298957347869873, "ppl": 73.62299049917314, "gate_mean": 1.6167759895324707e-06, "lr": 0.00017517042714060928, "steps_per_second": 4.928777027442263 }, { "step": 3110, "loss": 4.219235420227051, "lm_loss": 4.219235420227051, "ppl": 67.9814871439646, "gate_mean": 1.325272023677826e-06, "lr": 0.0001751247680379919, "steps_per_second": 4.929494658399665 }, { "step": 3120, "loss": 4.131433963775635, "lm_loss": 4.131433963775635, "ppl": 62.267147781203235, "gate_mean": 1.2773089110851288e-06, "lr": 0.00017507890081316555, "steps_per_second": 4.930085566006946 }, { "step": 3130, "loss": 4.126554012298584, "lm_loss": 4.126554012298584, "ppl": 61.96402732950505, "gate_mean": 1.257285475730896e-06, "lr": 0.00017503282559152937, "steps_per_second": 4.930758691895599 }, { "step": 3140, "loss": 4.159948348999023, "lm_loss": 4.159948348999023, "ppl": 64.06821332712461, "gate_mean": 8.731149137020111e-07, "lr": 0.0001749865424990514, "steps_per_second": 4.931538927609051 }, { "step": 3150, "loss": 4.037411689758301, "lm_loss": 4.037411689758301, "ppl": 56.67944878604264, "gate_mean": 1.0028015822172165e-06, "lr": 0.00017494005166226773, "steps_per_second": 4.93225715445644 }, { "step": 3160, "loss": 4.078077793121338, "lm_loss": 4.078077793121338, "ppl": 59.03188921860271, "gate_mean": 8.351635187864304e-07, "lr": 0.0001748933532082826, "steps_per_second": 4.933135956888405 }, { "step": 3170, "loss": 4.1059651374816895, "lm_loss": 4.1059651374816895, "ppl": 60.701301386516015, "gate_mean": 1.0135117918252945e-06, "lr": 0.00017484644726476777, "steps_per_second": 4.933987213005872 }, { "step": 3180, "loss": 4.186484336853027, "lm_loss": 4.186484336853027, "ppl": 65.79108460676966, "gate_mean": 9.210780262947083e-07, "lr": 0.00017479933395996234, "steps_per_second": 4.934598684643173 }, { "step": 3190, "loss": 4.122785568237305, "lm_loss": 4.122785568237305, "ppl": 61.73095878740435, "gate_mean": 1.3494864106178284e-06, "lr": 0.0001747520134226722, "steps_per_second": 4.935253338672111 }, { "step": 3200, "loss": 4.090444087982178, "lm_loss": 4.090444087982178, "ppl": 59.766427363755284, "gate_mean": 8.624047040939331e-07, "lr": 0.00017470448578227, "steps_per_second": 4.935878243107144 }, { "step": 3210, "loss": 4.05557918548584, "lm_loss": 4.05557918548584, "ppl": 57.71858308259901, "gate_mean": 6.596092134714127e-07, "lr": 0.0001746567511686944, "steps_per_second": 4.9366365928795135 }, { "step": 3220, "loss": 4.001755237579346, "lm_loss": 4.001755237579346, "ppl": 54.69406691167448, "gate_mean": 1.3690441846847534e-06, "lr": 0.00017460880971245006, "steps_per_second": 4.937329211107432 }, { "step": 3230, "loss": 4.086610317230225, "lm_loss": 4.086610317230225, "ppl": 59.53773523927423, "gate_mean": 9.457580745220184e-07, "lr": 0.0001745606615446071, "steps_per_second": 4.938000022526369 }, { "step": 3240, "loss": 3.9899673461914062, "lm_loss": 3.9899673461914062, "ppl": 54.05312429413404, "gate_mean": 1.2796372175216675e-06, "lr": 0.00017451230679680073, "steps_per_second": 4.938792341999311 }, { "step": 3250, "loss": 4.176294326782227, "lm_loss": 4.176294326782227, "ppl": 65.124076970945, "gate_mean": 8.596107363700867e-07, "lr": 0.000174463745601231, "steps_per_second": 4.939448577694594 }, { "step": 3260, "loss": 4.123126029968262, "lm_loss": 4.123126029968262, "ppl": 61.75197939463986, "gate_mean": 1.0165385901927948e-06, "lr": 0.0001744149780906624, "steps_per_second": 4.940173433850938 }, { "step": 3270, "loss": 4.02529239654541, "lm_loss": 4.02529239654541, "ppl": 55.99667962381509, "gate_mean": 1.001870259642601e-06, "lr": 0.00017436600439842343, "steps_per_second": 4.940876691826441 }, { "step": 3280, "loss": 4.188346862792969, "lm_loss": 4.188346862792969, "ppl": 65.91373639407774, "gate_mean": 1.1082738637924194e-06, "lr": 0.00017431682465840622, "steps_per_second": 4.941631558270952 }, { "step": 3290, "loss": 4.112817287445068, "lm_loss": 4.112817287445068, "ppl": 61.118664089505245, "gate_mean": 1.0654330253601074e-06, "lr": 0.0001742674390050664, "steps_per_second": 4.942295922035583 }, { "step": 3300, "loss": 4.152454376220703, "lm_loss": 4.152454376220703, "ppl": 63.58988241847149, "gate_mean": 1.0544899851083755e-06, "lr": 0.00017421784757342242, "steps_per_second": 4.9429655131895 }, { "step": 3310, "loss": 4.1832427978515625, "lm_loss": 4.1832427978515625, "ppl": 65.57816551926986, "gate_mean": 1.0933727025985718e-06, "lr": 0.00017416805049905534, "steps_per_second": 4.9436267317473686 }, { "step": 3320, "loss": 4.205841064453125, "lm_loss": 4.205841064453125, "ppl": 67.07699002010685, "gate_mean": 1.3420358300209045e-06, "lr": 0.00017411804791810852, "steps_per_second": 4.9441531866519846 }, { "step": 3330, "loss": 4.167230129241943, "lm_loss": 4.167230129241943, "ppl": 64.53644669439271, "gate_mean": 1.3513490557670593e-06, "lr": 0.00017406783996728702, "steps_per_second": 4.94477010107377 }, { "step": 3340, "loss": 4.0760698318481445, "lm_loss": 4.0760698318481445, "ppl": 58.913474397142615, "gate_mean": 8.079223334789276e-07, "lr": 0.00017401742678385755, "steps_per_second": 4.945220175847283 }, { "step": 3350, "loss": 4.200141906738281, "lm_loss": 4.200141906738281, "ppl": 66.6957949521316, "gate_mean": 1.1012889444828033e-06, "lr": 0.00017396680850564775, "steps_per_second": 4.9459723166299 }, { "step": 3360, "loss": 4.156731605529785, "lm_loss": 4.156731605529785, "ppl": 63.86245343613556, "gate_mean": 8.526258170604706e-07, "lr": 0.00017391598527104608, "steps_per_second": 4.946577997826781 }, { "step": 3370, "loss": 4.34261417388916, "lm_loss": 4.34261417388916, "ppl": 76.90832851930477, "gate_mean": 1.2244563549757004e-06, "lr": 0.00017386495721900138, "steps_per_second": 4.947050600029767 }, { "step": 3380, "loss": 4.144240856170654, "lm_loss": 4.144240856170654, "ppl": 63.06972473127631, "gate_mean": 1.0970979928970337e-06, "lr": 0.00017381372448902233, "steps_per_second": 4.947104977616697 }, { "step": 3390, "loss": 4.335396766662598, "lm_loss": 4.335396766662598, "ppl": 76.35524809745587, "gate_mean": 9.557697921991348e-07, "lr": 0.0001737622872211773, "steps_per_second": 4.947436387595985 }, { "step": 3400, "loss": 4.047310829162598, "lm_loss": 4.047310829162598, "ppl": 57.2433128307749, "gate_mean": 1.027015969157219e-06, "lr": 0.00017371064555609379, "steps_per_second": 4.947942381118773 }, { "step": 3410, "loss": 4.07283353805542, "lm_loss": 4.07283353805542, "ppl": 58.72312127110356, "gate_mean": 1.5993136912584305e-06, "lr": 0.00017365879963495817, "steps_per_second": 4.948428649061954 }, { "step": 3420, "loss": 4.126690864562988, "lm_loss": 4.126690864562988, "ppl": 61.97250782723113, "gate_mean": 9.499490261077881e-07, "lr": 0.00017360674959951517, "steps_per_second": 4.947936496623056 }, { "step": 3430, "loss": 4.101974964141846, "lm_loss": 4.101974964141846, "ppl": 60.45957525732762, "gate_mean": 8.062925189733505e-07, "lr": 0.0001735544955920676, "steps_per_second": 4.948577616690028 }, { "step": 3440, "loss": 4.066504001617432, "lm_loss": 4.066504001617432, "ppl": 58.35260497047823, "gate_mean": 1.0477378964424133e-06, "lr": 0.00017350203775547596, "steps_per_second": 4.948776793622482 }, { "step": 3450, "loss": 4.008937358856201, "lm_loss": 4.008937358856201, "ppl": 55.08830035491952, "gate_mean": 1.0121148079633713e-06, "lr": 0.00017344937623315794, "steps_per_second": 4.949460662661733 }, { "step": 3460, "loss": 4.121732234954834, "lm_loss": 4.121732234954834, "ppl": 61.66596974752294, "gate_mean": 8.130446076393127e-07, "lr": 0.00017339651116908816, "steps_per_second": 4.950041064321908 }, { "step": 3470, "loss": 4.168971538543701, "lm_loss": 4.168971538543701, "ppl": 64.64892897338692, "gate_mean": 1.1879019439220428e-06, "lr": 0.00017334344270779762, "steps_per_second": 4.950133858962267 }, { "step": 3480, "loss": 4.012760639190674, "lm_loss": 4.012760639190674, "ppl": 55.29932150979219, "gate_mean": 1.0987278074026108e-06, "lr": 0.00017329017099437354, "steps_per_second": 4.95057155200192 }, { "step": 3490, "loss": 4.088432788848877, "lm_loss": 4.088432788848877, "ppl": 59.64634000647957, "gate_mean": 7.436610758304596e-07, "lr": 0.00017323669617445874, "steps_per_second": 4.950785233420792 }, { "step": 3500, "loss": 4.023845195770264, "lm_loss": 4.023845195770264, "ppl": 55.915699796825685, "gate_mean": 9.695068001747131e-07, "lr": 0.00017318301839425133, "steps_per_second": 4.951420043791511 }, { "step": 3510, "loss": 4.104953289031982, "lm_loss": 4.104953289031982, "ppl": 60.63991193246256, "gate_mean": 9.282957762479782e-07, "lr": 0.00017312913780050427, "steps_per_second": 4.950265782490799 }, { "step": 3520, "loss": 4.035820484161377, "lm_loss": 4.035820484161377, "ppl": 56.58933184620744, "gate_mean": 9.667128324508667e-07, "lr": 0.00017307505454052508, "steps_per_second": 4.951040617202593 }, { "step": 3530, "loss": 3.970242977142334, "lm_loss": 3.970242977142334, "ppl": 52.99740643374167, "gate_mean": 9.34116542339325e-07, "lr": 0.00017302076876217536, "steps_per_second": 4.951457772797803 }, { "step": 3540, "loss": 4.023186683654785, "lm_loss": 4.023186683654785, "ppl": 55.87889075199516, "gate_mean": 1.0468065738677979e-06, "lr": 0.00017296628061387029, "steps_per_second": 4.951974916830992 }, { "step": 3550, "loss": 3.963839530944824, "lm_loss": 3.963839530944824, "ppl": 52.65912463360064, "gate_mean": 1.2544915080070496e-06, "lr": 0.00017291159024457838, "steps_per_second": 4.952121117135869 }, { "step": 3560, "loss": 4.167809963226318, "lm_loss": 4.167809963226318, "ppl": 64.57387797033113, "gate_mean": 1.2563541531562805e-06, "lr": 0.0001728566978038211, "steps_per_second": 4.952560439269556 }, { "step": 3570, "loss": 3.983485221862793, "lm_loss": 3.983485221862793, "ppl": 53.70387837273323, "gate_mean": 1.0584481060504913e-06, "lr": 0.00017280160344167224, "steps_per_second": 4.952541573332746 }, { "step": 3580, "loss": 4.011145114898682, "lm_loss": 4.011145114898682, "ppl": 55.210056237085276, "gate_mean": 1.248437911272049e-06, "lr": 0.00017274630730875767, "steps_per_second": 4.953255267655375 }, { "step": 3590, "loss": 4.067841529846191, "lm_loss": 4.067841529846191, "ppl": 58.430705445999905, "gate_mean": 1.1632218956947327e-06, "lr": 0.0001726908095562549, "steps_per_second": 4.953644639153577 }, { "step": 3600, "loss": 4.046589374542236, "lm_loss": 4.046589374542236, "ppl": 57.20202927214606, "gate_mean": 1.1243391782045364e-06, "lr": 0.00017263511033589266, "steps_per_second": 4.954438987439343 }, { "step": 3610, "loss": 3.917083740234375, "lm_loss": 3.917083740234375, "ppl": 50.2536780976877, "gate_mean": 1.0039657354354858e-06, "lr": 0.00017257920979995054, "steps_per_second": 4.943282932299448 }, { "step": 3620, "loss": 4.1021246910095215, "lm_loss": 4.1021246910095215, "ppl": 60.468628357881194, "gate_mean": 8.707866072654724e-07, "lr": 0.00017252310810125844, "steps_per_second": 4.930481965860148 }, { "step": 3630, "loss": 4.027927398681641, "lm_loss": 4.027927398681641, "ppl": 56.14442556419368, "gate_mean": 8.707866072654724e-07, "lr": 0.00017246680539319628, "steps_per_second": 4.904301962138823 }, { "step": 3640, "loss": 3.9672305583953857, "lm_loss": 3.9672305583953857, "ppl": 52.8379962786803, "gate_mean": 1.3704411685466766e-06, "lr": 0.00017241030182969347, "steps_per_second": 4.9046564445036624 }, { "step": 3650, "loss": 3.9281704425811768, "lm_loss": 3.9281704425811768, "ppl": 50.81392557856421, "gate_mean": 1.1266674846410751e-06, "lr": 0.00017235359756522864, "steps_per_second": 4.895122513563373 }, { "step": 3660, "loss": 4.112855911254883, "lm_loss": 4.112855911254883, "ppl": 61.12102477075211, "gate_mean": 9.404029697179794e-07, "lr": 0.00017229669275482907, "steps_per_second": 4.89503950114585 }, { "step": 3670, "loss": 4.004656791687012, "lm_loss": 4.004656791687012, "ppl": 54.852995164147295, "gate_mean": 9.438954293727875e-07, "lr": 0.0001722395875540703, "steps_per_second": 4.8847948773329515 }, { "step": 3680, "loss": 4.002162933349609, "lm_loss": 4.002162933349609, "ppl": 54.71636999754085, "gate_mean": 8.880160748958588e-07, "lr": 0.00017218228211907582, "steps_per_second": 4.885073799966694 }, { "step": 3690, "loss": 3.9389753341674805, "lm_loss": 3.9389753341674805, "ppl": 51.365941400625886, "gate_mean": 9.688083082437515e-07, "lr": 0.00017212477660651652, "steps_per_second": 4.885551382572565 }, { "step": 3700, "loss": 4.003444194793701, "lm_loss": 4.003444194793701, "ppl": 54.7865209040084, "gate_mean": 1.0279472917318344e-06, "lr": 0.0001720670711736102, "steps_per_second": 4.885928151174489 }, { "step": 3710, "loss": 3.858193874359131, "lm_loss": 3.858193874359131, "ppl": 47.37970035222547, "gate_mean": 9.324867278337479e-07, "lr": 0.00017200916597812137, "steps_per_second": 4.886540473565213 }, { "step": 3720, "loss": 3.949772834777832, "lm_loss": 3.949772834777832, "ppl": 51.92357026562188, "gate_mean": 1.2675300240516663e-06, "lr": 0.0001719510611783606, "steps_per_second": 4.887010810564461 }, { "step": 3730, "loss": 3.9947593212127686, "lm_loss": 3.9947593212127686, "ppl": 54.31276711969688, "gate_mean": 1.0575167834758759e-06, "lr": 0.0001718927569331842, "steps_per_second": 4.887521254339038 }, { "step": 3740, "loss": 4.02522611618042, "lm_loss": 4.02522611618042, "ppl": 55.992968266447804, "gate_mean": 9.296927601099014e-07, "lr": 0.00017183425340199375, "steps_per_second": 4.888171164790843 }, { "step": 3750, "loss": 3.998255968093872, "lm_loss": 3.998255968093872, "ppl": 54.50301210337804, "gate_mean": 9.932555258274078e-07, "lr": 0.00017177555074473567, "steps_per_second": 4.888750153661606 }, { "step": 3760, "loss": 4.082256317138672, "lm_loss": 4.082256317138672, "ppl": 59.27907145429414, "gate_mean": 1.207459717988968e-06, "lr": 0.00017171664912190074, "steps_per_second": 4.87957187510015 }, { "step": 3770, "loss": 3.9432811737060547, "lm_loss": 3.9432811737060547, "ppl": 51.58759175502025, "gate_mean": 1.0624062269926071e-06, "lr": 0.00017165754869452382, "steps_per_second": 4.879940753910524 }, { "step": 3780, "loss": 4.069271087646484, "lm_loss": 4.069271087646484, "ppl": 58.514295250739956, "gate_mean": 8.798670023679733e-07, "lr": 0.00017159824962418321, "steps_per_second": 4.879213049221145 }, { "step": 3790, "loss": 4.149206161499023, "lm_loss": 4.149206161499023, "ppl": 63.38366392853322, "gate_mean": 1.0437797755002975e-06, "lr": 0.00017153875207300025, "steps_per_second": 4.879803454619101 }, { "step": 3800, "loss": 4.037104606628418, "lm_loss": 4.037104606628418, "ppl": 56.662046155673245, "gate_mean": 9.166542440652847e-07, "lr": 0.00017147905620363904, "steps_per_second": 4.880259902140134 }, { "step": 3810, "loss": 4.068886756896973, "lm_loss": 4.068886756896973, "ppl": 58.49181072881244, "gate_mean": 1.046573743224144e-06, "lr": 0.00017141916217930576, "steps_per_second": 4.880645568301059 }, { "step": 3820, "loss": 4.024337291717529, "lm_loss": 4.024337291717529, "ppl": 55.94322245742369, "gate_mean": 1.1236406862735748e-06, "lr": 0.0001713590701637484, "steps_per_second": 4.881283239585063 }, { "step": 3830, "loss": 3.877730369567871, "lm_loss": 3.877730369567871, "ppl": 48.31443462681146, "gate_mean": 9.860377758741379e-07, "lr": 0.00017129878032125622, "steps_per_second": 4.881885707807878 }, { "step": 3840, "loss": 4.002173900604248, "lm_loss": 4.002173900604248, "ppl": 54.71697008919419, "gate_mean": 1.0647345334291458e-06, "lr": 0.0001712382928166594, "steps_per_second": 4.882233414669145 }, { "step": 3850, "loss": 4.05836296081543, "lm_loss": 4.05836296081543, "ppl": 57.87948250025172, "gate_mean": 9.532086551189423e-07, "lr": 0.0001711776078153284, "steps_per_second": 4.8830614852756975 }, { "step": 3860, "loss": 4.0644450187683105, "lm_loss": 4.0644450187683105, "ppl": 58.23258156311348, "gate_mean": 1.0866206139326096e-06, "lr": 0.00017111672548317375, "steps_per_second": 4.883775286731948 }, { "step": 3870, "loss": 3.857032299041748, "lm_loss": 3.857032299041748, "ppl": 47.324697213080185, "gate_mean": 8.374918252229691e-07, "lr": 0.0001710556459866454, "steps_per_second": 4.884152421837107 }, { "step": 3880, "loss": 3.939443588256836, "lm_loss": 3.939443588256836, "ppl": 51.38999934491614, "gate_mean": 9.175855666399002e-07, "lr": 0.00017099436949273233, "steps_per_second": 4.884969166575539 }, { "step": 3890, "loss": 3.956021308898926, "lm_loss": 3.956021308898926, "ppl": 52.24902910234239, "gate_mean": 9.154900908470154e-07, "lr": 0.0001709328961689622, "steps_per_second": 4.88558021231377 }, { "step": 3900, "loss": 3.79995059967041, "lm_loss": 3.79995059967041, "ppl": 44.69897629459712, "gate_mean": 1.0505318641662598e-06, "lr": 0.0001708712261834007, "steps_per_second": 4.886179739364542 }, { "step": 3910, "loss": 3.929058074951172, "lm_loss": 3.929058074951172, "ppl": 50.85904968760145, "gate_mean": 1.1334195733070374e-06, "lr": 0.00017080935970465117, "steps_per_second": 4.886648036976197 }, { "step": 3920, "loss": 3.9861090183258057, "lm_loss": 3.9861090183258057, "ppl": 53.84497143765602, "gate_mean": 8.831266313791275e-07, "lr": 0.00017074729690185424, "steps_per_second": 4.887096142259921 }, { "step": 3930, "loss": 3.943912982940674, "lm_loss": 3.943912982940674, "ppl": 51.62019557049411, "gate_mean": 1.1241063475608826e-06, "lr": 0.00017068503794468728, "steps_per_second": 4.887615259045542 }, { "step": 3940, "loss": 3.898549795150757, "lm_loss": 3.898549795150757, "ppl": 49.33085735816945, "gate_mean": 1.048436388373375e-06, "lr": 0.00017062258300336387, "steps_per_second": 4.8879615657016675 }, { "step": 3950, "loss": 4.037543773651123, "lm_loss": 4.037543773651123, "ppl": 56.6869357227223, "gate_mean": 1.1695083230733871e-06, "lr": 0.00017055993224863338, "steps_per_second": 4.888485950001366 }, { "step": 3960, "loss": 4.023759841918945, "lm_loss": 4.023759841918945, "ppl": 55.91092738017383, "gate_mean": 1.1259689927101135e-06, "lr": 0.00017049708585178069, "steps_per_second": 4.889025085355368 }, { "step": 3970, "loss": 3.8465588092803955, "lm_loss": 3.8465588092803955, "ppl": 46.83162906069214, "gate_mean": 1.0470394045114517e-06, "lr": 0.00017043404398462534, "steps_per_second": 4.8898749195885935 }, { "step": 3980, "loss": 3.977687120437622, "lm_loss": 3.977687120437622, "ppl": 53.393398804823185, "gate_mean": 9.98377799987793e-07, "lr": 0.00017037080681952147, "steps_per_second": 4.890371832083627 }, { "step": 3990, "loss": 3.9411110877990723, "lm_loss": 3.9411110877990723, "ppl": 51.47576363138083, "gate_mean": 1.1010561138391495e-06, "lr": 0.00017030737452935704, "steps_per_second": 4.89110978028138 }, { "step": 4000, "loss": 3.869615316390991, "lm_loss": 3.869615316390991, "ppl": 47.92394697755497, "gate_mean": 1.032138243317604e-06, "lr": 0.0001702437472875535, "steps_per_second": 4.891720066610229 }, { "step": 4010, "loss": 4.063568592071533, "lm_loss": 4.063568592071533, "ppl": 58.1815673323916, "gate_mean": 1.2293457984924316e-06, "lr": 0.00017017992526806533, "steps_per_second": 4.850686706469467 }, { "step": 4020, "loss": 3.9166836738586426, "lm_loss": 3.9166836738586426, "ppl": 50.233577311916285, "gate_mean": 1.0263174772262573e-06, "lr": 0.00017011590864537946, "steps_per_second": 4.85093823230489 }, { "step": 4030, "loss": 4.082901477813721, "lm_loss": 4.082901477813721, "ppl": 59.317328319635415, "gate_mean": 9.224750101566315e-07, "lr": 0.0001700516975945149, "steps_per_second": 4.851100466333295 }, { "step": 4040, "loss": 3.9822869300842285, "lm_loss": 3.9822869300842285, "ppl": 53.63956399819106, "gate_mean": 9.57166776061058e-07, "lr": 0.00016998729229102222, "steps_per_second": 4.85153269297033 }, { "step": 4050, "loss": 3.9699416160583496, "lm_loss": 3.9699416160583496, "ppl": 52.98143748422127, "gate_mean": 1.0719522833824158e-06, "lr": 0.0001699226929109831, "steps_per_second": 4.851543005301255 }, { "step": 4060, "loss": 3.875509262084961, "lm_loss": 3.875509262084961, "ppl": 48.207242161590464, "gate_mean": 7.986091077327728e-07, "lr": 0.0001698578996310097, "steps_per_second": 4.851590826100895 }, { "step": 4070, "loss": 3.9253695011138916, "lm_loss": 3.9253695011138916, "ppl": 50.6717978858889, "gate_mean": 9.490177035331726e-07, "lr": 0.0001697929126282444, "steps_per_second": 4.852283644552969 }, { "step": 4080, "loss": 3.9765396118164062, "lm_loss": 3.9765396118164062, "ppl": 53.332164559510154, "gate_mean": 6.533227860927582e-07, "lr": 0.00016972773208035922, "steps_per_second": 4.852906330535051 }, { "step": 4090, "loss": 3.808655261993408, "lm_loss": 3.808655261993408, "ppl": 45.0897641600959, "gate_mean": 9.059440344572067e-07, "lr": 0.00016966235816555527, "steps_per_second": 4.853405391167718 }, { "step": 4100, "loss": 3.9703102111816406, "lm_loss": 3.9703102111816406, "ppl": 53.00096978323683, "gate_mean": 7.397029548883438e-07, "lr": 0.00016959679106256232, "steps_per_second": 4.853567769208118 }, { "step": 4110, "loss": 3.9643983840942383, "lm_loss": 3.9643983840942383, "ppl": 52.68856157594535, "gate_mean": 7.967464625835419e-07, "lr": 0.0001695310309506383, "steps_per_second": 4.85452269491947 }, { "step": 4120, "loss": 3.8977370262145996, "lm_loss": 3.8977370262145996, "ppl": 49.29077905911926, "gate_mean": 8.505303412675858e-07, "lr": 0.00016946507800956893, "steps_per_second": 4.8553051500251225 }, { "step": 4130, "loss": 3.967458963394165, "lm_loss": 3.967458963394165, "ppl": 52.85006611950935, "gate_mean": 7.336493581533432e-07, "lr": 0.00016939893241966697, "steps_per_second": 4.856005310876355 }, { "step": 4140, "loss": 3.8857483863830566, "lm_loss": 3.8857483863830566, "ppl": 48.70337776867616, "gate_mean": 8.591450750827789e-07, "lr": 0.00016933259436177197, "steps_per_second": 4.856732057630223 }, { "step": 4150, "loss": 3.9896891117095947, "lm_loss": 3.9896891117095947, "ppl": 54.03808694315755, "gate_mean": 1.146690919995308e-06, "lr": 0.0001692660640172496, "steps_per_second": 4.857263728669426 }, { "step": 4160, "loss": 3.884798526763916, "lm_loss": 3.884798526763916, "ppl": 48.6571383607677, "gate_mean": 1.12154521048069e-06, "lr": 0.00016919934156799132, "steps_per_second": 4.857800152928385 }, { "step": 4170, "loss": 3.9552979469299316, "lm_loss": 3.9552979469299316, "ppl": 52.211247808195985, "gate_mean": 8.868519216775894e-07, "lr": 0.00016913242719641375, "steps_per_second": 4.8581230840928615 }, { "step": 4180, "loss": 4.030788898468018, "lm_loss": 4.030788898468018, "ppl": 56.305312905717365, "gate_mean": 8.228234946727753e-07, "lr": 0.00016906532108545822, "steps_per_second": 4.858988865770863 }, { "step": 4190, "loss": 3.9020602703094482, "lm_loss": 3.9020602703094482, "ppl": 49.50433642630707, "gate_mean": 1.1236406862735748e-06, "lr": 0.00016899802341859027, "steps_per_second": 4.859751329219117 }, { "step": 4200, "loss": 3.9793038368225098, "lm_loss": 3.9793038368225098, "ppl": 53.479790604217214, "gate_mean": 1.225154846906662e-06, "lr": 0.00016893053437979917, "steps_per_second": 4.860301965784842 }, { "step": 4210, "loss": 3.969107151031494, "lm_loss": 3.969107151031494, "ppl": 52.93724476877029, "gate_mean": 8.570495992898941e-07, "lr": 0.00016886285415359736, "steps_per_second": 4.860975930759097 }, { "step": 4220, "loss": 3.888859987258911, "lm_loss": 3.888859987258911, "ppl": 48.85515926084713, "gate_mean": 9.797513484954834e-07, "lr": 0.00016879498292502005, "steps_per_second": 4.861677708562449 }, { "step": 4230, "loss": 3.9740898609161377, "lm_loss": 3.9740898609161377, "ppl": 53.20167394138106, "gate_mean": 1.0337680578231812e-06, "lr": 0.00016872692087962446, "steps_per_second": 4.862460249546664 }, { "step": 4240, "loss": 3.8785455226898193, "lm_loss": 3.8785455226898193, "ppl": 48.35383434525274, "gate_mean": 8.686911314725876e-07, "lr": 0.00016865866820348974, "steps_per_second": 4.862990076724695 }, { "step": 4250, "loss": 3.9026989936828613, "lm_loss": 3.9026989936828613, "ppl": 49.53596610329953, "gate_mean": 1.0980293154716492e-06, "lr": 0.0001685902250832161, "steps_per_second": 4.863754563260327 }, { "step": 4260, "loss": 3.8340752124786377, "lm_loss": 3.8340752124786377, "ppl": 46.25063587343056, "gate_mean": 8.186325430870056e-07, "lr": 0.0001685215917059243, "steps_per_second": 4.864051163599355 }, { "step": 4270, "loss": 3.78233003616333, "lm_loss": 3.78233003616333, "ppl": 43.918253729930434, "gate_mean": 9.315554052591324e-07, "lr": 0.0001684527682592555, "steps_per_second": 4.864883187700822 }, { "step": 4280, "loss": 3.721837043762207, "lm_loss": 3.721837043762207, "ppl": 41.34026827715539, "gate_mean": 1.0370276868343353e-06, "lr": 0.00016838375493137032, "steps_per_second": 4.865498483599293 }, { "step": 4290, "loss": 3.7959351539611816, "lm_loss": 3.7959351539611816, "ppl": 44.51984985895002, "gate_mean": 9.192153811454773e-07, "lr": 0.00016831455191094859, "steps_per_second": 4.8661526769682855 }, { "step": 4300, "loss": 3.823711395263672, "lm_loss": 3.823711395263672, "ppl": 45.773778039094665, "gate_mean": 1.0861549526453018e-06, "lr": 0.00016824515938718867, "steps_per_second": 4.8667869230692435 }, { "step": 4310, "loss": 3.854065179824829, "lm_loss": 3.854065179824829, "ppl": 47.18448730716245, "gate_mean": 9.371433407068253e-07, "lr": 0.0001681755775498071, "steps_per_second": 4.867392893234794 }, { "step": 4320, "loss": 3.7624711990356445, "lm_loss": 3.7624711990356445, "ppl": 43.05469133496843, "gate_mean": 1.0228250175714493e-06, "lr": 0.000168105806589038, "steps_per_second": 4.868098253418502 }, { "step": 4330, "loss": 3.7915115356445312, "lm_loss": 3.7915115356445312, "ppl": 44.32334598516587, "gate_mean": 1.0235235095024109e-06, "lr": 0.00016803584669563247, "steps_per_second": 4.868902981731021 }, { "step": 4340, "loss": 3.7774994373321533, "lm_loss": 3.7774994373321533, "ppl": 43.706613850019245, "gate_mean": 1.1760275810956955e-06, "lr": 0.0001679656980608582, "steps_per_second": 4.869678082381608 }, { "step": 4350, "loss": 3.8315622806549072, "lm_loss": 3.8315622806549072, "ppl": 46.1345570887972, "gate_mean": 8.360948413610458e-07, "lr": 0.00016789536087649887, "steps_per_second": 4.870353643262793 }, { "step": 4360, "loss": 4.023744106292725, "lm_loss": 4.023744106292725, "ppl": 55.91004759364094, "gate_mean": 1.1008232831954956e-06, "lr": 0.00016782483533485368, "steps_per_second": 4.870808241977131 }, { "step": 4370, "loss": 3.8727402687072754, "lm_loss": 3.8727402687072754, "ppl": 48.07394126710732, "gate_mean": 1.25030055642128e-06, "lr": 0.00016775412162873675, "steps_per_second": 4.871385035070689 }, { "step": 4380, "loss": 3.8151068687438965, "lm_loss": 3.8151068687438965, "ppl": 45.38160599903557, "gate_mean": 9.145587682723999e-07, "lr": 0.0001676832199514767, "steps_per_second": 4.871892558791145 }, { "step": 4390, "loss": 3.9215786457061768, "lm_loss": 3.9215786457061768, "ppl": 50.48007205889953, "gate_mean": 9.955838322639465e-07, "lr": 0.0001676121304969159, "steps_per_second": 4.872571364850964 }, { "step": 4400, "loss": 3.8078088760375977, "lm_loss": 3.8078088760375977, "ppl": 45.05161696286327, "gate_mean": 1.1015217751264572e-06, "lr": 0.0001675408534594103, "steps_per_second": 4.873212020616951 }, { "step": 4410, "loss": 3.935939073562622, "lm_loss": 3.935939073562622, "ppl": 51.210217545056906, "gate_mean": 1.7578713595867157e-06, "lr": 0.00016746938903382857, "steps_per_second": 4.873308121612406 }, { "step": 4420, "loss": 3.9951045513153076, "lm_loss": 3.9951045513153076, "ppl": 54.33152075883291, "gate_mean": 1.0307412594556808e-06, "lr": 0.0001673977374155517, "steps_per_second": 4.873154843192547 }, { "step": 4430, "loss": 3.876889228820801, "lm_loss": 3.876889228820801, "ppl": 48.27381247404423, "gate_mean": 9.648501873016357e-07, "lr": 0.00016732589880047254, "steps_per_second": 4.873617202018576 }, { "step": 4440, "loss": 3.994638681411743, "lm_loss": 3.994638681411743, "ppl": 54.306215233495486, "gate_mean": 8.319038897752762e-07, "lr": 0.00016725387338499502, "steps_per_second": 4.874282512027263 }, { "step": 4450, "loss": 3.9191055297851562, "lm_loss": 3.9191055297851562, "ppl": 50.35538323750396, "gate_mean": 1.1187512427568436e-06, "lr": 0.00016718166136603396, "steps_per_second": 4.874757776674216 }, { "step": 4460, "loss": 3.921060562133789, "lm_loss": 3.921060562133789, "ppl": 50.45392593635597, "gate_mean": 1.1273659765720367e-06, "lr": 0.00016710926294101423, "steps_per_second": 4.87515265552205 }, { "step": 4470, "loss": 3.934021472930908, "lm_loss": 3.934021472930908, "ppl": 51.112110894293124, "gate_mean": 1.2600794434547424e-06, "lr": 0.0001670366783078704, "steps_per_second": 4.875720672072406 }, { "step": 4480, "loss": 3.8877129554748535, "lm_loss": 3.8877129554748535, "ppl": 48.799152966999856, "gate_mean": 9.28761437535286e-07, "lr": 0.000166963907665046, "steps_per_second": 4.876194916437878 }, { "step": 4490, "loss": 3.9010024070739746, "lm_loss": 3.9010024070739746, "ppl": 49.45199529856367, "gate_mean": 6.913905963301659e-07, "lr": 0.00016689095121149328, "steps_per_second": 4.876068508775316 }, { "step": 4500, "loss": 3.7511887550354004, "lm_loss": 3.7511887550354004, "ppl": 42.57165920639934, "gate_mean": 8.221250027418137e-07, "lr": 0.0001668178091466723, "steps_per_second": 4.875961679922072 }, { "step": 4510, "loss": 3.916264295578003, "lm_loss": 3.916264295578003, "ppl": 50.2125148575095, "gate_mean": 1.242849975824356e-06, "lr": 0.00016674448167055078, "steps_per_second": 4.876333625381714 }, { "step": 4520, "loss": 3.879234790802002, "lm_loss": 3.879234790802002, "ppl": 48.387174590232576, "gate_mean": 9.639188647270203e-07, "lr": 0.00016667096898360318, "steps_per_second": 4.876982891178548 }, { "step": 4530, "loss": 3.945826292037964, "lm_loss": 3.945826292037964, "ppl": 51.71905550493051, "gate_mean": 8.605420589447021e-07, "lr": 0.00016659727128681036, "steps_per_second": 4.87758501725263 }, { "step": 4540, "loss": 4.271183490753174, "lm_loss": 4.271183490753174, "ppl": 71.60633092179445, "gate_mean": 9.711366146802902e-07, "lr": 0.00016652338878165907, "steps_per_second": 4.878105979574264 }, { "step": 4550, "loss": 3.855368137359619, "lm_loss": 3.855368137359619, "ppl": 47.24600676033859, "gate_mean": 8.908100426197052e-07, "lr": 0.00016644932167014123, "steps_per_second": 4.878470829933307 }, { "step": 4560, "loss": 3.8869822025299072, "lm_loss": 3.8869822025299072, "ppl": 48.76350586845575, "gate_mean": 1.0570511221885681e-06, "lr": 0.00016637507015475347, "steps_per_second": 4.8791278885408005 }, { "step": 4570, "loss": 4.026947975158691, "lm_loss": 4.026947975158691, "ppl": 56.08946331316804, "gate_mean": 8.444767445325851e-07, "lr": 0.00016630063443849662, "steps_per_second": 4.879632595254626 }, { "step": 4580, "loss": 3.969287633895874, "lm_loss": 3.969287633895874, "ppl": 52.94679989658102, "gate_mean": 9.67876985669136e-07, "lr": 0.0001662260147248751, "steps_per_second": 4.879507443475429 }, { "step": 4590, "loss": 3.8755977153778076, "lm_loss": 3.8755977153778076, "ppl": 48.21150643949065, "gate_mean": 8.882489055395126e-07, "lr": 0.0001661512112178963, "steps_per_second": 4.88001062810017 }, { "step": 4600, "loss": 3.8459272384643555, "lm_loss": 3.8459272384643555, "ppl": 46.802060908683586, "gate_mean": 9.180512279272079e-07, "lr": 0.00016607622412207024, "steps_per_second": 4.8805565023583055 }, { "step": 4610, "loss": 3.8167717456817627, "lm_loss": 3.8167717456817627, "ppl": 45.45722371789828, "gate_mean": 1.2798700481653214e-06, "lr": 0.0001660010536424087, "steps_per_second": 4.881108055143306 }, { "step": 4620, "loss": 4.010291576385498, "lm_loss": 4.010291576385498, "ppl": 55.162952433087014, "gate_mean": 1.0342337191104889e-06, "lr": 0.00016592569998442492, "steps_per_second": 4.881740318567205 }, { "step": 4630, "loss": 3.8214523792266846, "lm_loss": 3.8214523792266846, "ppl": 45.67049104783979, "gate_mean": 1.0668300092220306e-06, "lr": 0.00016585016335413297, "steps_per_second": 4.8824252994595 }, { "step": 4640, "loss": 3.775559186935425, "lm_loss": 3.775559186935425, "ppl": 43.621894290314486, "gate_mean": 1.1243391782045364e-06, "lr": 0.00016577444395804707, "steps_per_second": 4.882945295091029 }, { "step": 4650, "loss": 3.8330414295196533, "lm_loss": 3.8330414295196533, "ppl": 46.20284745990217, "gate_mean": 1.0302755981683731e-06, "lr": 0.00016569854200318114, "steps_per_second": 4.883281845940994 }, { "step": 4660, "loss": 3.784943103790283, "lm_loss": 3.784943103790283, "ppl": 44.033165167277105, "gate_mean": 1.0731164366006851e-06, "lr": 0.0001656224576970483, "steps_per_second": 4.883894680924248 }, { "step": 4670, "loss": 3.7809572219848633, "lm_loss": 3.7809572219848633, "ppl": 43.85800349416783, "gate_mean": 9.30391252040863e-07, "lr": 0.00016554619124766002, "steps_per_second": 4.884244498176915 }, { "step": 4680, "loss": 3.7172319889068604, "lm_loss": 3.7172319889068604, "ppl": 41.150331743739834, "gate_mean": 9.017530828714371e-07, "lr": 0.00016546974286352597, "steps_per_second": 4.8845140470984285 }, { "step": 4690, "loss": 3.7917935848236084, "lm_loss": 3.7917935848236084, "ppl": 44.33584911168034, "gate_mean": 8.989591151475906e-07, "lr": 0.00016539311275365307, "steps_per_second": 4.8845849545284885 }, { "step": 4700, "loss": 3.8100109100341797, "lm_loss": 3.8100109100341797, "ppl": 45.15093146183709, "gate_mean": 8.826609700918198e-07, "lr": 0.00016531630112754511, "steps_per_second": 4.88513695528181 }, { "step": 4710, "loss": 3.766402244567871, "lm_loss": 3.766402244567871, "ppl": 43.22427438795133, "gate_mean": 1.046573743224144e-06, "lr": 0.00016523930819520216, "steps_per_second": 4.885661681135932 }, { "step": 4720, "loss": 3.8533825874328613, "lm_loss": 3.8533825874328613, "ppl": 47.152290524997085, "gate_mean": 1.0381918400526047e-06, "lr": 0.0001651621341671199, "steps_per_second": 4.886200583529508 }, { "step": 4730, "loss": 3.9576926231384277, "lm_loss": 3.9576926231384277, "ppl": 52.33642666273098, "gate_mean": 8.931383490562439e-07, "lr": 0.0001650847792542893, "steps_per_second": 4.886545534757753 }, { "step": 4740, "loss": 3.8129961490631104, "lm_loss": 3.8129961490631104, "ppl": 45.28591916966952, "gate_mean": 9.131617844104767e-07, "lr": 0.00016500724366819562, "steps_per_second": 4.887293009559862 }, { "step": 4750, "loss": 3.6951143741607666, "lm_loss": 3.6951143741607666, "ppl": 40.2501759048062, "gate_mean": 1.0353978723287582e-06, "lr": 0.0001649295276208182, "steps_per_second": 4.887894697575828 }, { "step": 4760, "loss": 3.8406500816345215, "lm_loss": 3.8406500816345215, "ppl": 46.555729629365956, "gate_mean": 9.611248970031738e-07, "lr": 0.00016485163132462982, "steps_per_second": 4.888312797347341 }, { "step": 4770, "loss": 3.940300464630127, "lm_loss": 3.940300464630127, "ppl": 51.4340530927906, "gate_mean": 1.2295786291360855e-06, "lr": 0.0001647735549925959, "steps_per_second": 4.8886742879946885 }, { "step": 4780, "loss": 3.8058037757873535, "lm_loss": 3.8058037757873535, "ppl": 44.96137445728684, "gate_mean": 1.0463409125804901e-06, "lr": 0.00016469529883817423, "steps_per_second": 4.888943492449573 }, { "step": 4790, "loss": 3.9531235694885254, "lm_loss": 3.9531235694885254, "ppl": 52.09784418459488, "gate_mean": 1.0328367352485657e-06, "lr": 0.0001646168630753141, "steps_per_second": 4.889420651229884 }, { "step": 4800, "loss": 3.691230058670044, "lm_loss": 3.691230058670044, "ppl": 40.09413477571675, "gate_mean": 1.133885234594345e-06, "lr": 0.00016453824791845588, "steps_per_second": 4.889564267618029 }, { "step": 4810, "loss": 3.953394651412964, "lm_loss": 3.953394651412964, "ppl": 52.11196888284423, "gate_mean": 9.35746356844902e-07, "lr": 0.00016445945358253041, "steps_per_second": 4.889572315468664 }, { "step": 4820, "loss": 3.918822765350342, "lm_loss": 3.918822765350342, "ppl": 50.341146538933806, "gate_mean": 1.11432746052742e-06, "lr": 0.00016438048028295842, "steps_per_second": 4.890128690028202 }, { "step": 4830, "loss": 3.7559359073638916, "lm_loss": 3.7559359073638916, "ppl": 42.77423380333775, "gate_mean": 9.560026228427887e-07, "lr": 0.0001643013282356499, "steps_per_second": 4.890588900431178 }, { "step": 4840, "loss": 3.8140645027160645, "lm_loss": 3.8140645027160645, "ppl": 45.334326400258384, "gate_mean": 9.334180504083633e-07, "lr": 0.00016422199765700352, "steps_per_second": 4.891205574521638 }, { "step": 4850, "loss": 3.804772138595581, "lm_loss": 3.804772138595581, "ppl": 44.91501454861813, "gate_mean": 9.355135262012482e-07, "lr": 0.000164142488763906, "steps_per_second": 4.89174907745281 }, { "step": 4860, "loss": 3.9356415271759033, "lm_loss": 3.9356415271759033, "ppl": 51.19498239655739, "gate_mean": 1.1010561138391495e-06, "lr": 0.00016406280177373166, "steps_per_second": 4.892218070694928 }, { "step": 4870, "loss": 3.797189712524414, "lm_loss": 3.797189712524414, "ppl": 44.575737667758716, "gate_mean": 1.0512303560972214e-06, "lr": 0.00016398293690434172, "steps_per_second": 4.892873186846449 }, { "step": 4880, "loss": 3.855180501937866, "lm_loss": 3.855180501937866, "ppl": 47.23714256757825, "gate_mean": 1.064268872141838e-06, "lr": 0.0001639028943740836, "steps_per_second": 4.893009191641761 }, { "step": 4890, "loss": 3.6734960079193115, "lm_loss": 3.6734960079193115, "ppl": 39.38937098361103, "gate_mean": 1.3052485883235931e-06, "lr": 0.00016382267440179062, "steps_per_second": 4.893523879852258 }, { "step": 4900, "loss": 3.841787338256836, "lm_loss": 3.841787338256836, "ppl": 46.60870555909738, "gate_mean": 7.82310962677002e-07, "lr": 0.00016374227720678108, "steps_per_second": 4.894144516336343 }, { "step": 4910, "loss": 3.802259922027588, "lm_loss": 3.802259922027588, "ppl": 44.802319920843104, "gate_mean": 9.580980986356735e-07, "lr": 0.00016366170300885778, "steps_per_second": 4.89463129483489 }, { "step": 4920, "loss": 3.6992506980895996, "lm_loss": 3.6992506980895996, "ppl": 40.41700846944168, "gate_mean": 8.584465831518173e-07, "lr": 0.00016358095202830762, "steps_per_second": 4.895215629651517 }, { "step": 4930, "loss": 3.8181662559509277, "lm_loss": 3.8181662559509277, "ppl": 45.52065850313042, "gate_mean": 1.2330710887908936e-06, "lr": 0.0001635000244859006, "steps_per_second": 4.8956986596580325 }, { "step": 4940, "loss": 3.7421326637268066, "lm_loss": 3.7421326637268066, "ppl": 42.18786682587537, "gate_mean": 8.76840204000473e-07, "lr": 0.00016341892060288957, "steps_per_second": 4.89617392474436 }, { "step": 4950, "loss": 3.7797842025756836, "lm_loss": 3.7797842025756836, "ppl": 43.8065873667746, "gate_mean": 1.273350790143013e-06, "lr": 0.0001633376406010094, "steps_per_second": 4.896703986308368 }, { "step": 4960, "loss": 3.703075408935547, "lm_loss": 3.703075408935547, "ppl": 40.571887835688216, "gate_mean": 9.138602763414383e-07, "lr": 0.0001632561847024766, "steps_per_second": 4.89704362493259 }, { "step": 4970, "loss": 3.6530845165252686, "lm_loss": 3.6530845165252686, "ppl": 38.593525009240295, "gate_mean": 7.60890543460846e-07, "lr": 0.00016317455312998843, "steps_per_second": 4.8975816095501505 }, { "step": 4980, "loss": 3.7421774864196777, "lm_loss": 3.7421774864196777, "ppl": 42.18975784205289, "gate_mean": 8.17934051156044e-07, "lr": 0.0001630927461067225, "steps_per_second": 4.898150222116117 }, { "step": 4990, "loss": 3.7107925415039062, "lm_loss": 3.7107925415039062, "ppl": 40.88619769823378, "gate_mean": 9.308569133281708e-07, "lr": 0.0001630107638563361, "steps_per_second": 4.898723840449203 }, { "step": 5000, "loss": 3.72099232673645, "lm_loss": 3.72099232673645, "ppl": 41.30536219364944, "gate_mean": 8.030328899621964e-07, "lr": 0.00016292860660296557, "steps_per_second": 4.8992012743402915 }, { "step": 5010, "loss": 3.7164270877838135, "lm_loss": 3.7164270877838135, "ppl": 41.1172231218765, "gate_mean": 1.0342337191104889e-06, "lr": 0.00016284627457122567, "steps_per_second": 4.875074840830145 }, { "step": 5020, "loss": 3.793325901031494, "lm_loss": 3.793325901031494, "ppl": 44.4038377285913, "gate_mean": 1.1886004358530045e-06, "lr": 0.00016276376798620904, "steps_per_second": 4.875645952304466 }, { "step": 5030, "loss": 3.6495614051818848, "lm_loss": 3.6495614051818848, "ppl": 38.45779495993089, "gate_mean": 8.800998330116272e-07, "lr": 0.00016268108707348557, "steps_per_second": 4.876255576260304 }, { "step": 5040, "loss": 3.6559486389160156, "lm_loss": 3.6559486389160156, "ppl": 38.704220034736565, "gate_mean": 9.420327842235565e-07, "lr": 0.00016259823205910172, "steps_per_second": 4.876815630564467 }, { "step": 5050, "loss": 3.7308638095855713, "lm_loss": 3.7308638095855713, "ppl": 41.715126531445314, "gate_mean": 1.0388903319835663e-06, "lr": 0.00016251520316957986, "steps_per_second": 4.877362077306164 }, { "step": 5060, "loss": 3.745306968688965, "lm_loss": 3.745306968688965, "ppl": 42.3219967529033, "gate_mean": 1.0249204933643341e-06, "lr": 0.00016243200063191792, "steps_per_second": 4.877967729619611 }, { "step": 5070, "loss": 3.6724424362182617, "lm_loss": 3.6724424362182617, "ppl": 39.34789331070848, "gate_mean": 1.034000888466835e-06, "lr": 0.00016234862467358834, "steps_per_second": 4.878541393496528 }, { "step": 5080, "loss": 3.7983174324035645, "lm_loss": 3.7983174324035645, "ppl": 44.62603496855696, "gate_mean": 8.936040103435516e-07, "lr": 0.0001622650755225379, "steps_per_second": 4.879242397861303 }, { "step": 5090, "loss": 3.8981242179870605, "lm_loss": 3.8981242179870605, "ppl": 49.30986773848046, "gate_mean": 1.0256189852952957e-06, "lr": 0.00016218135340718675, "steps_per_second": 4.8795340114599375 }, { "step": 5100, "loss": 3.7206406593322754, "lm_loss": 3.7206406593322754, "ppl": 41.29083899796523, "gate_mean": 8.712522685527802e-07, "lr": 0.00016209745855642793, "steps_per_second": 4.8800350761245 }, { "step": 5110, "loss": 3.8013715744018555, "lm_loss": 3.8013715744018555, "ppl": 44.7625375592136, "gate_mean": 1.2104865163564682e-06, "lr": 0.0001620133911996268, "steps_per_second": 4.880393381436217 }, { "step": 5120, "loss": 3.675058364868164, "lm_loss": 3.675058364868164, "ppl": 39.45095934004864, "gate_mean": 1.010950654745102e-06, "lr": 0.0001619291515666203, "steps_per_second": 4.880926645645201 }, { "step": 5130, "loss": 3.740663528442383, "lm_loss": 3.740663528442383, "ppl": 42.1259326481295, "gate_mean": 1.2987293303012848e-06, "lr": 0.00016184473988771638, "steps_per_second": 4.881261243770809 }, { "step": 5140, "loss": 3.731778383255005, "lm_loss": 3.731778383255005, "ppl": 41.75329553931233, "gate_mean": 9.76957380771637e-07, "lr": 0.0001617601563936933, "steps_per_second": 4.881851022894757 }, { "step": 5150, "loss": 3.725090503692627, "lm_loss": 3.725090503692627, "ppl": 41.47498621438082, "gate_mean": 9.059440344572067e-07, "lr": 0.00016167540131579913, "steps_per_second": 4.882259962690299 }, { "step": 5160, "loss": 3.8300411701202393, "lm_loss": 3.8300411701202393, "ppl": 46.06443467349015, "gate_mean": 9.476207196712494e-07, "lr": 0.00016159047488575103, "steps_per_second": 4.882960195426013 }, { "step": 5170, "loss": 3.7151012420654297, "lm_loss": 3.7151012420654297, "ppl": 41.062744150984244, "gate_mean": 8.449424058198929e-07, "lr": 0.00016150537733573462, "steps_per_second": 4.883448709189283 }, { "step": 5180, "loss": 3.781453847885132, "lm_loss": 3.781453847885132, "ppl": 43.879789924042, "gate_mean": 9.71369445323944e-07, "lr": 0.0001614201088984033, "steps_per_second": 4.883905279032943 }, { "step": 5190, "loss": 3.9124808311462402, "lm_loss": 3.9124808311462402, "ppl": 50.022896526814186, "gate_mean": 9.855721145868301e-07, "lr": 0.00016133466980687775, "steps_per_second": 4.884317706907705 }, { "step": 5200, "loss": 3.6399972438812256, "lm_loss": 3.6399972438812256, "ppl": 38.09173173991734, "gate_mean": 1.0083895176649094e-06, "lr": 0.00016124906029474522, "steps_per_second": 4.884865460327298 }, { "step": 5210, "loss": 3.7254695892333984, "lm_loss": 3.7254695892333984, "ppl": 41.49071176243401, "gate_mean": 8.298084139823914e-07, "lr": 0.00016116328059605881, "steps_per_second": 4.885236568382957 }, { "step": 5220, "loss": 3.756033420562744, "lm_loss": 3.756033420562744, "ppl": 42.77840505907732, "gate_mean": 1.8451828509569168e-06, "lr": 0.00016107733094533695, "steps_per_second": 4.885598678151718 }, { "step": 5230, "loss": 3.8945693969726562, "lm_loss": 3.8945693969726562, "ppl": 49.13489117387192, "gate_mean": 9.73464921116829e-07, "lr": 0.00016099121157756268, "steps_per_second": 4.886099214483896 }, { "step": 5240, "loss": 3.705759048461914, "lm_loss": 3.705759048461914, "ppl": 40.68091438609007, "gate_mean": 1.0905787348747253e-06, "lr": 0.00016090492272818313, "steps_per_second": 4.886391601219893 }, { "step": 5250, "loss": 3.7738842964172363, "lm_loss": 3.7738842964172363, "ppl": 43.548893544376305, "gate_mean": 9.220093488693237e-07, "lr": 0.00016081846463310864, "steps_per_second": 4.88642862644345 }, { "step": 5260, "loss": 3.729475259780884, "lm_loss": 3.729475259780884, "ppl": 41.65724319689427, "gate_mean": 7.238704711198807e-07, "lr": 0.0001607318375287124, "steps_per_second": 4.886816007285462 }, { "step": 5270, "loss": 3.684347152709961, "lm_loss": 3.684347152709961, "ppl": 39.819118158987756, "gate_mean": 8.03731381893158e-07, "lr": 0.00016064504165182959, "steps_per_second": 4.887254952077446 }, { "step": 5280, "loss": 3.8054323196411133, "lm_loss": 3.8054323196411133, "ppl": 44.94467637989501, "gate_mean": 9.32253897190094e-07, "lr": 0.00016055807723975676, "steps_per_second": 4.887671536307626 }, { "step": 5290, "loss": 3.803891181945801, "lm_loss": 3.803891181945801, "ppl": 44.875463791685604, "gate_mean": 7.974449545145035e-07, "lr": 0.00016047094453025136, "steps_per_second": 4.888108129922872 }, { "step": 5300, "loss": 3.7514729499816895, "lm_loss": 3.7514729499816895, "ppl": 42.58375957615146, "gate_mean": 8.111819624900818e-07, "lr": 0.00016038364376153093, "steps_per_second": 4.888354486613239 }, { "step": 5310, "loss": 3.6545581817626953, "lm_loss": 3.6545581817626953, "ppl": 38.65044087260052, "gate_mean": 7.522758096456528e-07, "lr": 0.00016029617517227234, "steps_per_second": 4.888718688443221 }, { "step": 5320, "loss": 3.6718015670776367, "lm_loss": 3.6718015670776367, "ppl": 39.32268453876184, "gate_mean": 9.157229214906693e-07, "lr": 0.00016020853900161142, "steps_per_second": 4.889142468859835 }, { "step": 5330, "loss": 3.609685182571411, "lm_loss": 3.609685182571411, "ppl": 36.95441708879104, "gate_mean": 8.489005267620087e-07, "lr": 0.0001601207354891421, "steps_per_second": 4.889693306977078 }, { "step": 5340, "loss": 3.774317741394043, "lm_loss": 3.774317741394043, "ppl": 43.56777368498407, "gate_mean": 1.12154521048069e-06, "lr": 0.00016003276487491585, "steps_per_second": 4.890142567363723 }, { "step": 5350, "loss": 3.6643807888031006, "lm_loss": 3.6643807888031006, "ppl": 39.03195965223514, "gate_mean": 9.066425263881683e-07, "lr": 0.000159944627399441, "steps_per_second": 4.8904972435854415 }, { "step": 5360, "loss": 3.5991098880767822, "lm_loss": 3.5991098880767822, "ppl": 36.56567241290677, "gate_mean": 9.14325937628746e-07, "lr": 0.00015985632330368197, "steps_per_second": 4.890865170215047 }, { "step": 5370, "loss": 3.6141936779022217, "lm_loss": 3.6141936779022217, "ppl": 37.12140204829182, "gate_mean": 1.0097865015268326e-06, "lr": 0.00015976785282905881, "steps_per_second": 4.891421243305903 }, { "step": 5380, "loss": 3.6117823123931885, "lm_loss": 3.6117823123931885, "ppl": 37.03199661765351, "gate_mean": 9.094364941120148e-07, "lr": 0.00015967921621744645, "steps_per_second": 4.891974919996561 }, { "step": 5390, "loss": 3.5703043937683105, "lm_loss": 3.5703043937683105, "ppl": 35.52740582682744, "gate_mean": 1.0766088962554932e-06, "lr": 0.000159590413711174, "steps_per_second": 4.89086365347276 }, { "step": 5400, "loss": 3.682845115661621, "lm_loss": 3.682845115661621, "ppl": 39.759353264060294, "gate_mean": 1.0081566870212555e-06, "lr": 0.00015950144555302407, "steps_per_second": 4.891250935563239 }, { "step": 5410, "loss": 3.661684274673462, "lm_loss": 3.661684274673462, "ppl": 38.92685119842942, "gate_mean": 8.591450750827789e-07, "lr": 0.00015941231198623223, "steps_per_second": 4.8843343470613485 }, { "step": 5420, "loss": 3.7614309787750244, "lm_loss": 3.7614309787750244, "ppl": 43.00992825849294, "gate_mean": 1.1171214282512665e-06, "lr": 0.0001593230132544863, "steps_per_second": 4.878484265198772 }, { "step": 5430, "loss": 3.6645381450653076, "lm_loss": 3.6645381450653076, "ppl": 39.03810205877302, "gate_mean": 7.736962288618088e-07, "lr": 0.00015923354960192555, "steps_per_second": 4.878820743979011 }, { "step": 5440, "loss": 3.69882869720459, "lm_loss": 3.69882869720459, "ppl": 40.39995605441831, "gate_mean": 8.777715265750885e-07, "lr": 0.0001591439212731402, "steps_per_second": 4.860477796080047 }, { "step": 5450, "loss": 3.7843410968780518, "lm_loss": 3.7843410968780518, "ppl": 44.00666487495889, "gate_mean": 7.934868335723877e-07, "lr": 0.00015905412851317064, "steps_per_second": 4.860893737358188 }, { "step": 5460, "loss": 3.6460037231445312, "lm_loss": 3.6460037231445312, "ppl": 38.321217447343585, "gate_mean": 1.0416842997074127e-06, "lr": 0.00015896417156750694, "steps_per_second": 4.861284976385933 }, { "step": 5470, "loss": 3.6498074531555176, "lm_loss": 3.6498074531555176, "ppl": 38.467258586656506, "gate_mean": 1.0419171303510666e-06, "lr": 0.00015887405068208788, "steps_per_second": 4.861679051842315 }, { "step": 5480, "loss": 3.6331770420074463, "lm_loss": 3.6331770420074463, "ppl": 37.832822350480804, "gate_mean": 1.141335815191269e-06, "lr": 0.00015878376610330052, "steps_per_second": 4.856246893867966 }, { "step": 5490, "loss": 3.648341655731201, "lm_loss": 3.648341655731201, "ppl": 38.41091468256262, "gate_mean": 1.1387746781110764e-06, "lr": 0.0001586933180779795, "steps_per_second": 4.856586963454576 }, { "step": 5500, "loss": 3.8374123573303223, "lm_loss": 3.8374123573303223, "ppl": 46.40523876747031, "gate_mean": 7.620546966791153e-07, "lr": 0.00015860270685340622, "steps_per_second": 4.851121805294013 }, { "step": 5510, "loss": 3.670095920562744, "lm_loss": 3.670095920562744, "ppl": 39.255671105782746, "gate_mean": 1.0670628398656845e-06, "lr": 0.00015851193267730834, "steps_per_second": 4.851441198363815 }, { "step": 5520, "loss": 3.675457239151001, "lm_loss": 3.675457239151001, "ppl": 39.46669845191744, "gate_mean": 1.1003576219081879e-06, "lr": 0.00015842099579785898, "steps_per_second": 4.851764241395159 }, { "step": 5530, "loss": 3.692553758621216, "lm_loss": 3.692553758621216, "ppl": 40.14724252156741, "gate_mean": 8.489005267620087e-07, "lr": 0.00015832989646367605, "steps_per_second": 4.8519069546598566 }, { "step": 5540, "loss": 3.639893054962158, "lm_loss": 3.639893054962158, "ppl": 38.08776321030395, "gate_mean": 9.55304130911827e-07, "lr": 0.00015823863492382174, "steps_per_second": 4.852396330096052 }, { "step": 5550, "loss": 3.5817372798919678, "lm_loss": 3.5817372798919678, "ppl": 35.93591739530311, "gate_mean": 9.741634130477905e-07, "lr": 0.00015814721142780157, "steps_per_second": 4.852870830628661 }, { "step": 5560, "loss": 3.623408555984497, "lm_loss": 3.623408555984497, "ppl": 37.46505215761878, "gate_mean": 6.856862455606461e-07, "lr": 0.00015805562622556388, "steps_per_second": 4.853377330670862 }, { "step": 5570, "loss": 3.6866180896759033, "lm_loss": 3.6866180896759033, "ppl": 39.90964762081071, "gate_mean": 1.0842923074960709e-06, "lr": 0.0001579638795674991, "steps_per_second": 4.853746250745626 }, { "step": 5580, "loss": 3.675865650177002, "lm_loss": 3.675865650177002, "ppl": 39.482820378687286, "gate_mean": 1.1497177183628082e-06, "lr": 0.00015787197170443913, "steps_per_second": 4.854252099105669 }, { "step": 5590, "loss": 3.657503366470337, "lm_loss": 3.657503366470337, "ppl": 38.7644413538347, "gate_mean": 8.782371878623962e-07, "lr": 0.00015777990288765655, "steps_per_second": 4.854838867359454 }, { "step": 5600, "loss": 3.6502175331115723, "lm_loss": 3.6502175331115723, "ppl": 38.48303647324368, "gate_mean": 7.485505193471909e-07, "lr": 0.00015768767336886397, "steps_per_second": 4.854680949255983 }, { "step": 5610, "loss": 3.7111096382141113, "lm_loss": 3.7111096382141113, "ppl": 40.899164632794175, "gate_mean": 1.0291114449501038e-06, "lr": 0.00015759528340021338, "steps_per_second": 4.855037730819848 }, { "step": 5620, "loss": 3.667127847671509, "lm_loss": 3.667127847671509, "ppl": 39.13933015217007, "gate_mean": 9.993091225624084e-07, "lr": 0.00015750273323429545, "steps_per_second": 4.855596308941723 }, { "step": 5630, "loss": 3.641249656677246, "lm_loss": 3.641249656677246, "ppl": 38.139468198807265, "gate_mean": 1.037493348121643e-06, "lr": 0.00015741002312413875, "steps_per_second": 4.856121524201711 }, { "step": 5640, "loss": 3.7112765312194824, "lm_loss": 3.7112765312194824, "ppl": 40.90599098691644, "gate_mean": 1.0563526302576065e-06, "lr": 0.00015731715332320924, "steps_per_second": 4.850753530416661 }, { "step": 5650, "loss": 3.785691976547241, "lm_loss": 3.785691976547241, "ppl": 44.066152755284854, "gate_mean": 1.0922085493803024e-06, "lr": 0.00015722412408540934, "steps_per_second": 4.851241805595755 }, { "step": 5660, "loss": 3.860689163208008, "lm_loss": 3.860689163208008, "ppl": 47.49807401699957, "gate_mean": 1.044943928718567e-06, "lr": 0.0001571309356650775, "steps_per_second": 4.851517490301227 }, { "step": 5670, "loss": 3.6921286582946777, "lm_loss": 3.6921286582946777, "ppl": 40.13017954265779, "gate_mean": 8.139759302139282e-07, "lr": 0.00015703758831698726, "steps_per_second": 4.851976274221823 }, { "step": 5680, "loss": 3.673158645629883, "lm_loss": 3.673158645629883, "ppl": 39.37608473650223, "gate_mean": 1.0363291949033737e-06, "lr": 0.00015694408229634675, "steps_per_second": 4.8522734175886795 }, { "step": 5690, "loss": 3.5971226692199707, "lm_loss": 3.5971226692199707, "ppl": 36.49308057100339, "gate_mean": 9.93022695183754e-07, "lr": 0.00015685041785879782, "steps_per_second": 4.852721041687234 }, { "step": 5700, "loss": 3.5699710845947266, "lm_loss": 3.5699710845947266, "ppl": 35.515566189791144, "gate_mean": 8.388888090848923e-07, "lr": 0.00015675659526041545, "steps_per_second": 4.85310389115975 }, { "step": 5710, "loss": 3.5495591163635254, "lm_loss": 3.5495591163635254, "ppl": 34.7979722485713, "gate_mean": 9.157229214906693e-07, "lr": 0.00015666261475770707, "steps_per_second": 4.853686502809905 }, { "step": 5720, "loss": 3.481471061706543, "lm_loss": 3.481471061706543, "ppl": 32.50750746902825, "gate_mean": 8.642673492431641e-07, "lr": 0.00015656847660761183, "steps_per_second": 4.85393878964035 }, { "step": 5730, "loss": 3.5677084922790527, "lm_loss": 3.5677084922790527, "ppl": 35.435299781964574, "gate_mean": 8.749775588512421e-07, "lr": 0.00015647418106749976, "steps_per_second": 4.854363486814739 }, { "step": 5740, "loss": 3.5817453861236572, "lm_loss": 3.5817453861236572, "ppl": 35.93620870135618, "gate_mean": 8.300412446260452e-07, "lr": 0.00015637972839517127, "steps_per_second": 4.854855555541225 }, { "step": 5750, "loss": 3.5456783771514893, "lm_loss": 3.5456783771514893, "ppl": 34.66319208564915, "gate_mean": 7.550697773694992e-07, "lr": 0.00015628511884885644, "steps_per_second": 4.854830204871881 }, { "step": 5760, "loss": 3.6604459285736084, "lm_loss": 3.6604459285736084, "ppl": 38.878676118935545, "gate_mean": 9.853392839431763e-07, "lr": 0.0001561903526872141, "steps_per_second": 4.855282549614301 }, { "step": 5770, "loss": 3.4917678833007812, "lm_loss": 3.4917678833007812, "ppl": 32.843960700674536, "gate_mean": 1.005362719297409e-06, "lr": 0.00015609543016933128, "steps_per_second": 4.855757826301965 }, { "step": 5780, "loss": 3.4254071712493896, "lm_loss": 3.4254071712493896, "ppl": 30.735156776266813, "gate_mean": 9.126961231231689e-07, "lr": 0.00015600035155472259, "steps_per_second": 4.856277316468 }, { "step": 5790, "loss": 3.6888108253479004, "lm_loss": 3.6888108253479004, "ppl": 39.99725494355456, "gate_mean": 1.1152587831020355e-06, "lr": 0.00015590511710332928, "steps_per_second": 4.856787319235743 }, { "step": 5800, "loss": 3.7215352058410645, "lm_loss": 3.7215352058410645, "ppl": 41.32779209950562, "gate_mean": 9.499490261077881e-07, "lr": 0.00015580972707551874, "steps_per_second": 4.85732009264622 }, { "step": 5810, "loss": 3.604661464691162, "lm_loss": 3.604661464691162, "ppl": 36.76923406600585, "gate_mean": 9.026844054460526e-07, "lr": 0.0001557141817320837, "steps_per_second": 4.857572494272026 }, { "step": 5820, "loss": 3.632539749145508, "lm_loss": 3.632539749145508, "ppl": 37.80871944397076, "gate_mean": 9.30391252040863e-07, "lr": 0.00015561848133424134, "steps_per_second": 4.858144514430095 }, { "step": 5830, "loss": 3.632506847381592, "lm_loss": 3.632506847381592, "ppl": 37.807475490873884, "gate_mean": 1.2747477740049362e-06, "lr": 0.00015552262614363304, "steps_per_second": 4.858086357970053 }, { "step": 5840, "loss": 3.682499647140503, "lm_loss": 3.682499647140503, "ppl": 39.74562003142391, "gate_mean": 1.1110678315162659e-06, "lr": 0.00015542661642232316, "steps_per_second": 4.8585760548333345 }, { "step": 5850, "loss": 3.6035380363464355, "lm_loss": 3.6035380363464355, "ppl": 36.72794966061987, "gate_mean": 8.649658411741257e-07, "lr": 0.0001553304524327986, "steps_per_second": 4.859124532884961 }, { "step": 5860, "loss": 3.591963768005371, "lm_loss": 3.591963768005371, "ppl": 36.305301157510684, "gate_mean": 8.682254701852798e-07, "lr": 0.00015523413443796808, "steps_per_second": 4.859635073932559 }, { "step": 5870, "loss": 3.612504482269287, "lm_loss": 3.612504482269287, "ppl": 37.05874966902376, "gate_mean": 8.121132850646973e-07, "lr": 0.00015513766270116132, "steps_per_second": 4.860010277043731 }, { "step": 5880, "loss": 3.828115463256836, "lm_loss": 3.828115463256836, "ppl": 45.9758134321335, "gate_mean": 8.430797606706619e-07, "lr": 0.00015504103748612835, "steps_per_second": 4.860497638019894 }, { "step": 5890, "loss": 3.7095468044281006, "lm_loss": 3.7095468044281006, "ppl": 40.83529595754787, "gate_mean": 1.0959338396787643e-06, "lr": 0.0001549442590570388, "steps_per_second": 4.8606352754156115 }, { "step": 5900, "loss": 3.7620153427124023, "lm_loss": 3.7620153427124023, "ppl": 43.03506905448826, "gate_mean": 8.952338248491287e-07, "lr": 0.00015484732767848122, "steps_per_second": 4.861064810633026 }, { "step": 5910, "loss": 3.642925977706909, "lm_loss": 3.642925977706909, "ppl": 38.203455808312775, "gate_mean": 9.664800018072128e-07, "lr": 0.00015475024361546227, "steps_per_second": 4.86160791781983 }, { "step": 5920, "loss": 3.6388914585113525, "lm_loss": 3.6388914585113525, "ppl": 38.04963374021195, "gate_mean": 7.390044629573822e-07, "lr": 0.0001546530071334061, "steps_per_second": 4.862093844151292 }, { "step": 5930, "loss": 3.5921239852905273, "lm_loss": 3.5921239852905273, "ppl": 36.3111183602947, "gate_mean": 1.0735820978879929e-06, "lr": 0.00015455561849815356, "steps_per_second": 4.862561488405983 }, { "step": 5940, "loss": 3.7042746543884277, "lm_loss": 3.7042746543884277, "ppl": 40.62057267439084, "gate_mean": 1.101754605770111e-06, "lr": 0.00015445807797596139, "steps_per_second": 4.862922179158711 }, { "step": 5950, "loss": 3.5938220024108887, "lm_loss": 3.5938220024108887, "ppl": 36.372827637807795, "gate_mean": 1.1683441698551178e-06, "lr": 0.00015436038583350174, "steps_per_second": 4.863387714120139 }, { "step": 5960, "loss": 3.6074752807617188, "lm_loss": 3.6074752807617188, "ppl": 36.87284162567112, "gate_mean": 9.62754711508751e-07, "lr": 0.00015426254233786112, "steps_per_second": 4.86370306218532 }, { "step": 5970, "loss": 3.6708459854125977, "lm_loss": 3.6708459854125977, "ppl": 39.285126450164874, "gate_mean": 1.1459924280643463e-06, "lr": 0.00015416454775653995, "steps_per_second": 4.864103246564441 }, { "step": 5980, "loss": 3.5904650688171387, "lm_loss": 3.5904650688171387, "ppl": 36.250931184431344, "gate_mean": 1.0763760656118393e-06, "lr": 0.00015406640235745168, "steps_per_second": 4.864456175081811 }, { "step": 5990, "loss": 3.593047857284546, "lm_loss": 3.593047857284546, "ppl": 36.34468068687883, "gate_mean": 1.0123476386070251e-06, "lr": 0.00015396810640892205, "steps_per_second": 4.864638464070236 }, { "step": 6000, "loss": 3.5741004943847656, "lm_loss": 3.5741004943847656, "ppl": 35.66252773991335, "gate_mean": 8.938368409872055e-07, "lr": 0.00015386966017968848, "steps_per_second": 4.865159230839764 }, { "step": 6010, "loss": 3.5834667682647705, "lm_loss": 3.5834667682647705, "ppl": 35.998121922102044, "gate_mean": 8.344650268554688e-07, "lr": 0.00015377106393889913, "steps_per_second": 4.836793951349832 }, { "step": 6020, "loss": 3.62913179397583, "lm_loss": 3.62913179397583, "ppl": 37.68008833210926, "gate_mean": 1.2242235243320465e-06, "lr": 0.00015367231795611244, "steps_per_second": 4.836901423187808 }, { "step": 6030, "loss": 3.6031007766723633, "lm_loss": 3.6031007766723633, "ppl": 36.71189351992917, "gate_mean": 9.548384696245193e-07, "lr": 0.0001535734225012961, "steps_per_second": 4.836921944990496 }, { "step": 6040, "loss": 3.530775308609009, "lm_loss": 3.530775308609009, "ppl": 34.15043447928781, "gate_mean": 9.781215339899063e-07, "lr": 0.00015347437784482651, "steps_per_second": 4.836700340646346 }, { "step": 6050, "loss": 3.6399078369140625, "lm_loss": 3.6399078369140625, "ppl": 38.08832622594909, "gate_mean": 9.934883564710617e-07, "lr": 0.00015337518425748798, "steps_per_second": 4.83699734142049 }, { "step": 6060, "loss": 3.632335662841797, "lm_loss": 3.632335662841797, "ppl": 37.801003989507464, "gate_mean": 9.951181709766388e-07, "lr": 0.000153275842010472, "steps_per_second": 4.837394045295183 }, { "step": 6070, "loss": 3.4891014099121094, "lm_loss": 3.4891014099121094, "ppl": 32.75649981137569, "gate_mean": 9.599607437849045e-07, "lr": 0.0001531763513753765, "steps_per_second": 4.837884197659814 }, { "step": 6080, "loss": 3.4921183586120605, "lm_loss": 3.4921183586120605, "ppl": 32.855473715420615, "gate_mean": 1.0032672435045242e-06, "lr": 0.00015307671262420507, "steps_per_second": 4.838141476118305 }, { "step": 6090, "loss": 3.545943260192871, "lm_loss": 3.545943260192871, "ppl": 34.672374993537396, "gate_mean": 8.188653737306595e-07, "lr": 0.00015297692602936623, "steps_per_second": 4.838676737081657 }, { "step": 6100, "loss": 3.5517258644104004, "lm_loss": 3.5517258644104004, "ppl": 34.87345243071438, "gate_mean": 8.302740752696991e-07, "lr": 0.00015287699186367274, "steps_per_second": 4.839034453885153 }, { "step": 6110, "loss": 3.529745101928711, "lm_loss": 3.529745101928711, "ppl": 34.11527058969918, "gate_mean": 8.598435670137405e-07, "lr": 0.00015277691040034078, "steps_per_second": 4.839554600731712 }, { "step": 6120, "loss": 3.487312078475952, "lm_loss": 3.487312078475952, "ppl": 32.69793998361935, "gate_mean": 8.244533091783524e-07, "lr": 0.00015267668191298924, "steps_per_second": 4.8400347268639585 }, { "step": 6130, "loss": 3.53933048248291, "lm_loss": 3.53933048248291, "ppl": 34.4438507079374, "gate_mean": 1.0135117918252945e-06, "lr": 0.00015257630667563898, "steps_per_second": 4.840463577343854 }, { "step": 6140, "loss": 3.4554200172424316, "lm_loss": 3.4554200172424316, "ppl": 31.67158850226862, "gate_mean": 9.667128324508667e-07, "lr": 0.0001524757849627121, "steps_per_second": 4.840938581452954 }, { "step": 6150, "loss": 3.526820182800293, "lm_loss": 3.526820182800293, "ppl": 34.01563197066687, "gate_mean": 8.321367204189301e-07, "lr": 0.0001523751170490311, "steps_per_second": 4.841172743838544 }, { "step": 6160, "loss": 3.5607311725616455, "lm_loss": 3.5607311725616455, "ppl": 35.18891691168681, "gate_mean": 9.55304130911827e-07, "lr": 0.0001522743032098182, "steps_per_second": 4.841652296764152 }, { "step": 6170, "loss": 3.577845335006714, "lm_loss": 3.577845335006714, "ppl": 35.79632859755192, "gate_mean": 1.2558884918689728e-06, "lr": 0.00015217334372069456, "steps_per_second": 4.841918848168801 }, { "step": 6180, "loss": 3.5952558517456055, "lm_loss": 3.5952558517456055, "ppl": 36.42501820027802, "gate_mean": 9.522773325443268e-07, "lr": 0.00015207223885767965, "steps_per_second": 4.842209351051222 }, { "step": 6190, "loss": 3.6413533687591553, "lm_loss": 3.6413533687591553, "ppl": 38.14342392758196, "gate_mean": 9.150244295597076e-07, "lr": 0.00015197098889719026, "steps_per_second": 4.842781634964971 }, { "step": 6200, "loss": 3.5719356536865234, "lm_loss": 3.5719356536865234, "ppl": 35.585407555036106, "gate_mean": 1.0498333722352982e-06, "lr": 0.00015186959411603993, "steps_per_second": 4.843193864048773 }, { "step": 6210, "loss": 3.589246988296509, "lm_loss": 3.589246988296509, "ppl": 36.20680151350351, "gate_mean": 9.201467037200928e-07, "lr": 0.00015176805479143808, "steps_per_second": 4.843592377635446 }, { "step": 6220, "loss": 3.538533926010132, "lm_loss": 3.538533926010132, "ppl": 34.416425160157516, "gate_mean": 1.2258533388376236e-06, "lr": 0.0001516663712009894, "steps_per_second": 4.843852850421015 }, { "step": 6230, "loss": 3.4448883533477783, "lm_loss": 3.4448883533477783, "ppl": 31.339784269342278, "gate_mean": 9.62521880865097e-07, "lr": 0.00015156454362269287, "steps_per_second": 4.843996496763834 }, { "step": 6240, "loss": 3.5917017459869385, "lm_loss": 3.5917017459869385, "ppl": 36.29578961539274, "gate_mean": 1.1320225894451141e-06, "lr": 0.00015146257233494126, "steps_per_second": 4.843966119601933 }, { "step": 6250, "loss": 3.585085391998291, "lm_loss": 3.585085391998291, "ppl": 36.056436518570514, "gate_mean": 8.700881153345108e-07, "lr": 0.0001513604576165202, "steps_per_second": 4.844377465593935 }, { "step": 6260, "loss": 3.6042351722717285, "lm_loss": 3.6042351722717285, "ppl": 36.75356296072914, "gate_mean": 9.015202522277832e-07, "lr": 0.0001512581997466074, "steps_per_second": 4.844778303780228 }, { "step": 6270, "loss": 3.534895420074463, "lm_loss": 3.534895420074463, "ppl": 34.291428331678816, "gate_mean": 8.551869541406631e-07, "lr": 0.00015115579900477194, "steps_per_second": 4.8452668170779845 }, { "step": 6280, "loss": 3.589413642883301, "lm_loss": 3.589413642883301, "ppl": 36.21283604587608, "gate_mean": 1.0873191058635712e-06, "lr": 0.00015105325567097355, "steps_per_second": 4.84562765018183 }, { "step": 6290, "loss": 3.537067413330078, "lm_loss": 3.537067413330078, "ppl": 34.36599002717811, "gate_mean": 8.48318450152874e-07, "lr": 0.0001509505700255618, "steps_per_second": 4.845660354190271 }, { "step": 6300, "loss": 3.5725302696228027, "lm_loss": 3.5725302696228027, "ppl": 35.606573497647084, "gate_mean": 1.0165385901927948e-06, "lr": 0.00015084774234927538, "steps_per_second": 4.846224300714698 }, { "step": 6310, "loss": 3.5808229446411133, "lm_loss": 3.5808229446411133, "ppl": 35.90307493605394, "gate_mean": 9.320210665464401e-07, "lr": 0.00015074477292324114, "steps_per_second": 4.846617973115945 }, { "step": 6320, "loss": 3.5964462757110596, "lm_loss": 3.5964462757110596, "ppl": 36.468405234244564, "gate_mean": 8.856877684593201e-07, "lr": 0.00015064166202897363, "steps_per_second": 4.847016050493895 }, { "step": 6330, "loss": 3.55324387550354, "lm_loss": 3.55324387550354, "ppl": 34.926430919153404, "gate_mean": 9.764917194843292e-07, "lr": 0.00015053840994837402, "steps_per_second": 4.84742118633026 }, { "step": 6340, "loss": 3.524066925048828, "lm_loss": 3.524066925048828, "ppl": 33.922106976560094, "gate_mean": 1.1806841939687729e-06, "lr": 0.00015043501696372963, "steps_per_second": 4.847947058735399 }, { "step": 6350, "loss": 3.5232579708099365, "lm_loss": 3.5232579708099365, "ppl": 33.89467664076836, "gate_mean": 1.0221265256404877e-06, "lr": 0.00015033148335771287, "steps_per_second": 4.848359882837926 }, { "step": 6360, "loss": 3.5774972438812256, "lm_loss": 3.5774972438812256, "ppl": 35.78387038166504, "gate_mean": 1.0342337191104889e-06, "lr": 0.00015022780941338073, "steps_per_second": 4.848665195148747 }, { "step": 6370, "loss": 3.5382020473480225, "lm_loss": 3.5382020473480225, "ppl": 34.40500497818296, "gate_mean": 9.904615581035614e-07, "lr": 0.00015012399541417378, "steps_per_second": 4.84914926687324 }, { "step": 6380, "loss": 3.789072036743164, "lm_loss": 3.789072036743164, "ppl": 44.2153510116976, "gate_mean": 9.706709533929825e-07, "lr": 0.00015002004164391553, "steps_per_second": 4.849690571106875 }, { "step": 6390, "loss": 3.532297372817993, "lm_loss": 3.532297372817993, "ppl": 34.202453211212465, "gate_mean": 1.096632331609726e-06, "lr": 0.00014991594838681163, "steps_per_second": 4.850194827969498 }, { "step": 6400, "loss": 3.5853006839752197, "lm_loss": 3.5853006839752197, "ppl": 36.06420001574894, "gate_mean": 7.660128176212311e-07, "lr": 0.00014981171592744902, "steps_per_second": 4.850577456791358 }, { "step": 6410, "loss": 3.5432987213134766, "lm_loss": 3.5432987213134766, "ppl": 34.58080368513634, "gate_mean": 8.677598088979721e-07, "lr": 0.00014970734455079535, "steps_per_second": 4.850828711358833 }, { "step": 6420, "loss": 3.474682569503784, "lm_loss": 3.474682569503784, "ppl": 32.287577847898284, "gate_mean": 1.0489020496606827e-06, "lr": 0.0001496028345421979, "steps_per_second": 4.851092642485613 }, { "step": 6430, "loss": 3.5497684478759766, "lm_loss": 3.5497684478759766, "ppl": 34.80525732320357, "gate_mean": 9.378418326377869e-07, "lr": 0.0001494981861873831, "steps_per_second": 4.851395643900904 }, { "step": 6440, "loss": 3.529021739959717, "lm_loss": 3.529021739959717, "ppl": 34.09060182369189, "gate_mean": 7.00121745467186e-07, "lr": 0.0001493933997724555, "steps_per_second": 4.851795174503032 }, { "step": 6450, "loss": 3.5476887226104736, "lm_loss": 3.5476887226104736, "ppl": 34.73294716886697, "gate_mean": 1.0300427675247192e-06, "lr": 0.00014928847558389724, "steps_per_second": 4.852246160479137 }, { "step": 6460, "loss": 3.597761392593384, "lm_loss": 3.597761392593384, "ppl": 36.51639700011337, "gate_mean": 1.2079253792762756e-06, "lr": 0.00014918341390856698, "steps_per_second": 4.8525869803451 }, { "step": 6470, "loss": 3.527423143386841, "lm_loss": 3.527423143386841, "ppl": 34.03614824070122, "gate_mean": 1.119915395975113e-06, "lr": 0.00014907821503369931, "steps_per_second": 4.85307422262738 }, { "step": 6480, "loss": 3.4307632446289062, "lm_loss": 3.4307632446289062, "ppl": 30.900218177179124, "gate_mean": 9.73232090473175e-07, "lr": 0.00014897287924690403, "steps_per_second": 4.853491058755189 }, { "step": 6490, "loss": 3.4180777072906494, "lm_loss": 3.4180777072906494, "ppl": 30.51070810144577, "gate_mean": 9.974464774131775e-07, "lr": 0.00014886740683616505, "steps_per_second": 4.853971381131729 }, { "step": 6500, "loss": 3.556058645248413, "lm_loss": 3.556058645248413, "ppl": 35.024879269920696, "gate_mean": 1.0570511221885681e-06, "lr": 0.00014876179808983995, "steps_per_second": 4.8543482481047 }, { "step": 6510, "loss": 3.460913896560669, "lm_loss": 3.460913896560669, "ppl": 31.846067231003403, "gate_mean": 1.0174699127674103e-06, "lr": 0.00014865605329665905, "steps_per_second": 4.854796998003185 }, { "step": 6520, "loss": 3.4023489952087402, "lm_loss": 3.4023489952087402, "ppl": 30.034568307242584, "gate_mean": 1.0074581950902939e-06, "lr": 0.00014855017274572452, "steps_per_second": 4.855286937007006 }, { "step": 6530, "loss": 3.5584092140197754, "lm_loss": 3.5584092140197754, "ppl": 35.10730449247923, "gate_mean": 1.0130461305379868e-06, "lr": 0.0001484441567265098, "steps_per_second": 4.855680444588152 }, { "step": 6540, "loss": 3.444991111755371, "lm_loss": 3.444991111755371, "ppl": 31.343004861136706, "gate_mean": 8.190982043743134e-07, "lr": 0.00014833800552885862, "steps_per_second": 4.856098068712297 }, { "step": 6550, "loss": 3.6604411602020264, "lm_loss": 3.6604411602020264, "ppl": 38.87849073140319, "gate_mean": 9.955838322639465e-07, "lr": 0.0001482317194429843, "steps_per_second": 4.856550608040112 }, { "step": 6560, "loss": 3.565520763397217, "lm_loss": 3.565520763397217, "ppl": 35.357861690855195, "gate_mean": 1.0384246706962585e-06, "lr": 0.00014812529875946886, "steps_per_second": 4.856865408766063 }, { "step": 6570, "loss": 3.4983789920806885, "lm_loss": 3.4983789920806885, "ppl": 33.06181503350094, "gate_mean": 1.2123491615056992e-06, "lr": 0.00014801874376926255, "steps_per_second": 4.857286024419418 }, { "step": 6580, "loss": 3.5811119079589844, "lm_loss": 3.5811119079589844, "ppl": 35.9134511068034, "gate_mean": 1.1387746781110764e-06, "lr": 0.00014791205476368248, "steps_per_second": 4.857565716677911 }, { "step": 6590, "loss": 3.501673936843872, "lm_loss": 3.501673936843872, "ppl": 33.170931555543284, "gate_mean": 9.273644536733627e-07, "lr": 0.00014780523203441242, "steps_per_second": 4.857933117128305 }, { "step": 6600, "loss": 3.684685707092285, "lm_loss": 3.684685707092285, "ppl": 39.832601378213454, "gate_mean": 1.0700896382331848e-06, "lr": 0.0001476982758735016, "steps_per_second": 4.858270248367512 }, { "step": 6610, "loss": 3.5286712646484375, "lm_loss": 3.5286712646484375, "ppl": 34.07865600288594, "gate_mean": 1.077074557542801e-06, "lr": 0.00014759118657336405, "steps_per_second": 4.858425528753162 }, { "step": 6620, "loss": 3.4925858974456787, "lm_loss": 3.4925858974456787, "ppl": 32.87083851682027, "gate_mean": 8.984934538602829e-07, "lr": 0.0001474839644267779, "steps_per_second": 4.858864294565768 }, { "step": 6630, "loss": 3.5316288471221924, "lm_loss": 3.5316288471221924, "ppl": 34.17959563367162, "gate_mean": 1.2032687664031982e-06, "lr": 0.0001473766097268843, "steps_per_second": 4.859341985837337 }, { "step": 6640, "loss": 3.4281530380249023, "lm_loss": 3.4281530380249023, "ppl": 30.819667396451695, "gate_mean": 1.0498333722352982e-06, "lr": 0.00014726912276718702, "steps_per_second": 4.859678238449804 }, { "step": 6650, "loss": 3.576455593109131, "lm_loss": 3.576455593109131, "ppl": 35.746615492114245, "gate_mean": 9.017530828714371e-07, "lr": 0.00014716150384155125, "steps_per_second": 4.860000522303157 }, { "step": 6660, "loss": 3.4524145126342773, "lm_loss": 3.4524145126342773, "ppl": 31.576542299524128, "gate_mean": 7.664784789085388e-07, "lr": 0.000147053753244203, "steps_per_second": 4.860411627402166 }, { "step": 6670, "loss": 3.497128963470459, "lm_loss": 3.497128963470459, "ppl": 33.02051263876866, "gate_mean": 1.0817311704158783e-06, "lr": 0.00014694587126972832, "steps_per_second": 4.860834570330486 }, { "step": 6680, "loss": 3.5532054901123047, "lm_loss": 3.5532054901123047, "ppl": 34.92509028016876, "gate_mean": 9.043142199516296e-07, "lr": 0.0001468378582130724, "steps_per_second": 4.861130328564023 }, { "step": 6690, "loss": 3.524038314819336, "lm_loss": 3.524038314819336, "ppl": 33.92113647117789, "gate_mean": 1.0300427675247192e-06, "lr": 0.0001467297143695388, "steps_per_second": 4.861593375963783 }, { "step": 6700, "loss": 3.5127084255218506, "lm_loss": 3.5127084255218506, "ppl": 33.538982717940065, "gate_mean": 1.1846423149108887e-06, "lr": 0.00014662144003478865, "steps_per_second": 4.8619643404135555 }, { "step": 6710, "loss": 3.5681345462799072, "lm_loss": 3.5681345462799072, "ppl": 35.450400349808405, "gate_mean": 8.132774382829666e-07, "lr": 0.00014651303550483987, "steps_per_second": 4.86235763265616 }, { "step": 6720, "loss": 3.5027852058410645, "lm_loss": 3.5027852058410645, "ppl": 33.207813872681186, "gate_mean": 7.592607289552689e-07, "lr": 0.00014640450107606634, "steps_per_second": 4.86270889423124 }, { "step": 6730, "loss": 3.490851402282715, "lm_loss": 3.490851402282715, "ppl": 32.813873623357814, "gate_mean": 1.1262018233537674e-06, "lr": 0.00014629583704519694, "steps_per_second": 4.8631940900484185 }, { "step": 6740, "loss": 3.440936803817749, "lm_loss": 3.440936803817749, "ppl": 31.216187918918823, "gate_mean": 1.3192184269428253e-06, "lr": 0.0001461870437093151, "steps_per_second": 4.863502071707986 }, { "step": 6750, "loss": 3.433542490005493, "lm_loss": 3.433542490005493, "ppl": 30.9862169161264, "gate_mean": 1.1245720088481903e-06, "lr": 0.00014607812136585756, "steps_per_second": 4.863900676845356 }, { "step": 6760, "loss": 3.4393913745880127, "lm_loss": 3.4393913745880127, "ppl": 31.16798276808678, "gate_mean": 1.1003576219081879e-06, "lr": 0.0001459690703126139, "steps_per_second": 4.864365085014432 }, { "step": 6770, "loss": 3.4246129989624023, "lm_loss": 3.4246129989624023, "ppl": 30.710757456433114, "gate_mean": 9.017530828714371e-07, "lr": 0.0001458598908477255, "steps_per_second": 4.8647623609861075 }, { "step": 6780, "loss": 3.430760145187378, "lm_loss": 3.430760145187378, "ppl": 30.900122403908092, "gate_mean": 1.0249204933643341e-06, "lr": 0.0001457505832696849, "steps_per_second": 4.865182330739818 }, { "step": 6790, "loss": 3.471325635910034, "lm_loss": 3.471325635910034, "ppl": 32.17937231416948, "gate_mean": 1.0433141142129898e-06, "lr": 0.00014564114787733476, "steps_per_second": 4.865620785361362 }, { "step": 6800, "loss": 3.395725965499878, "lm_loss": 3.395725965499878, "ppl": 29.836305743200626, "gate_mean": 8.898787200450897e-07, "lr": 0.00014553158496986734, "steps_per_second": 4.8660886555809055 }, { "step": 6810, "loss": 3.351743698120117, "lm_loss": 3.351743698120117, "ppl": 28.552477163076976, "gate_mean": 1.044711098074913e-06, "lr": 0.00014542189484682342, "steps_per_second": 4.866423977846423 }, { "step": 6820, "loss": 3.541926145553589, "lm_loss": 3.541926145553589, "ppl": 34.5333714718405, "gate_mean": 1.0307412594556808e-06, "lr": 0.00014531207780809157, "steps_per_second": 4.866934380765048 }, { "step": 6830, "loss": 3.392061233520508, "lm_loss": 3.392061233520508, "ppl": 29.727163789551547, "gate_mean": 1.0514631867408752e-06, "lr": 0.00014520213415390745, "steps_per_second": 4.867298050360768 }, { "step": 6840, "loss": 3.398245334625244, "lm_loss": 3.398245334625244, "ppl": 29.911569179083884, "gate_mean": 1.1241063475608826e-06, "lr": 0.00014509206418485276, "steps_per_second": 4.867835211136296 }, { "step": 6850, "loss": 3.3977606296539307, "lm_loss": 3.3977606296539307, "ppl": 29.89707440593108, "gate_mean": 9.671784937381744e-07, "lr": 0.00014498186820185457, "steps_per_second": 4.868173392009629 }, { "step": 6860, "loss": 3.4982969760894775, "lm_loss": 3.4982969760894775, "ppl": 33.05910354716387, "gate_mean": 1.0153744369745255e-06, "lr": 0.0001448715465061846, "steps_per_second": 4.8684364267925035 }, { "step": 6870, "loss": 3.413881778717041, "lm_loss": 3.413881778717041, "ppl": 30.382955557229465, "gate_mean": 1.1557713150978088e-06, "lr": 0.00014476109939945804, "steps_per_second": 4.8689455323759425 }, { "step": 6880, "loss": 3.498904228210449, "lm_loss": 3.498904228210449, "ppl": 33.079184854501456, "gate_mean": 8.435454219579697e-07, "lr": 0.00014465052718363314, "steps_per_second": 4.869335271919537 }, { "step": 6890, "loss": 3.5256781578063965, "lm_loss": 3.5256781578063965, "ppl": 33.97680744228372, "gate_mean": 8.903443813323975e-07, "lr": 0.00014453983016101004, "steps_per_second": 4.86979435537627 }, { "step": 6900, "loss": 3.352719306945801, "lm_loss": 3.352719306945801, "ppl": 28.580346804515926, "gate_mean": 1.021428033709526e-06, "lr": 0.00014442900863423025, "steps_per_second": 4.870190666439783 }, { "step": 6910, "loss": 3.5197110176086426, "lm_loss": 3.5197110176086426, "ppl": 33.774666769468254, "gate_mean": 1.0603107511997223e-06, "lr": 0.00014431806290627554, "steps_per_second": 4.870479795868429 }, { "step": 6920, "loss": 3.546802520751953, "lm_loss": 3.546802520751953, "ppl": 34.70218040133088, "gate_mean": 1.1203810572624207e-06, "lr": 0.0001442069932804673, "steps_per_second": 4.870819973848814 }, { "step": 6930, "loss": 3.4738941192626953, "lm_loss": 3.4738941192626953, "ppl": 32.26213073257031, "gate_mean": 1.2745149433612823e-06, "lr": 0.00014409580006046567, "steps_per_second": 4.871241265107312 }, { "step": 6940, "loss": 3.4685661792755127, "lm_loss": 3.4685661792755127, "ppl": 32.09069713566606, "gate_mean": 9.636860340833664e-07, "lr": 0.00014398448355026867, "steps_per_second": 4.871594986519398 }, { "step": 6950, "loss": 3.382206439971924, "lm_loss": 3.382206439971924, "ppl": 29.43564750328986, "gate_mean": 8.656643331050873e-07, "lr": 0.00014387304405421132, "steps_per_second": 4.872031781093849 }, { "step": 6960, "loss": 3.518707036972046, "lm_loss": 3.518707036972046, "ppl": 33.74077467437468, "gate_mean": 1.1189840734004974e-06, "lr": 0.00014376148187696504, "steps_per_second": 4.872476271177499 }, { "step": 6970, "loss": 3.3853671550750732, "lm_loss": 3.3853671550750732, "ppl": 29.528832386781477, "gate_mean": 8.854549378156662e-07, "lr": 0.00014364979732353652, "steps_per_second": 4.872820850235949 }, { "step": 6980, "loss": 3.397162437438965, "lm_loss": 3.397162437438965, "ppl": 29.879195556798518, "gate_mean": 1.1422671377658844e-06, "lr": 0.00014353799069926707, "steps_per_second": 4.873303417051802 }, { "step": 6990, "loss": 3.5749785900115967, "lm_loss": 3.5749785900115967, "ppl": 35.693856602419075, "gate_mean": 9.040813893079758e-07, "lr": 0.00014342606230983168, "steps_per_second": 4.873537770484489 }, { "step": 7000, "loss": 3.5971498489379883, "lm_loss": 3.5971498489379883, "ppl": 36.494072456122424, "gate_mean": 1.0475050657987595e-06, "lr": 0.0001433140124612384, "steps_per_second": 4.873635604790191 }, { "step": 7010, "loss": 3.3158013820648193, "lm_loss": 3.3158013820648193, "ppl": 27.54445877747236, "gate_mean": 9.280629456043243e-07, "lr": 0.00014320184145982716, "steps_per_second": 4.873986367492737 }, { "step": 7020, "loss": 3.4104959964752197, "lm_loss": 3.4104959964752197, "ppl": 30.280259437304238, "gate_mean": 8.533243089914322e-07, "lr": 0.0001430895496122692, "steps_per_second": 4.874390902672539 }, { "step": 7030, "loss": 3.4954166412353516, "lm_loss": 3.4954166412353516, "ppl": 32.96401926199867, "gate_mean": 9.05478373169899e-07, "lr": 0.00014297713722556614, "steps_per_second": 4.8746516608427 }, { "step": 7040, "loss": 3.5023105144500732, "lm_loss": 3.5023105144500732, "ppl": 33.19205415012043, "gate_mean": 1.0782387107610703e-06, "lr": 0.0001428646046070492, "steps_per_second": 4.874898234091597 }, { "step": 7050, "loss": 3.420926570892334, "lm_loss": 3.420926570892334, "ppl": 30.597752877693534, "gate_mean": 1.261010766029358e-06, "lr": 0.00014275195206437817, "steps_per_second": 4.875190385437577 }, { "step": 7060, "loss": 3.4129750728607178, "lm_loss": 3.4129750728607178, "ppl": 30.355419638869012, "gate_mean": 1.0845251381397247e-06, "lr": 0.00014263917990554092, "steps_per_second": 4.875397955522672 }, { "step": 7070, "loss": 3.4587244987487793, "lm_loss": 3.4587244987487793, "ppl": 31.776419791886987, "gate_mean": 1.1597294360399246e-06, "lr": 0.0001425262884388522, "steps_per_second": 4.875824339444187 }, { "step": 7080, "loss": 3.4581520557403564, "lm_loss": 3.4581520557403564, "ppl": 31.75823480797445, "gate_mean": 1.0312069207429886e-06, "lr": 0.00014241327797295285, "steps_per_second": 4.875914455553766 }, { "step": 7090, "loss": 3.4497528076171875, "lm_loss": 3.4497528076171875, "ppl": 31.492606614035463, "gate_mean": 8.810311555862427e-07, "lr": 0.00014230014881680932, "steps_per_second": 4.876425305309435 }, { "step": 7100, "loss": 3.3919742107391357, "lm_loss": 3.3919742107391357, "ppl": 29.72457696163439, "gate_mean": 1.2016389518976212e-06, "lr": 0.00014218690127971228, "steps_per_second": 4.876504535412564 }, { "step": 7110, "loss": 3.455595016479492, "lm_loss": 3.455595016479492, "ppl": 31.67713149108827, "gate_mean": 1.0156072676181793e-06, "lr": 0.00014207353567127626, "steps_per_second": 4.876979658944451 }, { "step": 7120, "loss": 3.3752241134643555, "lm_loss": 3.3752241134643555, "ppl": 29.230834070693074, "gate_mean": 1.0181684046983719e-06, "lr": 0.0001419600523014385, "steps_per_second": 4.87734559845335 }, { "step": 7130, "loss": 3.4728050231933594, "lm_loss": 3.4728050231933594, "ppl": 32.22701329940144, "gate_mean": 9.152572602033615e-07, "lr": 0.00014184645148045813, "steps_per_second": 4.877176227196614 }, { "step": 7140, "loss": 3.3744914531707764, "lm_loss": 3.3744914531707764, "ppl": 29.209425642731492, "gate_mean": 8.861534297466278e-07, "lr": 0.00014173273351891552, "steps_per_second": 4.877509206910378 }, { "step": 7150, "loss": 3.4206724166870117, "lm_loss": 3.4206724166870117, "ppl": 30.589977318263674, "gate_mean": 9.473878890275955e-07, "lr": 0.0001416188987277112, "steps_per_second": 4.877569251044867 }, { "step": 7160, "loss": 3.275352954864502, "lm_loss": 3.275352954864502, "ppl": 26.452560392933734, "gate_mean": 9.424984455108643e-07, "lr": 0.00014150494741806515, "steps_per_second": 4.878037127521251 }, { "step": 7170, "loss": 3.4653987884521484, "lm_loss": 3.4653987884521484, "ppl": 31.989214159010523, "gate_mean": 1.1296942830085754e-06, "lr": 0.00014139087990151593, "steps_per_second": 4.878330366999085 }, { "step": 7180, "loss": 3.3648791313171387, "lm_loss": 3.3648791313171387, "ppl": 28.93000035668072, "gate_mean": 1.0959338396787643e-06, "lr": 0.00014127669648991977, "steps_per_second": 4.878653403731825 }, { "step": 7190, "loss": 3.3678245544433594, "lm_loss": 3.3678245544433594, "ppl": 29.015337063432654, "gate_mean": 1.1355150490999222e-06, "lr": 0.0001411623974954497, "steps_per_second": 4.878821818835778 }, { "step": 7200, "loss": 3.4691014289855957, "lm_loss": 3.4691014289855957, "ppl": 32.107878269692605, "gate_mean": 1.0435469448566437e-06, "lr": 0.00014104798323059484, "steps_per_second": 4.879113950923471 }, { "step": 7210, "loss": 3.228301525115967, "lm_loss": 3.228301525115967, "ppl": 25.236756551476564, "gate_mean": 1.1795200407505035e-06, "lr": 0.00014093345400815942, "steps_per_second": 4.874111567723982 }, { "step": 7220, "loss": 3.5009689331054688, "lm_loss": 3.5009689331054688, "ppl": 33.14755416633197, "gate_mean": 1.1527445167303085e-06, "lr": 0.000140818810141262, "steps_per_second": 4.873851199873741 }, { "step": 7230, "loss": 3.4182116985321045, "lm_loss": 3.4182116985321045, "ppl": 30.514796543003516, "gate_mean": 9.55536961555481e-07, "lr": 0.00014070405194333452, "steps_per_second": 4.869084992008133 }, { "step": 7240, "loss": 3.383227586746216, "lm_loss": 3.383227586746216, "ppl": 29.465720971886533, "gate_mean": 1.1818483471870422e-06, "lr": 0.00014058917972812143, "steps_per_second": 4.869332042343257 }, { "step": 7250, "loss": 3.3695151805877686, "lm_loss": 3.3695151805877686, "ppl": 29.064432640300033, "gate_mean": 1.0274816304445267e-06, "lr": 0.0001404741938096791, "steps_per_second": 4.864863393457371 }, { "step": 7260, "loss": 3.3673360347747803, "lm_loss": 3.3673360347747803, "ppl": 29.001165962299357, "gate_mean": 1.0707881301641464e-06, "lr": 0.00014035909450237454, "steps_per_second": 4.855467955154511 }, { "step": 7270, "loss": 3.3653101921081543, "lm_loss": 3.3653101921081543, "ppl": 28.942473633695737, "gate_mean": 1.3257376849651337e-06, "lr": 0.00014024388212088493, "steps_per_second": 4.855259236035231 }, { "step": 7280, "loss": 3.3232429027557373, "lm_loss": 3.3232429027557373, "ppl": 27.750195986823485, "gate_mean": 8.00006091594696e-07, "lr": 0.0001401285569801965, "steps_per_second": 4.855445154573136 }, { "step": 7290, "loss": 3.506408214569092, "lm_loss": 3.506408214569092, "ppl": 33.328344281700474, "gate_mean": 1.0440126061439514e-06, "lr": 0.00014001311939560373, "steps_per_second": 4.855678250199361 }, { "step": 7300, "loss": 3.2909529209136963, "lm_loss": 3.2909529209136963, "ppl": 26.86845497337405, "gate_mean": 1.1422671377658844e-06, "lr": 0.00013989756968270866, "steps_per_second": 4.851560944752897 }, { "step": 7310, "loss": 3.391021490097046, "lm_loss": 3.391021490097046, "ppl": 29.69627122945436, "gate_mean": 1.003500074148178e-06, "lr": 0.00013978190815741972, "steps_per_second": 4.85175531164794 }, { "step": 7320, "loss": 3.3878345489501953, "lm_loss": 3.3878345489501953, "ppl": 29.601781607172157, "gate_mean": 1.3825483620166779e-06, "lr": 0.00013966613513595116, "steps_per_second": 4.852111720029273 }, { "step": 7330, "loss": 3.3645923137664795, "lm_loss": 3.3645923137664795, "ppl": 28.92170391467731, "gate_mean": 1.05355866253376e-06, "lr": 0.00013955025093482197, "steps_per_second": 4.846558470650245 }, { "step": 7340, "loss": 3.372274875640869, "lm_loss": 3.372274875640869, "ppl": 29.144752389309946, "gate_mean": 1.1532101780176163e-06, "lr": 0.0001394342558708551, "steps_per_second": 4.846760068347935 }, { "step": 7350, "loss": 3.408329963684082, "lm_loss": 3.408329963684082, "ppl": 30.21474238409721, "gate_mean": 9.369105100631714e-07, "lr": 0.0001393181502611767, "steps_per_second": 4.847072088006224 }, { "step": 7360, "loss": 3.43841814994812, "lm_loss": 3.43841814994812, "ppl": 31.13766407512372, "gate_mean": 1.076841726899147e-06, "lr": 0.00013920193442321498, "steps_per_second": 4.847387062984769 }, { "step": 7370, "loss": 3.3232550621032715, "lm_loss": 3.3232550621032715, "ppl": 27.75053341315207, "gate_mean": 9.73232090473175e-07, "lr": 0.00013908560867469967, "steps_per_second": 4.847765292351972 }, { "step": 7380, "loss": 3.416278123855591, "lm_loss": 3.416278123855591, "ppl": 30.45585091140433, "gate_mean": 1.016305759549141e-06, "lr": 0.00013896917333366092, "steps_per_second": 4.847966284885395 }, { "step": 7390, "loss": 3.433666706085205, "lm_loss": 3.433666706085205, "ppl": 30.99006614157972, "gate_mean": 1.1175870895385742e-06, "lr": 0.0001388526287184285, "steps_per_second": 4.8483047259616106 }, { "step": 7400, "loss": 3.4904816150665283, "lm_loss": 3.4904816150665283, "ppl": 32.80174171562877, "gate_mean": 1.01444311439991e-06, "lr": 0.00013873597514763093, "steps_per_second": 4.84870469968498 }, { "step": 7410, "loss": 3.357114791870117, "lm_loss": 3.357114791870117, "ppl": 28.706247783250372, "gate_mean": 1.4044344425201416e-06, "lr": 0.0001386192129401946, "steps_per_second": 4.848939232353458 }, { "step": 7420, "loss": 3.4172005653381348, "lm_loss": 3.4172005653381348, "ppl": 30.483957613071965, "gate_mean": 1.1068768799304962e-06, "lr": 0.00013850234241534297, "steps_per_second": 4.848948894624685 }, { "step": 7430, "loss": 3.2699851989746094, "lm_loss": 3.2699851989746094, "ppl": 26.310949911346214, "gate_mean": 1.0761432349681854e-06, "lr": 0.00013838536389259556, "steps_per_second": 4.849349816650354 }, { "step": 7440, "loss": 3.2928524017333984, "lm_loss": 3.2928524017333984, "ppl": 26.919539590016196, "gate_mean": 1.0335352271795273e-06, "lr": 0.0001382682776917672, "steps_per_second": 4.849606545382341 }, { "step": 7450, "loss": 3.373922348022461, "lm_loss": 3.373922348022461, "ppl": 29.192807137505852, "gate_mean": 9.292270988225937e-07, "lr": 0.00013815108413296707, "steps_per_second": 4.849982177262389 }, { "step": 7460, "loss": 3.4499435424804688, "lm_loss": 3.4499435424804688, "ppl": 31.49861392493596, "gate_mean": 9.457580745220184e-07, "lr": 0.0001380337835365979, "steps_per_second": 4.850445086523673 }, { "step": 7470, "loss": 3.3696675300598145, "lm_loss": 3.3696675300598145, "ppl": 29.068860928583224, "gate_mean": 1.1618249118328094e-06, "lr": 0.00013791637622335507, "steps_per_second": 4.850729123888374 }, { "step": 7480, "loss": 3.4034833908081055, "lm_loss": 3.4034833908081055, "ppl": 30.06865872171145, "gate_mean": 9.390059858560562e-07, "lr": 0.00013779886251422566, "steps_per_second": 4.851113151512233 }, { "step": 7490, "loss": 3.3144731521606445, "lm_loss": 3.3144731521606445, "ppl": 27.507897689769894, "gate_mean": 1.0463409125804901e-06, "lr": 0.00013768124273048766, "steps_per_second": 4.8515014977717845 }, { "step": 7500, "loss": 3.367094039916992, "lm_loss": 3.367094039916992, "ppl": 28.99414867837418, "gate_mean": 8.600763976573944e-07, "lr": 0.00013756351719370905, "steps_per_second": 4.8519020958708055 }, { "step": 7510, "loss": 3.277336597442627, "lm_loss": 3.277336597442627, "ppl": 26.5050848957267, "gate_mean": 1.1206138879060745e-06, "lr": 0.000137445686225747, "steps_per_second": 4.852170874506314 }, { "step": 7520, "loss": 3.359496593475342, "lm_loss": 3.359496593475342, "ppl": 28.77470185995384, "gate_mean": 1.155305653810501e-06, "lr": 0.00013732775014874685, "steps_per_second": 4.848226479734475 }, { "step": 7530, "loss": 3.2172932624816895, "lm_loss": 3.2172932624816895, "ppl": 24.960467230125367, "gate_mean": 1.092907041311264e-06, "lr": 0.00013720970928514134, "steps_per_second": 4.848590038956373 }, { "step": 7540, "loss": 3.2558352947235107, "lm_loss": 3.2558352947235107, "ppl": 25.941274096054315, "gate_mean": 9.918585419654846e-07, "lr": 0.00013709156395764967, "steps_per_second": 4.848852433753917 }, { "step": 7550, "loss": 3.348295211791992, "lm_loss": 3.348295211791992, "ppl": 28.454183914816454, "gate_mean": 1.0796356946229935e-06, "lr": 0.00013697331448927667, "steps_per_second": 4.848691107159124 }, { "step": 7560, "loss": 3.2195396423339844, "lm_loss": 3.2195396423339844, "ppl": 25.016600946033947, "gate_mean": 1.244945451617241e-06, "lr": 0.00013685496120331188, "steps_per_second": 4.849089777571493 }, { "step": 7570, "loss": 3.2048258781433105, "lm_loss": 3.2048258781433105, "ppl": 24.651207328751997, "gate_mean": 1.0274816304445267e-06, "lr": 0.0001367365044233286, "steps_per_second": 4.849372381878767 }, { "step": 7580, "loss": 3.210144519805908, "lm_loss": 3.210144519805908, "ppl": 24.7826675520996, "gate_mean": 1.061009243130684e-06, "lr": 0.00013661794447318324, "steps_per_second": 4.849674823296176 }, { "step": 7590, "loss": 3.2904577255249023, "lm_loss": 3.2904577255249023, "ppl": 26.85515313214426, "gate_mean": 1.16787850856781e-06, "lr": 0.0001364992816770141, "steps_per_second": 4.849965568374952 }, { "step": 7600, "loss": 3.334439277648926, "lm_loss": 3.334439277648926, "ppl": 28.062643459976524, "gate_mean": 1.176726073026657e-06, "lr": 0.00013638051635924078, "steps_per_second": 4.850450437374866 }, { "step": 7610, "loss": 3.2626287937164307, "lm_loss": 3.2626287937164307, "ppl": 26.118106089395408, "gate_mean": 7.255002856254578e-07, "lr": 0.00013626164884456308, "steps_per_second": 4.850714569980561 }, { "step": 7620, "loss": 3.4304959774017334, "lm_loss": 3.4304959774017334, "ppl": 30.89196066507821, "gate_mean": 1.103617250919342e-06, "lr": 0.00013614267945796029, "steps_per_second": 4.851027229429182 }, { "step": 7630, "loss": 3.366689920425415, "lm_loss": 3.366689920425415, "ppl": 28.982433944987044, "gate_mean": 1.2260861694812775e-06, "lr": 0.00013602360852469015, "steps_per_second": 4.851408575842806 }, { "step": 7640, "loss": 3.373809337615967, "lm_loss": 3.373809337615967, "ppl": 29.189508232913322, "gate_mean": 1.043081283569336e-06, "lr": 0.00013590443637028803, "steps_per_second": 4.851665866495774 }, { "step": 7650, "loss": 3.3696072101593018, "lm_loss": 3.3696072101593018, "ppl": 29.067107550666314, "gate_mean": 1.0777730494737625e-06, "lr": 0.00013578516332056608, "steps_per_second": 4.851966421325282 }, { "step": 7660, "loss": 3.4016149044036865, "lm_loss": 3.4016149044036865, "ppl": 30.01252829748849, "gate_mean": 1.051928848028183e-06, "lr": 0.00013566578970161225, "steps_per_second": 4.852372256752721 }, { "step": 7670, "loss": 3.340402603149414, "lm_loss": 3.340402603149414, "ppl": 28.230490102010066, "gate_mean": 1.3331882655620575e-06, "lr": 0.00013554631583978947, "steps_per_second": 4.852787959709223 }, { "step": 7680, "loss": 3.4690463542938232, "lm_loss": 3.4690463542938232, "ppl": 32.106109986887695, "gate_mean": 1.0437797755002975e-06, "lr": 0.0001354267420617347, "steps_per_second": 4.853108713519699 }, { "step": 7690, "loss": 3.3942010402679443, "lm_loss": 3.3942010402679443, "ppl": 29.7908422807457, "gate_mean": 1.321546733379364e-06, "lr": 0.00013530706869435805, "steps_per_second": 4.853465215555152 }, { "step": 7700, "loss": 3.3470284938812256, "lm_loss": 3.3470284938812256, "ppl": 28.41816330920493, "gate_mean": 1.1455267667770386e-06, "lr": 0.00013518729606484199, "steps_per_second": 4.853830181223902 }, { "step": 7710, "loss": 3.3370425701141357, "lm_loss": 3.3370425701141357, "ppl": 28.135793902935735, "gate_mean": 1.023290678858757e-06, "lr": 0.00013506742450064031, "steps_per_second": 4.854192635775022 }, { "step": 7720, "loss": 3.3508095741271973, "lm_loss": 3.3508095741271973, "ppl": 28.525818062492924, "gate_mean": 9.667128324508667e-07, "lr": 0.00013494745432947723, "steps_per_second": 4.85450311966618 }, { "step": 7730, "loss": 3.3500890731811523, "lm_loss": 3.3500890731811523, "ppl": 28.50527258599862, "gate_mean": 7.974449545145035e-07, "lr": 0.00013482738587934672, "steps_per_second": 4.854923105620926 }, { "step": 7740, "loss": 3.3547325134277344, "lm_loss": 3.3547325134277344, "ppl": 28.637942900922916, "gate_mean": 9.927898645401e-07, "lr": 0.00013470721947851126, "steps_per_second": 4.855259667006448 }, { "step": 7750, "loss": 3.3347156047821045, "lm_loss": 3.3347156047821045, "ppl": 28.07039900127713, "gate_mean": 1.0193325579166412e-06, "lr": 0.00013458695545550122, "steps_per_second": 4.85565141102766 }, { "step": 7760, "loss": 3.2446179389953613, "lm_loss": 3.2446179389953613, "ppl": 25.651907594191158, "gate_mean": 1.3122335076332092e-06, "lr": 0.00013446659413911385, "steps_per_second": 4.855909811742001 }, { "step": 7770, "loss": 3.318081855773926, "lm_loss": 3.318081855773926, "ppl": 27.607344869331957, "gate_mean": 9.352806955575943e-07, "lr": 0.00013434613585841241, "steps_per_second": 4.856303832000801 }, { "step": 7780, "loss": 3.33646821975708, "lm_loss": 3.33646821975708, "ppl": 28.119638739467597, "gate_mean": 8.482020348310471e-07, "lr": 0.00013422558094272518, "steps_per_second": 4.856494557338532 }, { "step": 7790, "loss": 3.309168815612793, "lm_loss": 3.309168815612793, "ppl": 27.36237284029527, "gate_mean": 9.918585419654846e-07, "lr": 0.0001341049297216448, "steps_per_second": 4.85698982499177 }, { "step": 7800, "loss": 3.5114080905914307, "lm_loss": 3.5114080905914307, "ppl": 33.49539914994029, "gate_mean": 1.0181684046983719e-06, "lr": 0.000133984182525027, "steps_per_second": 4.85733174838265 }, { "step": 7810, "loss": 3.2575316429138184, "lm_loss": 3.2575316429138184, "ppl": 25.985316874803996, "gate_mean": 1.1222437024116516e-06, "lr": 0.00013386333968299006, "steps_per_second": 4.857646918030228 }, { "step": 7820, "loss": 3.2190704345703125, "lm_loss": 3.2190704345703125, "ppl": 25.00486571599521, "gate_mean": 1.160893589258194e-06, "lr": 0.00013374240152591363, "steps_per_second": 4.857938033127541 }, { "step": 7830, "loss": 3.2509641647338867, "lm_loss": 3.2509641647338867, "ppl": 25.815218044795557, "gate_mean": 1.1867377907037735e-06, "lr": 0.0001336213683844381, "steps_per_second": 4.858275804315611 }, { "step": 7840, "loss": 3.31343936920166, "lm_loss": 3.31343936920166, "ppl": 27.479475187781105, "gate_mean": 9.993091225624084e-07, "lr": 0.00013350024058946344, "steps_per_second": 4.858584651700038 }, { "step": 7850, "loss": 3.370910882949829, "lm_loss": 3.370910882949829, "ppl": 29.10502625930337, "gate_mean": 1.0775402188301086e-06, "lr": 0.00013337901847214838, "steps_per_second": 4.858830217402629 }, { "step": 7860, "loss": 3.375244379043579, "lm_loss": 3.375244379043579, "ppl": 29.23142645647921, "gate_mean": 1.1078082025051117e-06, "lr": 0.00013325770236390956, "steps_per_second": 4.85915681933917 }, { "step": 7870, "loss": 3.300715446472168, "lm_loss": 3.300715446472168, "ppl": 27.13204350316718, "gate_mean": 1.0665971785783768e-06, "lr": 0.00013313629259642062, "steps_per_second": 4.859410004997507 }, { "step": 7880, "loss": 3.2481706142425537, "lm_loss": 3.2481706142425537, "ppl": 25.74320256601432, "gate_mean": 1.3094395399093628e-06, "lr": 0.0001330147895016112, "steps_per_second": 4.859711084179565 }, { "step": 7890, "loss": 3.2325985431671143, "lm_loss": 3.2325985431671143, "ppl": 25.34543267434194, "gate_mean": 1.2917444109916687e-06, "lr": 0.0001328931934116662, "steps_per_second": 4.860003766115155 }, { "step": 7900, "loss": 3.3471944332122803, "lm_loss": 3.3471944332122803, "ppl": 28.42287939149521, "gate_mean": 1.1369120329618454e-06, "lr": 0.00013277150465902454, "steps_per_second": 4.860341191742985 }, { "step": 7910, "loss": 3.2368035316467285, "lm_loss": 3.2368035316467285, "ppl": 25.452234319220008, "gate_mean": 9.718351066112518e-07, "lr": 0.00013264972357637873, "steps_per_second": 4.860666480266821 }, { "step": 7920, "loss": 3.213216543197632, "lm_loss": 3.213216543197632, "ppl": 24.858917547447284, "gate_mean": 1.155305653810501e-06, "lr": 0.00013252785049667356, "steps_per_second": 4.861052711264434 }, { "step": 7930, "loss": 3.282179355621338, "lm_loss": 3.282179355621338, "ppl": 26.633753917396078, "gate_mean": 1.0083895176649094e-06, "lr": 0.00013240588575310543, "steps_per_second": 4.861381420934436 }, { "step": 7940, "loss": 3.1246848106384277, "lm_loss": 3.1246848106384277, "ppl": 22.752722547137285, "gate_mean": 1.176726073026657e-06, "lr": 0.00013228382967912122, "steps_per_second": 4.861780805013403 }, { "step": 7950, "loss": 3.2992377281188965, "lm_loss": 3.2992377281188965, "ppl": 27.09197959339864, "gate_mean": 1.0658986866474152e-06, "lr": 0.00013216168260841762, "steps_per_second": 4.861926557757148 }, { "step": 7960, "loss": 3.198404312133789, "lm_loss": 3.198404312133789, "ppl": 24.49341515234715, "gate_mean": 9.953510016202927e-07, "lr": 0.00013203944487494003, "steps_per_second": 4.862210264326912 }, { "step": 7970, "loss": 3.3523943424224854, "lm_loss": 3.3523943424224854, "ppl": 28.57106071464701, "gate_mean": 1.096632331609726e-06, "lr": 0.00013191711681288177, "steps_per_second": 4.862402368918081 }, { "step": 7980, "loss": 3.3682045936584473, "lm_loss": 3.3682045936584473, "ppl": 29.02636612496095, "gate_mean": 1.0209623724222183e-06, "lr": 0.0001317946987566831, "steps_per_second": 4.862643758478089 }, { "step": 7990, "loss": 3.1316757202148438, "lm_loss": 3.1316757202148438, "ppl": 22.912342065804996, "gate_mean": 1.0544899851083755e-06, "lr": 0.0001316721910410302, "steps_per_second": 4.862931883919382 }, { "step": 8000, "loss": 3.3515546321868896, "lm_loss": 3.3515546321868896, "ppl": 28.547079372621404, "gate_mean": 1.400243490934372e-06, "lr": 0.0001315495940008547, "steps_per_second": 4.863325756608795 }, { "step": 8010, "loss": 3.284175395965576, "lm_loss": 3.284175395965576, "ppl": 26.686969056848138, "gate_mean": 1.1781230568885803e-06, "lr": 0.00013142690797133202, "steps_per_second": 4.841775682362564 }, { "step": 8020, "loss": 3.496511459350586, "lm_loss": 3.496511459350586, "ppl": 33.00012863042618, "gate_mean": 1.3920944184064865e-06, "lr": 0.00013130413328788122, "steps_per_second": 4.841739821307976 }, { "step": 8030, "loss": 3.1821658611297607, "lm_loss": 3.1821658611297607, "ppl": 24.09889192273349, "gate_mean": 1.2968666851520538e-06, "lr": 0.00013118127028616353, "steps_per_second": 4.841636981952679 }, { "step": 8040, "loss": 3.1907522678375244, "lm_loss": 3.1907522678375244, "ppl": 24.306705720102016, "gate_mean": 9.653158485889435e-07, "lr": 0.0001310583193020817, "steps_per_second": 4.841603392872202 }, { "step": 8050, "loss": 3.2569451332092285, "lm_loss": 3.2569451332092285, "ppl": 25.970080702798214, "gate_mean": 1.1397060006856918e-06, "lr": 0.00013093528067177902, "steps_per_second": 4.841784108386246 }, { "step": 8060, "loss": 3.23099422454834, "lm_loss": 3.23099422454834, "ppl": 25.30480312488742, "gate_mean": 1.2647360563278198e-06, "lr": 0.00013081215473163848, "steps_per_second": 4.842033528667204 }, { "step": 8070, "loss": 3.3162074089050293, "lm_loss": 3.3162074089050293, "ppl": 27.55564483780176, "gate_mean": 9.830109775066376e-07, "lr": 0.00013068894181828163, "steps_per_second": 4.841999316812707 }, { "step": 8080, "loss": 3.3441264629364014, "lm_loss": 3.3441264629364014, "ppl": 28.335812470023438, "gate_mean": 1.2801028788089752e-06, "lr": 0.00013056564226856786, "steps_per_second": 4.842360563841509 }, { "step": 8090, "loss": 3.3524396419525146, "lm_loss": 3.3524396419525146, "ppl": 28.57235499958485, "gate_mean": 1.0400544852018356e-06, "lr": 0.0001304422564195935, "steps_per_second": 4.842274172334386 }, { "step": 8100, "loss": 3.313451051712036, "lm_loss": 3.313451051712036, "ppl": 27.479796218910334, "gate_mean": 1.2214295566082e-06, "lr": 0.0001303187846086907, "steps_per_second": 4.84267477006717 }, { "step": 8110, "loss": 3.210904598236084, "lm_loss": 3.210904598236084, "ppl": 24.801511483673927, "gate_mean": 1.319684088230133e-06, "lr": 0.00013019522717342666, "steps_per_second": 4.842987135467344 }, { "step": 8120, "loss": 3.305300712585449, "lm_loss": 3.305300712585449, "ppl": 27.25673680033185, "gate_mean": 1.0498333722352982e-06, "lr": 0.00013007158445160272, "steps_per_second": 4.843289461033972 }, { "step": 8130, "loss": 3.3033876419067383, "lm_loss": 3.3033876419067383, "ppl": 27.204642582210443, "gate_mean": 1.1650845408439636e-06, "lr": 0.00012994785678125336, "steps_per_second": 4.843636150682831 }, { "step": 8140, "loss": 3.2391960620880127, "lm_loss": 3.2391960620880127, "ppl": 25.513202469622502, "gate_mean": 1.218169927597046e-06, "lr": 0.0001298240445006453, "steps_per_second": 4.8440577648377605 }, { "step": 8150, "loss": 3.315856695175171, "lm_loss": 3.315856695175171, "ppl": 27.545982389297762, "gate_mean": 9.997747838497162e-07, "lr": 0.0001297001479482766, "steps_per_second": 4.844320474047138 }, { "step": 8160, "loss": 3.244688034057617, "lm_loss": 3.244688034057617, "ppl": 25.653705729270413, "gate_mean": 9.879004210233688e-07, "lr": 0.0001295761674628757, "steps_per_second": 4.8444693724110355 }, { "step": 8170, "loss": 3.177372932434082, "lm_loss": 3.177372932434082, "ppl": 23.983664012268402, "gate_mean": 1.0726507753133774e-06, "lr": 0.00012945210338340048, "steps_per_second": 4.844660900398377 }, { "step": 8180, "loss": 3.397787094116211, "lm_loss": 3.397787094116211, "ppl": 29.897865626398552, "gate_mean": 1.0866206139326096e-06, "lr": 0.00012932795604903743, "steps_per_second": 4.845001372319885 }, { "step": 8190, "loss": 3.231576919555664, "lm_loss": 3.231576919555664, "ppl": 25.319552404077964, "gate_mean": 1.1650845408439636e-06, "lr": 0.00012920372579920062, "steps_per_second": 4.845157641854984 }, { "step": 8200, "loss": 3.4072036743164062, "lm_loss": 3.4072036743164062, "ppl": 30.180730997929615, "gate_mean": 1.0598450899124146e-06, "lr": 0.00012907941297353077, "steps_per_second": 4.845499260571316 }, { "step": 8210, "loss": 3.175253391265869, "lm_loss": 3.175253391265869, "ppl": 23.93288348375095, "gate_mean": 1.0775402188301086e-06, "lr": 0.00012895501791189445, "steps_per_second": 4.845660554923 }, { "step": 8220, "loss": 3.292635440826416, "lm_loss": 3.292635440826416, "ppl": 26.913699735824135, "gate_mean": 1.0652001947164536e-06, "lr": 0.000128830540954383, "steps_per_second": 4.846055045587993 }, { "step": 8230, "loss": 3.3452699184417725, "lm_loss": 3.3452699184417725, "ppl": 28.368231742256786, "gate_mean": 1.1886004358530045e-06, "lr": 0.00012870598244131166, "steps_per_second": 4.846192866147952 }, { "step": 8240, "loss": 3.127393960952759, "lm_loss": 3.127393960952759, "ppl": 22.814446664782125, "gate_mean": 1.2624077498912811e-06, "lr": 0.00012858134271321862, "steps_per_second": 4.846369200200103 }, { "step": 8250, "loss": 3.2046284675598145, "lm_loss": 3.2046284675598145, "ppl": 24.64634139983808, "gate_mean": 1.3415701687335968e-06, "lr": 0.0001284566221108642, "steps_per_second": 4.8467432985315675 }, { "step": 8260, "loss": 3.162724733352661, "lm_loss": 3.162724733352661, "ppl": 23.634907093537453, "gate_mean": 9.776558727025986e-07, "lr": 0.0001283318209752297, "steps_per_second": 4.847104459221677 }, { "step": 8270, "loss": 3.35245418548584, "lm_loss": 3.35245418548584, "ppl": 28.572770545603714, "gate_mean": 1.0654330253601074e-06, "lr": 0.00012820693964751665, "steps_per_second": 4.847270468377854 }, { "step": 8280, "loss": 3.1371915340423584, "lm_loss": 3.1371915340423584, "ppl": 23.03907146557687, "gate_mean": 1.1860392987728119e-06, "lr": 0.00012808197846914595, "steps_per_second": 4.847659978042023 }, { "step": 8290, "loss": 3.102058172225952, "lm_loss": 3.102058172225952, "ppl": 22.243685536584522, "gate_mean": 1.2558884918689728e-06, "lr": 0.00012795693778175653, "steps_per_second": 4.84799856163375 }, { "step": 8300, "loss": 3.2753701210021973, "lm_loss": 3.2753701210021973, "ppl": 26.453014485125326, "gate_mean": 1.1264346539974213e-06, "lr": 0.00012783181792720496, "steps_per_second": 4.848389035035106 }, { "step": 8310, "loss": 3.2368721961975098, "lm_loss": 3.2368721961975098, "ppl": 25.45398204545865, "gate_mean": 1.1473894119262695e-06, "lr": 0.00012770661924756414, "steps_per_second": 4.848727045566235 }, { "step": 8320, "loss": 3.107048988342285, "lm_loss": 3.107048988342285, "ppl": 22.354977167874168, "gate_mean": 1.4631077647209167e-06, "lr": 0.00012758134208512245, "steps_per_second": 4.849058650814205 }, { "step": 8330, "loss": 3.2349936962127686, "lm_loss": 3.2349936962127686, "ppl": 25.406211622991126, "gate_mean": 1.2505333870649338e-06, "lr": 0.00012745598678238286, "steps_per_second": 4.84938879431151 }, { "step": 8340, "loss": 3.113729476928711, "lm_loss": 3.113729476928711, "ppl": 22.50481928971229, "gate_mean": 1.2731179594993591e-06, "lr": 0.000127330553682062, "steps_per_second": 4.849630162036267 }, { "step": 8350, "loss": 3.3279500007629395, "lm_loss": 3.3279500007629395, "ppl": 27.881126789362153, "gate_mean": 1.1562369763851166e-06, "lr": 0.0001272050431270892, "steps_per_second": 4.849679557543331 }, { "step": 8360, "loss": 3.151322364807129, "lm_loss": 3.151322364807129, "ppl": 23.366943783767244, "gate_mean": 1.3327226042747498e-06, "lr": 0.00012707945546060548, "steps_per_second": 4.850027244251703 }, { "step": 8370, "loss": 3.20723032951355, "lm_loss": 3.20723032951355, "ppl": 24.710551274216304, "gate_mean": 1.23586505651474e-06, "lr": 0.0001269537910259627, "steps_per_second": 4.850351638629187 }, { "step": 8380, "loss": 3.3249688148498535, "lm_loss": 3.3249688148498535, "ppl": 27.79813174024038, "gate_mean": 9.264331310987473e-07, "lr": 0.00012682805016672272, "steps_per_second": 4.8507638100463675 }, { "step": 8390, "loss": 3.2361555099487305, "lm_loss": 3.2361555099487305, "ppl": 25.435746062069814, "gate_mean": 1.3648532330989838e-06, "lr": 0.0001267022332266562, "steps_per_second": 4.851001720278432 }, { "step": 8400, "loss": 3.1930594444274902, "lm_loss": 3.1930594444274902, "ppl": 24.362850325372985, "gate_mean": 1.1781230568885803e-06, "lr": 0.00012657634054974177, "steps_per_second": 4.8512057858585464 }, { "step": 8410, "loss": 3.1955904960632324, "lm_loss": 3.1955904960632324, "ppl": 24.424592060339062, "gate_mean": 1.257285475730896e-06, "lr": 0.00012645037248016535, "steps_per_second": 4.851480438334593 }, { "step": 8420, "loss": 3.2472405433654785, "lm_loss": 3.2472405433654785, "ppl": 25.719270693918812, "gate_mean": 1.107342541217804e-06, "lr": 0.00012632432936231876, "steps_per_second": 4.851627557028706 }, { "step": 8430, "loss": 3.3660426139831543, "lm_loss": 3.3660426139831543, "ppl": 28.963679499373644, "gate_mean": 1.3522803783416748e-06, "lr": 0.00012619821154079904, "steps_per_second": 4.851940607981575 }, { "step": 8440, "loss": 3.1615030765533447, "lm_loss": 3.1615030765533447, "ppl": 23.606050978308957, "gate_mean": 1.2777745723724365e-06, "lr": 0.00012607201936040756, "steps_per_second": 4.852290562200662 }, { "step": 8450, "loss": 3.17075252532959, "lm_loss": 3.17075252532959, "ppl": 23.825406834154144, "gate_mean": 1.267995685338974e-06, "lr": 0.00012594575316614883, "steps_per_second": 4.852664563764731 }, { "step": 8460, "loss": 3.155104637145996, "lm_loss": 3.155104637145996, "ppl": 23.455491278694545, "gate_mean": 1.000240445137024e-06, "lr": 0.00012581941330322993, "steps_per_second": 4.852938065713938 }, { "step": 8470, "loss": 3.2076172828674316, "lm_loss": 3.2076172828674316, "ppl": 24.720114955138005, "gate_mean": 1.050066202878952e-06, "lr": 0.00012569300011705913, "steps_per_second": 4.853326143996675 }, { "step": 8480, "loss": 3.2299299240112305, "lm_loss": 3.2299299240112305, "ppl": 25.27788553607315, "gate_mean": 1.3192184269428253e-06, "lr": 0.00012556651395324528, "steps_per_second": 4.853706735351567 }, { "step": 8490, "loss": 3.136760711669922, "lm_loss": 3.136760711669922, "ppl": 23.029147855959348, "gate_mean": 1.1478550732135773e-06, "lr": 0.0001254399551575967, "steps_per_second": 4.8539679857914075 }, { "step": 8500, "loss": 3.185030698776245, "lm_loss": 3.185030698776245, "ppl": 24.16803032341224, "gate_mean": 1.1278316378593445e-06, "lr": 0.00012531332407612035, "steps_per_second": 4.854337647510597 }, { "step": 8510, "loss": 3.358008623123169, "lm_loss": 3.358008623123169, "ppl": 28.731917795297353, "gate_mean": 1.1443626135587692e-06, "lr": 0.00012518662105502078, "steps_per_second": 4.854685560759683 }, { "step": 8520, "loss": 3.2671704292297363, "lm_loss": 3.2671704292297363, "ppl": 26.236994777741593, "gate_mean": 1.3257376849651337e-06, "lr": 0.00012505984644069914, "steps_per_second": 4.855087350732722 }, { "step": 8530, "loss": 3.1040091514587402, "lm_loss": 3.1040091514587402, "ppl": 22.287124865963154, "gate_mean": 1.1981464922428131e-06, "lr": 0.0001249330005797524, "steps_per_second": 4.855478708749888 }, { "step": 8540, "loss": 3.168461799621582, "lm_loss": 3.168461799621582, "ppl": 23.77089182551989, "gate_mean": 1.1674128472805023e-06, "lr": 0.00012480608381897236, "steps_per_second": 4.8558930748686935 }, { "step": 8550, "loss": 3.183786392211914, "lm_loss": 3.183786392211914, "ppl": 24.13797658654258, "gate_mean": 1.3182871043682098e-06, "lr": 0.00012467909650534459, "steps_per_second": 4.856235636160324 }, { "step": 8560, "loss": 3.1684298515319824, "lm_loss": 3.1684298515319824, "ppl": 23.7701324030691, "gate_mean": 1.25030055642128e-06, "lr": 0.00012455203898604746, "steps_per_second": 4.856578818250515 }, { "step": 8570, "loss": 3.218564987182617, "lm_loss": 3.218564987182617, "ppl": 24.992230265486118, "gate_mean": 1.301988959312439e-06, "lr": 0.0001244249116084515, "steps_per_second": 4.856923628073101 }, { "step": 8580, "loss": 3.0808329582214355, "lm_loss": 3.0808329582214355, "ppl": 21.776533786655342, "gate_mean": 1.1192169040441513e-06, "lr": 0.00012429771472011802, "steps_per_second": 4.857298394567955 }, { "step": 8590, "loss": 3.3037948608398438, "lm_loss": 3.3037948608398438, "ppl": 27.215723083680157, "gate_mean": 1.1404044926166534e-06, "lr": 0.00012417044866879848, "steps_per_second": 4.857676062938947 }, { "step": 8600, "loss": 3.162250518798828, "lm_loss": 3.162250518798828, "ppl": 23.623701733697555, "gate_mean": 8.977949619293213e-07, "lr": 0.00012404311380243344, "steps_per_second": 4.857714067234923 }, { "step": 8610, "loss": 3.188164472579956, "lm_loss": 3.188164472579956, "ppl": 24.24388625932061, "gate_mean": 9.587965905666351e-07, "lr": 0.00012391571046915152, "steps_per_second": 4.858097926101407 }, { "step": 8620, "loss": 3.0828354358673096, "lm_loss": 3.0828354358673096, "ppl": 21.820184498969247, "gate_mean": 1.0831281542778015e-06, "lr": 0.00012378823901726857, "steps_per_second": 4.858468819739792 }, { "step": 8630, "loss": 3.157198905944824, "lm_loss": 3.157198905944824, "ppl": 23.504664855611846, "gate_mean": 1.1040829122066498e-06, "lr": 0.0001236606997952867, "steps_per_second": 4.858787709563503 }, { "step": 8640, "loss": 3.114051342010498, "lm_loss": 3.114051342010498, "ppl": 22.512063971056012, "gate_mean": 1.1299271136522293e-06, "lr": 0.00012353309315189323, "steps_per_second": 4.858904060597235 }, { "step": 8650, "loss": 3.0904154777526855, "lm_loss": 3.0904154777526855, "ppl": 21.98621085985509, "gate_mean": 1.1490192264318466e-06, "lr": 0.0001234054194359599, "steps_per_second": 4.859204669286983 }, { "step": 8660, "loss": 3.0356602668762207, "lm_loss": 3.0356602668762207, "ppl": 20.814716631291365, "gate_mean": 1.1599622666835785e-06, "lr": 0.00012327767899654175, "steps_per_second": 4.859435632281824 }, { "step": 8670, "loss": 3.1418135166168213, "lm_loss": 3.1418135166168213, "ppl": 23.145804120650233, "gate_mean": 1.3653188943862915e-06, "lr": 0.00012314987218287626, "steps_per_second": 4.859709630492923 }, { "step": 8680, "loss": 3.215101957321167, "lm_loss": 3.215101957321167, "ppl": 24.90583111353978, "gate_mean": 9.78820025920868e-07, "lr": 0.00012302199934438238, "steps_per_second": 4.860077702228023 }, { "step": 8690, "loss": 3.1442017555236816, "lm_loss": 3.1442017555236816, "ppl": 23.201147891349674, "gate_mean": 1.044943928718567e-06, "lr": 0.00012289406083065957, "steps_per_second": 4.860464801482539 }, { "step": 8700, "loss": 3.1295032501220703, "lm_loss": 3.1295032501220703, "ppl": 22.862619717624728, "gate_mean": 1.014210283756256e-06, "lr": 0.00012276605699148687, "steps_per_second": 4.860578314948297 }, { "step": 8710, "loss": 3.196085214614868, "lm_loss": 3.196085214614868, "ppl": 24.436678348556445, "gate_mean": 1.346692442893982e-06, "lr": 0.0001226379881768218, "steps_per_second": 4.860817001027103 }, { "step": 8720, "loss": 3.2134628295898438, "lm_loss": 3.2134628295898438, "ppl": 24.865040714559758, "gate_mean": 1.0705552995204926e-06, "lr": 0.00012250985473679973, "steps_per_second": 4.861130540778083 }, { "step": 8730, "loss": 3.211782693862915, "lm_loss": 3.211782693862915, "ppl": 24.823299146872476, "gate_mean": 1.200009137392044e-06, "lr": 0.00012238165702173248, "steps_per_second": 4.8615615694582575 }, { "step": 8740, "loss": 3.176105499267578, "lm_loss": 3.176105499267578, "ppl": 23.953285576430158, "gate_mean": 1.3266690075397491e-06, "lr": 0.0001222533953821078, "steps_per_second": 4.861814628310136 }, { "step": 8750, "loss": 3.295687675476074, "lm_loss": 3.295687675476074, "ppl": 26.99597215620266, "gate_mean": 1.2884847819805145e-06, "lr": 0.00012212507016858806, "steps_per_second": 4.862176775587202 }, { "step": 8760, "loss": 3.1276869773864746, "lm_loss": 3.1276869773864746, "ppl": 22.82113265208528, "gate_mean": 1.2151431292295456e-06, "lr": 0.0001219966817320095, "steps_per_second": 4.862451557378261 }, { "step": 8770, "loss": 3.0520553588867188, "lm_loss": 3.0520553588867188, "ppl": 21.158788664806135, "gate_mean": 1.0817311704158783e-06, "lr": 0.00012186823042338123, "steps_per_second": 4.862850751260037 }, { "step": 8780, "loss": 3.1735823154449463, "lm_loss": 3.1735823154449463, "ppl": 23.892923218451926, "gate_mean": 1.1676456779241562e-06, "lr": 0.00012173971659388417, "steps_per_second": 4.863113126287043 }, { "step": 8790, "loss": 3.170727252960205, "lm_loss": 3.170727252960205, "ppl": 23.82480471728038, "gate_mean": 1.612585037946701e-06, "lr": 0.00012161114059487027, "steps_per_second": 4.863410906773885 }, { "step": 8800, "loss": 3.1701040267944336, "lm_loss": 3.1701040267944336, "ppl": 23.809961101531492, "gate_mean": 1.5632249414920807e-06, "lr": 0.00012148250277786138, "steps_per_second": 4.863730928465133 }, { "step": 8810, "loss": 3.0558133125305176, "lm_loss": 3.0558133125305176, "ppl": 21.238452003582644, "gate_mean": 1.0728836059570312e-06, "lr": 0.00012135380349454843, "steps_per_second": 4.864063932147063 }, { "step": 8820, "loss": 3.194376230239868, "lm_loss": 3.194376230239868, "ppl": 24.394952111980594, "gate_mean": 1.1012889444828033e-06, "lr": 0.00012122504309679028, "steps_per_second": 4.864311356309774 }, { "step": 8830, "loss": 3.1391077041625977, "lm_loss": 3.1391077041625977, "ppl": 23.08326056931659, "gate_mean": 1.1711381375789642e-06, "lr": 0.00012109622193661293, "steps_per_second": 4.864611089995636 }, { "step": 8840, "loss": 3.208401679992676, "lm_loss": 3.208401679992676, "ppl": 24.739512949115277, "gate_mean": 1.1096708476543427e-06, "lr": 0.0001209673403662086, "steps_per_second": 4.86494932692988 }, { "step": 8850, "loss": 3.1207938194274902, "lm_loss": 3.1207938194274902, "ppl": 22.664363916486842, "gate_mean": 1.496635377407074e-06, "lr": 0.00012083839873793449, "steps_per_second": 4.865135565335166 }, { "step": 8860, "loss": 3.1709022521972656, "lm_loss": 3.1709022521972656, "ppl": 23.82897440476444, "gate_mean": 9.57399606704712e-07, "lr": 0.00012070939740431216, "steps_per_second": 4.865564743218447 }, { "step": 8870, "loss": 3.1847174167633057, "lm_loss": 3.1847174167633057, "ppl": 24.160460100093054, "gate_mean": 1.1541415005922318e-06, "lr": 0.00012058033671802627, "steps_per_second": 4.8657770806476455 }, { "step": 8880, "loss": 3.160726547241211, "lm_loss": 3.160726547241211, "ppl": 23.587727303136774, "gate_mean": 1.257285475730896e-06, "lr": 0.00012045121703192385, "steps_per_second": 4.866057421147214 }, { "step": 8890, "loss": 3.0692121982574463, "lm_loss": 3.0692121982574463, "ppl": 21.52493860957465, "gate_mean": 9.066425263881683e-07, "lr": 0.00012032203869901317, "steps_per_second": 4.86639886653076 }, { "step": 8900, "loss": 3.081967353820801, "lm_loss": 3.081967353820801, "ppl": 21.801251007655097, "gate_mean": 1.126900315284729e-06, "lr": 0.00012019280207246287, "steps_per_second": 4.866633113590457 }, { "step": 8910, "loss": 3.1750590801239014, "lm_loss": 3.1750590801239014, "ppl": 23.928233509616163, "gate_mean": 1.1401716619729996e-06, "lr": 0.00012006350750560096, "steps_per_second": 4.866762671424437 }, { "step": 8920, "loss": 3.0371053218841553, "lm_loss": 3.0371053218841553, "ppl": 20.84481678474901, "gate_mean": 1.077074557542801e-06, "lr": 0.00011993415535191382, "steps_per_second": 4.867096027468098 }, { "step": 8930, "loss": 3.1418886184692383, "lm_loss": 3.1418886184692383, "ppl": 23.147542478691513, "gate_mean": 1.1364463716745377e-06, "lr": 0.00011980474596504533, "steps_per_second": 4.867436465212574 }, { "step": 8940, "loss": 3.140904188156128, "lm_loss": 3.140904188156128, "ppl": 23.12476654869878, "gate_mean": 9.275972843170166e-07, "lr": 0.00011967527969879579, "steps_per_second": 4.867751643180305 }, { "step": 8950, "loss": 3.0013530254364014, "lm_loss": 3.0013530254364014, "ppl": 20.11273155891674, "gate_mean": 1.2624077498912811e-06, "lr": 0.00011954575690712104, "steps_per_second": 4.867919463034995 }, { "step": 8960, "loss": 3.0246524810791016, "lm_loss": 3.0246524810791016, "ppl": 20.586849148320574, "gate_mean": 1.0444782674312592e-06, "lr": 0.00011941617794413144, "steps_per_second": 4.868078744838768 }, { "step": 8970, "loss": 2.9370059967041016, "lm_loss": 2.9370059967041016, "ppl": 18.85929690285317, "gate_mean": 1.5459954738616943e-06, "lr": 0.00011928654316409094, "steps_per_second": 4.868280396449094 }, { "step": 8980, "loss": 2.998354434967041, "lm_loss": 2.998354434967041, "ppl": 20.052512045702763, "gate_mean": 1.334119588136673e-06, "lr": 0.00011915685292141607, "steps_per_second": 4.868576944200727 }, { "step": 8990, "loss": 3.116237163543701, "lm_loss": 3.116237163543701, "ppl": 22.56132514369334, "gate_mean": 1.2118835002183914e-06, "lr": 0.000119027107570675, "steps_per_second": 4.868925563801517 }, { "step": 9000, "loss": 3.196317672729492, "lm_loss": 3.196317672729492, "ppl": 24.44235951302383, "gate_mean": 9.883660823106766e-07, "lr": 0.00011889730746658656, "steps_per_second": 4.869107187066086 }, { "step": 9010, "loss": 3.0765879154205322, "lm_loss": 3.0765879154205322, "ppl": 21.68428740212721, "gate_mean": 1.1711381375789642e-06, "lr": 0.00011876745296401928, "steps_per_second": 4.865439635610843 }, { "step": 9020, "loss": 2.9275426864624023, "lm_loss": 2.9275426864624023, "ppl": 18.681667332807336, "gate_mean": 1.4472752809524536e-06, "lr": 0.00011863754441799041, "steps_per_second": 4.865605754287231 }, { "step": 9030, "loss": 3.1523308753967285, "lm_loss": 3.1523308753967285, "ppl": 23.39052148120013, "gate_mean": 1.055654138326645e-06, "lr": 0.00011850758218366497, "steps_per_second": 4.861682340724593 }, { "step": 9040, "loss": 3.0261964797973633, "lm_loss": 3.0261964797973633, "ppl": 20.618659768477475, "gate_mean": 1.326901838183403e-06, "lr": 0.00011837756661635472, "steps_per_second": 4.861430669813006 }, { "step": 9050, "loss": 3.247786283493042, "lm_loss": 3.247786283493042, "ppl": 25.73331056269958, "gate_mean": 1.269858330488205e-06, "lr": 0.00011824749807151729, "steps_per_second": 4.861494607223619 }, { "step": 9060, "loss": 3.1569266319274902, "lm_loss": 3.1569266319274902, "ppl": 23.498266017243765, "gate_mean": 1.3294629752635956e-06, "lr": 0.00011811737690475508, "steps_per_second": 4.857920944203905 }, { "step": 9070, "loss": 3.160221815109253, "lm_loss": 3.160221815109253, "ppl": 23.575824823281717, "gate_mean": 1.2493692338466644e-06, "lr": 0.00011798720347181442, "steps_per_second": 4.850817688937015 }, { "step": 9080, "loss": 3.0400142669677734, "lm_loss": 3.0400142669677734, "ppl": 20.90554149165189, "gate_mean": 1.062639057636261e-06, "lr": 0.0001178569781285845, "steps_per_second": 4.851021202153421 }, { "step": 9090, "loss": 3.0979599952697754, "lm_loss": 3.0979599952697754, "ppl": 22.152713514144363, "gate_mean": 9.865034371614456e-07, "lr": 0.00011772670123109641, "steps_per_second": 4.851211632357509 }, { "step": 9100, "loss": 2.986818552017212, "lm_loss": 2.986818552017212, "ppl": 19.822517758049983, "gate_mean": 1.0489020496606827e-06, "lr": 0.00011759637313552224, "steps_per_second": 4.851422575525958 }, { "step": 9110, "loss": 3.2296454906463623, "lm_loss": 3.2296454906463623, "ppl": 25.270696684456464, "gate_mean": 1.1022202670574188e-06, "lr": 0.00011746599419817401, "steps_per_second": 4.8516986739250605 }, { "step": 9120, "loss": 3.1551055908203125, "lm_loss": 3.1551055908203125, "ppl": 23.45551364760482, "gate_mean": 1.2973323464393616e-06, "lr": 0.00011733556477550277, "steps_per_second": 4.848386100887554 }, { "step": 9130, "loss": 2.9330499172210693, "lm_loss": 2.9330499172210693, "ppl": 18.784835410216704, "gate_mean": 1.2675300240516663e-06, "lr": 0.00011720508522409757, "steps_per_second": 4.848708170055888 }, { "step": 9140, "loss": 3.030071496963501, "lm_loss": 3.030071496963501, "ppl": 20.698712431574, "gate_mean": 1.0989606380462646e-06, "lr": 0.00011707455590068456, "steps_per_second": 4.848947522395474 }, { "step": 9150, "loss": 2.929727077484131, "lm_loss": 2.929727077484131, "ppl": 18.722520002057742, "gate_mean": 1.157168298959732e-06, "lr": 0.00011694397716212585, "steps_per_second": 4.848776593302323 }, { "step": 9160, "loss": 3.1234312057495117, "lm_loss": 3.1234312057495117, "ppl": 22.72421749368626, "gate_mean": 1.2745149433612823e-06, "lr": 0.00011681334936541878, "steps_per_second": 4.845517584987038 }, { "step": 9170, "loss": 3.0815539360046387, "lm_loss": 3.0815539360046387, "ppl": 21.79223984488987, "gate_mean": 1.1981464922428131e-06, "lr": 0.00011668267286769475, "steps_per_second": 4.845710745306489 }, { "step": 9180, "loss": 3.0765221118927, "lm_loss": 3.0765221118927, "ppl": 21.682860546464212, "gate_mean": 1.3257376849651337e-06, "lr": 0.00011655194802621832, "steps_per_second": 4.845949230259709 }, { "step": 9190, "loss": 3.140194892883301, "lm_loss": 3.140194892883301, "ppl": 23.108370076757996, "gate_mean": 1.2116506695747375e-06, "lr": 0.0001164211751983862, "steps_per_second": 4.846177474007849 }, { "step": 9200, "loss": 3.1409120559692383, "lm_loss": 3.1409120559692383, "ppl": 23.124948490755948, "gate_mean": 1.2158416211605072e-06, "lr": 0.0001162903547417263, "steps_per_second": 4.846492105561811 }, { "step": 9210, "loss": 3.018026828765869, "lm_loss": 3.018026828765869, "ppl": 20.450898721113422, "gate_mean": 1.5115365386009216e-06, "lr": 0.00011615948701389679, "steps_per_second": 4.84674818734427 }, { "step": 9220, "loss": 2.9971108436584473, "lm_loss": 2.9971108436584473, "ppl": 20.0275904153805, "gate_mean": 1.2316741049289703e-06, "lr": 0.00011602857237268501, "steps_per_second": 4.847091555230098 }, { "step": 9230, "loss": 3.1197171211242676, "lm_loss": 3.1197171211242676, "ppl": 22.639974366763933, "gate_mean": 1.1739321053028107e-06, "lr": 0.0001158976111760066, "steps_per_second": 4.847403924540203 }, { "step": 9240, "loss": 2.9403276443481445, "lm_loss": 2.9403276443481445, "ppl": 18.922044997816315, "gate_mean": 1.1478550732135773e-06, "lr": 0.00011576660378190444, "steps_per_second": 4.847372734337587 }, { "step": 9250, "loss": 2.902841806411743, "lm_loss": 2.902841806411743, "ppl": 18.225866227807813, "gate_mean": 1.0705552995204926e-06, "lr": 0.00011563555054854779, "steps_per_second": 4.847615107604572 }, { "step": 9260, "loss": 3.035348415374756, "lm_loss": 3.035348415374756, "ppl": 20.80822654268187, "gate_mean": 1.4281831681728363e-06, "lr": 0.00011550445183423115, "steps_per_second": 4.847924704922982 }, { "step": 9270, "loss": 3.0728933811187744, "lm_loss": 3.0728933811187744, "ppl": 21.6043218671748, "gate_mean": 1.1730007827281952e-06, "lr": 0.00011537330799737345, "steps_per_second": 4.84815924286691 }, { "step": 9280, "loss": 3.1612401008605957, "lm_loss": 3.1612401008605957, "ppl": 23.599843976880887, "gate_mean": 1.4868564903736115e-06, "lr": 0.0001152421193965169, "steps_per_second": 4.8484387140492675 }, { "step": 9290, "loss": 3.0586206912994385, "lm_loss": 3.0586206912994385, "ppl": 21.298160155305798, "gate_mean": 1.3373792171478271e-06, "lr": 0.0001151108863903261, "steps_per_second": 4.848775615186907 }, { "step": 9300, "loss": 2.988368272781372, "lm_loss": 2.988368272781372, "ppl": 19.853260940939183, "gate_mean": 1.2605451047420502e-06, "lr": 0.00011497960933758711, "steps_per_second": 4.849089485727271 }, { "step": 9310, "loss": 3.082705497741699, "lm_loss": 3.082705497741699, "ppl": 21.81734940929209, "gate_mean": 1.218169927597046e-06, "lr": 0.00011484828859720635, "steps_per_second": 4.8493274399263475 }, { "step": 9320, "loss": 3.1130685806274414, "lm_loss": 3.1130685806274414, "ppl": 22.489950851672024, "gate_mean": 1.2363307178020477e-06, "lr": 0.00011471692452820974, "steps_per_second": 4.849517446425025 }, { "step": 9330, "loss": 2.943758964538574, "lm_loss": 2.943758964538574, "ppl": 18.98708411406394, "gate_mean": 1.00722536444664e-06, "lr": 0.00011458551748974165, "steps_per_second": 4.8497520286521665 }, { "step": 9340, "loss": 2.9394149780273438, "lm_loss": 2.9394149780273438, "ppl": 18.90478336288074, "gate_mean": 1.293141394853592e-06, "lr": 0.00011445406784106386, "steps_per_second": 4.850108708200564 }, { "step": 9350, "loss": 3.005976438522339, "lm_loss": 3.005976438522339, "ppl": 20.20593632123077, "gate_mean": 1.2922100722789764e-06, "lr": 0.0001143225759415547, "steps_per_second": 4.850225196246549 }, { "step": 9360, "loss": 2.9607763290405273, "lm_loss": 2.9607763290405273, "ppl": 19.31295914822121, "gate_mean": 1.066131517291069e-06, "lr": 0.000114191042150708, "steps_per_second": 4.8504736201921315 }, { "step": 9370, "loss": 2.940321207046509, "lm_loss": 2.940321207046509, "ppl": 18.92192319129715, "gate_mean": 1.4221295714378357e-06, "lr": 0.00011405946682813214, "steps_per_second": 4.850859276543036 }, { "step": 9380, "loss": 2.9839227199554443, "lm_loss": 2.9839227199554443, "ppl": 19.765198109675158, "gate_mean": 1.3867393136024475e-06, "lr": 0.00011392785033354901, "steps_per_second": 4.851185209464662 }, { "step": 9390, "loss": 2.8854033946990967, "lm_loss": 2.8854033946990967, "ppl": 17.91079125667838, "gate_mean": 1.1068768799304962e-06, "lr": 0.0001137961930267931, "steps_per_second": 4.851440483516879 }, { "step": 9400, "loss": 2.97239351272583, "lm_loss": 2.97239351272583, "ppl": 19.538629631867902, "gate_mean": 9.937211871147156e-07, "lr": 0.00011366449526781042, "steps_per_second": 4.847789309913262 }, { "step": 9410, "loss": 3.115654468536377, "lm_loss": 3.115654468536377, "ppl": 22.54818260159219, "gate_mean": 1.4039687812328339e-06, "lr": 0.00011353275741665764, "steps_per_second": 4.84805048105457 }, { "step": 9420, "loss": 3.080660820007324, "lm_loss": 3.080660820007324, "ppl": 21.772785535637645, "gate_mean": 1.0686926543712616e-06, "lr": 0.00011340097983350104, "steps_per_second": 4.848353952582208 }, { "step": 9430, "loss": 2.9470980167388916, "lm_loss": 2.9470980167388916, "ppl": 19.05058894301935, "gate_mean": 1.2936070561408997e-06, "lr": 0.00011326916287861546, "steps_per_second": 4.848631294204713 }, { "step": 9440, "loss": 3.029540538787842, "lm_loss": 3.029540538787842, "ppl": 20.687725198121676, "gate_mean": 1.398380845785141e-06, "lr": 0.00011313730691238344, "steps_per_second": 4.84886556855286 }, { "step": 9450, "loss": 3.0138542652130127, "lm_loss": 3.0138542652130127, "ppl": 20.365743827137578, "gate_mean": 1.3886019587516785e-06, "lr": 0.00011300541229529414, "steps_per_second": 4.848917834316839 }, { "step": 9460, "loss": 3.1024515628814697, "lm_loss": 3.1024515628814697, "ppl": 22.252437716018818, "gate_mean": 1.0780058801174164e-06, "lr": 0.00011287347938794243, "steps_per_second": 4.849205805684594 }, { "step": 9470, "loss": 2.9799201488494873, "lm_loss": 2.9799201488494873, "ppl": 19.686244612717353, "gate_mean": 1.3345852494239807e-06, "lr": 0.00011274150855102785, "steps_per_second": 4.8494736364760405 }, { "step": 9480, "loss": 3.0141220092773438, "lm_loss": 3.0141220092773438, "ppl": 20.37119736420649, "gate_mean": 1.2721866369247437e-06, "lr": 0.00011260950014535359, "steps_per_second": 4.849739358868279 }, { "step": 9490, "loss": 3.0916099548339844, "lm_loss": 3.0916099548339844, "ppl": 22.012488575772128, "gate_mean": 1.227017492055893e-06, "lr": 0.00011247745453182563, "steps_per_second": 4.85001986447403 }, { "step": 9500, "loss": 3.0286591053009033, "lm_loss": 3.0286591053009033, "ppl": 20.66949837840817, "gate_mean": 1.0798685252666473e-06, "lr": 0.00011234537207145158, "steps_per_second": 4.850277087117595 }, { "step": 9510, "loss": 3.068025827407837, "lm_loss": 3.068025827407837, "ppl": 21.499417191795064, "gate_mean": 1.1986121535301208e-06, "lr": 0.0001122132531253399, "steps_per_second": 4.850572805788258 }, { "step": 9520, "loss": 3.0774803161621094, "lm_loss": 3.0774803161621094, "ppl": 21.703647113310947, "gate_mean": 9.443610906600952e-07, "lr": 0.00011208109805469872, "steps_per_second": 4.850885060953204 }, { "step": 9530, "loss": 2.9802310466766357, "lm_loss": 2.9802310466766357, "ppl": 19.692365974901954, "gate_mean": 1.1329539120197296e-06, "lr": 0.00011194890722083495, "steps_per_second": 4.851216215126643 }, { "step": 9540, "loss": 3.0094852447509766, "lm_loss": 3.0094852447509766, "ppl": 20.276959566985518, "gate_mean": 1.2926757335662842e-06, "lr": 0.0001118166809851533, "steps_per_second": 4.851565512015299 }, { "step": 9550, "loss": 3.0072736740112305, "lm_loss": 3.0072736740112305, "ppl": 20.232165187742986, "gate_mean": 1.0137446224689484e-06, "lr": 0.00011168441970915524, "steps_per_second": 4.8517207023215425 }, { "step": 9560, "loss": 3.0811705589294434, "lm_loss": 3.0811705589294434, "ppl": 21.78388680100126, "gate_mean": 1.3951212167739868e-06, "lr": 0.00011155212375443804, "steps_per_second": 4.852063122008144 }, { "step": 9570, "loss": 3.105297327041626, "lm_loss": 3.105297327041626, "ppl": 22.31585309555754, "gate_mean": 1.2782402336597443e-06, "lr": 0.00011141979348269381, "steps_per_second": 4.85237889329894 }, { "step": 9580, "loss": 3.1174585819244385, "lm_loss": 3.1174585819244385, "ppl": 22.5888987969731, "gate_mean": 1.5278346836566925e-06, "lr": 0.00011128742925570847, "steps_per_second": 4.852696755094587 }, { "step": 9590, "loss": 2.967336654663086, "lm_loss": 2.967336654663086, "ppl": 19.440074953607112, "gate_mean": 1.241220161318779e-06, "lr": 0.0001111550314353607, "steps_per_second": 4.852845392162965 }, { "step": 9600, "loss": 3.000309705734253, "lm_loss": 3.000309705734253, "ppl": 20.09175849252645, "gate_mean": 1.2833625078201294e-06, "lr": 0.00011102260038362114, "steps_per_second": 4.85319091124658 }, { "step": 9610, "loss": 2.939023494720459, "lm_loss": 2.939023494720459, "ppl": 18.89738390425067, "gate_mean": 1.0852236300706863e-06, "lr": 0.00011089013646255121, "steps_per_second": 4.853458766839599 }, { "step": 9620, "loss": 2.9642839431762695, "lm_loss": 2.9642839431762695, "ppl": 19.3808205028791, "gate_mean": 1.4884863048791885e-06, "lr": 0.00011075764003430219, "steps_per_second": 4.853708969046134 }, { "step": 9630, "loss": 2.9583518505096436, "lm_loss": 2.9583518505096436, "ppl": 19.266192009269158, "gate_mean": 1.2794043868780136e-06, "lr": 0.00011062511146111434, "steps_per_second": 4.854124930052127 }, { "step": 9640, "loss": 3.0856313705444336, "lm_loss": 3.0856313705444336, "ppl": 21.881277675938787, "gate_mean": 1.2693926692008972e-06, "lr": 0.00011049255110531558, "steps_per_second": 4.854286068844303 }, { "step": 9650, "loss": 3.0334548950195312, "lm_loss": 3.0334548950195312, "ppl": 20.76886302174248, "gate_mean": 1.480337232351303e-06, "lr": 0.00011035995932932101, "steps_per_second": 4.854497306293674 }, { "step": 9660, "loss": 2.8261029720306396, "lm_loss": 2.8261029720306396, "ppl": 16.879552396659154, "gate_mean": 1.0868534445762634e-06, "lr": 0.00011022733649563138, "steps_per_second": 4.854825368279896 }, { "step": 9670, "loss": 3.0034918785095215, "lm_loss": 3.0034918785095215, "ppl": 20.15579577421786, "gate_mean": 1.1227093636989594e-06, "lr": 0.0001100946829668325, "steps_per_second": 4.855162321126799 }, { "step": 9680, "loss": 2.9697303771972656, "lm_loss": 2.9697303771972656, "ppl": 19.486664838472027, "gate_mean": 9.932555258274078e-07, "lr": 0.00010996199910559403, "steps_per_second": 4.8554838428281375 }, { "step": 9690, "loss": 3.012345790863037, "lm_loss": 3.012345790863037, "ppl": 20.335045784382316, "gate_mean": 1.1832453310489655e-06, "lr": 0.00010982928527466856, "steps_per_second": 4.855752390223877 }, { "step": 9700, "loss": 2.939448118209839, "lm_loss": 2.939448118209839, "ppl": 18.905409881232824, "gate_mean": 1.1706724762916565e-06, "lr": 0.00010969654183689072, "steps_per_second": 4.856083623353872 }, { "step": 9710, "loss": 2.9398274421691895, "lm_loss": 2.9398274421691895, "ppl": 18.91258251645231, "gate_mean": 1.041218638420105e-06, "lr": 0.0001095637691551759, "steps_per_second": 4.8563452218586605 }, { "step": 9720, "loss": 2.8861382007598877, "lm_loss": 2.8861382007598877, "ppl": 17.92395705120776, "gate_mean": 1.2898817658424377e-06, "lr": 0.00010943096759251963, "steps_per_second": 4.856600133041801 }, { "step": 9730, "loss": 2.818648338317871, "lm_loss": 2.818648338317871, "ppl": 16.75418936460024, "gate_mean": 1.253560185432434e-06, "lr": 0.00010929813751199622, "steps_per_second": 4.856944827853214 }, { "step": 9740, "loss": 2.9667954444885254, "lm_loss": 2.9667954444885254, "ppl": 19.429556633815587, "gate_mean": 1.1939555406570435e-06, "lr": 0.0001091652792767581, "steps_per_second": 4.85707164978381 }, { "step": 9750, "loss": 2.9193270206451416, "lm_loss": 2.9193270206451416, "ppl": 18.528813753818028, "gate_mean": 1.425156369805336e-06, "lr": 0.00010903239325003461, "steps_per_second": 4.857279579821173 }, { "step": 9760, "loss": 2.971501588821411, "lm_loss": 2.971501588821411, "ppl": 19.52121043049549, "gate_mean": 1.0826624929904938e-06, "lr": 0.00010889947979513104, "steps_per_second": 4.857600649955503 }, { "step": 9770, "loss": 2.9687817096710205, "lm_loss": 2.9687817096710205, "ppl": 19.46818723828035, "gate_mean": 1.1471565812826157e-06, "lr": 0.00010876653927542776, "steps_per_second": 4.857842702352876 }, { "step": 9780, "loss": 2.9518632888793945, "lm_loss": 2.9518632888793945, "ppl": 19.141586826412095, "gate_mean": 1.526903361082077e-06, "lr": 0.00010863357205437899, "steps_per_second": 4.858124910110601 }, { "step": 9790, "loss": 3.002013683319092, "lm_loss": 3.002013683319092, "ppl": 20.126023583820626, "gate_mean": 1.2223608791828156e-06, "lr": 0.0001085005784955121, "steps_per_second": 4.858347004353142 }, { "step": 9800, "loss": 2.995115280151367, "lm_loss": 2.995115280151367, "ppl": 19.98766393797331, "gate_mean": 1.1194497346878052e-06, "lr": 0.0001083675589624264, "steps_per_second": 4.8586015513371 }, { "step": 9810, "loss": 3.027416467666626, "lm_loss": 3.027416467666626, "ppl": 20.643829633618704, "gate_mean": 1.4170072972774506e-06, "lr": 0.0001082345138187922, "steps_per_second": 4.858906009452241 }, { "step": 9820, "loss": 2.946082830429077, "lm_loss": 2.946082830429077, "ppl": 19.031258859408755, "gate_mean": 1.4207325875759125e-06, "lr": 0.00010810144342834978, "steps_per_second": 4.859104362976746 }, { "step": 9830, "loss": 2.9707205295562744, "lm_loss": 2.9707205295562744, "ppl": 19.505969161164213, "gate_mean": 1.4924444258213043e-06, "lr": 0.00010796834815490861, "steps_per_second": 4.8592478862023745 }, { "step": 9840, "loss": 2.991222381591797, "lm_loss": 2.991222381591797, "ppl": 19.910005246598118, "gate_mean": 1.2475065886974335e-06, "lr": 0.00010783522836234603, "steps_per_second": 4.859472155178607 }, { "step": 9850, "loss": 2.892914056777954, "lm_loss": 2.892914056777954, "ppl": 18.04581959875079, "gate_mean": 9.192153811454773e-07, "lr": 0.0001077020844146065, "steps_per_second": 4.859815422098702 }, { "step": 9860, "loss": 3.0125365257263184, "lm_loss": 3.0125365257263184, "ppl": 20.338924756475667, "gate_mean": 1.3886019587516785e-06, "lr": 0.00010756891667570043, "steps_per_second": 4.860063285572939 }, { "step": 9870, "loss": 2.9073972702026367, "lm_loss": 2.9073972702026367, "ppl": 18.309082902830134, "gate_mean": 1.2177042663097382e-06, "lr": 0.00010743572550970341, "steps_per_second": 4.860334612129435 }, { "step": 9880, "loss": 2.9207875728607178, "lm_loss": 2.9207875728607178, "ppl": 18.555895826374385, "gate_mean": 1.1273659765720367e-06, "lr": 0.00010730251128075494, "steps_per_second": 4.860588303658671 }, { "step": 9890, "loss": 3.011660575866699, "lm_loss": 3.011660575866699, "ppl": 20.321116678820648, "gate_mean": 1.1955853551626205e-06, "lr": 0.00010716927435305765, "steps_per_second": 4.860423383804657 }, { "step": 9900, "loss": 3.0898091793060303, "lm_loss": 3.0898091793060303, "ppl": 21.972884694587794, "gate_mean": 1.4957040548324585e-06, "lr": 0.00010703601509087624, "steps_per_second": 4.860562570909737 }, { "step": 9910, "loss": 2.9588732719421387, "lm_loss": 2.9588732719421387, "ppl": 19.276240434209758, "gate_mean": 1.0130461305379868e-06, "lr": 0.00010690273385853644, "steps_per_second": 4.860862869816168 }, { "step": 9920, "loss": 2.9275529384613037, "lm_loss": 2.9275529384613037, "ppl": 18.681858858222064, "gate_mean": 1.3345852494239807e-06, "lr": 0.00010676943102042403, "steps_per_second": 4.861168573396422 }, { "step": 9930, "loss": 2.950953960418701, "lm_loss": 2.950953960418701, "ppl": 19.12418874821068, "gate_mean": 1.2139789760112762e-06, "lr": 0.00010663610694098385, "steps_per_second": 4.861462877237129 }, { "step": 9940, "loss": 2.9008114337921143, "lm_loss": 2.9008114337921143, "ppl": 18.18889846991127, "gate_mean": 1.3438984751701355e-06, "lr": 0.0001065027619847189, "steps_per_second": 4.861692332150446 }, { "step": 9950, "loss": 2.9565651416778564, "lm_loss": 2.9565651416778564, "ppl": 19.23179966755094, "gate_mean": 1.1329539120197296e-06, "lr": 0.0001063693965161892, "steps_per_second": 4.862030819330369 }, { "step": 9960, "loss": 2.964840888977051, "lm_loss": 2.964840888977051, "ppl": 19.391617575887015, "gate_mean": 1.4780089259147644e-06, "lr": 0.00010623601090001077, "steps_per_second": 4.862369726475257 }, { "step": 9970, "loss": 2.8747739791870117, "lm_loss": 2.8747739791870117, "ppl": 17.7214182594123, "gate_mean": 1.3774260878562927e-06, "lr": 0.0001061026055008549, "steps_per_second": 4.862512193610522 }, { "step": 9980, "loss": 2.944895029067993, "lm_loss": 2.944895029067993, "ppl": 19.008666924254264, "gate_mean": 1.3271346688270569e-06, "lr": 0.00010596918068344678, "steps_per_second": 4.862856953526838 }, { "step": 9990, "loss": 2.9533798694610596, "lm_loss": 2.9533798694610596, "ppl": 19.170638609411967, "gate_mean": 1.4766119420528412e-06, "lr": 0.00010583573681256477, "steps_per_second": 4.863137664557592 }, { "step": 10000, "loss": 2.846428632736206, "lm_loss": 2.846428632736206, "ppl": 17.226150941233556, "gate_mean": 1.0691583156585693e-06, "lr": 0.00010570227425303936, "steps_per_second": 4.8634333962656235 }, { "step": 10010, "loss": 3.0289156436920166, "lm_loss": 3.0289156436920166, "ppl": 20.674801578475407, "gate_mean": 1.0691583156585693e-06, "lr": 0.00010556879336975207, "steps_per_second": 4.830438284020697 }, { "step": 10020, "loss": 3.0027084350585938, "lm_loss": 3.0027084350585938, "ppl": 20.140011032054115, "gate_mean": 1.0817311704158783e-06, "lr": 0.00010543529452763452, "steps_per_second": 4.830568372622517 }, { "step": 10030, "loss": 2.929828643798828, "lm_loss": 2.929828643798828, "ppl": 18.724421675987568, "gate_mean": 1.0943040251731873e-06, "lr": 0.00010530177809166746, "steps_per_second": 4.830828266962551 }, { "step": 10040, "loss": 2.923147678375244, "lm_loss": 2.923147678375244, "ppl": 18.599741418200697, "gate_mean": 1.2491364032030106e-06, "lr": 0.00010516824442687976, "steps_per_second": 4.831124092932372 }, { "step": 10050, "loss": 2.8888230323791504, "lm_loss": 2.8888230323791504, "ppl": 17.97214451651051, "gate_mean": 1.1753290891647339e-06, "lr": 0.00010503469389834732, "steps_per_second": 4.831513570901595 }, { "step": 10060, "loss": 2.903815269470215, "lm_loss": 2.903815269470215, "ppl": 18.243617073783867, "gate_mean": 1.169741153717041e-06, "lr": 0.0001049011268711922, "steps_per_second": 4.831781963183855 }, { "step": 10070, "loss": 2.791252851486206, "lm_loss": 2.791252851486206, "ppl": 16.301430284626175, "gate_mean": 1.371605321764946e-06, "lr": 0.00010476754371058152, "steps_per_second": 4.831941823381208 }, { "step": 10080, "loss": 3.080160140991211, "lm_loss": 3.080160140991211, "ppl": 21.76188708733747, "gate_mean": 1.1133961379528046e-06, "lr": 0.00010463394478172658, "steps_per_second": 4.8319960922298 }, { "step": 10090, "loss": 2.843280553817749, "lm_loss": 2.843280553817749, "ppl": 17.172006928138124, "gate_mean": 1.2973323464393616e-06, "lr": 0.00010450033044988175, "steps_per_second": 4.832307855803246 }, { "step": 10100, "loss": 2.916726589202881, "lm_loss": 2.916726589202881, "ppl": 18.480693437852903, "gate_mean": 1.2351665645837784e-06, "lr": 0.0001043667010803435, "steps_per_second": 4.8325612428829485 }, { "step": 10110, "loss": 2.9660086631774902, "lm_loss": 2.9660086631774902, "ppl": 19.414275833886567, "gate_mean": 1.3648532330989838e-06, "lr": 0.0001042330570384494, "steps_per_second": 4.832839294664558 }, { "step": 10120, "loss": 2.7541310787200928, "lm_loss": 2.7541310787200928, "ppl": 15.707386488985343, "gate_mean": 1.1979136615991592e-06, "lr": 0.0001040993986895772, "steps_per_second": 4.833118161712322 }, { "step": 10130, "loss": 2.799919605255127, "lm_loss": 2.799919605255127, "ppl": 16.4433247610574, "gate_mean": 1.346692442893982e-06, "lr": 0.0001039657263991437, "steps_per_second": 4.833368185485442 }, { "step": 10140, "loss": 2.8030076026916504, "lm_loss": 2.8030076026916504, "ppl": 16.494180186066547, "gate_mean": 1.416075974702835e-06, "lr": 0.00010383204053260385, "steps_per_second": 4.833701225413624 }, { "step": 10150, "loss": 2.867445945739746, "lm_loss": 2.867445945739746, "ppl": 17.592029774261533, "gate_mean": 1.0943040251731873e-06, "lr": 0.00010369834145544967, "steps_per_second": 4.834029826841862 }, { "step": 10160, "loss": 2.9693031311035156, "lm_loss": 2.9693031311035156, "ppl": 19.478341015326656, "gate_mean": 1.2596137821674347e-06, "lr": 0.00010356462953320938, "steps_per_second": 4.834305945261998 }, { "step": 10170, "loss": 2.82793927192688, "lm_loss": 2.82793927192688, "ppl": 16.910576793304042, "gate_mean": 1.3741664588451385e-06, "lr": 0.00010343090513144624, "steps_per_second": 4.834542063155175 }, { "step": 10180, "loss": 2.8726563453674316, "lm_loss": 2.8726563453674316, "ppl": 17.68393049146695, "gate_mean": 1.451931893825531e-06, "lr": 0.00010329716861575764, "steps_per_second": 4.834816983965032 }, { "step": 10190, "loss": 2.909128189086914, "lm_loss": 2.909128189086914, "ppl": 18.340801883752594, "gate_mean": 1.184176653623581e-06, "lr": 0.00010316342035177418, "steps_per_second": 4.835079359444162 }, { "step": 10200, "loss": 2.970524549484253, "lm_loss": 2.970524549484253, "ppl": 19.50214675449316, "gate_mean": 1.2130476534366608e-06, "lr": 0.00010302966070515844, "steps_per_second": 4.835381577986841 }, { "step": 10210, "loss": 2.8898510932922363, "lm_loss": 2.8898510932922363, "ppl": 17.99063047653057, "gate_mean": 1.2335367500782013e-06, "lr": 0.00010289589004160419, "steps_per_second": 4.835613236159413 }, { "step": 10220, "loss": 2.9176185131073, "lm_loss": 2.9176185131073, "ppl": 18.497184163243215, "gate_mean": 1.3457611203193665e-06, "lr": 0.00010276210872683532, "steps_per_second": 4.835790880428302 }, { "step": 10230, "loss": 2.9134984016418457, "lm_loss": 2.9134984016418457, "ppl": 18.42113048509554, "gate_mean": 1.3331882655620575e-06, "lr": 0.00010262831712660488, "steps_per_second": 4.835993694775774 }, { "step": 10240, "loss": 2.819575309753418, "lm_loss": 2.819575309753418, "ppl": 16.769727220028436, "gate_mean": 1.1874362826347351e-06, "lr": 0.00010249451560669395, "steps_per_second": 4.83631463081663 }, { "step": 10250, "loss": 2.873772621154785, "lm_loss": 2.873772621154785, "ppl": 17.70368165672264, "gate_mean": 1.0398216545581818e-06, "lr": 0.00010236070453291076, "steps_per_second": 4.836597628864373 }, { "step": 10260, "loss": 2.9849681854248047, "lm_loss": 2.9849681854248047, "ppl": 19.7858727472206, "gate_mean": 1.5897676348686218e-06, "lr": 0.00010222688427108974, "steps_per_second": 4.836795972613255 }, { "step": 10270, "loss": 2.8269784450531006, "lm_loss": 2.8269784450531006, "ppl": 16.894336459993674, "gate_mean": 1.2842938303947449e-06, "lr": 0.00010209305518709033, "steps_per_second": 4.837009802046191 }, { "step": 10280, "loss": 2.800687074661255, "lm_loss": 2.800687074661255, "ppl": 16.45594935362115, "gate_mean": 1.3560056686401367e-06, "lr": 0.00010195921764679617, "steps_per_second": 4.8372469802357525 }, { "step": 10290, "loss": 2.935751438140869, "lm_loss": 2.935751438140869, "ppl": 18.835651645708374, "gate_mean": 1.62515789270401e-06, "lr": 0.00010182537201611391, "steps_per_second": 4.837478291535045 }, { "step": 10300, "loss": 2.898916482925415, "lm_loss": 2.898916482925415, "ppl": 18.15446403707462, "gate_mean": 1.334119588136673e-06, "lr": 0.00010169151866097249, "steps_per_second": 4.837727692125743 }, { "step": 10310, "loss": 3.093865394592285, "lm_loss": 3.093865394592285, "ppl": 22.06219244866026, "gate_mean": 1.3271346688270569e-06, "lr": 0.00010155765794732178, "steps_per_second": 4.837983613079782 }, { "step": 10320, "loss": 2.801532030105591, "lm_loss": 2.801532030105591, "ppl": 16.46985977363416, "gate_mean": 1.3238750398159027e-06, "lr": 0.00010142379024113195, "steps_per_second": 4.838262459554847 }, { "step": 10330, "loss": 2.8758718967437744, "lm_loss": 2.8758718967437744, "ppl": 17.74088560046227, "gate_mean": 1.2759119272232056e-06, "lr": 0.00010128991590839215, "steps_per_second": 4.838428815493202 }, { "step": 10340, "loss": 2.821847915649414, "lm_loss": 2.821847915649414, "ppl": 16.80788153942711, "gate_mean": 1.096632331609726e-06, "lr": 0.00010115603531510969, "steps_per_second": 4.83873360979168 }, { "step": 10350, "loss": 2.9341647624969482, "lm_loss": 2.9341647624969482, "ppl": 18.805789273219276, "gate_mean": 1.3830140233039856e-06, "lr": 0.000101022148827309, "steps_per_second": 4.839029904696981 }, { "step": 10360, "loss": 2.7508022785186768, "lm_loss": 2.7508022785186768, "ppl": 15.655186667267401, "gate_mean": 1.2754462659358978e-06, "lr": 0.00010088825681103061, "steps_per_second": 4.839363938301787 }, { "step": 10370, "loss": 2.780731439590454, "lm_loss": 2.780731439590454, "ppl": 16.130815351029106, "gate_mean": 1.34296715259552e-06, "lr": 0.00010075435963233025, "steps_per_second": 4.839617365666766 }, { "step": 10380, "loss": 2.808685302734375, "lm_loss": 2.808685302734375, "ppl": 16.588095552964994, "gate_mean": 1.3220123946666718e-06, "lr": 0.00010062045765727762, "steps_per_second": 4.839913168278855 }, { "step": 10390, "loss": 2.882676839828491, "lm_loss": 2.882676839828491, "ppl": 17.862023016394954, "gate_mean": 1.3723038136959076e-06, "lr": 0.00010048655125195562, "steps_per_second": 4.840258670115926 }, { "step": 10400, "loss": 2.7990570068359375, "lm_loss": 2.7990570068359375, "ppl": 16.42914689089561, "gate_mean": 1.6773119568824768e-06, "lr": 0.0001003526407824593, "steps_per_second": 4.8405496369320025 }, { "step": 10410, "loss": 2.7884511947631836, "lm_loss": 2.7884511947631836, "ppl": 16.255823190417324, "gate_mean": 1.282431185245514e-06, "lr": 0.00010021872661489467, "steps_per_second": 4.84086508562959 }, { "step": 10420, "loss": 2.903438091278076, "lm_loss": 2.903438091278076, "ppl": 18.23673727681416, "gate_mean": 1.3010576367378235e-06, "lr": 0.00010008480911537806, "steps_per_second": 4.841148546339959 }, { "step": 10430, "loss": 2.77351975440979, "lm_loss": 2.77351975440979, "ppl": 16.014903451439956, "gate_mean": 1.2349337339401245e-06, "lr": 9.995088865003475e-05, "steps_per_second": 4.841486294109861 }, { "step": 10440, "loss": 2.743865966796875, "lm_loss": 2.743865966796875, "ppl": 15.54697314769987, "gate_mean": 1.112464815378189e-06, "lr": 9.981696558499816e-05, "steps_per_second": 4.841746563859032 }, { "step": 10450, "loss": 2.914339542388916, "lm_loss": 2.914339542388916, "ppl": 18.436631767019627, "gate_mean": 1.2845266610383987e-06, "lr": 9.968304028640888e-05, "steps_per_second": 4.842125629762047 }, { "step": 10460, "loss": 2.740243673324585, "lm_loss": 2.740243673324585, "ppl": 15.490759321306456, "gate_mean": 1.216307282447815e-06, "lr": 9.954911312041349e-05, "steps_per_second": 4.842457042550209 }, { "step": 10470, "loss": 2.7566280364990234, "lm_loss": 2.7566280364990234, "ppl": 15.746656176838753, "gate_mean": 1.223292201757431e-06, "lr": 9.941518445316385e-05, "steps_per_second": 4.842731687391803 }, { "step": 10480, "loss": 2.7788045406341553, "lm_loss": 2.7788045406341553, "ppl": 16.099762826911125, "gate_mean": 1.1636875569820404e-06, "lr": 9.928125465081575e-05, "steps_per_second": 4.843062931449989 }, { "step": 10490, "loss": 2.765369415283203, "lm_loss": 2.765369415283203, "ppl": 15.884907034290018, "gate_mean": 1.2293457984924316e-06, "lr": 9.914732407952819e-05, "steps_per_second": 4.843353497379283 }, { "step": 10500, "loss": 2.8606343269348145, "lm_loss": 2.8606343269348145, "ppl": 17.47260676718483, "gate_mean": 1.1776573956012726e-06, "lr": 9.901339310546219e-05, "steps_per_second": 4.843595190535598 }, { "step": 10510, "loss": 2.8468945026397705, "lm_loss": 2.8468945026397705, "ppl": 17.234177956138485, "gate_mean": 1.7890706658363342e-06, "lr": 9.887946209477996e-05, "steps_per_second": 4.843844573702926 }, { "step": 10520, "loss": 2.844062328338623, "lm_loss": 2.844062328338623, "ppl": 17.18543681451431, "gate_mean": 1.4062970876693726e-06, "lr": 9.874553141364377e-05, "steps_per_second": 4.844047199020025 }, { "step": 10530, "loss": 2.7832417488098145, "lm_loss": 2.7832417488098145, "ppl": 16.1713595534719, "gate_mean": 1.0724179446697235e-06, "lr": 9.861160142821498e-05, "steps_per_second": 4.844281408688351 }, { "step": 10540, "loss": 2.861440420150757, "lm_loss": 2.861440420150757, "ppl": 17.48669699522033, "gate_mean": 1.300126314163208e-06, "lr": 9.847767250465303e-05, "steps_per_second": 4.844559862908664 }, { "step": 10550, "loss": 2.7719314098358154, "lm_loss": 2.7719314098358154, "ppl": 15.989486457256115, "gate_mean": 1.471489667892456e-06, "lr": 9.834374500911454e-05, "steps_per_second": 4.8448298182595035 }, { "step": 10560, "loss": 2.783914089202881, "lm_loss": 2.783914089202881, "ppl": 16.18223586759344, "gate_mean": 1.3206154108047485e-06, "lr": 9.820981930775213e-05, "steps_per_second": 4.845095799060087 }, { "step": 10570, "loss": 2.749100923538208, "lm_loss": 2.749100923538208, "ppl": 15.628574282436984, "gate_mean": 1.5995465219020844e-06, "lr": 9.807589576671353e-05, "steps_per_second": 4.845374540048376 }, { "step": 10580, "loss": 2.7686307430267334, "lm_loss": 2.7686307430267334, "ppl": 15.936797492206008, "gate_mean": 1.6093254089355469e-06, "lr": 9.794197475214066e-05, "steps_per_second": 4.845623172691633 }, { "step": 10590, "loss": 2.817046642303467, "lm_loss": 2.817046642303467, "ppl": 16.72737572565208, "gate_mean": 1.4570541679859161e-06, "lr": 9.780805663016839e-05, "steps_per_second": 4.845929282370225 }, { "step": 10600, "loss": 2.787891387939453, "lm_loss": 2.787891387939453, "ppl": 16.24672561634958, "gate_mean": 1.5101395547389984e-06, "lr": 9.767414176692378e-05, "steps_per_second": 4.846047187369804 }, { "step": 10610, "loss": 2.847104549407959, "lm_loss": 2.847104549407959, "ppl": 17.237798319730082, "gate_mean": 1.3881362974643707e-06, "lr": 9.754023052852496e-05, "steps_per_second": 4.8462742194419555 }, { "step": 10620, "loss": 2.8198225498199463, "lm_loss": 2.8198225498199463, "ppl": 16.773873881091234, "gate_mean": 1.2600794434547424e-06, "lr": 9.740632328108012e-05, "steps_per_second": 4.8465130526020745 }, { "step": 10630, "loss": 2.6585826873779297, "lm_loss": 2.6585826873779297, "ppl": 14.276041139922503, "gate_mean": 1.548323780298233e-06, "lr": 9.727242039068659e-05, "steps_per_second": 4.846673241147214 }, { "step": 10640, "loss": 2.867310047149658, "lm_loss": 2.867310047149658, "ppl": 17.589639204659623, "gate_mean": 1.4347024261951447e-06, "lr": 9.713852222342975e-05, "steps_per_second": 4.846856926682244 }, { "step": 10650, "loss": 2.91276216506958, "lm_loss": 2.91276216506958, "ppl": 18.407573166439228, "gate_mean": 1.2265518307685852e-06, "lr": 9.700462914538205e-05, "steps_per_second": 4.847124455513728 }, { "step": 10660, "loss": 2.7718353271484375, "lm_loss": 2.7718353271484375, "ppl": 15.987950218231678, "gate_mean": 1.7350539565086365e-06, "lr": 9.68707415226021e-05, "steps_per_second": 4.847238137605742 }, { "step": 10670, "loss": 2.799379348754883, "lm_loss": 2.799379348754883, "ppl": 16.43444354725238, "gate_mean": 1.223292201757431e-06, "lr": 9.67368597211335e-05, "steps_per_second": 4.847522131106098 }, { "step": 10680, "loss": 2.9252805709838867, "lm_loss": 2.9252805709838867, "ppl": 18.639455006548243, "gate_mean": 1.4521647244691849e-06, "lr": 9.6602984107004e-05, "steps_per_second": 4.847768947273189 }, { "step": 10690, "loss": 2.7491912841796875, "lm_loss": 2.7491912841796875, "ppl": 15.629986554240487, "gate_mean": 1.3220123946666718e-06, "lr": 9.646911504622445e-05, "steps_per_second": 4.847940859071383 }, { "step": 10700, "loss": 2.7751617431640625, "lm_loss": 2.7751617431640625, "ppl": 16.04122134373654, "gate_mean": 1.330394297838211e-06, "lr": 9.63352529047877e-05, "steps_per_second": 4.848153173651283 }, { "step": 10710, "loss": 2.7837460041046143, "lm_loss": 2.7837460041046143, "ppl": 16.179516103469776, "gate_mean": 1.4351680874824524e-06, "lr": 9.620139804866775e-05, "steps_per_second": 4.848365974770233 }, { "step": 10720, "loss": 2.778122901916504, "lm_loss": 2.778122901916504, "ppl": 16.088792344600815, "gate_mean": 1.0170042514801025e-06, "lr": 9.606755084381868e-05, "steps_per_second": 4.848694988787064 }, { "step": 10730, "loss": 2.7860004901885986, "lm_loss": 2.7860004901885986, "ppl": 16.216033746161912, "gate_mean": 1.430511474609375e-06, "lr": 9.593371165617365e-05, "steps_per_second": 4.848888533663611 }, { "step": 10740, "loss": 2.7710800170898438, "lm_loss": 2.7710800170898438, "ppl": 15.975878917976685, "gate_mean": 1.0831281542778015e-06, "lr": 9.579988085164383e-05, "steps_per_second": 4.848988678603937 }, { "step": 10750, "loss": 2.7871079444885254, "lm_loss": 2.7871079444885254, "ppl": 16.234002210251806, "gate_mean": 1.3380777090787888e-06, "lr": 9.56660587961176e-05, "steps_per_second": 4.849074187061908 }, { "step": 10760, "loss": 2.8707990646362305, "lm_loss": 2.8707990646362305, "ppl": 17.651116949525335, "gate_mean": 1.2489035725593567e-06, "lr": 9.553224585545926e-05, "steps_per_second": 4.849323810583064 }, { "step": 10770, "loss": 2.7231130599975586, "lm_loss": 2.7231130599975586, "ppl": 15.227653132607019, "gate_mean": 1.2386590242385864e-06, "lr": 9.539844239550838e-05, "steps_per_second": 4.849668902775479 }, { "step": 10780, "loss": 2.6738033294677734, "lm_loss": 2.6738033294677734, "ppl": 14.49499372503113, "gate_mean": 9.171199053525925e-07, "lr": 9.526464878207848e-05, "steps_per_second": 4.849950141279899 }, { "step": 10790, "loss": 2.778989553451538, "lm_loss": 2.778989553451538, "ppl": 16.10274176495329, "gate_mean": 1.2782402336597443e-06, "lr": 9.513086538095614e-05, "steps_per_second": 4.850101845725402 }, { "step": 10800, "loss": 2.7542917728424072, "lm_loss": 2.7542917728424072, "ppl": 15.709910776484694, "gate_mean": 1.2461096048355103e-06, "lr": 9.499709255790009e-05, "steps_per_second": 4.85023421513303 }, { "step": 10810, "loss": 2.762556791305542, "lm_loss": 2.762556791305542, "ppl": 15.840291536603958, "gate_mean": 1.0826624929904938e-06, "lr": 9.486333067864018e-05, "steps_per_second": 4.847098332360966 }, { "step": 10820, "loss": 2.726341724395752, "lm_loss": 2.726341724395752, "ppl": 15.276897568244266, "gate_mean": 1.3401731848716736e-06, "lr": 9.472958010887621e-05, "steps_per_second": 4.847344609961929 }, { "step": 10830, "loss": 2.787034511566162, "lm_loss": 2.787034511566162, "ppl": 16.232810143796854, "gate_mean": 1.778826117515564e-06, "lr": 9.459584121427719e-05, "steps_per_second": 4.8475111882638835 }, { "step": 10840, "loss": 2.722226142883301, "lm_loss": 2.722226142883301, "ppl": 15.214153453866713, "gate_mean": 1.4030374586582184e-06, "lr": 9.446211436048009e-05, "steps_per_second": 4.844573400369526 }, { "step": 10850, "loss": 2.743995189666748, "lm_loss": 2.743995189666748, "ppl": 15.548982301999402, "gate_mean": 1.0265503078699112e-06, "lr": 9.432839991308905e-05, "steps_per_second": 4.844478225255976 }, { "step": 10860, "loss": 2.6064515113830566, "lm_loss": 2.6064515113830566, "ppl": 13.550880291570394, "gate_mean": 1.2670643627643585e-06, "lr": 9.419469823767426e-05, "steps_per_second": 4.844501649234207 }, { "step": 10870, "loss": 2.6340744495391846, "lm_loss": 2.6340744495391846, "ppl": 13.930413194270262, "gate_mean": 1.1741649359464645e-06, "lr": 9.406100969977102e-05, "steps_per_second": 4.8417769678929385 }, { "step": 10880, "loss": 2.805473804473877, "lm_loss": 2.805473804473877, "ppl": 16.53490836394732, "gate_mean": 1.2847594916820526e-06, "lr": 9.392733466487864e-05, "steps_per_second": 4.835813111259148 }, { "step": 10890, "loss": 2.697061777114868, "lm_loss": 2.697061777114868, "ppl": 14.836075923239171, "gate_mean": 1.253560185432434e-06, "lr": 9.37936734984596e-05, "steps_per_second": 4.8359631553353815 }, { "step": 10900, "loss": 2.834048271179199, "lm_loss": 2.834048271179199, "ppl": 17.0141996881182, "gate_mean": 1.28941610455513e-06, "lr": 9.36600265659384e-05, "steps_per_second": 4.83604994234454 }, { "step": 10910, "loss": 2.7606658935546875, "lm_loss": 2.7606658935546875, "ppl": 15.810367465559944, "gate_mean": 1.258915290236473e-06, "lr": 9.352639423270067e-05, "steps_per_second": 4.83618128854919 }, { "step": 10920, "loss": 2.688725471496582, "lm_loss": 2.688725471496582, "ppl": 14.71291193973303, "gate_mean": 1.0188668966293335e-06, "lr": 9.339277686409214e-05, "steps_per_second": 4.83640774093352 }, { "step": 10930, "loss": 2.8244030475616455, "lm_loss": 2.8244030475616455, "ppl": 16.850882807486457, "gate_mean": 1.4174729585647583e-06, "lr": 9.325917482541747e-05, "steps_per_second": 4.836540926290424 }, { "step": 10940, "loss": 2.7195417881011963, "lm_loss": 2.7195417881011963, "ppl": 15.17336803404635, "gate_mean": 1.332256942987442e-06, "lr": 9.312558848193965e-05, "steps_per_second": 4.836816137128476 }, { "step": 10950, "loss": 2.5056333541870117, "lm_loss": 2.5056333541870117, "ppl": 12.25131593132994, "gate_mean": 1.5243422240018845e-06, "lr": 9.299201819887862e-05, "steps_per_second": 4.833817869253771 }, { "step": 10960, "loss": 2.703925132751465, "lm_loss": 2.703925132751465, "ppl": 14.938251420854023, "gate_mean": 1.191161572933197e-06, "lr": 9.285846434141041e-05, "steps_per_second": 4.834018802181792 }, { "step": 10970, "loss": 2.7243595123291016, "lm_loss": 2.7243595123291016, "ppl": 15.246645510445957, "gate_mean": 1.369975507259369e-06, "lr": 9.272492727466622e-05, "steps_per_second": 4.8342571640281715 }, { "step": 10980, "loss": 2.7319490909576416, "lm_loss": 2.7319490909576416, "ppl": 15.362801354761753, "gate_mean": 1.3927929103374481e-06, "lr": 9.259140736373118e-05, "steps_per_second": 4.83446753039468 }, { "step": 10990, "loss": 2.732241153717041, "lm_loss": 2.732241153717041, "ppl": 15.367288912209837, "gate_mean": 1.2244563549757004e-06, "lr": 9.245790497364374e-05, "steps_per_second": 4.8317377570390185 }, { "step": 11000, "loss": 2.752194881439209, "lm_loss": 2.752194881439209, "ppl": 15.67700331337843, "gate_mean": 1.5031546354293823e-06, "lr": 9.232442046939427e-05, "steps_per_second": 4.831894344112133 }, { "step": 11010, "loss": 2.7290687561035156, "lm_loss": 2.7290687561035156, "ppl": 15.318615008847427, "gate_mean": 1.4207325875759125e-06, "lr": 9.219095421592436e-05, "steps_per_second": 4.832061976487358 }, { "step": 11020, "loss": 2.8287038803100586, "lm_loss": 2.8287038803100586, "ppl": 16.923511706525957, "gate_mean": 1.2223608791828156e-06, "lr": 9.20575065781256e-05, "steps_per_second": 4.832268342872142 }, { "step": 11030, "loss": 2.686706066131592, "lm_loss": 2.686706066131592, "ppl": 14.683230585856641, "gate_mean": 1.1881347745656967e-06, "lr": 9.192407792083874e-05, "steps_per_second": 4.831990755961568 }, { "step": 11040, "loss": 2.724451780319214, "lm_loss": 2.724451780319214, "ppl": 15.248052352685415, "gate_mean": 1.4265533536672592e-06, "lr": 9.179066860885265e-05, "steps_per_second": 4.832272994746106 }, { "step": 11050, "loss": 2.6180248260498047, "lm_loss": 2.6180248260498047, "ppl": 13.708619917706125, "gate_mean": 1.2721866369247437e-06, "lr": 9.165727900690331e-05, "steps_per_second": 4.832493742428821 }, { "step": 11060, "loss": 2.6171317100524902, "lm_loss": 2.6171317100524902, "ppl": 13.696381995711942, "gate_mean": 1.2228265404701233e-06, "lr": 9.152390947967282e-05, "steps_per_second": 4.83268180104705 }, { "step": 11070, "loss": 2.8173117637634277, "lm_loss": 2.8173117637634277, "ppl": 16.73181109985624, "gate_mean": 1.2456439435482025e-06, "lr": 9.139056039178829e-05, "steps_per_second": 4.83271266753914 }, { "step": 11080, "loss": 2.7653627395629883, "lm_loss": 2.7653627395629883, "ppl": 15.884800991448975, "gate_mean": 1.157168298959732e-06, "lr": 9.125723210782109e-05, "steps_per_second": 4.83299809700292 }, { "step": 11090, "loss": 2.762406587600708, "lm_loss": 2.762406587600708, "ppl": 15.837912444808184, "gate_mean": 1.3289973139762878e-06, "lr": 9.112392499228565e-05, "steps_per_second": 4.833200176635772 }, { "step": 11100, "loss": 2.621335029602051, "lm_loss": 2.621335029602051, "ppl": 13.754073428736413, "gate_mean": 1.3960525393486023e-06, "lr": 9.099063940963854e-05, "steps_per_second": 4.833373068745017 }, { "step": 11110, "loss": 2.731168031692505, "lm_loss": 2.731168031692505, "ppl": 15.350806781271316, "gate_mean": 7.89295881986618e-07, "lr": 9.08573757242775e-05, "steps_per_second": 4.8336281466235995 }, { "step": 11120, "loss": 2.609811782836914, "lm_loss": 2.609811782836914, "ppl": 13.596491517921772, "gate_mean": 1.2097880244255066e-06, "lr": 9.072413430054025e-05, "steps_per_second": 4.833773342653356 }, { "step": 11130, "loss": 2.6651859283447266, "lm_loss": 2.6651859283447266, "ppl": 14.370621203433679, "gate_mean": 1.3047829270362854e-06, "lr": 9.059091550270378e-05, "steps_per_second": 4.834077907687838 }, { "step": 11140, "loss": 2.707364797592163, "lm_loss": 2.707364797592163, "ppl": 14.98972246987925, "gate_mean": 1.3648532330989838e-06, "lr": 9.045771969498325e-05, "steps_per_second": 4.834390816621897 }, { "step": 11150, "loss": 2.760037422180176, "lm_loss": 2.760037422180176, "ppl": 15.8004342238933, "gate_mean": 1.1115334928035736e-06, "lr": 9.032454724153094e-05, "steps_per_second": 4.83452640139714 }, { "step": 11160, "loss": 2.655661106109619, "lm_loss": 2.655661106109619, "ppl": 14.234393393804105, "gate_mean": 1.32853165268898e-06, "lr": 9.019139850643514e-05, "steps_per_second": 4.834619291618794 }, { "step": 11170, "loss": 2.6735618114471436, "lm_loss": 2.6735618114471436, "ppl": 14.491493345556993, "gate_mean": 1.5934929251670837e-06, "lr": 9.005827385371948e-05, "steps_per_second": 4.834898858673774 }, { "step": 11180, "loss": 2.673099994659424, "lm_loss": 2.673099994659424, "ppl": 14.484802475747804, "gate_mean": 1.2568198144435883e-06, "lr": 8.992517364734165e-05, "steps_per_second": 4.835151425282519 }, { "step": 11190, "loss": 2.6373703479766846, "lm_loss": 2.6373703479766846, "ppl": 13.976402167211711, "gate_mean": 1.4500692486763e-06, "lr": 8.979209825119257e-05, "steps_per_second": 4.835430228372138 }, { "step": 11200, "loss": 2.573190927505493, "lm_loss": 2.573190927505493, "ppl": 13.107583128565254, "gate_mean": 1.262640580534935e-06, "lr": 8.965904802909528e-05, "steps_per_second": 4.835679786863171 }, { "step": 11210, "loss": 2.645803213119507, "lm_loss": 2.645803213119507, "ppl": 14.094761635176162, "gate_mean": 1.2409873306751251e-06, "lr": 8.952602334480399e-05, "steps_per_second": 4.835938418947741 }, { "step": 11220, "loss": 2.704986572265625, "lm_loss": 2.704986572265625, "ppl": 14.954115889281926, "gate_mean": 1.4347024261951447e-06, "lr": 8.93930245620031e-05, "steps_per_second": 4.836199803865428 }, { "step": 11230, "loss": 2.6435515880584717, "lm_loss": 2.6435515880584717, "ppl": 14.063061218768034, "gate_mean": 1.246575266122818e-06, "lr": 8.92600520443062e-05, "steps_per_second": 4.836491205275103 }, { "step": 11240, "loss": 2.6306521892547607, "lm_loss": 2.6306521892547607, "ppl": 13.882821177034385, "gate_mean": 1.3960525393486023e-06, "lr": 8.912710615525511e-05, "steps_per_second": 4.836711972063699 }, { "step": 11250, "loss": 2.6116843223571777, "lm_loss": 2.6116843223571777, "ppl": 13.621975337909657, "gate_mean": 1.4347024261951447e-06, "lr": 8.899418725831882e-05, "steps_per_second": 4.836943691623488 }, { "step": 11260, "loss": 2.763082981109619, "lm_loss": 2.763082981109619, "ppl": 15.848628729784789, "gate_mean": 1.335982233285904e-06, "lr": 8.886129571689248e-05, "steps_per_second": 4.837008861065009 }, { "step": 11270, "loss": 2.649855613708496, "lm_loss": 2.649855613708496, "ppl": 14.151995143756354, "gate_mean": 1.3010576367378235e-06, "lr": 8.87284318942965e-05, "steps_per_second": 4.837289958270005 }, { "step": 11280, "loss": 2.697627544403076, "lm_loss": 2.697627544403076, "ppl": 14.84447206458901, "gate_mean": 1.4426186680793762e-06, "lr": 8.859559615377546e-05, "steps_per_second": 4.834525121198339 }, { "step": 11290, "loss": 2.6549201011657715, "lm_loss": 2.6549201011657715, "ppl": 14.223849544931152, "gate_mean": 1.3909302651882172e-06, "lr": 8.846278885849733e-05, "steps_per_second": 4.834794002700796 }, { "step": 11300, "loss": 2.643510580062866, "lm_loss": 2.643510580062866, "ppl": 14.062484532639829, "gate_mean": 1.1827796697616577e-06, "lr": 8.833001037155205e-05, "steps_per_second": 4.8349688948651 }, { "step": 11310, "loss": 2.578892946243286, "lm_loss": 2.578892946243286, "ppl": 13.18253630229266, "gate_mean": 1.2326054275035858e-06, "lr": 8.819726105595095e-05, "steps_per_second": 4.835169662203166 }, { "step": 11320, "loss": 2.692317008972168, "lm_loss": 2.692317008972168, "ppl": 14.765848920010377, "gate_mean": 1.3525132089853287e-06, "lr": 8.806454127462567e-05, "steps_per_second": 4.835387714464293 }, { "step": 11330, "loss": 2.7391676902770996, "lm_loss": 2.7391676902770996, "ppl": 15.474100490800877, "gate_mean": 1.3154931366443634e-06, "lr": 8.793185139042695e-05, "steps_per_second": 4.83561631371425 }, { "step": 11340, "loss": 2.645709753036499, "lm_loss": 2.645709753036499, "ppl": 14.093444399139218, "gate_mean": 1.269858330488205e-06, "lr": 8.779919176612391e-05, "steps_per_second": 4.835786145036944 }, { "step": 11350, "loss": 2.7253971099853516, "lm_loss": 2.7253971099853516, "ppl": 15.262473604269742, "gate_mean": 1.06077641248703e-06, "lr": 8.766656276440288e-05, "steps_per_second": 4.836030634378511 }, { "step": 11360, "loss": 2.628927707672119, "lm_loss": 2.628927707672119, "ppl": 13.858901138360414, "gate_mean": 1.2847594916820526e-06, "lr": 8.753396474786646e-05, "steps_per_second": 4.836293049292848 }, { "step": 11370, "loss": 2.691596508026123, "lm_loss": 2.691596508026123, "ppl": 14.755213943609704, "gate_mean": 1.3327226042747498e-06, "lr": 8.740139807903263e-05, "steps_per_second": 4.83653248756711 }, { "step": 11380, "loss": 2.769639492034912, "lm_loss": 2.769639492034912, "ppl": 15.952881832036823, "gate_mean": 1.2293457984924316e-06, "lr": 8.72688631203335e-05, "steps_per_second": 4.836764272357959 }, { "step": 11390, "loss": 2.780388116836548, "lm_loss": 2.780388116836548, "ppl": 16.125278225644994, "gate_mean": 1.2302771210670471e-06, "lr": 8.713636023411473e-05, "steps_per_second": 4.836927857151856 }, { "step": 11400, "loss": 2.6605420112609863, "lm_loss": 2.6605420112609863, "ppl": 14.304039948693644, "gate_mean": 1.2363307178020477e-06, "lr": 8.700388978263401e-05, "steps_per_second": 4.837255821603369 }, { "step": 11410, "loss": 2.5829203128814697, "lm_loss": 2.5829203128814697, "ppl": 13.23573426114102, "gate_mean": 1.3113021850585938e-06, "lr": 8.687145212806057e-05, "steps_per_second": 4.837536924438244 }, { "step": 11420, "loss": 2.607062578201294, "lm_loss": 2.607062578201294, "ppl": 13.559163315357234, "gate_mean": 1.3748649507761002e-06, "lr": 8.673904763247385e-05, "steps_per_second": 4.8377429023943 }, { "step": 11430, "loss": 2.6233205795288086, "lm_loss": 2.6233205795288086, "ppl": 13.781409958266584, "gate_mean": 1.1613592505455017e-06, "lr": 8.660667665786279e-05, "steps_per_second": 4.838030670758603 }, { "step": 11440, "loss": 2.6389760971069336, "lm_loss": 2.6389760971069336, "ppl": 13.998862791073241, "gate_mean": 1.41095370054245e-06, "lr": 8.647433956612447e-05, "steps_per_second": 4.838196195382246 }, { "step": 11450, "loss": 2.6506783962249756, "lm_loss": 2.6506783962249756, "ppl": 14.16364394949363, "gate_mean": 1.1399388313293457e-06, "lr": 8.634203671906347e-05, "steps_per_second": 4.838400672574957 }, { "step": 11460, "loss": 2.6925573348999023, "lm_loss": 2.6925573348999023, "ppl": 14.769397962797283, "gate_mean": 1.2782402336597443e-06, "lr": 8.620976847839075e-05, "steps_per_second": 4.838605019108276 }, { "step": 11470, "loss": 2.641960620880127, "lm_loss": 2.641960620880127, "ppl": 14.04070513855245, "gate_mean": 1.1087395250797272e-06, "lr": 8.60775352057226e-05, "steps_per_second": 4.838937756914029 }, { "step": 11480, "loss": 2.6658246517181396, "lm_loss": 2.6658246517181396, "ppl": 14.379802987084535, "gate_mean": 1.2624077498912811e-06, "lr": 8.594533726257981e-05, "steps_per_second": 4.8392140785576565 }, { "step": 11490, "loss": 2.6517512798309326, "lm_loss": 2.6517512798309326, "ppl": 14.178848045541924, "gate_mean": 1.2042000889778137e-06, "lr": 8.581317501038642e-05, "steps_per_second": 4.83949444019737 }, { "step": 11500, "loss": 2.6434454917907715, "lm_loss": 2.6434454917907715, "ppl": 14.061569259607333, "gate_mean": 1.5990808606147766e-06, "lr": 8.5681048810469e-05, "steps_per_second": 4.839696022037459 }, { "step": 11510, "loss": 2.558220386505127, "lm_loss": 2.558220386505127, "ppl": 12.912817032358499, "gate_mean": 1.5189871191978455e-06, "lr": 8.554895902405555e-05, "steps_per_second": 4.839953840177129 }, { "step": 11520, "loss": 2.582737445831299, "lm_loss": 2.582737445831299, "ppl": 13.233314102750198, "gate_mean": 1.1851079761981964e-06, "lr": 8.541690601227449e-05, "steps_per_second": 4.840186578266829 }, { "step": 11530, "loss": 2.5487945079803467, "lm_loss": 2.5487945079803467, "ppl": 12.791674223232073, "gate_mean": 1.475214958190918e-06, "lr": 8.52848901361538e-05, "steps_per_second": 4.840448467173903 }, { "step": 11540, "loss": 2.4809937477111816, "lm_loss": 2.4809937477111816, "ppl": 11.953136920191929, "gate_mean": 1.5953555703163147e-06, "lr": 8.515291175661968e-05, "steps_per_second": 4.840663070109735 }, { "step": 11550, "loss": 2.5740444660186768, "lm_loss": 2.5740444660186768, "ppl": 13.11877573155959, "gate_mean": 1.5045516192913055e-06, "lr": 8.50209712344961e-05, "steps_per_second": 4.840947018245114 }, { "step": 11560, "loss": 2.658761739730835, "lm_loss": 2.658761739730835, "ppl": 14.278597527535558, "gate_mean": 1.2903474271297455e-06, "lr": 8.488906893050337e-05, "steps_per_second": 4.841213876071002 }, { "step": 11570, "loss": 2.5906221866607666, "lm_loss": 2.5906221866607666, "ppl": 13.338067789899066, "gate_mean": 1.437496393918991e-06, "lr": 8.475720520525737e-05, "steps_per_second": 4.841474389820882 }, { "step": 11580, "loss": 2.4858059883117676, "lm_loss": 2.4858059883117676, "ppl": 12.010796916599004, "gate_mean": 1.1834781616926193e-06, "lr": 8.462538041926848e-05, "steps_per_second": 4.841684644505348 }, { "step": 11590, "loss": 2.603461265563965, "lm_loss": 2.603461265563965, "ppl": 13.510420351165276, "gate_mean": 1.2922100722789764e-06, "lr": 8.449359493294063e-05, "steps_per_second": 4.841984955243986 }, { "step": 11600, "loss": 2.487370729446411, "lm_loss": 2.487370729446411, "ppl": 12.029605415972998, "gate_mean": 1.071486622095108e-06, "lr": 8.43618491065703e-05, "steps_per_second": 4.842148228463688 }, { "step": 11610, "loss": 2.6432044506073, "lm_loss": 2.6432044506073, "ppl": 14.058180250773287, "gate_mean": 1.053791493177414e-06, "lr": 8.423014330034555e-05, "steps_per_second": 4.84237738143788 }, { "step": 11620, "loss": 2.660284996032715, "lm_loss": 2.660284996032715, "ppl": 14.300364065000304, "gate_mean": 1.171603798866272e-06, "lr": 8.409847787434507e-05, "steps_per_second": 4.842430202852587 }, { "step": 11630, "loss": 2.6328213214874268, "lm_loss": 2.6328213214874268, "ppl": 13.912967535828885, "gate_mean": 1.4365650713443756e-06, "lr": 8.396685318853702e-05, "steps_per_second": 4.842680763343808 }, { "step": 11640, "loss": 2.557995557785034, "lm_loss": 2.557995557785034, "ppl": 12.9099141865661, "gate_mean": 1.2167729437351227e-06, "lr": 8.383526960277832e-05, "steps_per_second": 4.842892632027116 }, { "step": 11650, "loss": 2.5606131553649902, "lm_loss": 2.5606131553649902, "ppl": 12.943751413499424, "gate_mean": 1.244712620973587e-06, "lr": 8.370372747681348e-05, "steps_per_second": 4.843107755652123 }, { "step": 11660, "loss": 2.5738959312438965, "lm_loss": 2.5738959312438965, "ppl": 13.116827281870759, "gate_mean": 1.4700926840305328e-06, "lr": 8.357222717027363e-05, "steps_per_second": 4.843336197613424 }, { "step": 11670, "loss": 2.691270351409912, "lm_loss": 2.691270351409912, "ppl": 14.750402217689166, "gate_mean": 1.3452954590320587e-06, "lr": 8.34407690426756e-05, "steps_per_second": 4.843509879744355 }, { "step": 11680, "loss": 2.657463312149048, "lm_loss": 2.657463312149048, "ppl": 14.260069833713919, "gate_mean": 1.5720725059509277e-06, "lr": 8.330935345342085e-05, "steps_per_second": 4.843777781405591 }, { "step": 11690, "loss": 2.6266860961914062, "lm_loss": 2.6266860961914062, "ppl": 13.827869659711972, "gate_mean": 1.33574940264225e-06, "lr": 8.317798076179463e-05, "steps_per_second": 4.843664245109522 }, { "step": 11700, "loss": 2.583373785018921, "lm_loss": 2.583373785018921, "ppl": 13.241737658931093, "gate_mean": 1.098494976758957e-06, "lr": 8.304665132696483e-05, "steps_per_second": 4.843890149131717 }, { "step": 11710, "loss": 2.7002315521240234, "lm_loss": 2.7002315521240234, "ppl": 14.883177557288139, "gate_mean": 1.2293457984924316e-06, "lr": 8.291536550798116e-05, "steps_per_second": 4.843908909063478 }, { "step": 11720, "loss": 2.550795555114746, "lm_loss": 2.550795555114746, "ppl": 12.817296593516225, "gate_mean": 1.4868564903736115e-06, "lr": 8.278412366377398e-05, "steps_per_second": 4.844242331345222 }, { "step": 11730, "loss": 2.641904592514038, "lm_loss": 2.641904592514038, "ppl": 14.039918482822525, "gate_mean": 1.5506520867347717e-06, "lr": 8.265292615315347e-05, "steps_per_second": 4.844324950777011 }, { "step": 11740, "loss": 2.612410306930542, "lm_loss": 2.612410306930542, "ppl": 13.631868272488164, "gate_mean": 1.312699168920517e-06, "lr": 8.252177333480868e-05, "steps_per_second": 4.844593641975497 }, { "step": 11750, "loss": 2.6472766399383545, "lm_loss": 2.6472766399383545, "ppl": 14.11554454226108, "gate_mean": 1.4863908290863037e-06, "lr": 8.239066556730638e-05, "steps_per_second": 4.844846386404936 }, { "step": 11760, "loss": 2.553834915161133, "lm_loss": 2.553834915161133, "ppl": 12.856312233939999, "gate_mean": 1.55717134475708e-06, "lr": 8.22596032090902e-05, "steps_per_second": 4.8449416944502435 }, { "step": 11770, "loss": 2.6015431880950928, "lm_loss": 2.6015431880950928, "ppl": 13.484531154973908, "gate_mean": 1.1920928955078125e-06, "lr": 8.212858661847958e-05, "steps_per_second": 4.8450851616072645 }, { "step": 11780, "loss": 2.537522792816162, "lm_loss": 2.537522792816162, "ppl": 12.64829967146295, "gate_mean": 1.294771209359169e-06, "lr": 8.199761615366892e-05, "steps_per_second": 4.845333560522455 }, { "step": 11790, "loss": 2.8274850845336914, "lm_loss": 2.8274850845336914, "ppl": 16.902897966458156, "gate_mean": 1.5771947801113129e-06, "lr": 8.186669217272648e-05, "steps_per_second": 4.845608128655179 }, { "step": 11800, "loss": 2.6002585887908936, "lm_loss": 2.6002585887908936, "ppl": 13.467220056927586, "gate_mean": 1.4510005712509155e-06, "lr": 8.173581503359339e-05, "steps_per_second": 4.845781808525984 }, { "step": 11810, "loss": 2.664853811264038, "lm_loss": 2.664853811264038, "ppl": 14.365849267136554, "gate_mean": 1.4444813132286072e-06, "lr": 8.160498509408279e-05, "steps_per_second": 4.846030087381313 }, { "step": 11820, "loss": 2.546273708343506, "lm_loss": 2.546273708343506, "ppl": 12.759469583271434, "gate_mean": 1.2391246855258942e-06, "lr": 8.147420271187868e-05, "steps_per_second": 4.846237307984533 }, { "step": 11830, "loss": 2.595482349395752, "lm_loss": 2.595482349395752, "ppl": 13.403050755908659, "gate_mean": 1.400243490934372e-06, "lr": 8.134346824453517e-05, "steps_per_second": 4.846451820062476 }, { "step": 11840, "loss": 2.5980725288391113, "lm_loss": 2.5980725288391113, "ppl": 13.437812062030636, "gate_mean": 1.4360994100570679e-06, "lr": 8.121278204947522e-05, "steps_per_second": 4.846501463745531 }, { "step": 11850, "loss": 2.649890422821045, "lm_loss": 2.649890422821045, "ppl": 14.152487770722008, "gate_mean": 1.32853165268898e-06, "lr": 8.108214448399e-05, "steps_per_second": 4.846812193644604 }, { "step": 11860, "loss": 2.5943281650543213, "lm_loss": 2.5943281650543213, "ppl": 13.387590088545195, "gate_mean": 1.4277175068855286e-06, "lr": 8.095155590523754e-05, "steps_per_second": 4.847013576641448 }, { "step": 11870, "loss": 2.5881361961364746, "lm_loss": 2.5881361961364746, "ppl": 13.30495066125075, "gate_mean": 1.2672971934080124e-06, "lr": 8.082101667024205e-05, "steps_per_second": 4.847257829482777 }, { "step": 11880, "loss": 2.4914093017578125, "lm_loss": 2.4914093017578125, "ppl": 12.07828608135129, "gate_mean": 1.2477394193410873e-06, "lr": 8.069052713589284e-05, "steps_per_second": 4.847518351161486 }, { "step": 11890, "loss": 2.5779502391815186, "lm_loss": 2.5779502391815186, "ppl": 13.170114888025859, "gate_mean": 1.3457611203193665e-06, "lr": 8.056008765894327e-05, "steps_per_second": 4.8475928305693925 }, { "step": 11900, "loss": 2.5272397994995117, "lm_loss": 2.5272397994995117, "ppl": 12.518903719520285, "gate_mean": 1.232372596859932e-06, "lr": 8.042969859600995e-05, "steps_per_second": 4.847712385682413 }, { "step": 11910, "loss": 2.551863193511963, "lm_loss": 2.551863193511963, "ppl": 12.830988139017299, "gate_mean": 1.1310912668704987e-06, "lr": 8.029936030357154e-05, "steps_per_second": 4.847921412689298 }, { "step": 11920, "loss": 2.587794780731201, "lm_loss": 2.587794780731201, "ppl": 13.300408921482674, "gate_mean": 1.1958181858062744e-06, "lr": 8.0169073137968e-05, "steps_per_second": 4.848144228115193 }, { "step": 11930, "loss": 2.6321330070495605, "lm_loss": 2.6321330070495605, "ppl": 13.903394334464732, "gate_mean": 1.4570541679859161e-06, "lr": 8.00388374553994e-05, "steps_per_second": 4.848363798966334 }, { "step": 11940, "loss": 2.637106418609619, "lm_loss": 2.637106418609619, "ppl": 13.972713870980122, "gate_mean": 1.39651820063591e-06, "lr": 7.99086536119252e-05, "steps_per_second": 4.848608780715632 }, { "step": 11950, "loss": 2.536762237548828, "lm_loss": 2.536762237548828, "ppl": 12.638683597766283, "gate_mean": 1.3746321201324463e-06, "lr": 7.977852196346302e-05, "steps_per_second": 4.84888027643193 }, { "step": 11960, "loss": 2.4698424339294434, "lm_loss": 2.4698424339294434, "ppl": 11.820584181902213, "gate_mean": 1.2586824595928192e-06, "lr": 7.964844286578782e-05, "steps_per_second": 4.849144056154153 }, { "step": 11970, "loss": 2.45015811920166, "lm_loss": 2.45015811920166, "ppl": 11.590179204226908, "gate_mean": 1.0416842997074127e-06, "lr": 7.951841667453088e-05, "steps_per_second": 4.849282013874882 }, { "step": 11980, "loss": 2.5141866207122803, "lm_loss": 2.5141866207122803, "ppl": 12.35655412534982, "gate_mean": 1.3280659914016724e-06, "lr": 7.938844374517885e-05, "steps_per_second": 4.8496263421252 }, { "step": 11990, "loss": 2.5512213706970215, "lm_loss": 2.5512213706970215, "ppl": 12.822755560302465, "gate_mean": 1.5543773770332336e-06, "lr": 7.925852443307277e-05, "steps_per_second": 4.849821822894682 }, { "step": 12000, "loss": 2.6039180755615234, "lm_loss": 2.6039180755615234, "ppl": 13.516593456113588, "gate_mean": 1.0738149285316467e-06, "lr": 7.912865909340704e-05, "steps_per_second": 4.849990177462971 }, { "step": 12010, "loss": 2.643749952316284, "lm_loss": 2.643749952316284, "ppl": 14.065851104166892, "gate_mean": 1.3830140233039856e-06, "lr": 7.899884808122858e-05, "steps_per_second": 4.835508018920307 }, { "step": 12020, "loss": 2.4827184677124023, "lm_loss": 2.4827184677124023, "ppl": 11.97377052299441, "gate_mean": 1.432374119758606e-06, "lr": 7.886909175143572e-05, "steps_per_second": 4.835512205830686 }, { "step": 12030, "loss": 2.5288729667663574, "lm_loss": 2.5288729667663574, "ppl": 12.539365887815414, "gate_mean": 1.146690919995308e-06, "lr": 7.873939045877732e-05, "steps_per_second": 4.835440236199204 }, { "step": 12040, "loss": 2.6508960723876953, "lm_loss": 2.6508960723876953, "ppl": 14.166727372740379, "gate_mean": 1.380685716867447e-06, "lr": 7.860974455785175e-05, "steps_per_second": 4.835418972425974 }, { "step": 12050, "loss": 2.5062994956970215, "lm_loss": 2.5062994956970215, "ppl": 12.259479760254818, "gate_mean": 1.1865049600601196e-06, "lr": 7.848015440310595e-05, "steps_per_second": 4.835501062998831 }, { "step": 12060, "loss": 2.5109777450561523, "lm_loss": 2.5109777450561523, "ppl": 12.316967028627479, "gate_mean": 1.4845281839370728e-06, "lr": 7.835062034883448e-05, "steps_per_second": 4.835534263366192 }, { "step": 12070, "loss": 2.460850715637207, "lm_loss": 2.460850715637207, "ppl": 11.714773242875903, "gate_mean": 1.4686957001686096e-06, "lr": 7.822114274917848e-05, "steps_per_second": 4.835856833997718 }, { "step": 12080, "loss": 2.3666675090789795, "lm_loss": 2.3666675090789795, "ppl": 10.661802655439573, "gate_mean": 1.2405216693878174e-06, "lr": 7.809172195812476e-05, "steps_per_second": 4.835988828808754 }, { "step": 12090, "loss": 2.6330089569091797, "lm_loss": 2.6330089569091797, "ppl": 13.915578346292907, "gate_mean": 1.9203871488571167e-06, "lr": 7.796235832950483e-05, "steps_per_second": 4.836227152909295 }, { "step": 12100, "loss": 2.4491071701049805, "lm_loss": 2.4491071701049805, "ppl": 11.578004914261927, "gate_mean": 1.1664815247058868e-06, "lr": 7.78330522169939e-05, "steps_per_second": 4.83638024444804 }, { "step": 12110, "loss": 2.5559401512145996, "lm_loss": 2.5559401512145996, "ppl": 12.883406315681587, "gate_mean": 1.1692754924297333e-06, "lr": 7.770380397410994e-05, "steps_per_second": 4.836636783047613 }, { "step": 12120, "loss": 2.5416712760925293, "lm_loss": 2.5416712760925293, "ppl": 12.700879919856622, "gate_mean": 1.7005950212478638e-06, "lr": 7.757461395421275e-05, "steps_per_second": 4.836845407677611 }, { "step": 12130, "loss": 2.5047507286071777, "lm_loss": 2.5047507286071777, "ppl": 12.240507377157197, "gate_mean": 1.3327226042747498e-06, "lr": 7.744548251050288e-05, "steps_per_second": 4.837118272867596 }, { "step": 12140, "loss": 2.5081377029418945, "lm_loss": 2.5081377029418945, "ppl": 12.28203594989264, "gate_mean": 1.41095370054245e-06, "lr": 7.731640999602074e-05, "steps_per_second": 4.837312817907381 }, { "step": 12150, "loss": 2.4152116775512695, "lm_loss": 2.4152116775512695, "ppl": 11.192139231451128, "gate_mean": 1.2726522982120514e-06, "lr": 7.718739676364572e-05, "steps_per_second": 4.837560543947922 }, { "step": 12160, "loss": 2.516191005706787, "lm_loss": 2.516191005706787, "ppl": 12.3813462552087, "gate_mean": 1.314561814069748e-06, "lr": 7.705844316609499e-05, "steps_per_second": 4.837808647310504 }, { "step": 12170, "loss": 2.4459638595581055, "lm_loss": 2.4459638595581055, "ppl": 11.541668787087003, "gate_mean": 1.3979151844978333e-06, "lr": 7.692954955592286e-05, "steps_per_second": 4.838089899475546 }, { "step": 12180, "loss": 2.5238170623779297, "lm_loss": 2.5238170623779297, "ppl": 12.47612804973461, "gate_mean": 1.4831312000751495e-06, "lr": 7.680071628551946e-05, "steps_per_second": 4.83825626508581 }, { "step": 12190, "loss": 2.5434441566467285, "lm_loss": 2.5434441566467285, "ppl": 12.723417034791119, "gate_mean": 1.2866221368312836e-06, "lr": 7.667194370711007e-05, "steps_per_second": 4.8384351795281395 }, { "step": 12200, "loss": 2.4789204597473145, "lm_loss": 2.4789204597473145, "ppl": 11.928380297956366, "gate_mean": 1.2870877981185913e-06, "lr": 7.654323217275398e-05, "steps_per_second": 4.838634098210267 }, { "step": 12210, "loss": 2.44136643409729, "lm_loss": 2.44136643409729, "ppl": 11.488728612644568, "gate_mean": 1.3425014913082123e-06, "lr": 7.641458203434359e-05, "steps_per_second": 4.838888498236298 }, { "step": 12220, "loss": 2.521082639694214, "lm_loss": 2.521082639694214, "ppl": 12.442059642131392, "gate_mean": 1.2004747986793518e-06, "lr": 7.628599364360356e-05, "steps_per_second": 4.839165353334839 }, { "step": 12230, "loss": 2.482966423034668, "lm_loss": 2.482966423034668, "ppl": 11.976739851238332, "gate_mean": 1.2866221368312836e-06, "lr": 7.615746735208953e-05, "steps_per_second": 4.839415890512334 }, { "step": 12240, "loss": 2.3524365425109863, "lm_loss": 2.3524365425109863, "ppl": 10.511149411455857, "gate_mean": 1.323409378528595e-06, "lr": 7.602900351118753e-05, "steps_per_second": 4.83963055340763 }, { "step": 12250, "loss": 2.5357820987701416, "lm_loss": 2.5357820987701416, "ppl": 12.62630200269252, "gate_mean": 1.41095370054245e-06, "lr": 7.590060247211278e-05, "steps_per_second": 4.83985818338768 }, { "step": 12260, "loss": 2.4774508476257324, "lm_loss": 2.4774508476257324, "ppl": 11.910863080589888, "gate_mean": 1.0184012353420258e-06, "lr": 7.577226458590883e-05, "steps_per_second": 4.840133917909785 }, { "step": 12270, "loss": 2.4535908699035645, "lm_loss": 2.4535908699035645, "ppl": 11.630033766276908, "gate_mean": 1.2032687664031982e-06, "lr": 7.564399020344658e-05, "steps_per_second": 4.840367101274801 }, { "step": 12280, "loss": 2.4794363975524902, "lm_loss": 2.4794363975524902, "ppl": 11.934536188198292, "gate_mean": 1.4316756278276443e-06, "lr": 7.551577967542322e-05, "steps_per_second": 4.8406331880052145 }, { "step": 12290, "loss": 2.452977180480957, "lm_loss": 2.452977180480957, "ppl": 11.62289872714293, "gate_mean": 1.176726073026657e-06, "lr": 7.538763335236151e-05, "steps_per_second": 4.840890927042399 }, { "step": 12300, "loss": 2.374777317047119, "lm_loss": 2.374777317047119, "ppl": 10.748619385252828, "gate_mean": 1.4505349099636078e-06, "lr": 7.525955158460856e-05, "steps_per_second": 4.841177048723811 }, { "step": 12310, "loss": 2.4397780895233154, "lm_loss": 2.4397780895233154, "ppl": 11.470495037324381, "gate_mean": 1.2414529919624329e-06, "lr": 7.513153472233506e-05, "steps_per_second": 4.8413981921305504 }, { "step": 12320, "loss": 2.3686580657958984, "lm_loss": 2.3686580657958984, "ppl": 10.683046715067976, "gate_mean": 1.4025717973709106e-06, "lr": 7.500358311553424e-05, "steps_per_second": 4.841624173629993 }, { "step": 12330, "loss": 2.4747583866119385, "lm_loss": 2.4747583866119385, "ppl": 11.8788366803693, "gate_mean": 1.8584541976451874e-06, "lr": 7.487569711402086e-05, "steps_per_second": 4.841830438207182 }, { "step": 12340, "loss": 2.569976806640625, "lm_loss": 2.569976806640625, "ppl": 13.065521404087004, "gate_mean": 1.498498022556305e-06, "lr": 7.474787706743038e-05, "steps_per_second": 4.842113635642226 }, { "step": 12350, "loss": 2.392238140106201, "lm_loss": 2.392238140106201, "ppl": 10.937947226263873, "gate_mean": 1.287786290049553e-06, "lr": 7.462012332521794e-05, "steps_per_second": 4.842250707391411 }, { "step": 12360, "loss": 2.431593179702759, "lm_loss": 2.431593179702759, "ppl": 11.376993243715711, "gate_mean": 1.5506520867347717e-06, "lr": 7.449243623665742e-05, "steps_per_second": 4.842316984662746 }, { "step": 12370, "loss": 2.3604576587677, "lm_loss": 2.3604576587677, "ppl": 10.595799603539083, "gate_mean": 1.6656704246997833e-06, "lr": 7.436481615084036e-05, "steps_per_second": 4.842455971965174 }, { "step": 12380, "loss": 2.5037200450897217, "lm_loss": 2.5037200450897217, "ppl": 12.227897787322737, "gate_mean": 1.4766119420528412e-06, "lr": 7.423726341667525e-05, "steps_per_second": 4.8426938022727315 }, { "step": 12390, "loss": 2.4576539993286133, "lm_loss": 2.4576539993286133, "ppl": 11.677384229073857, "gate_mean": 1.4873221516609192e-06, "lr": 7.41097783828864e-05, "steps_per_second": 4.842913338811711 }, { "step": 12400, "loss": 2.473625659942627, "lm_loss": 2.473625659942627, "ppl": 11.865388823072232, "gate_mean": 1.5133991837501526e-06, "lr": 7.398236139801302e-05, "steps_per_second": 4.843181047851164 }, { "step": 12410, "loss": 2.517475128173828, "lm_loss": 2.517475128173828, "ppl": 12.397255632715598, "gate_mean": 1.58604234457016e-06, "lr": 7.385501281040832e-05, "steps_per_second": 4.843266919497378 }, { "step": 12420, "loss": 2.260929584503174, "lm_loss": 2.260929584503174, "ppl": 9.592001599734333, "gate_mean": 1.3257376849651337e-06, "lr": 7.372773296823841e-05, "steps_per_second": 4.843530824983372 }, { "step": 12430, "loss": 2.3951303958892822, "lm_loss": 2.3951303958892822, "ppl": 10.969628360270827, "gate_mean": 1.443084329366684e-06, "lr": 7.360052221948154e-05, "steps_per_second": 4.843690925513017 }, { "step": 12440, "loss": 2.4587149620056152, "lm_loss": 2.4587149620056152, "ppl": 11.689780072506757, "gate_mean": 1.2265518307685852e-06, "lr": 7.347338091192705e-05, "steps_per_second": 4.843865445933305 }, { "step": 12450, "loss": 2.4941294193267822, "lm_loss": 2.4941294193267822, "ppl": 12.111185163925342, "gate_mean": 1.4095567166805267e-06, "lr": 7.334630939317441e-05, "steps_per_second": 4.844062603444685 }, { "step": 12460, "loss": 2.5679984092712402, "lm_loss": 2.5679984092712402, "ppl": 13.039698163649794, "gate_mean": 1.118052750825882e-06, "lr": 7.321930801063236e-05, "steps_per_second": 4.844176018409563 }, { "step": 12470, "loss": 2.40098237991333, "lm_loss": 2.40098237991333, "ppl": 11.034010648511284, "gate_mean": 1.3024546205997467e-06, "lr": 7.30923771115177e-05, "steps_per_second": 4.844379283684107 }, { "step": 12480, "loss": 2.46179461479187, "lm_loss": 2.46179461479187, "ppl": 11.72583602769218, "gate_mean": 1.5730038285255432e-06, "lr": 7.296551704285477e-05, "steps_per_second": 4.844541161662664 }, { "step": 12490, "loss": 2.5260677337646484, "lm_loss": 2.5260677337646484, "ppl": 12.504239336921518, "gate_mean": 1.552049070596695e-06, "lr": 7.28387281514741e-05, "steps_per_second": 4.844777695766413 }, { "step": 12500, "loss": 2.4382073879241943, "lm_loss": 2.4382073879241943, "ppl": 11.45249245447057, "gate_mean": 1.200009137392044e-06, "lr": 7.271201078401174e-05, "steps_per_second": 4.844830459981371 }, { "step": 12510, "loss": 2.6104493141174316, "lm_loss": 2.6104493141174316, "ppl": 13.60516247027773, "gate_mean": 1.5883706510066986e-06, "lr": 7.258536528690806e-05, "steps_per_second": 4.84506537981718 }, { "step": 12520, "loss": 2.4311575889587402, "lm_loss": 2.4311575889587402, "ppl": 11.372038609938635, "gate_mean": 1.2025702744722366e-06, "lr": 7.245879200640705e-05, "steps_per_second": 4.84522925136537 }, { "step": 12530, "loss": 2.4220991134643555, "lm_loss": 2.4220991134643555, "ppl": 11.269490443120585, "gate_mean": 1.0970979928970337e-06, "lr": 7.23322912885552e-05, "steps_per_second": 4.84547691352503 }, { "step": 12540, "loss": 2.369373321533203, "lm_loss": 2.369373321533203, "ppl": 10.690690558848505, "gate_mean": 1.2209638953208923e-06, "lr": 7.220586347920069e-05, "steps_per_second": 4.845758280919789 }, { "step": 12550, "loss": 2.4468021392822266, "lm_loss": 2.4468021392822266, "ppl": 11.551347990386866, "gate_mean": 1.2461096048355103e-06, "lr": 7.207950892399233e-05, "steps_per_second": 4.845849766020239 }, { "step": 12560, "loss": 2.42452073097229, "lm_loss": 2.42452073097229, "ppl": 11.296813908621774, "gate_mean": 1.4086253941059113e-06, "lr": 7.195322796837857e-05, "steps_per_second": 4.846096347335894 }, { "step": 12570, "loss": 2.4152941703796387, "lm_loss": 2.4152941703796387, "ppl": 11.193062540754507, "gate_mean": 1.2898817658424377e-06, "lr": 7.182702095760676e-05, "steps_per_second": 4.846171245036615 }, { "step": 12580, "loss": 2.3391449451446533, "lm_loss": 2.3391449451446533, "ppl": 10.372363829881616, "gate_mean": 1.0547228157520294e-06, "lr": 7.170088823672206e-05, "steps_per_second": 4.846236940786409 }, { "step": 12590, "loss": 2.367849349975586, "lm_loss": 2.367849349975586, "ppl": 10.674410658708744, "gate_mean": 1.473352313041687e-06, "lr": 7.157483015056651e-05, "steps_per_second": 4.846448537189744 }, { "step": 12600, "loss": 2.4222373962402344, "lm_loss": 2.4222373962402344, "ppl": 11.271048927295077, "gate_mean": 1.2507662177085876e-06, "lr": 7.144884704377813e-05, "steps_per_second": 4.846634291444817 }, { "step": 12610, "loss": 2.2725906372070312, "lm_loss": 2.2725906372070312, "ppl": 9.70450913920152, "gate_mean": 1.4784745872020721e-06, "lr": 7.132293926078986e-05, "steps_per_second": 4.843957983730124 }, { "step": 12620, "loss": 2.4096717834472656, "lm_loss": 2.4096717834472656, "ppl": 11.130307394363523, "gate_mean": 1.2838281691074371e-06, "lr": 7.119710714582882e-05, "steps_per_second": 4.844010732574188 }, { "step": 12630, "loss": 2.456665515899658, "lm_loss": 2.456665515899658, "ppl": 11.665847031373662, "gate_mean": 1.480337232351303e-06, "lr": 7.107135104291518e-05, "steps_per_second": 4.844067026812139 }, { "step": 12640, "loss": 2.326691150665283, "lm_loss": 2.326691150665283, "ppl": 10.243989574973382, "gate_mean": 1.7075799405574799e-06, "lr": 7.094567129586137e-05, "steps_per_second": 4.840381623173636 }, { "step": 12650, "loss": 2.4340779781341553, "lm_loss": 2.4340779781341553, "ppl": 11.405297929838257, "gate_mean": 1.5208497643470764e-06, "lr": 7.082006824827094e-05, "steps_per_second": 4.840543741717083 }, { "step": 12660, "loss": 2.4918735027313232, "lm_loss": 2.4918735027313232, "ppl": 12.083894135039882, "gate_mean": 1.4901161193847656e-06, "lr": 7.06945422435379e-05, "steps_per_second": 4.840749928812316 }, { "step": 12670, "loss": 2.3833649158477783, "lm_loss": 2.3833649158477783, "ppl": 10.84132169159383, "gate_mean": 1.4617107808589935e-06, "lr": 7.056909362484549e-05, "steps_per_second": 4.840965253035701 }, { "step": 12680, "loss": 2.4404056072235107, "lm_loss": 2.4404056072235107, "ppl": 11.477695234879738, "gate_mean": 1.2302771210670471e-06, "lr": 7.044372273516551e-05, "steps_per_second": 4.838415272860784 }, { "step": 12690, "loss": 2.3683624267578125, "lm_loss": 2.3683624267578125, "ppl": 10.679888856229482, "gate_mean": 1.3676472008228302e-06, "lr": 7.031842991725718e-05, "steps_per_second": 4.83348685609444 }, { "step": 12700, "loss": 2.3794589042663574, "lm_loss": 2.3794589042663574, "ppl": 10.799057958558452, "gate_mean": 1.3746321201324463e-06, "lr": 7.019321551366621e-05, "steps_per_second": 4.833672129525477 }, { "step": 12710, "loss": 2.4411816596984863, "lm_loss": 2.4411816596984863, "ppl": 11.486605985831734, "gate_mean": 1.1171214282512665e-06, "lr": 7.006807986672404e-05, "steps_per_second": 4.833668425255292 }, { "step": 12720, "loss": 2.4502415657043457, "lm_loss": 2.4502415657043457, "ppl": 11.59114640450118, "gate_mean": 1.323176547884941e-06, "lr": 6.994302331854675e-05, "steps_per_second": 4.8339842039559064 }, { "step": 12730, "loss": 2.438704252243042, "lm_loss": 2.438704252243042, "ppl": 11.45818420322941, "gate_mean": 1.3713724911212921e-06, "lr": 6.981804621103419e-05, "steps_per_second": 4.8341500548105225 }, { "step": 12740, "loss": 2.4263672828674316, "lm_loss": 2.4263672828674316, "ppl": 11.31769333329159, "gate_mean": 1.314561814069748e-06, "lr": 6.969314888586899e-05, "steps_per_second": 4.834325027585176 }, { "step": 12750, "loss": 2.351233720779419, "lm_loss": 2.351233720779419, "ppl": 10.4985139731333, "gate_mean": 1.162756234407425e-06, "lr": 6.956833168451564e-05, "steps_per_second": 4.834580316942609 }, { "step": 12760, "loss": 2.338695764541626, "lm_loss": 2.338695764541626, "ppl": 10.367705811465768, "gate_mean": 1.3015232980251312e-06, "lr": 6.944359494821959e-05, "steps_per_second": 4.834834162974301 }, { "step": 12770, "loss": 2.3497533798217773, "lm_loss": 2.3497533798217773, "ppl": 10.482984090501189, "gate_mean": 1.1119991540908813e-06, "lr": 6.931893901800638e-05, "steps_per_second": 4.8314562465957085 }, { "step": 12780, "loss": 2.333061456680298, "lm_loss": 2.333061456680298, "ppl": 10.309455220089815, "gate_mean": 1.412816345691681e-06, "lr": 6.919436423468053e-05, "steps_per_second": 4.8316258342267115 }, { "step": 12790, "loss": 2.344515085220337, "lm_loss": 2.344515085220337, "ppl": 10.42821470585692, "gate_mean": 1.282431185245514e-06, "lr": 6.906987093882473e-05, "steps_per_second": 4.83176382798932 }, { "step": 12800, "loss": 2.340883731842041, "lm_loss": 2.340883731842041, "ppl": 10.39041484701579, "gate_mean": 1.2652017176151276e-06, "lr": 6.894545947079889e-05, "steps_per_second": 4.832015658798094 }, { "step": 12810, "loss": 2.4002509117126465, "lm_loss": 2.4002509117126465, "ppl": 11.025942571726432, "gate_mean": 1.1315569281578064e-06, "lr": 6.882113017073922e-05, "steps_per_second": 4.832075577586084 }, { "step": 12820, "loss": 2.3790132999420166, "lm_loss": 2.3790132999420166, "ppl": 10.794246923621905, "gate_mean": 1.4225952327251434e-06, "lr": 6.86968833785573e-05, "steps_per_second": 4.829521518792557 }, { "step": 12830, "loss": 2.3202767372131348, "lm_loss": 2.3202767372131348, "ppl": 10.178490683502812, "gate_mean": 1.3653188943862915e-06, "lr": 6.857271943393912e-05, "steps_per_second": 4.829633532773052 }, { "step": 12840, "loss": 2.3688604831695557, "lm_loss": 2.3688604831695557, "ppl": 10.685209368198597, "gate_mean": 1.5166588127613068e-06, "lr": 6.844863867634412e-05, "steps_per_second": 4.829808741216737 }, { "step": 12850, "loss": 2.3695931434631348, "lm_loss": 2.3695931434631348, "ppl": 10.69304086539445, "gate_mean": 1.246575266122818e-06, "lr": 6.832464144500438e-05, "steps_per_second": 4.8299946033208325 }, { "step": 12860, "loss": 2.347506523132324, "lm_loss": 2.347506523132324, "ppl": 10.459456768730043, "gate_mean": 1.0980293154716492e-06, "lr": 6.820072807892358e-05, "steps_per_second": 4.830168506913135 }, { "step": 12870, "loss": 2.381927967071533, "lm_loss": 2.381927967071533, "ppl": 10.825754454995634, "gate_mean": 1.3927929103374481e-06, "lr": 6.807689891687616e-05, "steps_per_second": 4.830364918541857 }, { "step": 12880, "loss": 2.359455108642578, "lm_loss": 2.359455108642578, "ppl": 10.58518210649659, "gate_mean": 1.239590346813202e-06, "lr": 6.795315429740635e-05, "steps_per_second": 4.830614410424027 }, { "step": 12890, "loss": 2.3823273181915283, "lm_loss": 2.3823273181915283, "ppl": 10.830078595529743, "gate_mean": 1.4277175068855286e-06, "lr": 6.782949455882711e-05, "steps_per_second": 4.83055479311645 }, { "step": 12900, "loss": 2.3687539100646973, "lm_loss": 2.3687539100646973, "ppl": 10.684070672938388, "gate_mean": 1.3527460396289825e-06, "lr": 6.770592003921954e-05, "steps_per_second": 4.8307229621502366 }, { "step": 12910, "loss": 2.34714412689209, "lm_loss": 2.34714412689209, "ppl": 10.455666987664841, "gate_mean": 1.3951212167739868e-06, "lr": 6.758243107643157e-05, "steps_per_second": 4.830579519093636 }, { "step": 12920, "loss": 2.2832820415496826, "lm_loss": 2.2832820415496826, "ppl": 9.808820594711916, "gate_mean": 1.419801265001297e-06, "lr": 6.745902800807743e-05, "steps_per_second": 4.830729898790105 }, { "step": 12930, "loss": 2.3516335487365723, "lm_loss": 2.3516335487365723, "ppl": 10.502712411798969, "gate_mean": 1.319684088230133e-06, "lr": 6.733571117153628e-05, "steps_per_second": 4.830855390551907 }, { "step": 12940, "loss": 2.342648506164551, "lm_loss": 2.342648506164551, "ppl": 10.408767773961896, "gate_mean": 1.2237578630447388e-06, "lr": 6.721248090395172e-05, "steps_per_second": 4.831071804929026 }, { "step": 12950, "loss": 2.332594633102417, "lm_loss": 2.332594633102417, "ppl": 10.304643646483353, "gate_mean": 1.57160684466362e-06, "lr": 6.708933754223052e-05, "steps_per_second": 4.8312882155463255 }, { "step": 12960, "loss": 2.304715394973755, "lm_loss": 2.304715394973755, "ppl": 10.021325726851147, "gate_mean": 1.4025717973709106e-06, "lr": 6.696628142304197e-05, "steps_per_second": 4.831372453388566 }, { "step": 12970, "loss": 2.361210346221924, "lm_loss": 2.361210346221924, "ppl": 10.603777931185029, "gate_mean": 1.3620592653751373e-06, "lr": 6.684331288281689e-05, "steps_per_second": 4.831452533752964 }, { "step": 12980, "loss": 2.299574613571167, "lm_loss": 2.299574613571167, "ppl": 9.969940475263972, "gate_mean": 1.703854650259018e-06, "lr": 6.672043225774643e-05, "steps_per_second": 4.831747226423074 }, { "step": 12990, "loss": 2.323758602142334, "lm_loss": 2.323758602142334, "ppl": 10.213992583789723, "gate_mean": 1.3909302651882172e-06, "lr": 6.659763988378157e-05, "steps_per_second": 4.831893069952217 }, { "step": 13000, "loss": 2.2855403423309326, "lm_loss": 2.2855403423309326, "ppl": 9.830996892875046, "gate_mean": 1.2516975402832031e-06, "lr": 6.6474936096632e-05, "steps_per_second": 4.832114085567099 }, { "step": 13010, "loss": 2.2854251861572266, "lm_loss": 2.2854251861572266, "ppl": 9.829864858070795, "gate_mean": 1.2756790965795517e-06, "lr": 6.635232123176516e-05, "steps_per_second": 4.832235769100475 }, { "step": 13020, "loss": 2.3053853511810303, "lm_loss": 2.3053853511810303, "ppl": 10.028041825721838, "gate_mean": 1.1012889444828033e-06, "lr": 6.622979562440549e-05, "steps_per_second": 4.832326670589551 }, { "step": 13030, "loss": 2.2386465072631836, "lm_loss": 2.2386465072631836, "ppl": 9.380626081912022, "gate_mean": 1.35740265250206e-06, "lr": 6.61073596095332e-05, "steps_per_second": 4.832614566961989 }, { "step": 13040, "loss": 2.2646548748016357, "lm_loss": 2.2646548748016357, "ppl": 9.627801230844254, "gate_mean": 1.4584511518478394e-06, "lr": 6.598501352188374e-05, "steps_per_second": 4.832749712935216 }, { "step": 13050, "loss": 2.299065589904785, "lm_loss": 2.299065589904785, "ppl": 9.964866831021691, "gate_mean": 1.4174729585647583e-06, "lr": 6.586275769594663e-05, "steps_per_second": 4.832929305692085 }, { "step": 13060, "loss": 2.3815488815307617, "lm_loss": 2.3815488815307617, "ppl": 10.821651345777628, "gate_mean": 1.2298114597797394e-06, "lr": 6.574059246596465e-05, "steps_per_second": 4.8331336697210245 }, { "step": 13070, "loss": 2.3269574642181396, "lm_loss": 2.3269574642181396, "ppl": 10.246718051531534, "gate_mean": 1.5641562640666962e-06, "lr": 6.561851816593287e-05, "steps_per_second": 4.833095960030021 }, { "step": 13080, "loss": 2.2947263717651367, "lm_loss": 2.2947263717651367, "ppl": 9.921720777876635, "gate_mean": 1.2596137821674347e-06, "lr": 6.549653512959774e-05, "steps_per_second": 4.833387554016566 }, { "step": 13090, "loss": 2.277398109436035, "lm_loss": 2.277398109436035, "ppl": 9.751275621595635, "gate_mean": 1.6042031347751617e-06, "lr": 6.537464369045624e-05, "steps_per_second": 4.833578437985224 }, { "step": 13100, "loss": 2.347397565841675, "lm_loss": 2.347397565841675, "ppl": 10.458317196742323, "gate_mean": 1.178588718175888e-06, "lr": 6.525284418175493e-05, "steps_per_second": 4.8338174633295194 }, { "step": 13110, "loss": 2.273052930831909, "lm_loss": 2.273052930831909, "ppl": 9.708996509070465, "gate_mean": 1.5888363122940063e-06, "lr": 6.513113693648896e-05, "steps_per_second": 4.833990873152042 }, { "step": 13120, "loss": 2.3419740200042725, "lm_loss": 2.3419740200042725, "ppl": 10.401749571259172, "gate_mean": 1.3280659914016724e-06, "lr": 6.500952228740134e-05, "steps_per_second": 4.83424167892182 }, { "step": 13130, "loss": 2.3878302574157715, "lm_loss": 2.3878302574157715, "ppl": 10.889840141101457, "gate_mean": 1.407228410243988e-06, "lr": 6.488800056698188e-05, "steps_per_second": 4.83449433254321 }, { "step": 13140, "loss": 2.3730757236480713, "lm_loss": 2.3730757236480713, "ppl": 10.730345157519944, "gate_mean": 1.4123506844043732e-06, "lr": 6.476657210746628e-05, "steps_per_second": 4.834752869789447 }, { "step": 13150, "loss": 2.2543575763702393, "lm_loss": 2.2543575763702393, "ppl": 9.529169579632823, "gate_mean": 1.1855736374855042e-06, "lr": 6.464523724083536e-05, "steps_per_second": 4.834991610642115 }, { "step": 13160, "loss": 2.332245349884033, "lm_loss": 2.332245349884033, "ppl": 10.301045035889942, "gate_mean": 1.432374119758606e-06, "lr": 6.452399629881399e-05, "steps_per_second": 4.832599098842077 }, { "step": 13170, "loss": 2.2972874641418457, "lm_loss": 2.2972874641418457, "ppl": 9.947163788367659, "gate_mean": 1.5888363122940063e-06, "lr": 6.440284961287025e-05, "steps_per_second": 4.832834877684581 }, { "step": 13180, "loss": 2.330824375152588, "lm_loss": 2.330824375152588, "ppl": 10.28641790603864, "gate_mean": 1.3546086847782135e-06, "lr": 6.428179751421462e-05, "steps_per_second": 4.833020473971125 }, { "step": 13190, "loss": 2.141972780227661, "lm_loss": 2.141972780227661, "ppl": 8.51622170158657, "gate_mean": 1.0719522833824158e-06, "lr": 6.416084033379883e-05, "steps_per_second": 4.833240870450406 }, { "step": 13200, "loss": 2.271425485610962, "lm_loss": 2.271425485610962, "ppl": 9.693208499646476, "gate_mean": 1.1832453310489655e-06, "lr": 6.40399784023153e-05, "steps_per_second": 4.833364545997316 }, { "step": 13210, "loss": 2.264051675796509, "lm_loss": 2.264051675796509, "ppl": 9.621995501901244, "gate_mean": 1.3762619346380234e-06, "lr": 6.391921205019583e-05, "steps_per_second": 4.833492716786719 }, { "step": 13220, "loss": 2.307311773300171, "lm_loss": 2.307311773300171, "ppl": 10.04737868680494, "gate_mean": 1.4719553291797638e-06, "lr": 6.379854160761103e-05, "steps_per_second": 4.83372028767894 }, { "step": 13230, "loss": 2.3127329349517822, "lm_loss": 2.3127329349517822, "ppl": 10.101995059172825, "gate_mean": 1.509208232164383e-06, "lr": 6.367796740446936e-05, "steps_per_second": 4.8339504966783515 }, { "step": 13240, "loss": 2.207491874694824, "lm_loss": 2.207491874694824, "ppl": 9.092881682208663, "gate_mean": 1.4659017324447632e-06, "lr": 6.3557489770416e-05, "steps_per_second": 4.834096410987056 }, { "step": 13250, "loss": 2.2581539154052734, "lm_loss": 2.2581539154052734, "ppl": 9.565414293159368, "gate_mean": 1.2719538062810898e-06, "lr": 6.34371090348323e-05, "steps_per_second": 4.834325424248325 }, { "step": 13260, "loss": 2.254570960998535, "lm_loss": 2.254570960998535, "ppl": 9.531203174902812, "gate_mean": 1.5140976756811142e-06, "lr": 6.331682552683449e-05, "steps_per_second": 4.834510417630247 }, { "step": 13270, "loss": 2.213477611541748, "lm_loss": 2.213477611541748, "ppl": 9.14747249927655, "gate_mean": 1.4300458133220673e-06, "lr": 6.319663957527308e-05, "steps_per_second": 4.834746341603748 }, { "step": 13280, "loss": 2.2737505435943604, "lm_loss": 2.2737505435943604, "ppl": 9.715771992002688, "gate_mean": 1.4165416359901428e-06, "lr": 6.3076551508732e-05, "steps_per_second": 4.83492618648587 }, { "step": 13290, "loss": 2.189260721206665, "lm_loss": 2.189260721206665, "ppl": 8.928609942222552, "gate_mean": 1.2814998626708984e-06, "lr": 6.295656165552732e-05, "steps_per_second": 4.835147930298463 }, { "step": 13300, "loss": 2.308985948562622, "lm_loss": 2.308985948562622, "ppl": 10.064213848228045, "gate_mean": 1.6619451344013214e-06, "lr": 6.283667034370683e-05, "steps_per_second": 4.8353897747229855 }, { "step": 13310, "loss": 2.135547637939453, "lm_loss": 2.135547637939453, "ppl": 8.461679174781008, "gate_mean": 9.541399776935577e-07, "lr": 6.271687790104865e-05, "steps_per_second": 4.8356468292569295 }, { "step": 13320, "loss": 2.2224433422088623, "lm_loss": 2.2224433422088623, "ppl": 9.229855031935017, "gate_mean": 1.3569369912147522e-06, "lr": 6.259718465506082e-05, "steps_per_second": 4.835784289116537 }, { "step": 13330, "loss": 2.2364444732666016, "lm_loss": 2.2364444732666016, "ppl": 9.359992350796789, "gate_mean": 1.759268343448639e-06, "lr": 6.247759093298015e-05, "steps_per_second": 4.836027130381576 }, { "step": 13340, "loss": 2.2372384071350098, "lm_loss": 2.2372384071350098, "ppl": 9.367426516459583, "gate_mean": 1.2614764273166656e-06, "lr": 6.235809706177126e-05, "steps_per_second": 4.8362318964000615 }, { "step": 13350, "loss": 2.285614013671875, "lm_loss": 2.285614013671875, "ppl": 9.831721182278303, "gate_mean": 1.4533288776874542e-06, "lr": 6.223870336812584e-05, "steps_per_second": 4.8363807936456045 }, { "step": 13360, "loss": 2.2167041301727295, "lm_loss": 2.2167041301727295, "ppl": 9.177034655499561, "gate_mean": 1.296401023864746e-06, "lr": 6.211941017846164e-05, "steps_per_second": 4.836616257990301 }, { "step": 13370, "loss": 2.2898926734924316, "lm_loss": 2.2898926734924316, "ppl": 9.873877895471974, "gate_mean": 1.5771947801113129e-06, "lr": 6.200021781892173e-05, "steps_per_second": 4.836807897117468 }, { "step": 13380, "loss": 2.272136926651001, "lm_loss": 2.272136926651001, "ppl": 9.70010709966539, "gate_mean": 1.6177073121070862e-06, "lr": 6.188112661537351e-05, "steps_per_second": 4.837024200930031 }, { "step": 13390, "loss": 2.2060251235961914, "lm_loss": 2.2060251235961914, "ppl": 9.079554464256635, "gate_mean": 1.57160684466362e-06, "lr": 6.176213689340774e-05, "steps_per_second": 4.837273055703236 }, { "step": 13400, "loss": 2.3091835975646973, "lm_loss": 2.3091835975646973, "ppl": 10.06620322664467, "gate_mean": 1.3918615877628326e-06, "lr": 6.164324897833778e-05, "steps_per_second": 4.837405680745194 }, { "step": 13410, "loss": 2.123276710510254, "lm_loss": 2.123276710510254, "ppl": 8.358480987357137, "gate_mean": 1.4558900147676468e-06, "lr": 6.152446319519863e-05, "steps_per_second": 4.837660680811055 }, { "step": 13420, "loss": 2.3381428718566895, "lm_loss": 2.3381428718566895, "ppl": 10.361975167124687, "gate_mean": 1.4328397810459137e-06, "lr": 6.140577986874608e-05, "steps_per_second": 4.837798606829016 }, { "step": 13430, "loss": 2.168813705444336, "lm_loss": 2.168813705444336, "ppl": 8.747900296373798, "gate_mean": 1.453794538974762e-06, "lr": 6.128719932345589e-05, "steps_per_second": 4.837988065110579 }, { "step": 13440, "loss": 2.232903480529785, "lm_loss": 2.232903480529785, "ppl": 9.326907297402938, "gate_mean": 1.526903361082077e-06, "lr": 6.116872188352269e-05, "steps_per_second": 4.838177642029416 }, { "step": 13450, "loss": 2.188899517059326, "lm_loss": 2.188899517059326, "ppl": 8.925385473662216, "gate_mean": 1.36462040245533e-06, "lr": 6.105034787285926e-05, "steps_per_second": 4.83833327075311 }, { "step": 13460, "loss": 2.1362624168395996, "lm_loss": 2.1362624168395996, "ppl": 8.467729566603555, "gate_mean": 1.3210810720920563e-06, "lr": 6.093207761509561e-05, "steps_per_second": 4.838545038218218 }, { "step": 13470, "loss": 2.3408546447753906, "lm_loss": 2.3408546447753906, "ppl": 10.390112624722011, "gate_mean": 1.600012183189392e-06, "lr": 6.081391143357814e-05, "steps_per_second": 4.838735325141553 }, { "step": 13480, "loss": 2.2021336555480957, "lm_loss": 2.2021336555480957, "ppl": 9.044290327301068, "gate_mean": 1.4924444258213043e-06, "lr": 6.0695849651368746e-05, "steps_per_second": 4.838890268028778 }, { "step": 13490, "loss": 2.324679136276245, "lm_loss": 2.324679136276245, "ppl": 10.223399241517125, "gate_mean": 1.3133976608514786e-06, "lr": 6.057789259124371e-05, "steps_per_second": 4.838862302435107 }, { "step": 13500, "loss": 2.2510595321655273, "lm_loss": 2.2510595321655273, "ppl": 9.497793725042541, "gate_mean": 1.346692442893982e-06, "lr": 6.046004057569318e-05, "steps_per_second": 4.839027259735525 }, { "step": 13510, "loss": 2.3112339973449707, "lm_loss": 2.3112339973449707, "ppl": 10.086864141858332, "gate_mean": 1.0854564607143402e-06, "lr": 6.0342293926920115e-05, "steps_per_second": 4.83915811295325 }, { "step": 13520, "loss": 2.157832622528076, "lm_loss": 2.157832622528076, "ppl": 8.65236438178402, "gate_mean": 1.1129304766654968e-06, "lr": 6.022465296683927e-05, "steps_per_second": 4.839372565179851 }, { "step": 13530, "loss": 2.222630262374878, "lm_loss": 2.222630262374878, "ppl": 9.23158043922157, "gate_mean": 1.3257376849651337e-06, "lr": 6.010711801707664e-05, "steps_per_second": 4.839577114908109 }, { "step": 13540, "loss": 2.2597227096557617, "lm_loss": 2.2597227096557617, "ppl": 9.58043223705796, "gate_mean": 1.3830140233039856e-06, "lr": 5.9989689398968174e-05, "steps_per_second": 4.839709426174655 }, { "step": 13550, "loss": 2.2582545280456543, "lm_loss": 2.2582545280456543, "ppl": 9.566376743164245, "gate_mean": 1.498498022556305e-06, "lr": 5.987236743355928e-05, "steps_per_second": 4.839872013347287 }, { "step": 13560, "loss": 2.1817026138305664, "lm_loss": 2.1817026138305664, "ppl": 8.86138093163805, "gate_mean": 1.155305653810501e-06, "lr": 5.975515244160377e-05, "steps_per_second": 4.839982710510424 }, { "step": 13570, "loss": 2.281168222427368, "lm_loss": 2.281168222427368, "ppl": 9.788108420778357, "gate_mean": 1.1748634278774261e-06, "lr": 5.9638044743562934e-05, "steps_per_second": 4.84016211578386 }, { "step": 13580, "loss": 2.3049259185791016, "lm_loss": 2.3049259185791016, "ppl": 10.023435674562624, "gate_mean": 1.25030055642128e-06, "lr": 5.9521044659604724e-05, "steps_per_second": 4.840375578303744 }, { "step": 13590, "loss": 2.2991700172424316, "lm_loss": 2.2991700172424316, "ppl": 9.965907489870528, "gate_mean": 1.4184042811393738e-06, "lr": 5.9404152509602876e-05, "steps_per_second": 4.840509514850332 }, { "step": 13600, "loss": 2.4636919498443604, "lm_loss": 2.4636919498443604, "ppl": 11.748104986564934, "gate_mean": 1.6349367797374725e-06, "lr": 5.928736861313613e-05, "steps_per_second": 4.8406811983092295 }, { "step": 13610, "loss": 2.2480242252349854, "lm_loss": 2.2480242252349854, "ppl": 9.469008713696178, "gate_mean": 1.509208232164383e-06, "lr": 5.917069328948719e-05, "steps_per_second": 4.840837059061056 }, { "step": 13620, "loss": 2.2813055515289307, "lm_loss": 2.2813055515289307, "ppl": 9.789452705216354, "gate_mean": 1.2977980077266693e-06, "lr": 5.905412685764191e-05, "steps_per_second": 4.84105010037404 }, { "step": 13630, "loss": 2.196453809738159, "lm_loss": 2.196453809738159, "ppl": 8.993065764302846, "gate_mean": 1.2745149433612823e-06, "lr": 5.893766963628848e-05, "steps_per_second": 4.841312160972712 }, { "step": 13640, "loss": 2.165337085723877, "lm_loss": 2.165337085723877, "ppl": 8.717539979908418, "gate_mean": 1.375097781419754e-06, "lr": 5.8821321943816466e-05, "steps_per_second": 4.841451995433538 }, { "step": 13650, "loss": 2.224902868270874, "lm_loss": 2.224902868270874, "ppl": 9.252584040771627, "gate_mean": 1.269858330488205e-06, "lr": 5.870508409831602e-05, "steps_per_second": 4.841641373652517 }, { "step": 13660, "loss": 2.1949782371520996, "lm_loss": 2.1949782371520996, "ppl": 8.979805628548771, "gate_mean": 1.1245720088481903e-06, "lr": 5.858895641757704e-05, "steps_per_second": 4.84178137502744 }, { "step": 13670, "loss": 2.1742234230041504, "lm_loss": 2.1742234230041504, "ppl": 8.795352201196529, "gate_mean": 1.3457611203193665e-06, "lr": 5.8472939219088115e-05, "steps_per_second": 4.841969961187331 }, { "step": 13680, "loss": 2.2070324420928955, "lm_loss": 2.2070324420928955, "ppl": 9.088705075426503, "gate_mean": 1.4728866517543793e-06, "lr": 5.835703282003583e-05, "steps_per_second": 4.842243746976319 }, { "step": 13690, "loss": 2.2505078315734863, "lm_loss": 2.2505078315734863, "ppl": 9.492555231794135, "gate_mean": 1.8794089555740356e-06, "lr": 5.824123753730385e-05, "steps_per_second": 4.8424416986116245 }, { "step": 13700, "loss": 2.1037304401397705, "lm_loss": 2.1037304401397705, "ppl": 8.196690212279407, "gate_mean": 1.3182871043682098e-06, "lr": 5.812555368747207e-05, "steps_per_second": 4.842572725619416 }, { "step": 13710, "loss": 2.0856778621673584, "lm_loss": 2.0856778621673584, "ppl": 8.050046454565974, "gate_mean": 1.4365650713443756e-06, "lr": 5.800998158681578e-05, "steps_per_second": 4.842840238052629 }, { "step": 13720, "loss": 2.1775646209716797, "lm_loss": 2.1775646209716797, "ppl": 8.824788362731953, "gate_mean": 1.1962838470935822e-06, "lr": 5.7894521551304584e-05, "steps_per_second": 4.843015317373365 }, { "step": 13730, "loss": 2.1618199348449707, "lm_loss": 2.1618199348449707, "ppl": 8.68693293285905, "gate_mean": 1.4035031199455261e-06, "lr": 5.777917389660189e-05, "steps_per_second": 4.84305728215384 }, { "step": 13740, "loss": 2.128854751586914, "lm_loss": 2.128854751586914, "ppl": 8.405235214916043, "gate_mean": 1.4309771358966827e-06, "lr": 5.7663938938063706e-05, "steps_per_second": 4.843253527912966 }, { "step": 13750, "loss": 2.1827313899993896, "lm_loss": 2.1827313899993896, "ppl": 8.87050200012787, "gate_mean": 1.5473924577236176e-06, "lr": 5.7548816990738054e-05, "steps_per_second": 4.843471293541837 }, { "step": 13760, "loss": 2.18678617477417, "lm_loss": 2.18678617477417, "ppl": 8.906542996444218, "gate_mean": 1.3280659914016724e-06, "lr": 5.7433808369363984e-05, "steps_per_second": 4.843722816813224 }, { "step": 13770, "loss": 2.2406809329986572, "lm_loss": 2.2406809329986572, "ppl": 9.399729694879873, "gate_mean": 1.507345587015152e-06, "lr": 5.73189133883706e-05, "steps_per_second": 4.843986518305249 }, { "step": 13780, "loss": 2.130528450012207, "lm_loss": 2.130528450012207, "ppl": 8.419314823081747, "gate_mean": 1.2950040400028229e-06, "lr": 5.720413236187644e-05, "steps_per_second": 4.844141028877857 }, { "step": 13790, "loss": 2.246246814727783, "lm_loss": 2.246246814727783, "ppl": 9.45219334644779, "gate_mean": 1.562759280204773e-06, "lr": 5.708946560368841e-05, "steps_per_second": 4.844346058198015 }, { "step": 13800, "loss": 2.1773924827575684, "lm_loss": 2.1773924827575684, "ppl": 8.823269410161924, "gate_mean": 1.4039687812328339e-06, "lr": 5.697491342730105e-05, "steps_per_second": 4.844542640149511 }, { "step": 13810, "loss": 2.197246551513672, "lm_loss": 2.197246551513672, "ppl": 9.00019776976998, "gate_mean": 1.1147931218147278e-06, "lr": 5.6860476145895736e-05, "steps_per_second": 4.844802732415113 }, { "step": 13820, "loss": 2.2288475036621094, "lm_loss": 2.2288475036621094, "ppl": 9.289154191775456, "gate_mean": 1.5711411833763123e-06, "lr": 5.674615407233948e-05, "steps_per_second": 4.845005854082293 }, { "step": 13830, "loss": 2.1609973907470703, "lm_loss": 2.1609973907470703, "ppl": 8.679790485337998, "gate_mean": 1.23586505651474e-06, "lr": 5.663194751918455e-05, "steps_per_second": 4.845104642454285 }, { "step": 13840, "loss": 2.171440601348877, "lm_loss": 2.171440601348877, "ppl": 8.770910329084137, "gate_mean": 1.4188699424266815e-06, "lr": 5.651785679866727e-05, "steps_per_second": 4.8452942735802775 }, { "step": 13850, "loss": 2.0563035011291504, "lm_loss": 2.0563035011291504, "ppl": 7.817020729859098, "gate_mean": 1.6028061509132385e-06, "lr": 5.640388222270735e-05, "steps_per_second": 4.845564660988645 }, { "step": 13860, "loss": 2.1858456134796143, "lm_loss": 2.1858456134796143, "ppl": 8.898169785209948, "gate_mean": 1.4277175068855286e-06, "lr": 5.62900241029069e-05, "steps_per_second": 4.845721388168682 }, { "step": 13870, "loss": 2.15881609916687, "lm_loss": 2.15881609916687, "ppl": 8.660877965793116, "gate_mean": 1.4980323612689972e-06, "lr": 5.617628275054965e-05, "steps_per_second": 4.845909221155825 }, { "step": 13880, "loss": 2.142850637435913, "lm_loss": 2.142850637435913, "ppl": 8.523701010596726, "gate_mean": 1.2088567018508911e-06, "lr": 5.606265847660018e-05, "steps_per_second": 4.846046866616355 }, { "step": 13890, "loss": 2.1323490142822266, "lm_loss": 2.1323490142822266, "ppl": 8.434656688014769, "gate_mean": 1.242849975824356e-06, "lr": 5.594915159170282e-05, "steps_per_second": 4.846264801312817 }, { "step": 13900, "loss": 2.1650655269622803, "lm_loss": 2.1650655269622803, "ppl": 8.715172976952047, "gate_mean": 1.8035061657428741e-06, "lr": 5.583576240618117e-05, "steps_per_second": 4.846415081351872 }, { "step": 13910, "loss": 2.1491966247558594, "lm_loss": 2.1491966247558594, "ppl": 8.577964304109793, "gate_mean": 1.434236764907837e-06, "lr": 5.572249123003687e-05, "steps_per_second": 4.846617338652626 }, { "step": 13920, "loss": 2.2518630027770996, "lm_loss": 2.2518630027770996, "ppl": 9.50542798971834, "gate_mean": 1.4936085790395737e-06, "lr": 5.560933837294896e-05, "steps_per_second": 4.846806220999756 }, { "step": 13930, "loss": 2.231457471847534, "lm_loss": 2.231457471847534, "ppl": 9.313430254780974, "gate_mean": 1.453794538974762e-06, "lr": 5.549630414427312e-05, "steps_per_second": 4.847031126596024 }, { "step": 13940, "loss": 2.1638453006744385, "lm_loss": 2.1638453006744385, "ppl": 8.704544979382208, "gate_mean": 1.3276003301143646e-06, "lr": 5.538338885304053e-05, "steps_per_second": 4.847206969368477 }, { "step": 13950, "loss": 2.1996164321899414, "lm_loss": 2.1996164321899414, "ppl": 9.021552458584758, "gate_mean": 1.4952383935451508e-06, "lr": 5.527059280795734e-05, "steps_per_second": 4.847391859503293 }, { "step": 13960, "loss": 2.14005708694458, "lm_loss": 2.14005708694458, "ppl": 8.49992284966076, "gate_mean": 1.291278749704361e-06, "lr": 5.5157916317403614e-05, "steps_per_second": 4.847592385213915 }, { "step": 13970, "loss": 2.146667003631592, "lm_loss": 2.146667003631592, "ppl": 8.556292726399628, "gate_mean": 1.4582183212041855e-06, "lr": 5.504535968943254e-05, "steps_per_second": 4.847791760027766 }, { "step": 13980, "loss": 2.103847026824951, "lm_loss": 2.103847026824951, "ppl": 8.197645892929446, "gate_mean": 1.389533281326294e-06, "lr": 5.4932923231769716e-05, "steps_per_second": 4.847971435186416 }, { "step": 13990, "loss": 2.1742608547210693, "lm_loss": 2.1742608547210693, "ppl": 8.795681432492135, "gate_mean": 1.3634562492370605e-06, "lr": 5.482060725181206e-05, "steps_per_second": 4.848094980378698 }, { "step": 14000, "loss": 2.0829219818115234, "lm_loss": 2.0829219818115234, "ppl": 8.027892031170067, "gate_mean": 1.4114193618297577e-06, "lr": 5.470841205662716e-05, "steps_per_second": 4.848281028615776 }, { "step": 14010, "loss": 2.1932780742645264, "lm_loss": 2.1932780742645264, "ppl": 8.964551467235378, "gate_mean": 1.6433186829090118e-06, "lr": 5.459633795295247e-05, "steps_per_second": 4.835072589566906 }, { "step": 14020, "loss": 2.2388429641723633, "lm_loss": 2.2388429641723633, "ppl": 9.382469151754218, "gate_mean": 1.4281831681728363e-06, "lr": 5.448438524719423e-05, "steps_per_second": 4.835189063404868 }, { "step": 14030, "loss": 2.1093995571136475, "lm_loss": 2.1093995571136475, "ppl": 8.243290173400897, "gate_mean": 1.125037670135498e-06, "lr": 5.437255424542693e-05, "steps_per_second": 4.835282319786389 }, { "step": 14040, "loss": 2.179786205291748, "lm_loss": 2.179786205291748, "ppl": 8.844415167415052, "gate_mean": 1.296401023864746e-06, "lr": 5.4260845253392234e-05, "steps_per_second": 4.835436804542431 }, { "step": 14050, "loss": 2.173017978668213, "lm_loss": 2.173017978668213, "ppl": 8.784756281381858, "gate_mean": 1.6386620700359344e-06, "lr": 5.414925857649822e-05, "steps_per_second": 4.835536236067478 }, { "step": 14060, "loss": 2.1108198165893555, "lm_loss": 2.1108198165893555, "ppl": 8.25500610224077, "gate_mean": 1.3704411685466766e-06, "lr": 5.403779451981867e-05, "steps_per_second": 4.835752032533467 }, { "step": 14070, "loss": 2.0455541610717773, "lm_loss": 2.0455541610717773, "ppl": 7.733442923699454, "gate_mean": 1.4360994100570679e-06, "lr": 5.392645338809199e-05, "steps_per_second": 4.835968293631032 }, { "step": 14080, "loss": 2.2193920612335205, "lm_loss": 2.2193920612335205, "ppl": 9.201735073634941, "gate_mean": 1.5553086996078491e-06, "lr": 5.381523548572065e-05, "steps_per_second": 4.836221407314431 }, { "step": 14090, "loss": 2.0153706073760986, "lm_loss": 2.0153706073760986, "ppl": 7.503507721271986, "gate_mean": 1.3853423297405243e-06, "lr": 5.370414111677012e-05, "steps_per_second": 4.836420801487163 }, { "step": 14100, "loss": 2.0922176837921143, "lm_loss": 2.0922176837921143, "ppl": 8.102864845627671, "gate_mean": 1.4561228454113007e-06, "lr": 5.359317058496812e-05, "steps_per_second": 4.836658476996663 }, { "step": 14110, "loss": 2.0946614742279053, "lm_loss": 2.0946614742279053, "ppl": 8.122690764568585, "gate_mean": 1.4472752809524536e-06, "lr": 5.348232419370392e-05, "steps_per_second": 4.836877666229708 }, { "step": 14120, "loss": 2.135230779647827, "lm_loss": 2.135230779647827, "ppl": 8.45899844630135, "gate_mean": 1.1655502021312714e-06, "lr": 5.337160224602725e-05, "steps_per_second": 4.837094211781259 }, { "step": 14130, "loss": 1.9686366319656372, "lm_loss": 1.9686366319656372, "ppl": 7.160906878537795, "gate_mean": 1.539476215839386e-06, "lr": 5.3261005044647775e-05, "steps_per_second": 4.83730760808448 }, { "step": 14140, "loss": 2.1178698539733887, "lm_loss": 2.1178698539733887, "ppl": 8.313409836217613, "gate_mean": 1.2461096048355103e-06, "lr": 5.3150532891933947e-05, "steps_per_second": 4.837488477568703 }, { "step": 14150, "loss": 2.1560447216033936, "lm_loss": 2.1560447216033936, "ppl": 8.636908632296677, "gate_mean": 1.7075799405574799e-06, "lr": 5.3040186089912414e-05, "steps_per_second": 4.837682990363695 }, { "step": 14160, "loss": 2.004776954650879, "lm_loss": 2.004776954650879, "ppl": 7.424437725759502, "gate_mean": 1.2291129678487778e-06, "lr": 5.292996494026717e-05, "steps_per_second": 4.837870686124462 }, { "step": 14170, "loss": 2.09236741065979, "lm_loss": 2.09236741065979, "ppl": 8.104078153030299, "gate_mean": 1.5771947801113129e-06, "lr": 5.281986974433856e-05, "steps_per_second": 4.838017341349604 }, { "step": 14180, "loss": 2.125668525695801, "lm_loss": 2.125668525695801, "ppl": 8.378496856698726, "gate_mean": 1.4468096196651459e-06, "lr": 5.2709900803122734e-05, "steps_per_second": 4.838250637258325 }, { "step": 14190, "loss": 1.9898592233657837, "lm_loss": 1.9898592233657837, "ppl": 7.314503978575457, "gate_mean": 1.5189871191978455e-06, "lr": 5.260005841727051e-05, "steps_per_second": 4.838463706200714 }, { "step": 14200, "loss": 2.041191339492798, "lm_loss": 2.041191339492798, "ppl": 7.699776785110775, "gate_mean": 1.0668300092220306e-06, "lr": 5.249034288708677e-05, "steps_per_second": 4.838701926801264 }, { "step": 14210, "loss": 2.0982987880706787, "lm_loss": 2.0982987880706787, "ppl": 8.152289337143447, "gate_mean": 1.505482941865921e-06, "lr": 5.238075451252965e-05, "steps_per_second": 4.838904538179971 }, { "step": 14220, "loss": 2.07486891746521, "lm_loss": 2.07486891746521, "ppl": 7.96350251451941, "gate_mean": 1.4477409422397614e-06, "lr": 5.22712935932095e-05, "steps_per_second": 4.839128015101249 }, { "step": 14230, "loss": 2.0141308307647705, "lm_loss": 2.0141308307647705, "ppl": 7.494210812132303, "gate_mean": 1.6023404896259308e-06, "lr": 5.2161960428388374e-05, "steps_per_second": 4.839354170042611 }, { "step": 14240, "loss": 1.9759595394134521, "lm_loss": 1.9759595394134521, "ppl": 7.213538008102052, "gate_mean": 1.4533288776874542e-06, "lr": 5.205275531697892e-05, "steps_per_second": 4.839582864431952 }, { "step": 14250, "loss": 2.2381105422973633, "lm_loss": 2.2381105422973633, "ppl": 9.37559974206601, "gate_mean": 1.492910087108612e-06, "lr": 5.194367855754374e-05, "steps_per_second": 4.839661184785251 }, { "step": 14260, "loss": 2.069526433944702, "lm_loss": 2.069526433944702, "ppl": 7.921071079115847, "gate_mean": 1.3494864106178284e-06, "lr": 5.183473044829456e-05, "steps_per_second": 4.839894966696876 }, { "step": 14270, "loss": 2.020839214324951, "lm_loss": 2.020839214324951, "ppl": 7.544653859223439, "gate_mean": 1.5362165868282318e-06, "lr": 5.1725911287091344e-05, "steps_per_second": 4.840075892427233 }, { "step": 14280, "loss": 2.0247907638549805, "lm_loss": 2.0247907638549805, "ppl": 7.574525914217159, "gate_mean": 1.1818483471870422e-06, "lr": 5.1617221371441466e-05, "steps_per_second": 4.840219281545869 }, { "step": 14290, "loss": 2.1536734104156494, "lm_loss": 2.1536734104156494, "ppl": 8.616452098219238, "gate_mean": 1.4137476682662964e-06, "lr": 5.150866099849909e-05, "steps_per_second": 4.840379814463344 }, { "step": 14300, "loss": 2.043280601501465, "lm_loss": 2.043280601501465, "ppl": 7.715880452755898, "gate_mean": 1.4705583453178406e-06, "lr": 5.1400230465064055e-05, "steps_per_second": 4.84046955202383 }, { "step": 14310, "loss": 2.2212774753570557, "lm_loss": 2.2212774753570557, "ppl": 9.219100520288789, "gate_mean": 1.6726553440093994e-06, "lr": 5.129193006758138e-05, "steps_per_second": 4.8406729794632515 }, { "step": 14320, "loss": 2.099118709564209, "lm_loss": 2.099118709564209, "ppl": 8.158976315416433, "gate_mean": 1.3825483620166779e-06, "lr": 5.118376010214017e-05, "steps_per_second": 4.840837330352041 }, { "step": 14330, "loss": 2.1355719566345215, "lm_loss": 2.1355719566345215, "ppl": 8.46188495427876, "gate_mean": 1.6666017472743988e-06, "lr": 5.107572086447295e-05, "steps_per_second": 4.841074387397104 }, { "step": 14340, "loss": 2.0638794898986816, "lm_loss": 2.0638794898986816, "ppl": 7.876467290025133, "gate_mean": 1.1846423149108887e-06, "lr": 5.096781264995491e-05, "steps_per_second": 4.841209888444857 }, { "step": 14350, "loss": 2.08375883102417, "lm_loss": 2.08375883102417, "ppl": 8.034612978112909, "gate_mean": 1.332256942987442e-06, "lr": 5.0860035753602984e-05, "steps_per_second": 4.841357535000717 }, { "step": 14360, "loss": 2.0923380851745605, "lm_loss": 2.0923380851745605, "ppl": 8.103840500490778, "gate_mean": 1.5827827155590057e-06, "lr": 5.075239047007511e-05, "steps_per_second": 4.841567806177689 }, { "step": 14370, "loss": 2.0797548294067383, "lm_loss": 2.0797548294067383, "ppl": 8.00250669445302, "gate_mean": 1.3024546205997467e-06, "lr": 5.064487709366934e-05, "steps_per_second": 4.841776195402371 }, { "step": 14380, "loss": 2.0722272396087646, "lm_loss": 2.0722272396087646, "ppl": 7.942493268314981, "gate_mean": 1.6167759895324707e-06, "lr": 5.053749591832315e-05, "steps_per_second": 4.842004583336289 }, { "step": 14390, "loss": 2.0561163425445557, "lm_loss": 2.0561163425445557, "ppl": 7.815557844223623, "gate_mean": 1.3173557817935944e-06, "lr": 5.043024723761261e-05, "steps_per_second": 4.842197206835192 }, { "step": 14400, "loss": 1.9953553676605225, "lm_loss": 1.9953553676605225, "ppl": 7.354816227376297, "gate_mean": 1.58604234457016e-06, "lr": 5.032313134475146e-05, "steps_per_second": 4.842266185507295 }, { "step": 14410, "loss": 2.0788822174072266, "lm_loss": 2.0788822174072266, "ppl": 7.9955266569604175, "gate_mean": 1.1478550732135773e-06, "lr": 5.0216148532590505e-05, "steps_per_second": 4.840241937578197 }, { "step": 14420, "loss": 1.9965615272521973, "lm_loss": 1.9965615272521973, "ppl": 7.3636926616359775, "gate_mean": 1.077074557542801e-06, "lr": 5.010929909361667e-05, "steps_per_second": 4.84041558498143 }, { "step": 14430, "loss": 2.011209487915039, "lm_loss": 2.011209487915039, "ppl": 7.4723496005563685, "gate_mean": 1.3327226042747498e-06, "lr": 5.000258331995221e-05, "steps_per_second": 4.840586212851608 }, { "step": 14440, "loss": 2.086223840713501, "lm_loss": 2.086223840713501, "ppl": 8.054442807273539, "gate_mean": 1.2032687664031982e-06, "lr": 4.9896001503354005e-05, "steps_per_second": 4.840776847903601 }, { "step": 14450, "loss": 1.9691112041473389, "lm_loss": 1.9691112041473389, "ppl": 7.164306052250952, "gate_mean": 1.5511177480220795e-06, "lr": 4.978955393521264e-05, "steps_per_second": 4.838363630127468 }, { "step": 14460, "loss": 2.109005928039551, "lm_loss": 2.109005928039551, "ppl": 8.240046013262196, "gate_mean": 1.3872049748897552e-06, "lr": 4.968324090655173e-05, "steps_per_second": 4.838440217130122 }, { "step": 14470, "loss": 2.132918119430542, "lm_loss": 2.132918119430542, "ppl": 8.439458260730472, "gate_mean": 1.1967495083808899e-06, "lr": 4.957706270802704e-05, "steps_per_second": 4.838656483852586 }, { "step": 14480, "loss": 1.9930051565170288, "lm_loss": 1.9930051565170288, "ppl": 7.337551152552948, "gate_mean": 1.169741153717041e-06, "lr": 4.947101962992567e-05, "steps_per_second": 4.838794910399861 }, { "step": 14490, "loss": 2.1218783855438232, "lm_loss": 2.1218783855438232, "ppl": 8.346801282625565, "gate_mean": 1.3243407011032104e-06, "lr": 4.93651119621654e-05, "steps_per_second": 4.83680842379566 }, { "step": 14500, "loss": 2.094906806945801, "lm_loss": 2.094906806945801, "ppl": 8.124683770835308, "gate_mean": 1.4249235391616821e-06, "lr": 4.925933999429369e-05, "steps_per_second": 4.834588799874707 }, { "step": 14510, "loss": 2.1384029388427734, "lm_loss": 2.1384029388427734, "ppl": 8.485874340773945, "gate_mean": 1.3336539268493652e-06, "lr": 4.915370401548711e-05, "steps_per_second": 4.832082489445842 }, { "step": 14520, "loss": 2.028787136077881, "lm_loss": 2.028787136077881, "ppl": 7.6048571061806465, "gate_mean": 1.2326054275035858e-06, "lr": 4.9048204314550394e-05, "steps_per_second": 4.832260323463187 }, { "step": 14530, "loss": 2.017171621322632, "lm_loss": 2.017171621322632, "ppl": 7.517033820015994, "gate_mean": 1.4291144907474518e-06, "lr": 4.894284117991564e-05, "steps_per_second": 4.832387897813155 }, { "step": 14540, "loss": 2.1008830070495605, "lm_loss": 2.1008830070495605, "ppl": 8.173383882692866, "gate_mean": 1.492910087108612e-06, "lr": 4.88376148996417e-05, "steps_per_second": 4.832564453667099 }, { "step": 14550, "loss": 2.073005437850952, "lm_loss": 2.073005437850952, "ppl": 7.9486765081961295, "gate_mean": 1.2172386050224304e-06, "lr": 4.873252576141318e-05, "steps_per_second": 4.832710501810908 }, { "step": 14560, "loss": 2.0208098888397217, "lm_loss": 2.0208098888397217, "ppl": 7.544432611832239, "gate_mean": 1.4500692486763e-06, "lr": 4.862757405253975e-05, "steps_per_second": 4.8329137642271185 }, { "step": 14570, "loss": 1.9473373889923096, "lm_loss": 1.9473373889923096, "ppl": 7.009997812500475, "gate_mean": 1.3569369912147522e-06, "lr": 4.852276005995541e-05, "steps_per_second": 4.832860114698998 }, { "step": 14580, "loss": 1.9643654823303223, "lm_loss": 1.9643654823303223, "ppl": 7.130386798047756, "gate_mean": 1.1487863957881927e-06, "lr": 4.841808407021758e-05, "steps_per_second": 4.833052952923113 }, { "step": 14590, "loss": 2.0067927837371826, "lm_loss": 2.0067927837371826, "ppl": 7.439419218267524, "gate_mean": 1.498498022556305e-06, "lr": 4.831354636950645e-05, "steps_per_second": 4.831087265840415 }, { "step": 14600, "loss": 2.0321764945983887, "lm_loss": 2.0321764945983887, "ppl": 7.630676424156257, "gate_mean": 1.2884847819805145e-06, "lr": 4.82091472436241e-05, "steps_per_second": 4.831261844529483 }, { "step": 14610, "loss": 2.0599563121795654, "lm_loss": 2.0599563121795654, "ppl": 7.845627044485641, "gate_mean": 1.5059486031532288e-06, "lr": 4.810488697799371e-05, "steps_per_second": 4.831474297498669 }, { "step": 14620, "loss": 1.9463447332382202, "lm_loss": 1.9463447332382202, "ppl": 7.003042750398057, "gate_mean": 1.3336539268493652e-06, "lr": 4.800076585765892e-05, "steps_per_second": 4.83165371967714 }, { "step": 14630, "loss": 2.0248684883117676, "lm_loss": 2.0248684883117676, "ppl": 7.575114663009056, "gate_mean": 1.4621764421463013e-06, "lr": 4.7896784167282856e-05, "steps_per_second": 4.831836196751321 }, { "step": 14640, "loss": 2.0568227767944336, "lm_loss": 2.0568227767944336, "ppl": 7.821080972600517, "gate_mean": 1.366250216960907e-06, "lr": 4.779294219114753e-05, "steps_per_second": 4.832078114436287 }, { "step": 14650, "loss": 2.0390543937683105, "lm_loss": 2.0390543937683105, "ppl": 7.683340348171946, "gate_mean": 1.4551915228366852e-06, "lr": 4.768924021315292e-05, "steps_per_second": 4.829881566469457 }, { "step": 14660, "loss": 1.8701982498168945, "lm_loss": 1.8701982498168945, "ppl": 6.489582830372977, "gate_mean": 1.419801265001297e-06, "lr": 4.7585678516816245e-05, "steps_per_second": 4.83001722935348 }, { "step": 14670, "loss": 2.029668092727661, "lm_loss": 2.029668092727661, "ppl": 7.6115596074920395, "gate_mean": 1.477077603340149e-06, "lr": 4.748225738527127e-05, "steps_per_second": 4.830148840933797 }, { "step": 14680, "loss": 2.113959312438965, "lm_loss": 2.113959312438965, "ppl": 8.280963384707023, "gate_mean": 1.6433186829090118e-06, "lr": 4.737897710126739e-05, "steps_per_second": 4.830289418273018 }, { "step": 14690, "loss": 1.9514447450637817, "lm_loss": 1.9514447450637817, "ppl": 7.0388495812587815, "gate_mean": 1.627020537853241e-06, "lr": 4.727583794716902e-05, "steps_per_second": 4.8304111541440165 }, { "step": 14700, "loss": 1.9299347400665283, "lm_loss": 1.9299347400665283, "ppl": 6.889060647271682, "gate_mean": 1.366250216960907e-06, "lr": 4.717284020495456e-05, "steps_per_second": 4.830596480374305 }, { "step": 14710, "loss": 1.9366179704666138, "lm_loss": 1.9366179704666138, "ppl": 6.935256022038959, "gate_mean": 1.6335397958755493e-06, "lr": 4.706998415621597e-05, "steps_per_second": 4.830574787658517 }, { "step": 14720, "loss": 1.9989700317382812, "lm_loss": 1.9989700317382812, "ppl": 7.381449523602719, "gate_mean": 1.3220123946666718e-06, "lr": 4.696727008215779e-05, "steps_per_second": 4.830716577149914 }, { "step": 14730, "loss": 1.960966944694519, "lm_loss": 1.960966944694519, "ppl": 7.106195041738581, "gate_mean": 1.4794059097766876e-06, "lr": 4.68646982635963e-05, "steps_per_second": 4.830857735948996 }, { "step": 14740, "loss": 1.9915826320648193, "lm_loss": 1.9915826320648193, "ppl": 7.327120727145595, "gate_mean": 1.5227124094963074e-06, "lr": 4.6762268980959004e-05, "steps_per_second": 4.831083321153807 }, { "step": 14750, "loss": 1.8929873704910278, "lm_loss": 1.8929873704910278, "ppl": 6.639172754033428, "gate_mean": 1.287553459405899e-06, "lr": 4.665998251428363e-05, "steps_per_second": 4.831309556436544 }, { "step": 14760, "loss": 1.9581862688064575, "lm_loss": 1.9581862688064575, "ppl": 7.086462464196063, "gate_mean": 1.2521632015705109e-06, "lr": 4.6557839143217425e-05, "steps_per_second": 4.831397820196741 }, { "step": 14770, "loss": 1.9255943298339844, "lm_loss": 1.9255943298339844, "ppl": 6.859224096222671, "gate_mean": 1.3499520719051361e-06, "lr": 4.645583914701654e-05, "steps_per_second": 4.831592179583939 }, { "step": 14780, "loss": 1.9288674592971802, "lm_loss": 1.9288674592971802, "ppl": 6.881712007552465, "gate_mean": 1.8202699720859528e-06, "lr": 4.635398280454501e-05, "steps_per_second": 4.831548279620437 }, { "step": 14790, "loss": 1.8576607704162598, "lm_loss": 1.8576607704162598, "ppl": 6.408727737966909, "gate_mean": 1.5951227396726608e-06, "lr": 4.625227039427422e-05, "steps_per_second": 4.8316876919863425 }, { "step": 14800, "loss": 1.979626178741455, "lm_loss": 1.979626178741455, "ppl": 7.240035999858832, "gate_mean": 1.2549571692943573e-06, "lr": 4.615070219428203e-05, "steps_per_second": 4.831933552402386 }, { "step": 14810, "loss": 1.9851770401000977, "lm_loss": 1.9851770401000977, "ppl": 7.2803361828121895, "gate_mean": 1.1427327990531921e-06, "lr": 4.604927848225194e-05, "steps_per_second": 4.83212457961577 }, { "step": 14820, "loss": 2.0135068893432617, "lm_loss": 2.0135068893432617, "ppl": 7.489536322040739, "gate_mean": 1.4193356037139893e-06, "lr": 4.594799953547261e-05, "steps_per_second": 4.832289467677834 }, { "step": 14830, "loss": 1.950475811958313, "lm_loss": 1.950475811958313, "ppl": 7.03203270995455, "gate_mean": 1.4975666999816895e-06, "lr": 4.584686563083675e-05, "steps_per_second": 4.832384906795204 }, { "step": 14840, "loss": 2.0461087226867676, "lm_loss": 2.0461087226867676, "ppl": 7.737732783682554, "gate_mean": 1.5366822481155396e-06, "lr": 4.574587704484058e-05, "steps_per_second": 4.832626428242439 }, { "step": 14850, "loss": 1.9599061012268066, "lm_loss": 1.9599061012268066, "ppl": 7.098660478351047, "gate_mean": 1.666136085987091e-06, "lr": 4.564503405358311e-05, "steps_per_second": 4.832832327397428 }, { "step": 14860, "loss": 1.9068608283996582, "lm_loss": 1.9068608283996582, "ppl": 6.731922932983766, "gate_mean": 1.3816170394420624e-06, "lr": 4.5544336932765145e-05, "steps_per_second": 4.832978759681551 }, { "step": 14870, "loss": 1.985358715057373, "lm_loss": 1.985358715057373, "ppl": 7.281658957731052, "gate_mean": 1.691281795501709e-06, "lr": 4.544378595768883e-05, "steps_per_second": 4.8331924825384895 }, { "step": 14880, "loss": 1.9630274772644043, "lm_loss": 1.9630274772644043, "ppl": 7.120852684158732, "gate_mean": 1.507345587015152e-06, "lr": 4.534338140325668e-05, "steps_per_second": 4.833422439352156 }, { "step": 14890, "loss": 2.0311617851257324, "lm_loss": 2.0311617851257324, "ppl": 7.622937431584456, "gate_mean": 1.7383135855197906e-06, "lr": 4.524312354397086e-05, "steps_per_second": 4.833506195229633 }, { "step": 14900, "loss": 1.993132472038269, "lm_loss": 1.993132472038269, "ppl": 7.338485396173158, "gate_mean": 1.2973323464393616e-06, "lr": 4.51430126539326e-05, "steps_per_second": 4.833670453594237 }, { "step": 14910, "loss": 1.9787453413009644, "lm_loss": 1.9787453413009644, "ppl": 7.2336615129351785, "gate_mean": 1.768115907907486e-06, "lr": 4.504304900684119e-05, "steps_per_second": 4.833821464482287 }, { "step": 14920, "loss": 1.9563241004943848, "lm_loss": 1.9563241004943848, "ppl": 7.073278557486983, "gate_mean": 1.7993152141571045e-06, "lr": 4.49432328759935e-05, "steps_per_second": 4.833954553551076 }, { "step": 14930, "loss": 2.0197014808654785, "lm_loss": 2.0197014808654785, "ppl": 7.536074935277478, "gate_mean": 1.3550743460655212e-06, "lr": 4.484356453428295e-05, "steps_per_second": 4.834180924361359 }, { "step": 14940, "loss": 1.9491868019104004, "lm_loss": 1.9491868019104004, "ppl": 7.022974188650922, "gate_mean": 1.4901161193847656e-06, "lr": 4.4744044254198996e-05, "steps_per_second": 4.834412180447688 }, { "step": 14950, "loss": 1.9838258028030396, "lm_loss": 1.9838258028030396, "ppl": 7.27050536440689, "gate_mean": 1.4486722648143768e-06, "lr": 4.4644672307826315e-05, "steps_per_second": 4.834631928115402 }, { "step": 14960, "loss": 1.9508308172225952, "lm_loss": 1.9508308172225952, "ppl": 7.034529561756733, "gate_mean": 1.4784745872020721e-06, "lr": 4.454544896684397e-05, "steps_per_second": 4.834854133044821 }, { "step": 14970, "loss": 1.9240503311157227, "lm_loss": 1.9240503311157227, "ppl": 6.8486416347656185, "gate_mean": 1.2996606528759003e-06, "lr": 4.4446374502524845e-05, "steps_per_second": 4.8350982664521265 }, { "step": 14980, "loss": 1.8836803436279297, "lm_loss": 1.8836803436279297, "ppl": 6.5776684499299005, "gate_mean": 1.6605481505393982e-06, "lr": 4.434744918573467e-05, "steps_per_second": 4.835259030544409 }, { "step": 14990, "loss": 1.953027606010437, "lm_loss": 1.953027606010437, "ppl": 7.0499999237631314, "gate_mean": 1.3867393136024475e-06, "lr": 4.42486732869315e-05, "steps_per_second": 4.8354671830578075 }, { "step": 15000, "loss": 1.9461950063705444, "lm_loss": 1.9461950063705444, "ppl": 7.001994285236502, "gate_mean": 1.5194527804851532e-06, "lr": 4.415004707616493e-05, "steps_per_second": 4.835593604329736 }, { "step": 15010, "loss": 1.838313341140747, "lm_loss": 1.838313341140747, "ppl": 6.285927100205842, "gate_mean": 1.5106052160263062e-06, "lr": 4.4051570823075184e-05, "steps_per_second": 4.827558270052728 }, { "step": 15020, "loss": 1.9474053382873535, "lm_loss": 1.9474053382873535, "ppl": 7.010474153093415, "gate_mean": 1.4754477888345718e-06, "lr": 4.3953244796892654e-05, "steps_per_second": 4.827785129709886 }, { "step": 15030, "loss": 1.9620301723480225, "lm_loss": 1.9620301723480225, "ppl": 7.113754562851954, "gate_mean": 1.5655532479286194e-06, "lr": 4.3855069266436844e-05, "steps_per_second": 4.827988434826972 }, { "step": 15040, "loss": 1.9296786785125732, "lm_loss": 1.9296786785125732, "ppl": 6.887296849527083, "gate_mean": 1.4682300388813019e-06, "lr": 4.375704450011596e-05, "steps_per_second": 4.825941250987592 }, { "step": 15050, "loss": 1.8507622480392456, "lm_loss": 1.8507622480392456, "ppl": 6.364669130630185, "gate_mean": 1.18231400847435e-06, "lr": 4.3659170765925993e-05, "steps_per_second": 4.826059811438119 }, { "step": 15060, "loss": 1.9524863958358765, "lm_loss": 1.9524863958358765, "ppl": 7.046185424389784, "gate_mean": 1.5129335224628448e-06, "lr": 4.356144833144993e-05, "steps_per_second": 4.8262858269063305 }, { "step": 15070, "loss": 1.9544742107391357, "lm_loss": 1.9544742107391357, "ppl": 7.0602058671933845, "gate_mean": 1.6526319086551666e-06, "lr": 4.346387746385728e-05, "steps_per_second": 4.826405673680305 }, { "step": 15080, "loss": 1.8330304622650146, "lm_loss": 1.8330304622650146, "ppl": 6.252806870822666, "gate_mean": 1.4933757483959198e-06, "lr": 4.3366458429902944e-05, "steps_per_second": 4.826597561148587 }, { "step": 15090, "loss": 1.980940341949463, "lm_loss": 1.980940341949463, "ppl": 7.249556843395413, "gate_mean": 1.5147961676120758e-06, "lr": 4.326919149592689e-05, "steps_per_second": 4.826747859199654 }, { "step": 15100, "loss": 1.8876830339431763, "lm_loss": 1.8876830339431763, "ppl": 6.604049582260615, "gate_mean": 1.1944212019443512e-06, "lr": 4.3172076927853254e-05, "steps_per_second": 4.826894050032911 }, { "step": 15110, "loss": 1.9421319961547852, "lm_loss": 1.9421319961547852, "ppl": 6.973602827373861, "gate_mean": 1.552049070596695e-06, "lr": 4.30751149911895e-05, "steps_per_second": 4.827075623895428 }, { "step": 15120, "loss": 1.9251282215118408, "lm_loss": 1.9251282215118408, "ppl": 6.856027699779335, "gate_mean": 1.417938619852066e-06, "lr": 4.297830595102588e-05, "steps_per_second": 4.827264158029354 }, { "step": 15130, "loss": 1.8352457284927368, "lm_loss": 1.8352457284927368, "ppl": 6.266673856573767, "gate_mean": 1.6158446669578552e-06, "lr": 4.2881650072034585e-05, "steps_per_second": 4.827444211526048 }, { "step": 15140, "loss": 1.83998441696167, "lm_loss": 1.83998441696167, "ppl": 6.296440142594082, "gate_mean": 1.8058344721794128e-06, "lr": 4.2785147618469116e-05, "steps_per_second": 4.827635364780675 }, { "step": 15150, "loss": 1.9259393215179443, "lm_loss": 1.9259393215179443, "ppl": 6.861590879731125, "gate_mean": 1.4170072972774506e-06, "lr": 4.268879885416351e-05, "steps_per_second": 4.8278002879373485 }, { "step": 15160, "loss": 1.8376818895339966, "lm_loss": 1.8376818895339966, "ppl": 6.281959094372178, "gate_mean": 1.1757947504520416e-06, "lr": 4.2592604042531625e-05, "steps_per_second": 4.8280200169962555 }, { "step": 15170, "loss": 1.906483769416809, "lm_loss": 1.906483769416809, "ppl": 6.7293850794603385, "gate_mean": 1.3601966202259064e-06, "lr": 4.2496563446566355e-05, "steps_per_second": 4.8282454600593345 }, { "step": 15180, "loss": 1.9589335918426514, "lm_loss": 1.9589335918426514, "ppl": 7.091760320199029, "gate_mean": 1.4118850231170654e-06, "lr": 4.2400677328839026e-05, "steps_per_second": 4.828486899937047 }, { "step": 15190, "loss": 1.8715746402740479, "lm_loss": 1.8715746402740479, "ppl": 6.498521180170186, "gate_mean": 1.1865049600601196e-06, "lr": 4.2304945951498644e-05, "steps_per_second": 4.828631899050415 }, { "step": 15200, "loss": 1.9300415515899658, "lm_loss": 1.9300415515899658, "ppl": 6.889796517633488, "gate_mean": 1.239590346813202e-06, "lr": 4.220936957627118e-05, "steps_per_second": 4.828848976116589 }, { "step": 15210, "loss": 1.9297837018966675, "lm_loss": 1.9297837018966675, "ppl": 6.888020214733949, "gate_mean": 1.321546733379364e-06, "lr": 4.211394846445871e-05, "steps_per_second": 4.828997935564176 }, { "step": 15220, "loss": 1.9624311923980713, "lm_loss": 1.9624311923980713, "ppl": 7.116607893145873, "gate_mean": 1.63959339261055e-06, "lr": 4.201868287693896e-05, "steps_per_second": 4.829057336406908 }, { "step": 15230, "loss": 1.8711018562316895, "lm_loss": 1.8711018562316895, "ppl": 6.495449509232991, "gate_mean": 1.618172973394394e-06, "lr": 4.192357307416438e-05, "steps_per_second": 4.829242919285142 }, { "step": 15240, "loss": 1.8989543914794922, "lm_loss": 1.8989543914794922, "ppl": 6.678907267644461, "gate_mean": 1.5283003449440002e-06, "lr": 4.182861931616156e-05, "steps_per_second": 4.829430597850635 }, { "step": 15250, "loss": 1.9187281131744385, "lm_loss": 1.9187281131744385, "ppl": 6.812288496856596, "gate_mean": 1.1012889444828033e-06, "lr": 4.1733821862530487e-05, "steps_per_second": 4.829622115250137 }, { "step": 15260, "loss": 1.902948260307312, "lm_loss": 1.902948260307312, "ppl": 6.705635285755605, "gate_mean": 1.7266720533370972e-06, "lr": 4.16391809724437e-05, "steps_per_second": 4.82978360882621 }, { "step": 15270, "loss": 1.879467248916626, "lm_loss": 1.879467248916626, "ppl": 6.550014405230839, "gate_mean": 1.6861595213413239e-06, "lr": 4.154469690464584e-05, "steps_per_second": 4.829988507554329 }, { "step": 15280, "loss": 1.9889018535614014, "lm_loss": 1.9889018535614014, "ppl": 7.307504644342565, "gate_mean": 1.285690814256668e-06, "lr": 4.145036991745268e-05, "steps_per_second": 4.83018603488558 }, { "step": 15290, "loss": 1.8701848983764648, "lm_loss": 1.8701848983764648, "ppl": 6.489496185672821, "gate_mean": 1.3527460396289825e-06, "lr": 4.1356200268750654e-05, "steps_per_second": 4.830114237778307 }, { "step": 15300, "loss": 1.9083441495895386, "lm_loss": 1.9083441495895386, "ppl": 6.741915946511006, "gate_mean": 1.4281831681728363e-06, "lr": 4.126218821599601e-05, "steps_per_second": 4.830295281575958 }, { "step": 15310, "loss": 1.9847739934921265, "lm_loss": 1.9847739934921265, "ppl": 7.277402459262194, "gate_mean": 1.1865049600601196e-06, "lr": 4.116833401621402e-05, "steps_per_second": 4.830474218724683 }, { "step": 15320, "loss": 1.8847185373306274, "lm_loss": 1.8847185373306274, "ppl": 6.584500889977473, "gate_mean": 1.4370307326316833e-06, "lr": 4.107463792599857e-05, "steps_per_second": 4.830680497373727 }, { "step": 15330, "loss": 1.8925155401229858, "lm_loss": 1.8925155401229858, "ppl": 6.6360409296124345, "gate_mean": 1.7755664885044098e-06, "lr": 4.098110020151112e-05, "steps_per_second": 4.830900365780406 }, { "step": 15340, "loss": 1.9538389444351196, "lm_loss": 1.9538389444351196, "ppl": 7.055722180624825, "gate_mean": 1.3653188943862915e-06, "lr": 4.088772109848031e-05, "steps_per_second": 4.831099168454331 }, { "step": 15350, "loss": 1.8977073431015015, "lm_loss": 1.8977073431015015, "ppl": 6.670583538294893, "gate_mean": 1.6237609088420868e-06, "lr": 4.079450087220099e-05, "steps_per_second": 4.831302157985395 }, { "step": 15360, "loss": 1.8710347414016724, "lm_loss": 1.8710347414016724, "ppl": 6.495013582872019, "gate_mean": 1.6959384083747864e-06, "lr": 4.07014397775337e-05, "steps_per_second": 4.83133983597056 }, { "step": 15370, "loss": 1.946516990661621, "lm_loss": 1.946516990661621, "ppl": 7.0042491804034865, "gate_mean": 1.300126314163208e-06, "lr": 4.060853806890395e-05, "steps_per_second": 4.83150920396343 }, { "step": 15380, "loss": 1.924712061882019, "lm_loss": 1.924712061882019, "ppl": 6.853175091441132, "gate_mean": 1.5040859580039978e-06, "lr": 4.051579600030143e-05, "steps_per_second": 4.83169572615112 }, { "step": 15390, "loss": 1.9170863628387451, "lm_loss": 1.9170863628387451, "ppl": 6.801113595638972, "gate_mean": 1.50362029671669e-06, "lr": 4.0423213825279445e-05, "steps_per_second": 4.831747687337567 }, { "step": 15400, "loss": 1.9271061420440674, "lm_loss": 1.9271061420440674, "ppl": 6.869601797554243, "gate_mean": 1.3085082173347473e-06, "lr": 4.033079179695413e-05, "steps_per_second": 4.831936250049813 }, { "step": 15410, "loss": 1.90041184425354, "lm_loss": 1.90041184425354, "ppl": 6.688648556579243, "gate_mean": 1.689419150352478e-06, "lr": 4.023853016800372e-05, "steps_per_second": 4.832077621131829 }, { "step": 15420, "loss": 1.9295375347137451, "lm_loss": 1.9295375347137451, "ppl": 6.886324818885747, "gate_mean": 1.3997778296470642e-06, "lr": 4.014642919066807e-05, "steps_per_second": 4.832166878859639 }, { "step": 15430, "loss": 1.9203506708145142, "lm_loss": 1.9203506708145142, "ppl": 6.82335079978867, "gate_mean": 1.4025717973709106e-06, "lr": 4.005448911674766e-05, "steps_per_second": 4.8323576044621435 }, { "step": 15440, "loss": 1.825430154800415, "lm_loss": 1.825430154800415, "ppl": 6.205463755101973, "gate_mean": 1.8696300685405731e-06, "lr": 3.9962710197603195e-05, "steps_per_second": 4.832560910042454 }, { "step": 15450, "loss": 2.080059051513672, "lm_loss": 2.080059051513672, "ppl": 8.004941604258272, "gate_mean": 1.423526555299759e-06, "lr": 3.987109268415474e-05, "steps_per_second": 4.832702860136471 }, { "step": 15460, "loss": 1.852264642715454, "lm_loss": 1.852264642715454, "ppl": 6.374238562379528, "gate_mean": 1.3527460396289825e-06, "lr": 3.977963682688101e-05, "steps_per_second": 4.83291327262917 }, { "step": 15470, "loss": 1.8345458507537842, "lm_loss": 1.8345458507537842, "ppl": 6.262289485485103, "gate_mean": 1.5622936189174652e-06, "lr": 3.96883428758189e-05, "steps_per_second": 4.832999368695301 }, { "step": 15480, "loss": 1.94538414478302, "lm_loss": 1.94538414478302, "ppl": 6.996318938305909, "gate_mean": 1.2950040400028229e-06, "lr": 3.959721108056256e-05, "steps_per_second": 4.833109208719103 }, { "step": 15490, "loss": 1.7729941606521606, "lm_loss": 1.7729941606521606, "ppl": 5.888457981890755, "gate_mean": 1.7718411982059479e-06, "lr": 3.950624169026281e-05, "steps_per_second": 4.833277249548782 }, { "step": 15500, "loss": 1.8737645149230957, "lm_loss": 1.8737645149230957, "ppl": 6.512767720334177, "gate_mean": 1.5255063772201538e-06, "lr": 3.941543495362655e-05, "steps_per_second": 4.833375165003317 }, { "step": 15510, "loss": 1.9108216762542725, "lm_loss": 1.9108216762542725, "ppl": 6.758639931544803, "gate_mean": 1.5334226191043854e-06, "lr": 3.932479111891586e-05, "steps_per_second": 4.833438726505767 }, { "step": 15520, "loss": 1.9162895679473877, "lm_loss": 1.9162895679473877, "ppl": 6.795696661449763, "gate_mean": 1.2493692338466644e-06, "lr": 3.923431043394759e-05, "steps_per_second": 4.833623209997979 }, { "step": 15530, "loss": 1.8576923608779907, "lm_loss": 1.8576923608779907, "ppl": 6.40893019583311, "gate_mean": 1.475214958190918e-06, "lr": 3.914399314609244e-05, "steps_per_second": 4.83382596815512 }, { "step": 15540, "loss": 1.7940547466278076, "lm_loss": 1.7940547466278076, "ppl": 6.013787481392689, "gate_mean": 1.9073486328125e-06, "lr": 3.9053839502274406e-05, "steps_per_second": 4.833996216911023 }, { "step": 15550, "loss": 1.9240024089813232, "lm_loss": 1.9240024089813232, "ppl": 6.848313441104677, "gate_mean": 1.7061829566955566e-06, "lr": 3.896384974897016e-05, "steps_per_second": 4.834175916797729 }, { "step": 15560, "loss": 1.8354707956314087, "lm_loss": 1.8354707956314087, "ppl": 6.268084437659426, "gate_mean": 1.4156103134155273e-06, "lr": 3.887402413220821e-05, "steps_per_second": 4.834315564021589 }, { "step": 15570, "loss": 1.8469302654266357, "lm_loss": 1.8469302654266357, "ppl": 6.3403264992441475, "gate_mean": 1.6703270375728607e-06, "lr": 3.8784362897568393e-05, "steps_per_second": 4.834487492301389 }, { "step": 15580, "loss": 1.8177592754364014, "lm_loss": 1.8177592754364014, "ppl": 6.158044497462911, "gate_mean": 1.3676472008228302e-06, "lr": 3.8694866290181095e-05, "steps_per_second": 4.834669546069444 }, { "step": 15590, "loss": 1.9496536254882812, "lm_loss": 1.9496536254882812, "ppl": 7.026253443946325, "gate_mean": 1.4943070709705353e-06, "lr": 3.8605534554726574e-05, "steps_per_second": 4.83485161126506 }, { "step": 15600, "loss": 1.873728632926941, "lm_loss": 1.873728632926941, "ppl": 6.512534033420481, "gate_mean": 1.3147946447134018e-06, "lr": 3.851636793543447e-05, "steps_per_second": 4.835021060410963 }, { "step": 15610, "loss": 1.9071708917617798, "lm_loss": 1.9071708917617798, "ppl": 6.734010579276499, "gate_mean": 1.5692785382270813e-06, "lr": 3.8427366676082824e-05, "steps_per_second": 4.835179746933357 }, { "step": 15620, "loss": 1.8264821767807007, "lm_loss": 1.8264821767807007, "ppl": 6.211995474523992, "gate_mean": 1.5399418771266937e-06, "lr": 3.833853101999777e-05, "steps_per_second": 4.8353448838547255 }, { "step": 15630, "loss": 1.823121190071106, "lm_loss": 1.823121190071106, "ppl": 6.191152087089104, "gate_mean": 1.4011748135089874e-06, "lr": 3.824986121005253e-05, "steps_per_second": 4.835563984054613 }, { "step": 15640, "loss": 1.8521339893341064, "lm_loss": 1.8521339893341064, "ppl": 6.373405800960569, "gate_mean": 1.5953555703163147e-06, "lr": 3.8161357488666995e-05, "steps_per_second": 4.83569886327811 }, { "step": 15650, "loss": 1.9335975646972656, "lm_loss": 1.9335975646972656, "ppl": 6.914340337565999, "gate_mean": 1.2279488146305084e-06, "lr": 3.8073020097807e-05, "steps_per_second": 4.83582801641229 }, { "step": 15660, "loss": 1.8131943941116333, "lm_loss": 1.8131943941116333, "ppl": 6.129997818722725, "gate_mean": 1.441221684217453e-06, "lr": 3.798484927898352e-05, "steps_per_second": 4.835996791701801 }, { "step": 15670, "loss": 1.8608293533325195, "lm_loss": 1.8608293533325195, "ppl": 6.4290665287482485, "gate_mean": 1.3862736523151398e-06, "lr": 3.789684527325229e-05, "steps_per_second": 4.83620274693327 }, { "step": 15680, "loss": 1.7778701782226562, "lm_loss": 1.7778701782226562, "ppl": 5.917240321042993, "gate_mean": 1.242849975824356e-06, "lr": 3.780900832121286e-05, "steps_per_second": 4.836421090552411 }, { "step": 15690, "loss": 1.8555407524108887, "lm_loss": 1.8555407524108887, "ppl": 6.395155511536437, "gate_mean": 1.582317054271698e-06, "lr": 3.772133866300809e-05, "steps_per_second": 4.836599880629157 }, { "step": 15700, "loss": 1.818112850189209, "lm_loss": 1.818112850189209, "ppl": 6.160222211493542, "gate_mean": 1.6130506992340088e-06, "lr": 3.763383653832352e-05, "steps_per_second": 4.836831051970413 }, { "step": 15710, "loss": 1.8246314525604248, "lm_loss": 1.8246314525604248, "ppl": 6.200509416084805, "gate_mean": 1.5259720385074615e-06, "lr": 3.7546502186386574e-05, "steps_per_second": 4.837029911630452 }, { "step": 15720, "loss": 1.893065094947815, "lm_loss": 1.893065094947815, "ppl": 6.639688800183695, "gate_mean": 1.521781086921692e-06, "lr": 3.745933584596606e-05, "steps_per_second": 4.837222563416462 }, { "step": 15730, "loss": 1.802348256111145, "lm_loss": 1.802348256111145, "ppl": 6.063870278911312, "gate_mean": 1.407228410243988e-06, "lr": 3.737233775537143e-05, "steps_per_second": 4.8374285145659215 }, { "step": 15740, "loss": 1.8642168045043945, "lm_loss": 1.8642168045043945, "ppl": 6.4508816055885765, "gate_mean": 1.5208497643470764e-06, "lr": 3.728550815245211e-05, "steps_per_second": 4.837638962454478 }, { "step": 15750, "loss": 2.017054557800293, "lm_loss": 2.017054557800293, "ppl": 7.516153901063694, "gate_mean": 1.843087375164032e-06, "lr": 3.7198847274596964e-05, "steps_per_second": 4.837784871283547 }, { "step": 15760, "loss": 1.8614813089370728, "lm_loss": 1.8614813089370728, "ppl": 6.433259361325548, "gate_mean": 2.239830791950226e-06, "lr": 3.711235535873349e-05, "steps_per_second": 4.837972576557041 }, { "step": 15770, "loss": 1.8933486938476562, "lm_loss": 1.8933486938476562, "ppl": 6.641572075657519, "gate_mean": 1.6316771507263184e-06, "lr": 3.702603264132727e-05, "steps_per_second": 4.838193863584967 }, { "step": 15780, "loss": 1.8420170545578003, "lm_loss": 1.8420170545578003, "ppl": 6.309251539602332, "gate_mean": 1.405365765094757e-06, "lr": 3.6939879358381366e-05, "steps_per_second": 4.8383995930807435 }, { "step": 15790, "loss": 1.839241623878479, "lm_loss": 1.839241623878479, "ppl": 6.2917649269813065, "gate_mean": 1.6759149730205536e-06, "lr": 3.6853895745435515e-05, "steps_per_second": 4.8385969219386284 }, { "step": 15800, "loss": 1.8180028200149536, "lm_loss": 1.8180028200149536, "ppl": 6.159544438458586, "gate_mean": 1.5757977962493896e-06, "lr": 3.676808203756567e-05, "steps_per_second": 4.838765493382274 }, { "step": 15810, "loss": 1.7784559726715088, "lm_loss": 1.7784559726715088, "ppl": 5.920707623039566, "gate_mean": 1.4584511518478394e-06, "lr": 3.6682438469383217e-05, "steps_per_second": 4.838926277495141 }, { "step": 15820, "loss": 1.872136116027832, "lm_loss": 1.872136116027832, "ppl": 6.502170966785765, "gate_mean": 1.6293488442897797e-06, "lr": 3.6596965275034376e-05, "steps_per_second": 4.839097344371896 }, { "step": 15830, "loss": 1.8463892936706543, "lm_loss": 1.8463892936706543, "ppl": 6.336897489266756, "gate_mean": 1.3066455721855164e-06, "lr": 3.651166268819964e-05, "steps_per_second": 4.839328252338323 }, { "step": 15840, "loss": 1.8309518098831177, "lm_loss": 1.8309518098831177, "ppl": 6.239822958122688, "gate_mean": 1.4808028936386108e-06, "lr": 3.6426530942092985e-05, "steps_per_second": 4.839551894796057 }, { "step": 15850, "loss": 1.782643437385559, "lm_loss": 1.782643437385559, "ppl": 5.945552359217217, "gate_mean": 1.650303602218628e-06, "lr": 3.634157026946139e-05, "steps_per_second": 4.839772294898 }, { "step": 15860, "loss": 1.7884379625320435, "lm_loss": 1.7884379625320435, "ppl": 5.980104020430321, "gate_mean": 1.498498022556305e-06, "lr": 3.625678090258407e-05, "steps_per_second": 4.839958762401444 }, { "step": 15870, "loss": 1.8381831645965576, "lm_loss": 1.8381831645965576, "ppl": 6.285108873197048, "gate_mean": 1.5166588127613068e-06, "lr": 3.6172163073271905e-05, "steps_per_second": 4.840158657527479 }, { "step": 15880, "loss": 1.8206713199615479, "lm_loss": 1.8206713199615479, "ppl": 6.176003132709308, "gate_mean": 1.3951212167739868e-06, "lr": 3.608771701286685e-05, "steps_per_second": 4.840379514790507 }, { "step": 15890, "loss": 1.698811411857605, "lm_loss": 1.698811411857605, "ppl": 5.467444987870877, "gate_mean": 1.3862736523151398e-06, "lr": 3.600344295224113e-05, "steps_per_second": 4.840589435556677 }, { "step": 15900, "loss": 1.7125389575958252, "lm_loss": 1.7125389575958252, "ppl": 5.543017112144081, "gate_mean": 1.3159587979316711e-06, "lr": 3.591934112179691e-05, "steps_per_second": 4.8407487480198 }, { "step": 15910, "loss": 1.765621542930603, "lm_loss": 1.765621542930603, "ppl": 5.845204274667885, "gate_mean": 1.405365765094757e-06, "lr": 3.5835411751465326e-05, "steps_per_second": 4.840971948671529 }, { "step": 15920, "loss": 1.7512792348861694, "lm_loss": 1.7512792348861694, "ppl": 5.7619688750497895, "gate_mean": 1.2987293303012848e-06, "lr": 3.5751655070706064e-05, "steps_per_second": 4.841130734214343 }, { "step": 15930, "loss": 1.8095377683639526, "lm_loss": 1.8095377683639526, "ppl": 6.107623642789712, "gate_mean": 1.5897676348686218e-06, "lr": 3.5668071308506725e-05, "steps_per_second": 4.841348135735904 }, { "step": 15940, "loss": 1.7085412740707397, "lm_loss": 1.7085412740707397, "ppl": 5.520902117782414, "gate_mean": 1.4263205230236053e-06, "lr": 3.558466069338208e-05, "steps_per_second": 4.841509739340262 }, { "step": 15950, "loss": 1.7812426090240479, "lm_loss": 1.7812426090240479, "ppl": 5.937229491663047, "gate_mean": 1.5050172805786133e-06, "lr": 3.5501423453373614e-05, "steps_per_second": 4.841745066932438 }, { "step": 15960, "loss": 1.8872932195663452, "lm_loss": 1.8872932195663452, "ppl": 6.60147573048295, "gate_mean": 1.646578311920166e-06, "lr": 3.541835981604874e-05, "steps_per_second": 4.841879593510513 }, { "step": 15970, "loss": 1.7915349006652832, "lm_loss": 1.7915349006652832, "ppl": 5.998652739905163, "gate_mean": 1.6544945538043976e-06, "lr": 3.5335470008500245e-05, "steps_per_second": 4.8420868309459735 }, { "step": 15980, "loss": 1.875974178314209, "lm_loss": 1.875974178314209, "ppl": 6.527174656117958, "gate_mean": 1.46450474858284e-06, "lr": 3.525275425734575e-05, "steps_per_second": 4.842246245170971 }, { "step": 15990, "loss": 1.8004881143569946, "lm_loss": 1.8004881143569946, "ppl": 6.0526011049936255, "gate_mean": 1.5622936189174652e-06, "lr": 3.517021278872691e-05, "steps_per_second": 4.8424169070433125 }, { "step": 16000, "loss": 1.8477652072906494, "lm_loss": 1.8477652072906494, "ppl": 6.345622513894387, "gate_mean": 1.62515789270401e-06, "lr": 3.508784582830899e-05, "steps_per_second": 4.8426140717476915 }, { "step": 16010, "loss": 1.8059216737747192, "lm_loss": 1.8059216737747192, "ppl": 6.0855777819638615, "gate_mean": 1.4291144907474518e-06, "lr": 3.50056536012801e-05, "steps_per_second": 4.830016368448227 }, { "step": 16020, "loss": 1.8646116256713867, "lm_loss": 1.8646116256713867, "ppl": 6.453429053052221, "gate_mean": 1.3937242329120636e-06, "lr": 3.492363633235061e-05, "steps_per_second": 4.830038253281849 }, { "step": 16030, "loss": 1.6352097988128662, "lm_loss": 1.6352097988128662, "ppl": 5.130534264121416, "gate_mean": 1.671724021434784e-06, "lr": 3.484179424575268e-05, "steps_per_second": 4.830142596135659 }, { "step": 16040, "loss": 1.813406229019165, "lm_loss": 1.813406229019165, "ppl": 6.131296503792383, "gate_mean": 1.4230608940124512e-06, "lr": 3.47601275652394e-05, "steps_per_second": 4.830175929275387 }, { "step": 16050, "loss": 1.6933045387268066, "lm_loss": 1.6933045387268066, "ppl": 5.437419211921686, "gate_mean": 1.3848766684532166e-06, "lr": 3.4678636514084335e-05, "steps_per_second": 4.830132802116763 }, { "step": 16060, "loss": 1.896049976348877, "lm_loss": 1.896049976348877, "ppl": 6.659537091453374, "gate_mean": 1.3317912817001343e-06, "lr": 3.459732131508097e-05, "steps_per_second": 4.830273191247502 }, { "step": 16070, "loss": 1.8360891342163086, "lm_loss": 1.8360891342163086, "ppl": 6.271961434645529, "gate_mean": 1.3010576367378235e-06, "lr": 3.451618219054189e-05, "steps_per_second": 4.830432837630741 }, { "step": 16080, "loss": 1.6337462663650513, "lm_loss": 1.6337462663650513, "ppl": 5.123031052687224, "gate_mean": 1.5343539416790009e-06, "lr": 3.443521936229841e-05, "steps_per_second": 4.830543388660057 }, { "step": 16090, "loss": 1.8525612354278564, "lm_loss": 1.8525612354278564, "ppl": 6.376129395474041, "gate_mean": 1.307111233472824e-06, "lr": 3.43544330516998e-05, "steps_per_second": 4.830719666259254 }, { "step": 16100, "loss": 1.7865092754364014, "lm_loss": 1.7865092754364014, "ppl": 5.96858138632523, "gate_mean": 1.612585037946701e-06, "lr": 3.427382347961271e-05, "steps_per_second": 4.8307428882619625 }, { "step": 16110, "loss": 1.7844172716140747, "lm_loss": 1.7844172716140747, "ppl": 5.956108142836772, "gate_mean": 1.7452985048294067e-06, "lr": 3.4193390866420694e-05, "steps_per_second": 4.830905345604083 }, { "step": 16120, "loss": 1.8596265316009521, "lm_loss": 1.8596265316009521, "ppl": 6.42133815667249, "gate_mean": 1.1050142347812653e-06, "lr": 3.41131354320234e-05, "steps_per_second": 4.8311393067223305 }, { "step": 16130, "loss": 1.7467010021209717, "lm_loss": 1.7467010021209717, "ppl": 5.735649534359511, "gate_mean": 1.4766119420528412e-06, "lr": 3.403305739583615e-05, "steps_per_second": 4.831269365553367 }, { "step": 16140, "loss": 1.808880090713501, "lm_loss": 1.808880090713501, "ppl": 6.103608115828387, "gate_mean": 1.5604309737682343e-06, "lr": 3.395315697678922e-05, "steps_per_second": 4.8315058136193105 }, { "step": 16150, "loss": 1.8251368999481201, "lm_loss": 1.8251368999481201, "ppl": 6.2036442395489715, "gate_mean": 1.3718381524085999e-06, "lr": 3.387343439332729e-05, "steps_per_second": 4.831639325064391 }, { "step": 16160, "loss": 1.8332232236862183, "lm_loss": 1.8332232236862183, "ppl": 6.254012286936726, "gate_mean": 1.4351680874824524e-06, "lr": 3.3793889863408905e-05, "steps_per_second": 4.831826514979833 }, { "step": 16170, "loss": 1.8535151481628418, "lm_loss": 1.8535151481628418, "ppl": 6.382214568404842, "gate_mean": 1.4007091522216797e-06, "lr": 3.371452360450572e-05, "steps_per_second": 4.831909253725807 }, { "step": 16180, "loss": 1.8390578031539917, "lm_loss": 1.8390578031539917, "ppl": 6.290608476487165, "gate_mean": 1.3727694749832153e-06, "lr": 3.363533583360208e-05, "steps_per_second": 4.832172288217072 }, { "step": 16190, "loss": 1.8137686252593994, "lm_loss": 1.8137686252593994, "ppl": 6.133518865256517, "gate_mean": 1.5203841030597687e-06, "lr": 3.355632676719435e-05, "steps_per_second": 4.832363776930247 }, { "step": 16200, "loss": 1.7865668535232544, "lm_loss": 1.7865668535232544, "ppl": 5.9689250557164995, "gate_mean": 1.3727694749832153e-06, "lr": 3.347749662129025e-05, "steps_per_second": 4.83251551962941 }, { "step": 16210, "loss": 1.7014899253845215, "lm_loss": 1.7014899253845215, "ppl": 5.482109243665003, "gate_mean": 1.037493348121643e-06, "lr": 3.339884561140845e-05, "steps_per_second": 4.830696347982817 }, { "step": 16220, "loss": 1.7898030281066895, "lm_loss": 1.7898030281066895, "ppl": 5.988272828772457, "gate_mean": 1.1320225894451141e-06, "lr": 3.3320373952577745e-05, "steps_per_second": 4.830856937405208 }, { "step": 16230, "loss": 1.8108713626861572, "lm_loss": 1.8108713626861572, "ppl": 6.115774168541374, "gate_mean": 1.437496393918991e-06, "lr": 3.32420818593367e-05, "steps_per_second": 4.830941064132641 }, { "step": 16240, "loss": 1.8248209953308105, "lm_loss": 1.8248209953308105, "ppl": 6.201684789205553, "gate_mean": 1.3955868780612946e-06, "lr": 3.31639695457329e-05, "steps_per_second": 4.831108246221108 }, { "step": 16250, "loss": 1.7947458028793335, "lm_loss": 1.7947458028793335, "ppl": 6.017944783126284, "gate_mean": 1.4561228454113007e-06, "lr": 3.3086037225322395e-05, "steps_per_second": 4.829286035227824 }, { "step": 16260, "loss": 1.8498077392578125, "lm_loss": 1.8498077392578125, "ppl": 6.358596896515503, "gate_mean": 1.5213154256343842e-06, "lr": 3.300828511116922e-05, "steps_per_second": 4.829409438557754 }, { "step": 16270, "loss": 1.8543504476547241, "lm_loss": 1.8543504476547241, "ppl": 6.387547856127079, "gate_mean": 1.4514662325382233e-06, "lr": 3.2930713415844634e-05, "steps_per_second": 4.829609638665684 }, { "step": 16280, "loss": 1.8643313646316528, "lm_loss": 1.8643313646316528, "ppl": 6.451620661738616, "gate_mean": 1.2558884918689728e-06, "lr": 3.285332235142675e-05, "steps_per_second": 4.82964683750735 }, { "step": 16290, "loss": 1.9228112697601318, "lm_loss": 1.9228112697601318, "ppl": 6.840161002674445, "gate_mean": 1.618172973394394e-06, "lr": 3.277611212949975e-05, "steps_per_second": 4.829632126139888 }, { "step": 16300, "loss": 1.940332293510437, "lm_loss": 1.940332293510437, "ppl": 6.961063702657202, "gate_mean": 1.1944212019443512e-06, "lr": 3.2699082961153416e-05, "steps_per_second": 4.827425610675098 }, { "step": 16310, "loss": 1.9723169803619385, "lm_loss": 1.9723169803619385, "ppl": 7.1873100673485695, "gate_mean": 1.314328983426094e-06, "lr": 3.262223505698259e-05, "steps_per_second": 4.827603447052502 }, { "step": 16320, "loss": 1.985082745552063, "lm_loss": 1.985082745552063, "ppl": 7.279649719167689, "gate_mean": 1.4477409422397614e-06, "lr": 3.254556862708649e-05, "steps_per_second": 4.823726130741017 }, { "step": 16330, "loss": 2.0167267322540283, "lm_loss": 2.0167267322540283, "ppl": 7.513690317640123, "gate_mean": 1.4523975551128387e-06, "lr": 3.246908388106819e-05, "steps_per_second": 4.823652662189324 }, { "step": 16340, "loss": 2.048942804336548, "lm_loss": 2.048942804336548, "ppl": 7.759693254360268, "gate_mean": 7.534399628639221e-07, "lr": 3.23927810280341e-05, "steps_per_second": 4.823641018754676 }, { "step": 16350, "loss": 2.0056092739105225, "lm_loss": 2.0056092739105225, "ppl": 7.4306198006437825, "gate_mean": 1.4025717973709106e-06, "lr": 3.231666027659327e-05, "steps_per_second": 4.82376871771766 }, { "step": 16360, "loss": 2.023585796356201, "lm_loss": 2.023585796356201, "ppl": 7.5654043533677156, "gate_mean": 1.114560291171074e-06, "lr": 3.2240721834856966e-05, "steps_per_second": 4.823936092141493 }, { "step": 16370, "loss": 1.9587764739990234, "lm_loss": 1.9587764739990234, "ppl": 7.0906461656390665, "gate_mean": 1.3117678463459015e-06, "lr": 3.216496591043797e-05, "steps_per_second": 4.824098225482488 }, { "step": 16380, "loss": 2.000910758972168, "lm_loss": 2.000910758972168, "ppl": 7.395788813543335, "gate_mean": 1.0351650416851044e-06, "lr": 3.2089392710450067e-05, "steps_per_second": 4.824306541173301 }, { "step": 16390, "loss": 1.9379905462265015, "lm_loss": 1.9379905462265015, "ppl": 6.944781722220501, "gate_mean": 1.180451363325119e-06, "lr": 3.2014002441507536e-05, "steps_per_second": 4.824490083117311 }, { "step": 16400, "loss": 2.073004722595215, "lm_loss": 2.073004722595215, "ppl": 7.948670822861686, "gate_mean": 1.2158416211605072e-06, "lr": 3.193879530972445e-05, "steps_per_second": 4.8246814163453084 }, { "step": 16410, "loss": 1.9138144254684448, "lm_loss": 1.9138144254684448, "ppl": 6.778897143146303, "gate_mean": 1.037726178765297e-06, "lr": 3.186377152071429e-05, "steps_per_second": 4.824872843372902 }, { "step": 16420, "loss": 1.961251974105835, "lm_loss": 1.961251974105835, "ppl": 7.108220805015366, "gate_mean": 9.431969374418259e-07, "lr": 3.1788931279589206e-05, "steps_per_second": 4.822921010882547 }, { "step": 16430, "loss": 1.9727463722229004, "lm_loss": 1.9727463722229004, "ppl": 7.19039690247721, "gate_mean": 1.189298927783966e-06, "lr": 3.1714274790959527e-05, "steps_per_second": 4.823068811950909 }, { "step": 16440, "loss": 2.0195162296295166, "lm_loss": 2.0195162296295166, "ppl": 7.534678997385018, "gate_mean": 1.1059455573558807e-06, "lr": 3.163980225893329e-05, "steps_per_second": 4.823241843287707 }, { "step": 16450, "loss": 2.0052919387817383, "lm_loss": 2.0052919387817383, "ppl": 7.428262178050418, "gate_mean": 1.150183379650116e-06, "lr": 3.156551388711552e-05, "steps_per_second": 4.82341524751932 }, { "step": 16460, "loss": 1.9800221920013428, "lm_loss": 1.9800221920013428, "ppl": 7.2429037179065485, "gate_mean": 1.0135117918252945e-06, "lr": 3.149140987860783e-05, "steps_per_second": 4.823577781231402 }, { "step": 16470, "loss": 1.9943163394927979, "lm_loss": 1.9943163394927979, "ppl": 7.3471783348275075, "gate_mean": 1.2703239917755127e-06, "lr": 3.1417490436007704e-05, "steps_per_second": 4.82370899155363 }, { "step": 16480, "loss": 2.108079195022583, "lm_loss": 2.108079195022583, "ppl": 8.232413227883757, "gate_mean": 1.2142118066549301e-06, "lr": 3.1343755761408074e-05, "steps_per_second": 4.82190312886939 }, { "step": 16490, "loss": 2.074796438217163, "lm_loss": 2.074796438217163, "ppl": 7.962925346761933, "gate_mean": 1.3294629752635956e-06, "lr": 3.1270206056396776e-05, "steps_per_second": 4.822003111691993 }, { "step": 16500, "loss": 1.946871280670166, "lm_loss": 1.946871280670166, "ppl": 7.006731155548998, "gate_mean": 1.2293457984924316e-06, "lr": 3.1196841522055846e-05, "steps_per_second": 4.822134129174275 }, { "step": 16510, "loss": 2.0829358100891113, "lm_loss": 2.0829358100891113, "ppl": 8.028003043857074, "gate_mean": 1.3257376849651337e-06, "lr": 3.1123662358961176e-05, "steps_per_second": 4.822281650671665 }, { "step": 16520, "loss": 2.0624334812164307, "lm_loss": 2.0624334812164307, "ppl": 7.865086080585243, "gate_mean": 1.13598071038723e-06, "lr": 3.1050668767181725e-05, "steps_per_second": 4.822384328193848 }, { "step": 16530, "loss": 2.091130256652832, "lm_loss": 2.091130256652832, "ppl": 8.094058359562379, "gate_mean": 1.317821443080902e-06, "lr": 3.097786094627922e-05, "steps_per_second": 4.822540986540624 }, { "step": 16540, "loss": 1.979164481163025, "lm_loss": 1.979164481163025, "ppl": 7.23669406431109, "gate_mean": 1.1194497346878052e-06, "lr": 3.090523909530749e-05, "steps_per_second": 4.822544045115409 }, { "step": 16550, "loss": 2.0154364109039307, "lm_loss": 2.0154364109039307, "ppl": 7.504001494797002, "gate_mean": 1.0165385901927948e-06, "lr": 3.083280341281187e-05, "steps_per_second": 4.822725688971236 }, { "step": 16560, "loss": 2.080467700958252, "lm_loss": 2.080467700958252, "ppl": 8.008213487679889, "gate_mean": 8.698552846908569e-07, "lr": 3.076055409682879e-05, "steps_per_second": 4.822787408179667 }, { "step": 16570, "loss": 2.022292137145996, "lm_loss": 2.022292137145996, "ppl": 7.555623626175011, "gate_mean": 1.042848452925682e-06, "lr": 3.0688491344885055e-05, "steps_per_second": 4.822901332474652 }, { "step": 16580, "loss": 2.0256922245025635, "lm_loss": 2.0256922245025635, "ppl": 7.581357129826342, "gate_mean": 1.082196831703186e-06, "lr": 3.061661535399752e-05, "steps_per_second": 4.822930033746896 }, { "step": 16590, "loss": 1.9458543062210083, "lm_loss": 1.9458543062210083, "ppl": 6.99960911107414, "gate_mean": 1.1937227100133896e-06, "lr": 3.0544926320672414e-05, "steps_per_second": 4.823004616322841 }, { "step": 16600, "loss": 1.9714269638061523, "lm_loss": 1.9714269638061523, "ppl": 7.180916088192785, "gate_mean": 1.171370968222618e-06, "lr": 3.0473424440904794e-05, "steps_per_second": 4.823125009221851 }, { "step": 16610, "loss": 2.015712261199951, "lm_loss": 2.015712261199951, "ppl": 7.506071761359374, "gate_mean": 9.133946150541306e-07, "lr": 3.040210991017807e-05, "steps_per_second": 4.823296082427728 }, { "step": 16620, "loss": 2.0553839206695557, "lm_loss": 2.0553839206695557, "ppl": 7.8098356544774585, "gate_mean": 9.87434759736061e-07, "lr": 3.033098292346344e-05, "steps_per_second": 4.823474907600347 }, { "step": 16630, "loss": 2.082200527191162, "lm_loss": 2.082200527191162, "ppl": 8.022102360116008, "gate_mean": 1.1138617992401123e-06, "lr": 3.0260043675219367e-05, "steps_per_second": 4.823652216974768 }, { "step": 16640, "loss": 2.0622363090515137, "lm_loss": 2.0622363090515137, "ppl": 7.8635354574103635, "gate_mean": 1.1853408068418503e-06, "lr": 3.018929235939108e-05, "steps_per_second": 4.823854826112823 }, { "step": 16650, "loss": 2.078045606613159, "lm_loss": 2.078045606613159, "ppl": 7.9888403103798025, "gate_mean": 9.266659617424011e-07, "lr": 3.011872916940995e-05, "steps_per_second": 4.823988483769748 }, { "step": 16660, "loss": 2.1105220317840576, "lm_loss": 2.1105220317840576, "ppl": 8.252548252829149, "gate_mean": 1.074979081749916e-06, "lr": 3.0048354298193042e-05, "steps_per_second": 4.823962543475957 }, { "step": 16670, "loss": 2.1762218475341797, "lm_loss": 2.1762218475341797, "ppl": 8.812946623492632, "gate_mean": 1.0652001947164536e-06, "lr": 2.997816793814256e-05, "steps_per_second": 4.824144772052726 }, { "step": 16680, "loss": 2.263598680496216, "lm_loss": 2.263598680496216, "ppl": 9.61763777024994, "gate_mean": 1.0919757187366486e-06, "lr": 2.9908170281145337e-05, "steps_per_second": 4.824296321560487 }, { "step": 16690, "loss": 2.2452940940856934, "lm_loss": 2.2452940940856934, "ppl": 9.443192335139075, "gate_mean": 6.663613021373749e-07, "lr": 2.9838361518572343e-05, "steps_per_second": 4.824497055993803 }, { "step": 16700, "loss": 2.256615400314331, "lm_loss": 2.256615400314331, "ppl": 9.550709073919723, "gate_mean": 8.663628250360489e-07, "lr": 2.9768741841277982e-05, "steps_per_second": 4.8245544771865125 }, { "step": 16710, "loss": 2.3768250942230225, "lm_loss": 2.3768250942230225, "ppl": 10.770652714677443, "gate_mean": 1.0584481060504913e-06, "lr": 2.969931143959986e-05, "steps_per_second": 4.824696057497946 }, { "step": 16720, "loss": 2.276219129562378, "lm_loss": 2.276219129562378, "ppl": 9.739785838337792, "gate_mean": 1.1373776942491531e-06, "lr": 2.963007050335801e-05, "steps_per_second": 4.824860444627962 }, { "step": 16730, "loss": 2.2919235229492188, "lm_loss": 2.2919235229492188, "ppl": 9.89395063048496, "gate_mean": 7.8580342233181e-07, "lr": 2.9561019221854506e-05, "steps_per_second": 4.825051215084408 }, { "step": 16740, "loss": 2.3964507579803467, "lm_loss": 2.3964507579803467, "ppl": 10.984121807904604, "gate_mean": 1.0901130735874176e-06, "lr": 2.9492157783872978e-05, "steps_per_second": 4.825238519622693 }, { "step": 16750, "loss": 2.356924295425415, "lm_loss": 2.356924295425415, "ppl": 10.558426858263966, "gate_mean": 1.1937227100133896e-06, "lr": 2.9423486377677875e-05, "steps_per_second": 4.825387989655648 }, { "step": 16760, "loss": 2.1868107318878174, "lm_loss": 2.1868107318878174, "ppl": 8.906761718118362, "gate_mean": 9.380746632814407e-07, "lr": 2.9355005191014264e-05, "steps_per_second": 4.82560858051807 }, { "step": 16770, "loss": 2.408177137374878, "lm_loss": 2.408177137374878, "ppl": 11.113683950309438, "gate_mean": 1.0039657354354858e-06, "lr": 2.9286714411107055e-05, "steps_per_second": 4.825772377968619 }, { "step": 16780, "loss": 2.337902784347534, "lm_loss": 2.337902784347534, "ppl": 10.359487684935534, "gate_mean": 9.853392839431763e-07, "lr": 2.9218614224660656e-05, "steps_per_second": 4.825970450626028 }, { "step": 16790, "loss": 2.208923578262329, "lm_loss": 2.208923578262329, "ppl": 9.105909316982261, "gate_mean": 9.834766387939453e-07, "lr": 2.9150704817858398e-05, "steps_per_second": 4.8261296076185545 }, { "step": 16800, "loss": 2.354234218597412, "lm_loss": 2.354234218597412, "ppl": 10.530062047697395, "gate_mean": 1.1415686458349228e-06, "lr": 2.908298637636195e-05, "steps_per_second": 4.826260488714571 }, { "step": 16810, "loss": 2.222334384918213, "lm_loss": 2.222334384918213, "ppl": 9.228849426722654, "gate_mean": 1.00000761449337e-06, "lr": 2.9015459085310985e-05, "steps_per_second": 4.826376216890656 }, { "step": 16820, "loss": 2.355726718902588, "lm_loss": 2.355726718902588, "ppl": 10.5457899025116, "gate_mean": 9.366776794195175e-07, "lr": 2.8948123129322515e-05, "steps_per_second": 4.826568693401354 }, { "step": 16830, "loss": 2.381314516067505, "lm_loss": 2.381314516067505, "ppl": 10.819115421624899, "gate_mean": 1.005595549941063e-06, "lr": 2.8880978692490502e-05, "steps_per_second": 4.826756784795591 }, { "step": 16840, "loss": 2.329453706741333, "lm_loss": 2.329453706741333, "ppl": 10.272328296247283, "gate_mean": 1.1813826858997345e-06, "lr": 2.881402595838526e-05, "steps_per_second": 4.826886344510452 }, { "step": 16850, "loss": 2.2539377212524414, "lm_loss": 2.2539377212524414, "ppl": 9.52516954879192, "gate_mean": 1.1431984603404999e-06, "lr": 2.874726511005298e-05, "steps_per_second": 4.82710224390327 }, { "step": 16860, "loss": 2.332843065261841, "lm_loss": 2.332843065261841, "ppl": 10.307203969376646, "gate_mean": 9.522773325443268e-07, "lr": 2.868069633001532e-05, "steps_per_second": 4.8272828563149615 }, { "step": 16870, "loss": 2.2497775554656982, "lm_loss": 2.2497775554656982, "ppl": 9.485625576095462, "gate_mean": 1.2293457984924316e-06, "lr": 2.8614319800268736e-05, "steps_per_second": 4.827349405522054 }, { "step": 16880, "loss": 2.2060258388519287, "lm_loss": 2.2060258388519287, "ppl": 9.07956095846238, "gate_mean": 9.592622518539429e-07, "lr": 2.8548135702284155e-05, "steps_per_second": 4.827563164098288 }, { "step": 16890, "loss": 2.3247132301330566, "lm_loss": 2.3247132301330566, "ppl": 10.223747802568854, "gate_mean": 8.868519216775894e-07, "lr": 2.848214421700638e-05, "steps_per_second": 4.82776077017349 }, { "step": 16900, "loss": 2.2263545989990234, "lm_loss": 2.2263545989990234, "ppl": 9.26602605607095, "gate_mean": 9.085051715373993e-07, "lr": 2.8416345524853575e-05, "steps_per_second": 4.827921890422023 }, { "step": 16910, "loss": 2.31330943107605, "lm_loss": 2.31330943107605, "ppl": 10.107820499182264, "gate_mean": 8.889473974704742e-07, "lr": 2.83507398057169e-05, "steps_per_second": 4.826077674888412 }, { "step": 16920, "loss": 2.331699848175049, "lm_loss": 2.331699848175049, "ppl": 10.295427330591766, "gate_mean": 9.385403245687485e-07, "lr": 2.8285327238959825e-05, "steps_per_second": 4.826272615810307 }, { "step": 16930, "loss": 2.254000663757324, "lm_loss": 2.254000663757324, "ppl": 9.525769105691364, "gate_mean": 1.041218638420105e-06, "lr": 2.822010800341788e-05, "steps_per_second": 4.826397281244855 }, { "step": 16940, "loss": 2.2485361099243164, "lm_loss": 2.2485361099243164, "ppl": 9.473856995054495, "gate_mean": 1.0847579687833786e-06, "lr": 2.815508227739792e-05, "steps_per_second": 4.826523534429439 }, { "step": 16950, "loss": 2.2371597290039062, "lm_loss": 2.2371597290039062, "ppl": 9.366689533840605, "gate_mean": 1.0044313967227936e-06, "lr": 2.8090250238677802e-05, "steps_per_second": 4.8266186739036145 }, { "step": 16960, "loss": 2.175693988800049, "lm_loss": 2.175693988800049, "ppl": 8.808295860224986, "gate_mean": 1.050299033522606e-06, "lr": 2.8025612064505863e-05, "steps_per_second": 4.826759775105252 }, { "step": 16970, "loss": 2.2284016609191895, "lm_loss": 2.2284016609191895, "ppl": 9.2850136128833, "gate_mean": 8.838251233100891e-07, "lr": 2.7961167931600377e-05, "steps_per_second": 4.82687389935578 }, { "step": 16980, "loss": 2.2520885467529297, "lm_loss": 2.2520885467529297, "ppl": 9.50757212352825, "gate_mean": 8.475035429000854e-07, "lr": 2.7896918016149154e-05, "steps_per_second": 4.8270604091239475 }, { "step": 16990, "loss": 2.264878988265991, "lm_loss": 2.264878988265991, "ppl": 9.629959192537328, "gate_mean": 1.0724179446697235e-06, "lr": 2.7832862493809012e-05, "steps_per_second": 4.827227082128532 }, { "step": 17000, "loss": 2.2927370071411133, "lm_loss": 2.2927370071411133, "ppl": 9.902002477499344, "gate_mean": 1.0058283805847168e-06, "lr": 2.77690015397053e-05, "steps_per_second": 4.827422636001596 }, { "step": 17010, "loss": 2.3127431869506836, "lm_loss": 2.3127431869506836, "ppl": 10.102098625345954, "gate_mean": 8.051283657550812e-07, "lr": 2.7705335328431424e-05, "steps_per_second": 4.82756855589207 }, { "step": 17020, "loss": 2.2909889221191406, "lm_loss": 2.2909889221191406, "ppl": 9.884708055744735, "gate_mean": 1.0260846465826035e-06, "lr": 2.7641864034048412e-05, "steps_per_second": 4.827687360274604 }, { "step": 17030, "loss": 2.270191192626953, "lm_loss": 2.270191192626953, "ppl": 9.681251621066503, "gate_mean": 1.0703224688768387e-06, "lr": 2.7578587830084294e-05, "steps_per_second": 4.827885922772269 }, { "step": 17040, "loss": 2.3072195053100586, "lm_loss": 2.3072195053100586, "ppl": 10.046451678134883, "gate_mean": 9.227078408002853e-07, "lr": 2.751550688953385e-05, "steps_per_second": 4.82804457809282 }, { "step": 17050, "loss": 2.250194549560547, "lm_loss": 2.250194549560547, "ppl": 9.489581850760898, "gate_mean": 9.362120181322098e-07, "lr": 2.7452621384857916e-05, "steps_per_second": 4.828180066352889 }, { "step": 17060, "loss": 2.3333473205566406, "lm_loss": 2.3333473205566406, "ppl": 10.312402742196987, "gate_mean": 9.292270988225937e-07, "lr": 2.738993148798309e-05, "steps_per_second": 4.828319673643497 }, { "step": 17070, "loss": 2.232332468032837, "lm_loss": 2.232332468032837, "ppl": 9.321583037032514, "gate_mean": 8.891802281141281e-07, "lr": 2.7327437370301185e-05, "steps_per_second": 4.82853273085226 }, { "step": 17080, "loss": 2.254547119140625, "lm_loss": 2.254547119140625, "ppl": 9.530975936019912, "gate_mean": 1.0260846465826035e-06, "lr": 2.7265139202668666e-05, "steps_per_second": 4.828703433778392 }, { "step": 17090, "loss": 2.366262912750244, "lm_loss": 2.366262912750244, "ppl": 10.657489801768696, "gate_mean": 1.0242220014333725e-06, "lr": 2.7203037155406397e-05, "steps_per_second": 4.828714813425495 }, { "step": 17100, "loss": 2.2514286041259766, "lm_loss": 2.2514286041259766, "ppl": 9.50129974133894, "gate_mean": 9.436625987291336e-07, "lr": 2.7141131398298973e-05, "steps_per_second": 4.82890337920571 }, { "step": 17110, "loss": 2.3244681358337402, "lm_loss": 2.3244681358337402, "ppl": 10.221242327316189, "gate_mean": 1.0444782674312592e-06, "lr": 2.7079422100594392e-05, "steps_per_second": 4.829108943662773 }, { "step": 17120, "loss": 2.214834690093994, "lm_loss": 2.214834690093994, "ppl": 9.159894765101372, "gate_mean": 8.274801075458527e-07, "lr": 2.7017909431003502e-05, "steps_per_second": 4.829248753083974 }, { "step": 17130, "loss": 2.3009696006774902, "lm_loss": 2.3009696006774902, "ppl": 9.983858118887513, "gate_mean": 8.135102689266205e-07, "lr": 2.6956593557699594e-05, "steps_per_second": 4.829448311681007 }, { "step": 17140, "loss": 2.3378543853759766, "lm_loss": 2.3378543853759766, "ppl": 10.358986308518869, "gate_mean": 9.206123650074005e-07, "lr": 2.6895474648317946e-05, "steps_per_second": 4.829636365974089 }, { "step": 17150, "loss": 2.4102377891540527, "lm_loss": 2.4102377891540527, "ppl": 11.136608995069775, "gate_mean": 9.436625987291336e-07, "lr": 2.6834552869955276e-05, "steps_per_second": 4.829809751474253 }, { "step": 17160, "loss": 2.2301011085510254, "lm_loss": 2.2301011085510254, "ppl": 9.300806423005197, "gate_mean": 1.010717824101448e-06, "lr": 2.677382838916945e-05, "steps_per_second": 4.829997696400492 }, { "step": 17170, "loss": 2.394270420074463, "lm_loss": 2.394270420074463, "ppl": 10.960198800361313, "gate_mean": 1.028645783662796e-06, "lr": 2.6713301371978862e-05, "steps_per_second": 4.830182762681118 }, { "step": 17180, "loss": 2.195497512817383, "lm_loss": 2.195497512817383, "ppl": 8.984469833990033, "gate_mean": 9.324867278337479e-07, "lr": 2.665297198386204e-05, "steps_per_second": 4.830196975057256 }, { "step": 17190, "loss": 2.4524011611938477, "lm_loss": 2.4524011611938477, "ppl": 11.616205641162347, "gate_mean": 1.0237563401460648e-06, "lr": 2.6592840389757274e-05, "steps_per_second": 4.830444875836455 }, { "step": 17200, "loss": 2.266876697540283, "lm_loss": 2.266876697540283, "ppl": 9.649216279954045, "gate_mean": 1.0530930012464523e-06, "lr": 2.6532906754062002e-05, "steps_per_second": 4.830665101354309 }, { "step": 17210, "loss": 2.3456995487213135, "lm_loss": 2.3456995487213135, "ppl": 10.440573863597034, "gate_mean": 8.074566721916199e-07, "lr": 2.647317124063253e-05, "steps_per_second": 4.8308525605682515 }, { "step": 17220, "loss": 2.3793978691101074, "lm_loss": 2.3793978691101074, "ppl": 10.798398856483002, "gate_mean": 8.922070264816284e-07, "lr": 2.6413634012783466e-05, "steps_per_second": 4.830917890603924 }, { "step": 17230, "loss": 2.337606191635132, "lm_loss": 2.337606191635132, "ppl": 10.35641559198667, "gate_mean": 6.963964551687241e-07, "lr": 2.635429523328728e-05, "steps_per_second": 4.831089691042396 }, { "step": 17240, "loss": 2.3188042640686035, "lm_loss": 2.3188042640686035, "ppl": 10.163514158291058, "gate_mean": 9.404029697179794e-07, "lr": 2.6295155064373974e-05, "steps_per_second": 4.831260769830085 }, { "step": 17250, "loss": 2.390415906906128, "lm_loss": 2.390415906906128, "ppl": 10.918033884573946, "gate_mean": 7.105991244316101e-07, "lr": 2.6236213667730496e-05, "steps_per_second": 4.831368952590655 }, { "step": 17260, "loss": 2.3660082817077637, "lm_loss": 2.3660082817077637, "ppl": 10.654776419500584, "gate_mean": 1.046573743224144e-06, "lr": 2.617747120450036e-05, "steps_per_second": 4.831464459399452 }, { "step": 17270, "loss": 2.304649591445923, "lm_loss": 2.304649591445923, "ppl": 10.020666309960983, "gate_mean": 7.427297532558441e-07, "lr": 2.6118927835283247e-05, "steps_per_second": 4.8316510940251165 }, { "step": 17280, "loss": 2.303762674331665, "lm_loss": 2.303762674331665, "ppl": 10.011782749587612, "gate_mean": 9.57166776061058e-07, "lr": 2.606058372013445e-05, "steps_per_second": 4.8318589643776155 }, { "step": 17290, "loss": 2.3632848262786865, "lm_loss": 2.3632848262786865, "ppl": 10.625798089322144, "gate_mean": 8.67992639541626e-07, "lr": 2.6002439018564594e-05, "steps_per_second": 4.832006501703486 }, { "step": 17300, "loss": 2.3846054077148438, "lm_loss": 2.3846054077148438, "ppl": 10.854778607852431, "gate_mean": 1.0095536708831787e-06, "lr": 2.594449388953907e-05, "steps_per_second": 4.8321534588089134 }, { "step": 17310, "loss": 2.2241759300231934, "lm_loss": 2.2241759300231934, "ppl": 9.245860427664361, "gate_mean": 9.511131793260574e-07, "lr": 2.588674849147761e-05, "steps_per_second": 4.8323057069282775 }, { "step": 17320, "loss": 2.2835874557495117, "lm_loss": 2.2835874557495117, "ppl": 9.811816805324462, "gate_mean": 8.887145668268204e-07, "lr": 2.5829202982253957e-05, "steps_per_second": 4.832358840957853 }, { "step": 17330, "loss": 2.273894786834717, "lm_loss": 2.273894786834717, "ppl": 9.71717352751596, "gate_mean": 8.491333574056625e-07, "lr": 2.5771857519195335e-05, "steps_per_second": 4.832530332513164 }, { "step": 17340, "loss": 2.334841728210449, "lm_loss": 2.334841728210449, "ppl": 10.327825196630869, "gate_mean": 9.324867278337479e-07, "lr": 2.5714712259082055e-05, "steps_per_second": 4.832647698763199 }, { "step": 17350, "loss": 2.2762484550476074, "lm_loss": 2.2762484550476074, "ppl": 9.740071466471603, "gate_mean": 1.1459924280643463e-06, "lr": 2.5657767358147055e-05, "steps_per_second": 4.8328407729309255 }, { "step": 17360, "loss": 2.1988348960876465, "lm_loss": 2.1988348960876465, "ppl": 9.014504544098171, "gate_mean": 8.274801075458527e-07, "lr": 2.560102297207553e-05, "steps_per_second": 4.833029041767578 }, { "step": 17370, "loss": 2.2122418880462646, "lm_loss": 2.2122418880462646, "ppl": 9.136175733861654, "gate_mean": 1.0652001947164536e-06, "lr": 2.5544479256004475e-05, "steps_per_second": 4.833183888200726 }, { "step": 17380, "loss": 2.283198118209839, "lm_loss": 2.283198118209839, "ppl": 9.807997440269098, "gate_mean": 9.278301149606705e-07, "lr": 2.548813636452223e-05, "steps_per_second": 4.833339585438828 }, { "step": 17390, "loss": 2.4085030555725098, "lm_loss": 2.4085030555725098, "ppl": 11.117306692478303, "gate_mean": 8.386559784412384e-07, "lr": 2.5431994451668132e-05, "steps_per_second": 4.833397609448801 }, { "step": 17400, "loss": 2.3211541175842285, "lm_loss": 2.3211541175842285, "ppl": 10.187425010264208, "gate_mean": 8.130446076393127e-07, "lr": 2.5376053670932016e-05, "steps_per_second": 4.8335520582474265 }, { "step": 17410, "loss": 2.371262311935425, "lm_loss": 2.371262311935425, "ppl": 10.710904256436976, "gate_mean": 1.0281801223754883e-06, "lr": 2.5320314175253828e-05, "steps_per_second": 4.833711933101188 }, { "step": 17420, "loss": 2.343190908432007, "lm_loss": 2.343190908432007, "ppl": 10.414415044611667, "gate_mean": 1.0570511221885681e-06, "lr": 2.5264776117023253e-05, "steps_per_second": 4.833910852047699 }, { "step": 17430, "loss": 2.222069501876831, "lm_loss": 2.222069501876831, "ppl": 9.226405184751464, "gate_mean": 8.884817361831665e-07, "lr": 2.5209439648079185e-05, "steps_per_second": 4.834121687307794 }, { "step": 17440, "loss": 2.405782461166382, "lm_loss": 2.405782461166382, "ppl": 11.08710211591083, "gate_mean": 1.0284129530191422e-06, "lr": 2.515430491970944e-05, "steps_per_second": 4.83427120572484 }, { "step": 17450, "loss": 2.2294933795928955, "lm_loss": 2.2294933795928955, "ppl": 9.295155770814375, "gate_mean": 9.881332516670227e-07, "lr": 2.5099372082650252e-05, "steps_per_second": 4.834430763199684 }, { "step": 17460, "loss": 2.2026312351226807, "lm_loss": 2.2026312351226807, "ppl": 9.04879170123754, "gate_mean": 1.0463409125804901e-06, "lr": 2.50446412870859e-05, "steps_per_second": 4.834587916181595 }, { "step": 17470, "loss": 2.281109094619751, "lm_loss": 2.281109094619751, "ppl": 9.787529688496472, "gate_mean": 1.0505318641662598e-06, "lr": 2.4990112682648304e-05, "steps_per_second": 4.834699067496473 }, { "step": 17480, "loss": 2.3762195110321045, "lm_loss": 2.3762195110321045, "ppl": 10.764132163005737, "gate_mean": 1.0507646948099136e-06, "lr": 2.493578641841655e-05, "steps_per_second": 4.83483927676129 }, { "step": 17490, "loss": 2.2121706008911133, "lm_loss": 2.2121706008911133, "ppl": 9.135524465098449, "gate_mean": 1.0544899851083755e-06, "lr": 2.488166264291661e-05, "steps_per_second": 4.835000265345294 }, { "step": 17500, "loss": 2.2347307205200195, "lm_loss": 2.2347307205200195, "ppl": 9.343965375256834, "gate_mean": 1.2028031051158905e-06, "lr": 2.4827741504120805e-05, "steps_per_second": 4.835199557355036 }, { "step": 17510, "loss": 2.2740068435668945, "lm_loss": 2.2740068435668945, "ppl": 9.718262463237608, "gate_mean": 1.4095567166805267e-06, "lr": 2.477402314944745e-05, "steps_per_second": 4.8353222046046564 }, { "step": 17520, "loss": 2.2435269355773926, "lm_loss": 2.2435269355773926, "ppl": 9.426519453609648, "gate_mean": 1.055654138326645e-06, "lr": 2.4720507725760517e-05, "steps_per_second": 4.835466975474657 }, { "step": 17530, "loss": 2.2621946334838867, "lm_loss": 2.2621946334838867, "ppl": 9.604143630093347, "gate_mean": 1.0139774531126022e-06, "lr": 2.4667195379369092e-05, "steps_per_second": 4.83565361240294 }, { "step": 17540, "loss": 2.2269203662872314, "lm_loss": 2.2269203662872314, "ppl": 9.271269953778173, "gate_mean": 8.684583008289337e-07, "lr": 2.4614086256027096e-05, "steps_per_second": 4.835847408731411 }, { "step": 17550, "loss": 2.2186992168426514, "lm_loss": 2.2186992168426514, "ppl": 9.19536191116279, "gate_mean": 1.1578667908906937e-06, "lr": 2.4561180500932886e-05, "steps_per_second": 4.83598960416254 }, { "step": 17560, "loss": 2.29787278175354, "lm_loss": 2.29787278175354, "ppl": 9.952987742784671, "gate_mean": 1.1171214282512665e-06, "lr": 2.450847825872872e-05, "steps_per_second": 4.836158400657608 }, { "step": 17570, "loss": 2.2636706829071045, "lm_loss": 2.2636706829071045, "ppl": 9.618330288287638, "gate_mean": 9.59029421210289e-07, "lr": 2.4455979673500562e-05, "steps_per_second": 4.836305628593557 }, { "step": 17580, "loss": 2.3333611488342285, "lm_loss": 2.3333611488342285, "ppl": 10.312545345950685, "gate_mean": 1.2649688869714737e-06, "lr": 2.440368488877751e-05, "steps_per_second": 4.83644168058916 }, { "step": 17590, "loss": 2.121093511581421, "lm_loss": 2.121093511581421, "ppl": 8.340252665885048, "gate_mean": 8.954666554927826e-07, "lr": 2.4351594047531517e-05, "steps_per_second": 4.836555114490444 }, { "step": 17600, "loss": 2.2047436237335205, "lm_loss": 2.2047436237335205, "ppl": 9.067926468686272, "gate_mean": 9.478535503149033e-07, "lr": 2.4299707292176967e-05, "steps_per_second": 4.836734354512868 }, { "step": 17610, "loss": 2.273749351501465, "lm_loss": 2.273749351501465, "ppl": 9.715760409906826, "gate_mean": 9.017530828714371e-07, "lr": 2.4248024764570242e-05, "steps_per_second": 4.836927167398921 }, { "step": 17620, "loss": 2.2517099380493164, "lm_loss": 2.2517099380493164, "ppl": 9.503973155315387, "gate_mean": 9.515788406133652e-07, "lr": 2.4196546606009428e-05, "steps_per_second": 4.83707706462842 }, { "step": 17630, "loss": 2.1898250579833984, "lm_loss": 2.1898250579833984, "ppl": 8.933650107219526, "gate_mean": 1.0812655091285706e-06, "lr": 2.4145272957233832e-05, "steps_per_second": 4.837248017325031 }, { "step": 17640, "loss": 2.0551505088806152, "lm_loss": 2.0551505088806152, "ppl": 7.8080129594935395, "gate_mean": 8.956994861364365e-07, "lr": 2.4094203958423637e-05, "steps_per_second": 4.837403495322841 }, { "step": 17650, "loss": 2.2313132286071777, "lm_loss": 2.2313132286071777, "ppl": 9.31208695230567, "gate_mean": 9.497161954641342e-07, "lr": 2.404333974919956e-05, "steps_per_second": 4.837643334498497 }, { "step": 17660, "loss": 2.2167584896087646, "lm_loss": 2.2167584896087646, "ppl": 9.177533527486988, "gate_mean": 1.0973308235406876e-06, "lr": 2.3992680468622378e-05, "steps_per_second": 4.837793277193269 }, { "step": 17670, "loss": 2.145184278488159, "lm_loss": 2.145184278488159, "ppl": 8.54361549678572, "gate_mean": 9.422656148672104e-07, "lr": 2.3942226255192667e-05, "steps_per_second": 4.837989460194034 }, { "step": 17680, "loss": 2.088068962097168, "lm_loss": 2.088068962097168, "ppl": 8.069317950933419, "gate_mean": 9.792856872081757e-07, "lr": 2.3891977246850298e-05, "steps_per_second": 4.838201553350803 }, { "step": 17690, "loss": 2.060544013977051, "lm_loss": 2.060544013977051, "ppl": 7.850239288781463, "gate_mean": 9.085051715373993e-07, "lr": 2.3841933580974132e-05, "steps_per_second": 4.838357544352799 }, { "step": 17700, "loss": 2.2238283157348633, "lm_loss": 2.2238283157348633, "ppl": 9.242646993022056, "gate_mean": 1.0724179446697235e-06, "lr": 2.3792095394381656e-05, "steps_per_second": 4.838576270619546 }, { "step": 17710, "loss": 2.187896966934204, "lm_loss": 2.187896966934204, "ppl": 8.916441811322843, "gate_mean": 9.096693247556686e-07, "lr": 2.3742462823328565e-05, "steps_per_second": 4.838739151973878 }, { "step": 17720, "loss": 2.1230685710906982, "lm_loss": 2.1230685710906982, "ppl": 8.356741439016632, "gate_mean": 1.0486692190170288e-06, "lr": 2.3693036003508428e-05, "steps_per_second": 4.838931679137638 }, { "step": 17730, "loss": 2.1127867698669434, "lm_loss": 2.1127867698669434, "ppl": 8.271259292943272, "gate_mean": 1.103617250919342e-06, "lr": 2.364381507005226e-05, "steps_per_second": 4.839101692999095 }, { "step": 17740, "loss": 2.1953768730163574, "lm_loss": 2.1953768730163574, "ppl": 8.983386014714128, "gate_mean": 1.0665971785783768e-06, "lr": 2.3594800157528206e-05, "steps_per_second": 4.839266177552235 }, { "step": 17750, "loss": 2.1386823654174805, "lm_loss": 2.1386823654174805, "ppl": 8.488245850890426, "gate_mean": 8.342321962118149e-07, "lr": 2.35459913999412e-05, "steps_per_second": 4.839440884490273 }, { "step": 17760, "loss": 2.156449556350708, "lm_loss": 2.156449556350708, "ppl": 8.640405860872471, "gate_mean": 1.0242220014333725e-06, "lr": 2.3497388930732484e-05, "steps_per_second": 4.839525924473363 }, { "step": 17770, "loss": 2.236933708190918, "lm_loss": 2.236933708190918, "ppl": 9.364572706289712, "gate_mean": 1.0945368558168411e-06, "lr": 2.344899288277939e-05, "steps_per_second": 4.839760044549949 }, { "step": 17780, "loss": 2.1799919605255127, "lm_loss": 2.1799919605255127, "ppl": 8.84623513935329, "gate_mean": 1.0505318641662598e-06, "lr": 2.3400803388394848e-05, "steps_per_second": 4.839901752907125 }, { "step": 17790, "loss": 2.1071739196777344, "lm_loss": 2.1071739196777344, "ppl": 8.224963999470095, "gate_mean": 9.76957380771637e-07, "lr": 2.335282057932708e-05, "steps_per_second": 4.840076643331043 }, { "step": 17800, "loss": 2.1436593532562256, "lm_loss": 2.1436593532562256, "ppl": 8.530597050544063, "gate_mean": 1.1275988072156906e-06, "lr": 2.3305044586759274e-05, "steps_per_second": 4.840235043507121 }, { "step": 17810, "loss": 2.2025599479675293, "lm_loss": 2.2025599479675293, "ppl": 9.048146661611394, "gate_mean": 1.0975636541843414e-06, "lr": 2.3257475541309165e-05, "steps_per_second": 4.840428405206013 }, { "step": 17820, "loss": 2.111971616744995, "lm_loss": 2.111971616744995, "ppl": 8.264519697382559, "gate_mean": 1.1245720088481903e-06, "lr": 2.3210113573028704e-05, "steps_per_second": 4.840565877437007 }, { "step": 17830, "loss": 2.2482898235321045, "lm_loss": 2.2482898235321045, "ppl": 9.471524000299077, "gate_mean": 1.1345837265253067e-06, "lr": 2.31629588114037e-05, "steps_per_second": 4.840736450903873 }, { "step": 17840, "loss": 2.1745049953460693, "lm_loss": 2.1745049953460693, "ppl": 8.797829077807432, "gate_mean": 1.1527445167303085e-06, "lr": 2.311601138535348e-05, "steps_per_second": 4.8409210427300025 }, { "step": 17850, "loss": 1.998955249786377, "lm_loss": 1.998955249786377, "ppl": 7.38134041217732, "gate_mean": 9.511131793260574e-07, "lr": 2.306927142323051e-05, "steps_per_second": 4.841114395502204 }, { "step": 17860, "loss": 2.109663486480713, "lm_loss": 2.109663486480713, "ppl": 8.245466106892644, "gate_mean": 1.2042000889778137e-06, "lr": 2.3022739052820066e-05, "steps_per_second": 4.841252048991648 }, { "step": 17870, "loss": 2.189972400665283, "lm_loss": 2.189972400665283, "ppl": 8.934966512164278, "gate_mean": 1.1220108717679977e-06, "lr": 2.2976414401339844e-05, "steps_per_second": 4.841425456440157 }, { "step": 17880, "loss": 2.1641740798950195, "lm_loss": 2.1641740798950195, "ppl": 8.707407323409877, "gate_mean": 1.0319054126739502e-06, "lr": 2.2930297595439728e-05, "steps_per_second": 4.841514412173643 }, { "step": 17890, "loss": 2.2942404747009277, "lm_loss": 2.2942404747009277, "ppl": 9.916901013928166, "gate_mean": 1.216307282447815e-06, "lr": 2.288438876120124e-05, "steps_per_second": 4.841640833805115 }, { "step": 17900, "loss": 2.146759510040283, "lm_loss": 2.146759510040283, "ppl": 8.55708427492255, "gate_mean": 1.125037670135498e-06, "lr": 2.2838688024137436e-05, "steps_per_second": 4.841737857705182 }, { "step": 17910, "loss": 2.1939797401428223, "lm_loss": 2.1939797401428223, "ppl": 8.97084379441164, "gate_mean": 1.4274846762418747e-06, "lr": 2.2793195509192362e-05, "steps_per_second": 4.841842560090172 }, { "step": 17920, "loss": 2.1129980087280273, "lm_loss": 2.1129980087280273, "ppl": 8.273006688888511, "gate_mean": 1.144595444202423e-06, "lr": 2.2747911340740792e-05, "steps_per_second": 4.842069097107453 }, { "step": 17930, "loss": 2.166297435760498, "lm_loss": 2.166297435760498, "ppl": 8.725915891006895, "gate_mean": 8.60774889588356e-07, "lr": 2.2702835642587967e-05, "steps_per_second": 4.842227080054976 }, { "step": 17940, "loss": 2.1799540519714355, "lm_loss": 2.1799540519714355, "ppl": 8.845899797726329, "gate_mean": 1.1129304766654968e-06, "lr": 2.2657968537969078e-05, "steps_per_second": 4.842332715960705 }, { "step": 17950, "loss": 2.1242411136627197, "lm_loss": 2.1242411136627197, "ppl": 8.366545821021601, "gate_mean": 7.66245648264885e-07, "lr": 2.2613310149549105e-05, "steps_per_second": 4.842407607384459 }, { "step": 17960, "loss": 2.0435643196105957, "lm_loss": 2.0435643196105957, "ppl": 7.718069898346228, "gate_mean": 9.050127118825912e-07, "lr": 2.2568860599422348e-05, "steps_per_second": 4.842586749942958 }, { "step": 17970, "loss": 2.1207587718963623, "lm_loss": 2.1207587718963623, "ppl": 8.337461319547621, "gate_mean": 1.1953525245189667e-06, "lr": 2.25246200091122e-05, "steps_per_second": 4.842774085814975 }, { "step": 17980, "loss": 2.206234931945801, "lm_loss": 2.206234931945801, "ppl": 9.081459630446867, "gate_mean": 1.0419171303510666e-06, "lr": 2.248058849957075e-05, "steps_per_second": 4.842888545535064 }, { "step": 17990, "loss": 2.0730865001678467, "lm_loss": 2.0730865001678467, "ppl": 7.949320872446606, "gate_mean": 1.1741649359464645e-06, "lr": 2.243676619117845e-05, "steps_per_second": 4.843081523123053 }, { "step": 18000, "loss": 2.1944258213043213, "lm_loss": 2.1944258213043213, "ppl": 8.974846411510748, "gate_mean": 1.1427327990531921e-06, "lr": 2.239315320374384e-05, "steps_per_second": 4.843238885158079 }, { "step": 18010, "loss": 2.0172481536865234, "lm_loss": 2.0172481536865234, "ppl": 7.51760913839865, "gate_mean": 9.099021553993225e-07, "lr": 2.234974965650313e-05, "steps_per_second": 4.8317691103599945 }, { "step": 18020, "loss": 2.106924057006836, "lm_loss": 2.106924057006836, "ppl": 8.222909144723582, "gate_mean": 9.476207196712494e-07, "lr": 2.2306555668119973e-05, "steps_per_second": 4.831581659503736 }, { "step": 18030, "loss": 2.0667707920074463, "lm_loss": 2.0667707920074463, "ppl": 7.899273490430451, "gate_mean": 1.0277144610881805e-06, "lr": 2.2263571356685095e-05, "steps_per_second": 4.83142872880996 }, { "step": 18040, "loss": 2.1257455348968506, "lm_loss": 2.1257455348968506, "ppl": 8.379142102892287, "gate_mean": 1.2461096048355103e-06, "lr": 2.2220796839715943e-05, "steps_per_second": 4.8313793231719515 }, { "step": 18050, "loss": 2.1969852447509766, "lm_loss": 2.1969852447509766, "ppl": 8.99784626447264, "gate_mean": 1.0381918400526047e-06, "lr": 2.2178232234156452e-05, "steps_per_second": 4.831359224958648 }, { "step": 18060, "loss": 2.0710630416870117, "lm_loss": 2.0710630416870117, "ppl": 7.933252014526466, "gate_mean": 1.2307427823543549e-06, "lr": 2.213587765637658e-05, "steps_per_second": 4.829797564627762 }, { "step": 18070, "loss": 2.1282293796539307, "lm_loss": 2.1282293796539307, "ppl": 8.399980459981894, "gate_mean": 1.0062940418720245e-06, "lr": 2.209373322217214e-05, "steps_per_second": 4.829925958041423 }, { "step": 18080, "loss": 2.0974795818328857, "lm_loss": 2.0974795818328857, "ppl": 8.14561366561538, "gate_mean": 1.35740265250206e-06, "lr": 2.2051799046764427e-05, "steps_per_second": 4.830078930484132 }, { "step": 18090, "loss": 2.158642292022705, "lm_loss": 2.158642292022705, "ppl": 8.659372774138241, "gate_mean": 1.037726178765297e-06, "lr": 2.2010075244799853e-05, "steps_per_second": 4.830221687810353 }, { "step": 18100, "loss": 2.1314046382904053, "lm_loss": 2.1314046382904053, "ppl": 8.426694960763108, "gate_mean": 1.1487863957881927e-06, "lr": 2.1968561930349722e-05, "steps_per_second": 4.83043430954814 }, { "step": 18110, "loss": 2.1171658039093018, "lm_loss": 2.1171658039093018, "ppl": 8.307558839428662, "gate_mean": 1.07521191239357e-06, "lr": 2.192725921690981e-05, "steps_per_second": 4.828688904277543 }, { "step": 18120, "loss": 2.1622395515441895, "lm_loss": 2.1622395515441895, "ppl": 8.6905788798793, "gate_mean": 1.0458752512931824e-06, "lr": 2.1886167217400195e-05, "steps_per_second": 4.828748249144229 }, { "step": 18130, "loss": 2.187345027923584, "lm_loss": 2.187345027923584, "ppl": 8.911521837138991, "gate_mean": 1.1646188795566559e-06, "lr": 2.184528604416483e-05, "steps_per_second": 4.824475738314665 }, { "step": 18140, "loss": 2.118380546569824, "lm_loss": 2.118380546569824, "ppl": 8.317656517354113, "gate_mean": 9.08970832824707e-07, "lr": 2.180461580897127e-05, "steps_per_second": 4.8245816112381625 }, { "step": 18150, "loss": 2.165602684020996, "lm_loss": 2.165602684020996, "ppl": 8.71985565118771, "gate_mean": 1.105479896068573e-06, "lr": 2.176415662301039e-05, "steps_per_second": 4.8247073066013275 }, { "step": 18160, "loss": 2.184488534927368, "lm_loss": 2.184488534927368, "ppl": 8.886102459846407, "gate_mean": 1.1450611054897308e-06, "lr": 2.1723908596896053e-05, "steps_per_second": 4.8246427070294 }, { "step": 18170, "loss": 2.139986038208008, "lm_loss": 2.139986038208008, "ppl": 8.499318962334298, "gate_mean": 1.1837109923362732e-06, "lr": 2.1683871840664837e-05, "steps_per_second": 4.824717229715396 }, { "step": 18180, "loss": 2.080793619155884, "lm_loss": 2.080793619155884, "ppl": 8.01082393555917, "gate_mean": 1.1299271136522293e-06, "lr": 2.1644046463775706e-05, "steps_per_second": 4.82485227775004 }, { "step": 18190, "loss": 2.150973081588745, "lm_loss": 2.150973081588745, "ppl": 8.593216230593587, "gate_mean": 1.1075753718614578e-06, "lr": 2.160443257510971e-05, "steps_per_second": 4.825046052642369 }, { "step": 18200, "loss": 2.1422359943389893, "lm_loss": 2.1422359943389893, "ppl": 8.518463586348535, "gate_mean": 1.1546071618795395e-06, "lr": 2.1565030282969713e-05, "steps_per_second": 4.825170819319142 }, { "step": 18210, "loss": 2.0818915367126465, "lm_loss": 2.0818915367126465, "ppl": 8.01962398978519, "gate_mean": 8.877832442522049e-07, "lr": 2.152583969508006e-05, "steps_per_second": 4.825370914372932 }, { "step": 18220, "loss": 2.139465093612671, "lm_loss": 2.139465093612671, "ppl": 8.494892441143078, "gate_mean": 9.634532034397125e-07, "lr": 2.148686091858635e-05, "steps_per_second": 4.825565413875622 }, { "step": 18230, "loss": 2.1361591815948486, "lm_loss": 2.1361591815948486, "ppl": 8.466855443590191, "gate_mean": 1.1541415005922318e-06, "lr": 2.144809406005507e-05, "steps_per_second": 4.825567023538024 }, { "step": 18240, "loss": 2.1772050857543945, "lm_loss": 2.1772050857543945, "ppl": 8.821616110832773, "gate_mean": 8.931383490562439e-07, "lr": 2.140953922547329e-05, "steps_per_second": 4.823955739102571 }, { "step": 18250, "loss": 2.1616580486297607, "lm_loss": 2.1616580486297607, "ppl": 8.685526751988487, "gate_mean": 1.2693926692008972e-06, "lr": 2.1371196520248492e-05, "steps_per_second": 4.824103432691207 }, { "step": 18260, "loss": 2.202390670776367, "lm_loss": 2.202390670776367, "ppl": 9.04661514638825, "gate_mean": 9.960494935512543e-07, "lr": 2.1333066049208167e-05, "steps_per_second": 4.824208982844229 }, { "step": 18270, "loss": 2.035432815551758, "lm_loss": 2.035432815551758, "ppl": 7.655564856053461, "gate_mean": 1.1171214282512665e-06, "lr": 2.1295147916599554e-05, "steps_per_second": 4.824323894487962 }, { "step": 18280, "loss": 2.1473941802978516, "lm_loss": 2.1473941802978516, "ppl": 8.56251692559189, "gate_mean": 1.2249220162630081e-06, "lr": 2.1257442226089393e-05, "steps_per_second": 4.8244874350012115 }, { "step": 18290, "loss": 2.2401750087738037, "lm_loss": 2.2401750087738037, "ppl": 9.39497534669153, "gate_mean": 1.2218952178955078e-06, "lr": 2.1219949080763586e-05, "steps_per_second": 4.824637855586287 }, { "step": 18300, "loss": 2.080427646636963, "lm_loss": 2.080427646636963, "ppl": 8.007892730547798, "gate_mean": 1.2977980077266693e-06, "lr": 2.1182668583126967e-05, "steps_per_second": 4.824746764251593 }, { "step": 18310, "loss": 1.9898769855499268, "lm_loss": 1.9898769855499268, "ppl": 7.314633901295893, "gate_mean": 9.972136467695236e-07, "lr": 2.1145600835103e-05, "steps_per_second": 4.82307598858432 }, { "step": 18320, "loss": 2.0703117847442627, "lm_loss": 2.0703117847442627, "ppl": 7.927294342023593, "gate_mean": 9.42964106798172e-07, "lr": 2.1108745938033488e-05, "steps_per_second": 4.8231769300906935 }, { "step": 18330, "loss": 2.133031129837036, "lm_loss": 2.133031129837036, "ppl": 8.440412061232784, "gate_mean": 1.1925585567951202e-06, "lr": 2.107210399267831e-05, "steps_per_second": 4.823330809098447 }, { "step": 18340, "loss": 2.187582015991211, "lm_loss": 2.187582015991211, "ppl": 8.913634011748995, "gate_mean": 9.923242032527924e-07, "lr": 2.1035675099215142e-05, "steps_per_second": 4.82340862325015 }, { "step": 18350, "loss": 1.961052656173706, "lm_loss": 1.961052656173706, "ppl": 7.106804150330426, "gate_mean": 1.159030944108963e-06, "lr": 2.0999459357239187e-05, "steps_per_second": 4.823584403267557 }, { "step": 18360, "loss": 2.130650520324707, "lm_loss": 2.130650520324707, "ppl": 8.420342634204571, "gate_mean": 1.0638032108545303e-06, "lr": 2.0963456865762916e-05, "steps_per_second": 4.823498152554759 }, { "step": 18370, "loss": 2.1458959579467773, "lm_loss": 2.1458959579467773, "ppl": 8.549697976568348, "gate_mean": 1.149950549006462e-06, "lr": 2.0927667723215757e-05, "steps_per_second": 4.823648900004013 }, { "step": 18380, "loss": 2.0225298404693604, "lm_loss": 2.0225298404693604, "ppl": 7.557419836495167, "gate_mean": 1.0868534445762634e-06, "lr": 2.0892092027443855e-05, "steps_per_second": 4.823793126479992 }, { "step": 18390, "loss": 2.036255121231079, "lm_loss": 2.036255121231079, "ppl": 7.66186265951791, "gate_mean": 1.1718366295099258e-06, "lr": 2.0856729875709802e-05, "steps_per_second": 4.823987646085264 }, { "step": 18400, "loss": 2.205129623413086, "lm_loss": 2.205129623413086, "ppl": 9.0714273610257, "gate_mean": 1.0998919606208801e-06, "lr": 2.082158136469238e-05, "steps_per_second": 4.824117370607835 }, { "step": 18410, "loss": 1.9218307733535767, "lm_loss": 1.9218307733535767, "ppl": 6.833457536290458, "gate_mean": 1.2405216693878174e-06, "lr": 2.0786646590486276e-05, "steps_per_second": 4.8242976960086335 }, { "step": 18420, "loss": 2.0848443508148193, "lm_loss": 2.0848443508148193, "ppl": 8.043339445030224, "gate_mean": 1.112697646021843e-06, "lr": 2.0751925648601817e-05, "steps_per_second": 4.824373278004299 }, { "step": 18430, "loss": 2.059807300567627, "lm_loss": 2.059807300567627, "ppl": 7.844458042052706, "gate_mean": 1.441221684217453e-06, "lr": 2.071741863396475e-05, "steps_per_second": 4.824591153463059 }, { "step": 18440, "loss": 2.111280918121338, "lm_loss": 2.111280918121338, "ppl": 8.258813375903442, "gate_mean": 1.1119991540908813e-06, "lr": 2.0683125640915913e-05, "steps_per_second": 4.824695876820401 }, { "step": 18450, "loss": 2.091003894805908, "lm_loss": 2.091003894805908, "ppl": 8.093035644016428, "gate_mean": 1.3129319995641708e-06, "lr": 2.0649046763211045e-05, "steps_per_second": 4.824782281304246 }, { "step": 18460, "loss": 2.110652446746826, "lm_loss": 2.110652446746826, "ppl": 8.253624578785267, "gate_mean": 1.4116521924734116e-06, "lr": 2.0615182094020528e-05, "steps_per_second": 4.824934104216745 }, { "step": 18470, "loss": 2.1256608963012695, "lm_loss": 2.1256608963012695, "ppl": 8.378432934084474, "gate_mean": 1.2121163308620453e-06, "lr": 2.0581531725929036e-05, "steps_per_second": 4.825087055218929 }, { "step": 18480, "loss": 2.1677663326263428, "lm_loss": 2.1677663326263428, "ppl": 8.738742779892926, "gate_mean": 1.4710240066051483e-06, "lr": 2.05480957509354e-05, "steps_per_second": 4.825256377222756 }, { "step": 18490, "loss": 2.074531078338623, "lm_loss": 2.074531078338623, "ppl": 7.960812586192435, "gate_mean": 8.079223334789276e-07, "lr": 2.05148742604523e-05, "steps_per_second": 4.825418239828389 }, { "step": 18500, "loss": 2.070171356201172, "lm_loss": 2.070171356201172, "ppl": 7.92618120178865, "gate_mean": 1.1909287422895432e-06, "lr": 2.0481867345306023e-05, "steps_per_second": 4.825595248889242 }, { "step": 18510, "loss": 2.207301616668701, "lm_loss": 2.207301616668701, "ppl": 9.091151853050194, "gate_mean": 1.074979081749916e-06, "lr": 2.0449075095736243e-05, "steps_per_second": 4.825635562685224 }, { "step": 18520, "loss": 1.9971498250961304, "lm_loss": 1.9971498250961304, "ppl": 7.368025980668349, "gate_mean": 1.1562369763851166e-06, "lr": 2.041649760139566e-05, "steps_per_second": 4.825796331338638 }, { "step": 18530, "loss": 2.136011838912964, "lm_loss": 2.136011838912964, "ppl": 8.465608006304636, "gate_mean": 1.2121163308620453e-06, "lr": 2.0384134951349954e-05, "steps_per_second": 4.825890996560788 }, { "step": 18540, "loss": 2.1296846866607666, "lm_loss": 2.1296846866607666, "ppl": 8.412213909956202, "gate_mean": 1.2079253792762756e-06, "lr": 2.035198723407733e-05, "steps_per_second": 4.825749388293815 }, { "step": 18550, "loss": 2.191617488861084, "lm_loss": 2.191617488861084, "ppl": 8.949677417153694, "gate_mean": 1.2062955647706985e-06, "lr": 2.0320054537468446e-05, "steps_per_second": 4.825909525847645 }, { "step": 18560, "loss": 2.136335849761963, "lm_loss": 2.136335849761963, "ppl": 8.468351399562641, "gate_mean": 1.1515803635120392e-06, "lr": 2.028833694882609e-05, "steps_per_second": 4.826063907174242 }, { "step": 18570, "loss": 2.089203119277954, "lm_loss": 2.089203119277954, "ppl": 8.078475017626419, "gate_mean": 1.1508818715810776e-06, "lr": 2.0256834554864902e-05, "steps_per_second": 4.826257427427158 }, { "step": 18580, "loss": 2.113471508026123, "lm_loss": 2.113471508026123, "ppl": 8.276924879305842, "gate_mean": 1.3192184269428253e-06, "lr": 2.022554744171125e-05, "steps_per_second": 4.82642774018447 }, { "step": 18590, "loss": 2.079047679901123, "lm_loss": 2.079047679901123, "ppl": 7.996849726197243, "gate_mean": 1.244712620973587e-06, "lr": 2.0194475694902882e-05, "steps_per_second": 4.826553639052382 }, { "step": 18600, "loss": 2.1585326194763184, "lm_loss": 2.1585326194763184, "ppl": 8.658423130751846, "gate_mean": 1.1238735169172287e-06, "lr": 2.0163619399388794e-05, "steps_per_second": 4.826721716069815 }, { "step": 18610, "loss": 2.1160123348236084, "lm_loss": 2.1160123348236084, "ppl": 8.297981851571357, "gate_mean": 1.2968666851520538e-06, "lr": 2.0132978639528908e-05, "steps_per_second": 4.826846806264193 }, { "step": 18620, "loss": 2.0653820037841797, "lm_loss": 2.0653820037841797, "ppl": 7.888310686702961, "gate_mean": 1.3080425560474396e-06, "lr": 2.0102553499093887e-05, "steps_per_second": 4.827042621528105 }, { "step": 18630, "loss": 2.1190922260284424, "lm_loss": 2.1190922260284424, "ppl": 8.323578129536296, "gate_mean": 1.2833625078201294e-06, "lr": 2.0072344061264922e-05, "steps_per_second": 4.8272203524867585 }, { "step": 18640, "loss": 2.0539937019348145, "lm_loss": 2.0539937019348145, "ppl": 7.798985818205645, "gate_mean": 1.0533258318901062e-06, "lr": 2.0042350408633423e-05, "steps_per_second": 4.827357394667258 }, { "step": 18650, "loss": 2.2167091369628906, "lm_loss": 2.2167091369628906, "ppl": 9.177080603101409, "gate_mean": 1.1487863957881927e-06, "lr": 2.001257262320094e-05, "steps_per_second": 4.827516326816275 }, { "step": 18660, "loss": 2.050445079803467, "lm_loss": 2.050445079803467, "ppl": 7.771359211713867, "gate_mean": 1.0603107511997223e-06, "lr": 1.998301078637878e-05, "steps_per_second": 4.827696688960361 }, { "step": 18670, "loss": 2.025820016860962, "lm_loss": 2.025820016860962, "ppl": 7.582326031241604, "gate_mean": 1.2086238712072372e-06, "lr": 1.9953664978987853e-05, "steps_per_second": 4.827845134461884 }, { "step": 18680, "loss": 2.07753849029541, "lm_loss": 2.07753849029541, "ppl": 7.984790066157785, "gate_mean": 1.1671800166368484e-06, "lr": 1.9924535281258504e-05, "steps_per_second": 4.827924779394884 }, { "step": 18690, "loss": 2.0910654067993164, "lm_loss": 2.0910654067993164, "ppl": 8.093533478082842, "gate_mean": 1.146458089351654e-06, "lr": 1.9895621772830212e-05, "steps_per_second": 4.828159626986515 }, { "step": 18700, "loss": 2.1279289722442627, "lm_loss": 2.1279289722442627, "ppl": 8.397457422599185, "gate_mean": 1.3280659914016724e-06, "lr": 1.98669245327514e-05, "steps_per_second": 4.82824591487855 }, { "step": 18710, "loss": 2.0344245433807373, "lm_loss": 2.0344245433807373, "ppl": 7.647849853120635, "gate_mean": 8.940696716308594e-07, "lr": 1.9838443639479227e-05, "steps_per_second": 4.828412092451073 }, { "step": 18720, "loss": 2.0691635608673096, "lm_loss": 2.0691635608673096, "ppl": 7.918197257124972, "gate_mean": 1.2898817658424377e-06, "lr": 1.9810179170879354e-05, "steps_per_second": 4.828565960459944 }, { "step": 18730, "loss": 2.0719261169433594, "lm_loss": 2.0719261169433594, "ppl": 7.940101963628152, "gate_mean": 9.452924132347107e-07, "lr": 1.9782131204225786e-05, "steps_per_second": 4.828757118448217 }, { "step": 18740, "loss": 2.010806083679199, "lm_loss": 2.010806083679199, "ppl": 7.469335831000401, "gate_mean": 1.0575167834758759e-06, "lr": 1.975429981620058e-05, "steps_per_second": 4.828901920283013 }, { "step": 18750, "loss": 2.0090463161468506, "lm_loss": 2.0090463161468506, "ppl": 7.4562030949871225, "gate_mean": 1.1431984603404999e-06, "lr": 1.9726685082893684e-05, "steps_per_second": 4.8290352072873075 }, { "step": 18760, "loss": 2.0552871227264404, "lm_loss": 2.0552871227264404, "ppl": 7.809079715037319, "gate_mean": 1.434236764907837e-06, "lr": 1.9699287079802725e-05, "steps_per_second": 4.8291726873161185 }, { "step": 18770, "loss": 1.9074628353118896, "lm_loss": 1.9074628353118896, "ppl": 6.735976817232769, "gate_mean": 1.0761432349681854e-06, "lr": 1.9672105881832803e-05, "steps_per_second": 4.82934607887315 }, { "step": 18780, "loss": 2.0034892559051514, "lm_loss": 2.0034892559051514, "ppl": 7.414883439453773, "gate_mean": 1.2440141290426254e-06, "lr": 1.9645141563296268e-05, "steps_per_second": 4.82951093105523 }, { "step": 18790, "loss": 1.9529615640640259, "lm_loss": 1.9529615640640259, "ppl": 7.049534343420053, "gate_mean": 1.4263205230236053e-06, "lr": 1.9618394197912543e-05, "steps_per_second": 4.827529124348445 }, { "step": 18800, "loss": 1.9765143394470215, "lm_loss": 1.9765143394470215, "ppl": 7.217541189611036, "gate_mean": 1.155305653810501e-06, "lr": 1.95918638588079e-05, "steps_per_second": 4.827685403597759 }, { "step": 18810, "loss": 2.1986305713653564, "lm_loss": 2.1986305713653564, "ppl": 9.012662846119238, "gate_mean": 1.1725351214408875e-06, "lr": 1.9565550618515284e-05, "steps_per_second": 4.827840011218531 }, { "step": 18820, "loss": 2.0142078399658203, "lm_loss": 2.0142078399658203, "ppl": 7.494787957541911, "gate_mean": 9.019859135150909e-07, "lr": 1.953945454897407e-05, "steps_per_second": 4.827878069589555 }, { "step": 18830, "loss": 2.0789990425109863, "lm_loss": 2.0789990425109863, "ppl": 7.996460789755749, "gate_mean": 1.271720975637436e-06, "lr": 1.9513575721529923e-05, "steps_per_second": 4.8280703581065305 }, { "step": 18840, "loss": 2.0589122772216797, "lm_loss": 2.0589122772216797, "ppl": 7.8374402099990075, "gate_mean": 1.1385418474674225e-06, "lr": 1.9487914206934586e-05, "steps_per_second": 4.828229078200611 }, { "step": 18850, "loss": 2.070409059524536, "lm_loss": 2.070409059524536, "ppl": 7.928065505345639, "gate_mean": 1.1632218956947327e-06, "lr": 1.946247007534563e-05, "steps_per_second": 4.8283725137381275 }, { "step": 18860, "loss": 2.024597644805908, "lm_loss": 2.024597644805908, "ppl": 7.573063270212188, "gate_mean": 1.1329539120197296e-06, "lr": 1.9437243396326372e-05, "steps_per_second": 4.828487331007176 }, { "step": 18870, "loss": 2.058945894241333, "lm_loss": 2.058945894241333, "ppl": 7.837703685809189, "gate_mean": 1.3932585716247559e-06, "lr": 1.9412234238845558e-05, "steps_per_second": 4.82861963739882 }, { "step": 18880, "loss": 1.9789255857467651, "lm_loss": 1.9789255857467651, "ppl": 7.234965457756563, "gate_mean": 1.296401023864746e-06, "lr": 1.9387442671277292e-05, "steps_per_second": 4.828805916209409 }, { "step": 18890, "loss": 2.0302929878234863, "lm_loss": 2.0302929878234863, "ppl": 7.6163175202056745, "gate_mean": 1.3650860637426376e-06, "lr": 1.9362868761400762e-05, "steps_per_second": 4.8287789426296 }, { "step": 18900, "loss": 2.0658249855041504, "lm_loss": 2.0658249855041504, "ppl": 7.891805838225581, "gate_mean": 1.3578683137893677e-06, "lr": 1.93385125764001e-05, "steps_per_second": 4.828948549882283 }, { "step": 18910, "loss": 2.0289835929870605, "lm_loss": 2.0289835929870605, "ppl": 7.606351279668027, "gate_mean": 1.133885234594345e-06, "lr": 1.9314374182864183e-05, "steps_per_second": 4.829091643684661 }, { "step": 18920, "loss": 2.0100462436676025, "lm_loss": 2.0100462436676025, "ppl": 7.463662486466005, "gate_mean": 1.2256205081939697e-06, "lr": 1.9290453646786467e-05, "steps_per_second": 4.829262439456347 }, { "step": 18930, "loss": 2.0699715614318848, "lm_loss": 2.0699715614318848, "ppl": 7.924597750432028, "gate_mean": 1.3415701687335968e-06, "lr": 1.9266751033564788e-05, "steps_per_second": 4.829407198240405 }, { "step": 18940, "loss": 2.1494498252868652, "lm_loss": 2.1494498252868652, "ppl": 8.580136524218581, "gate_mean": 1.2889504432678223e-06, "lr": 1.9243266408001167e-05, "steps_per_second": 4.829591473087241 }, { "step": 18950, "loss": 2.0422277450561523, "lm_loss": 2.0422277450561523, "ppl": 7.707761013342051, "gate_mean": 1.0961666703224182e-06, "lr": 1.9219999834301686e-05, "steps_per_second": 4.829730918677959 }, { "step": 18960, "loss": 2.0133650302886963, "lm_loss": 2.0133650302886963, "ppl": 7.48847393885508, "gate_mean": 1.0114163160324097e-06, "lr": 1.9196951376076268e-05, "steps_per_second": 4.829846564156132 }, { "step": 18970, "loss": 2.00223970413208, "lm_loss": 2.00223970413208, "ppl": 7.4056239450185295, "gate_mean": 1.2665987014770508e-06, "lr": 1.9174121096338513e-05, "steps_per_second": 4.830013108670763 }, { "step": 18980, "loss": 2.1129603385925293, "lm_loss": 2.1129603385925293, "ppl": 8.27269504947535, "gate_mean": 1.293141394853592e-06, "lr": 1.9151509057505533e-05, "steps_per_second": 4.830130738733725 }, { "step": 18990, "loss": 2.0481138229370117, "lm_loss": 2.0481138229370117, "ppl": 7.753263278519713, "gate_mean": 1.2344680726528168e-06, "lr": 1.9129115321397796e-05, "steps_per_second": 4.830308043184057 }, { "step": 19000, "loss": 2.064133882522583, "lm_loss": 2.064133882522583, "ppl": 7.87847126009291, "gate_mean": 1.3369135558605194e-06, "lr": 1.9106939949238895e-05, "steps_per_second": 4.830450783073477 }, { "step": 19010, "loss": 2.1345198154449463, "lm_loss": 2.1345198154449463, "ppl": 8.45298653859263, "gate_mean": 1.1604279279708862e-06, "lr": 1.908498300165547e-05, "steps_per_second": 4.830537794870928 }, { "step": 19020, "loss": 2.0537514686584473, "lm_loss": 2.0537514686584473, "ppl": 7.797096873110475, "gate_mean": 1.2419186532497406e-06, "lr": 1.9063244538676974e-05, "steps_per_second": 4.830691053353953 }, { "step": 19030, "loss": 2.05954647064209, "lm_loss": 2.05954647064209, "ppl": 7.842412239460585, "gate_mean": 1.5674158930778503e-06, "lr": 1.904172461973553e-05, "steps_per_second": 4.830858727174292 }, { "step": 19040, "loss": 2.0067083835601807, "lm_loss": 2.0067083835601807, "ppl": 7.438791356464907, "gate_mean": 9.897630661725998e-07, "lr": 1.9020423303665775e-05, "steps_per_second": 4.831030651779535 }, { "step": 19050, "loss": 2.0458321571350098, "lm_loss": 2.0458321571350098, "ppl": 7.735593089242405, "gate_mean": 1.330394297838211e-06, "lr": 1.8999340648704688e-05, "steps_per_second": 4.831123500347767 }, { "step": 19060, "loss": 2.1045191287994385, "lm_loss": 2.1045191287994385, "ppl": 8.20315739885977, "gate_mean": 1.0491348803043365e-06, "lr": 1.8978476712491463e-05, "steps_per_second": 4.831270229393231 }, { "step": 19070, "loss": 1.9738134145736694, "lm_loss": 1.9738134145736694, "ppl": 7.198073455366148, "gate_mean": 1.3406388461589813e-06, "lr": 1.8957831552067284e-05, "steps_per_second": 4.831417581272906 }, { "step": 19080, "loss": 2.012755870819092, "lm_loss": 2.012755870819092, "ppl": 7.483913653153967, "gate_mean": 1.2200325727462769e-06, "lr": 1.8937405223875236e-05, "steps_per_second": 4.831583175379925 }, { "step": 19090, "loss": 1.9326598644256592, "lm_loss": 1.9326598644256592, "ppl": 6.907859797630365, "gate_mean": 1.0468065738677979e-06, "lr": 1.8917197783760132e-05, "steps_per_second": 4.831726130598311 }, { "step": 19100, "loss": 1.9750250577926636, "lm_loss": 1.9750250577926636, "ppl": 7.20680023806407, "gate_mean": 1.203734427690506e-06, "lr": 1.889720928696833e-05, "steps_per_second": 4.831886513022154 }, { "step": 19110, "loss": 1.9662861824035645, "lm_loss": 1.9662861824035645, "ppl": 7.144095293232543, "gate_mean": 1.2903474271297455e-06, "lr": 1.887743978814762e-05, "steps_per_second": 4.83203514273377 }, { "step": 19120, "loss": 1.9339386224746704, "lm_loss": 1.9339386224746704, "ppl": 6.9166989292989145, "gate_mean": 1.4225952327251434e-06, "lr": 1.8857889341347062e-05, "steps_per_second": 4.832157215842752 }, { "step": 19130, "loss": 2.032921075820923, "lm_loss": 2.032921075820923, "ppl": 7.636360198290074, "gate_mean": 1.2670643627643585e-06, "lr": 1.8838558000016813e-05, "steps_per_second": 4.832186950940464 }, { "step": 19140, "loss": 1.9765739440917969, "lm_loss": 1.9765739440917969, "ppl": 7.217971401410978, "gate_mean": 1.0719522833824158e-06, "lr": 1.8819445817008046e-05, "steps_per_second": 4.832341166357802 }, { "step": 19150, "loss": 1.9137383699417114, "lm_loss": 1.9137383699417114, "ppl": 6.778381590158985, "gate_mean": 9.487848728895187e-07, "lr": 1.880055284457272e-05, "steps_per_second": 4.832484443007272 }, { "step": 19160, "loss": 1.9601880311965942, "lm_loss": 1.9601880311965942, "ppl": 7.100662085628523, "gate_mean": 1.362524926662445e-06, "lr": 1.8781879134363515e-05, "steps_per_second": 4.832640745015291 }, { "step": 19170, "loss": 1.952374815940857, "lm_loss": 1.952374815940857, "ppl": 7.045399255620972, "gate_mean": 1.2461096048355103e-06, "lr": 1.8763424737433633e-05, "steps_per_second": 4.8328095363189885 }, { "step": 19180, "loss": 2.0631604194641113, "lm_loss": 2.0631604194641113, "ppl": 7.870805591092126, "gate_mean": 1.4612451195716858e-06, "lr": 1.8745189704236692e-05, "steps_per_second": 4.832956122182987 }, { "step": 19190, "loss": 1.924660325050354, "lm_loss": 1.924660325050354, "ppl": 6.852820539046843, "gate_mean": 1.253560185432434e-06, "lr": 1.872717408462659e-05, "steps_per_second": 4.833090851466827 }, { "step": 19200, "loss": 2.056654453277588, "lm_loss": 2.056654453277588, "ppl": 7.819764611536044, "gate_mean": 1.2507662177085876e-06, "lr": 1.870937792785731e-05, "steps_per_second": 4.833264194064773 }, { "step": 19210, "loss": 1.947178602218628, "lm_loss": 1.947178602218628, "ppl": 7.008884805932013, "gate_mean": 1.30385160446167e-06, "lr": 1.8691801282582896e-05, "steps_per_second": 4.83337670113736 }, { "step": 19220, "loss": 1.958137035369873, "lm_loss": 1.958137035369873, "ppl": 7.086113581884132, "gate_mean": 1.35018490254879e-06, "lr": 1.8674444196857212e-05, "steps_per_second": 4.833442924538315 }, { "step": 19230, "loss": 1.9939639568328857, "lm_loss": 1.9939639568328857, "ppl": 7.344589772692023, "gate_mean": 1.2191012501716614e-06, "lr": 1.8657306718133877e-05, "steps_per_second": 4.833598851687375 }, { "step": 19240, "loss": 1.9725091457366943, "lm_loss": 1.9725091457366943, "ppl": 7.188691352194457, "gate_mean": 1.4244578778743744e-06, "lr": 1.8640388893266092e-05, "steps_per_second": 4.833773209647523 }, { "step": 19250, "loss": 1.9328399896621704, "lm_loss": 1.9328399896621704, "ppl": 6.909104189580032, "gate_mean": 1.2423843145370483e-06, "lr": 1.8623690768506544e-05, "steps_per_second": 4.833910519520083 }, { "step": 19260, "loss": 2.053938865661621, "lm_loss": 2.053938865661621, "ppl": 7.798558162614313, "gate_mean": 1.375097781419754e-06, "lr": 1.860721238950728e-05, "steps_per_second": 4.834036625936105 }, { "step": 19270, "loss": 2.048518657684326, "lm_loss": 2.048518657684326, "ppl": 7.756402704331385, "gate_mean": 1.428648829460144e-06, "lr": 1.8590953801319548e-05, "steps_per_second": 4.834161845010546 }, { "step": 19280, "loss": 1.9824392795562744, "lm_loss": 1.9824392795562744, "ppl": 7.260431625054159, "gate_mean": 8.216593414545059e-07, "lr": 1.857491504839371e-05, "steps_per_second": 4.8343319820331425 }, { "step": 19290, "loss": 2.0512876510620117, "lm_loss": 2.0512876510620117, "ppl": 7.777909894946652, "gate_mean": 1.6004778444766998e-06, "lr": 1.8559096174579114e-05, "steps_per_second": 4.834433051466408 }, { "step": 19300, "loss": 1.9133797883987427, "lm_loss": 1.9133797883987427, "ppl": 6.775951423362074, "gate_mean": 1.2391246855258942e-06, "lr": 1.8543497223123943e-05, "steps_per_second": 4.834613913028078 }, { "step": 19310, "loss": 1.987577199935913, "lm_loss": 1.987577199935913, "ppl": 7.297831140257807, "gate_mean": 1.4421530067920685e-06, "lr": 1.8528118236675143e-05, "steps_per_second": 4.834769895168055 }, { "step": 19320, "loss": 2.031275510787964, "lm_loss": 2.031275510787964, "ppl": 7.6238044044896105, "gate_mean": 1.3343524187803268e-06, "lr": 1.8512959257278263e-05, "steps_per_second": 4.834894028427415 }, { "step": 19330, "loss": 1.8199464082717896, "lm_loss": 1.8199464082717896, "ppl": 6.171527698185811, "gate_mean": 1.4076940715312958e-06, "lr": 1.8498020326377393e-05, "steps_per_second": 4.835069103096846 }, { "step": 19340, "loss": 1.9623335599899292, "lm_loss": 1.9623335599899292, "ppl": 7.115913115496422, "gate_mean": 1.4975666999816895e-06, "lr": 1.8483301484814994e-05, "steps_per_second": 4.835250920267118 }, { "step": 19350, "loss": 1.972165584564209, "lm_loss": 1.972165584564209, "ppl": 7.186222021172282, "gate_mean": 1.443084329366684e-06, "lr": 1.84688027728318e-05, "steps_per_second": 4.835435838763653 }, { "step": 19360, "loss": 1.978476643562317, "lm_loss": 1.978476643562317, "ppl": 7.231718105550789, "gate_mean": 1.3266690075397491e-06, "lr": 1.8454524230066756e-05, "steps_per_second": 4.835600308300891 }, { "step": 19370, "loss": 1.9710538387298584, "lm_loss": 1.9710538387298584, "ppl": 7.17823720813926, "gate_mean": 1.351814717054367e-06, "lr": 1.8440465895556844e-05, "steps_per_second": 4.835758811012918 }, { "step": 19380, "loss": 1.9571150541305542, "lm_loss": 1.9571150541305542, "ppl": 7.0788754060137276, "gate_mean": 1.4659017324447632e-06, "lr": 1.8426627807737023e-05, "steps_per_second": 4.835921361853665 }, { "step": 19390, "loss": 1.9374709129333496, "lm_loss": 1.9374709129333496, "ppl": 6.941173919872251, "gate_mean": 1.1275988072156906e-06, "lr": 1.841301000444008e-05, "steps_per_second": 4.836056099801524 }, { "step": 19400, "loss": 1.8671156167984009, "lm_loss": 1.8671156167984009, "ppl": 6.469608630445205, "gate_mean": 1.4370307326316833e-06, "lr": 1.8399612522896588e-05, "steps_per_second": 4.836208595440521 }, { "step": 19410, "loss": 2.1415905952453613, "lm_loss": 2.1415905952453613, "ppl": 8.512967551429558, "gate_mean": 1.492910087108612e-06, "lr": 1.8386435399734733e-05, "steps_per_second": 4.836351474954658 }, { "step": 19420, "loss": 1.949983835220337, "lm_loss": 1.949983835220337, "ppl": 7.0285739643215255, "gate_mean": 1.4039687812328339e-06, "lr": 1.837347867098027e-05, "steps_per_second": 4.836449502155258 }, { "step": 19430, "loss": 1.9318119287490845, "lm_loss": 1.9318119287490845, "ppl": 6.90200485951544, "gate_mean": 1.3890676200389862e-06, "lr": 1.836074237205641e-05, "steps_per_second": 4.836626143609665 }, { "step": 19440, "loss": 1.9108672142028809, "lm_loss": 1.9108672142028809, "ppl": 6.758947713150486, "gate_mean": 1.218169927597046e-06, "lr": 1.8348226537783704e-05, "steps_per_second": 4.8367672868894385 }, { "step": 19450, "loss": 1.9806783199310303, "lm_loss": 1.9806783199310303, "ppl": 7.247657548717943, "gate_mean": 1.5189871191978455e-06, "lr": 1.8335931202379966e-05, "steps_per_second": 4.836949409092274 }, { "step": 19460, "loss": 1.856504201889038, "lm_loss": 1.856504201889038, "ppl": 6.401319889833406, "gate_mean": 1.367880031466484e-06, "lr": 1.832385639946016e-05, "steps_per_second": 4.837100648850492 }, { "step": 19470, "loss": 1.945592999458313, "lm_loss": 1.945592999458313, "ppl": 6.997780304827318, "gate_mean": 1.105479896068573e-06, "lr": 1.831200216203635e-05, "steps_per_second": 4.83724421725865 }, { "step": 19480, "loss": 1.8739278316497803, "lm_loss": 1.8739278316497803, "ppl": 6.513831451100069, "gate_mean": 1.3026874512434006e-06, "lr": 1.8300368522517577e-05, "steps_per_second": 4.837412802845227 }, { "step": 19490, "loss": 1.9389958381652832, "lm_loss": 1.9389958381652832, "ppl": 6.951766765718144, "gate_mean": 1.423526555299759e-06, "lr": 1.8288955512709742e-05, "steps_per_second": 4.837538538654314 }, { "step": 19500, "loss": 1.8685015439987183, "lm_loss": 1.8685015439987183, "ppl": 6.478581253286361, "gate_mean": 1.2237578630447388e-06, "lr": 1.82777631638156e-05, "steps_per_second": 4.837695405181783 }, { "step": 19510, "loss": 1.897975206375122, "lm_loss": 1.897975206375122, "ppl": 6.672370581969419, "gate_mean": 1.3927929103374481e-06, "lr": 1.82667915064346e-05, "steps_per_second": 4.837872437040137 }, { "step": 19520, "loss": 1.8786077499389648, "lm_loss": 1.8786077499389648, "ppl": 6.544387093226761, "gate_mean": 1.3438984751701355e-06, "lr": 1.8256040570562825e-05, "steps_per_second": 4.838075247221847 }, { "step": 19530, "loss": 1.876861333847046, "lm_loss": 1.876861333847046, "ppl": 6.532967844577565, "gate_mean": 1.3005919754505157e-06, "lr": 1.824551038559293e-05, "steps_per_second": 4.838209411867582 }, { "step": 19540, "loss": 1.9279354810714722, "lm_loss": 1.9279354810714722, "ppl": 6.875301389548042, "gate_mean": 1.2633390724658966e-06, "lr": 1.8235200980314033e-05, "steps_per_second": 4.838379952552191 }, { "step": 19550, "loss": 1.867335319519043, "lm_loss": 1.867335319519043, "ppl": 6.471030177215932, "gate_mean": 1.4326069504022598e-06, "lr": 1.8225112382911675e-05, "steps_per_second": 4.83853452718512 }, { "step": 19560, "loss": 1.8631118535995483, "lm_loss": 1.8631118535995483, "ppl": 6.443757634665299, "gate_mean": 1.2735836207866669e-06, "lr": 1.8215244620967665e-05, "steps_per_second": 4.8387259545500605 }, { "step": 19570, "loss": 2.031182050704956, "lm_loss": 2.031182050704956, "ppl": 7.623091916392249, "gate_mean": 1.4989636838436127e-06, "lr": 1.820559772146011e-05, "steps_per_second": 4.838857780398041 }, { "step": 19580, "loss": 1.934253215789795, "lm_loss": 1.934253215789795, "ppl": 6.918875218849924, "gate_mean": 1.3122335076332092e-06, "lr": 1.8196171710763264e-05, "steps_per_second": 4.839037529524179 }, { "step": 19590, "loss": 1.9815561771392822, "lm_loss": 1.9815561771392822, "ppl": 7.254022750600434, "gate_mean": 1.216307282447815e-06, "lr": 1.8186966614647467e-05, "steps_per_second": 4.839164675122034 }, { "step": 19600, "loss": 1.8834370374679565, "lm_loss": 1.8834370374679565, "ppl": 6.576068257354021, "gate_mean": 1.2861564755439758e-06, "lr": 1.8177982458279107e-05, "steps_per_second": 4.839315186770016 }, { "step": 19610, "loss": 1.8466370105743408, "lm_loss": 1.8466370105743408, "ppl": 6.338467440335456, "gate_mean": 1.1469237506389618e-06, "lr": 1.8169219266220528e-05, "steps_per_second": 4.839436936625001 }, { "step": 19620, "loss": 1.8939321041107178, "lm_loss": 1.8939321041107178, "ppl": 6.645447967476918, "gate_mean": 1.2479722499847412e-06, "lr": 1.8160677062429958e-05, "steps_per_second": 4.8395873724924705 }, { "step": 19630, "loss": 2.017576217651367, "lm_loss": 2.017576217651367, "ppl": 7.520075799647951, "gate_mean": 1.4919787645339966e-06, "lr": 1.8152355870261456e-05, "steps_per_second": 4.839706706393237 }, { "step": 19640, "loss": 1.7285974025726318, "lm_loss": 1.7285974025726318, "ppl": 5.632747887546073, "gate_mean": 1.2451782822608948e-06, "lr": 1.8144255712464845e-05, "steps_per_second": 4.839901157140144 }, { "step": 19650, "loss": 1.9110509157180786, "lm_loss": 1.9110509157180786, "ppl": 6.760189456138076, "gate_mean": 1.6228295862674713e-06, "lr": 1.813637661118565e-05, "steps_per_second": 4.84001581690951 }, { "step": 19660, "loss": 1.904197335243225, "lm_loss": 1.904197335243225, "ppl": 6.714016359925586, "gate_mean": 1.884065568447113e-06, "lr": 1.8128718587965028e-05, "steps_per_second": 4.840149988587276 }, { "step": 19670, "loss": 1.940110206604004, "lm_loss": 1.940110206604004, "ppl": 6.959517913210147, "gate_mean": 1.2153759598731995e-06, "lr": 1.812128166373973e-05, "steps_per_second": 4.840324882302562 }, { "step": 19680, "loss": 1.9532995223999023, "lm_loss": 1.9532995223999023, "ppl": 7.051917194945047, "gate_mean": 1.2991949915885925e-06, "lr": 1.8114065858842017e-05, "steps_per_second": 4.840450966437514 }, { "step": 19690, "loss": 1.9437466859817505, "lm_loss": 1.9437466859817505, "ppl": 6.9848721286811415, "gate_mean": 1.4505349099636078e-06, "lr": 1.8107071192999614e-05, "steps_per_second": 4.840559837881523 }, { "step": 19700, "loss": 1.852339744567871, "lm_loss": 1.852339744567871, "ppl": 6.374717297480032, "gate_mean": 1.439359039068222e-06, "lr": 1.810029768533569e-05, "steps_per_second": 4.84067167095288 }, { "step": 19710, "loss": 1.8971751928329468, "lm_loss": 1.8971751928329468, "ppl": 6.667034729807015, "gate_mean": 1.439591869711876e-06, "lr": 1.8093745354368748e-05, "steps_per_second": 4.840758369555422 }, { "step": 19720, "loss": 1.9507544040679932, "lm_loss": 1.9507544040679932, "ppl": 7.033992051698458, "gate_mean": 1.8067657947540283e-06, "lr": 1.8087414218012603e-05, "steps_per_second": 4.840868870063023 }, { "step": 19730, "loss": 1.9580270051956177, "lm_loss": 1.9580270051956177, "ppl": 7.085333938464861, "gate_mean": 1.3513490557670593e-06, "lr": 1.808130429357635e-05, "steps_per_second": 4.841032200219068 }, { "step": 19740, "loss": 1.899785041809082, "lm_loss": 1.899785041809082, "ppl": 6.684457408961863, "gate_mean": 1.5450641512870789e-06, "lr": 1.8075415597764287e-05, "steps_per_second": 4.841165071728075 }, { "step": 19750, "loss": 1.9517008066177368, "lm_loss": 1.9517008066177368, "ppl": 7.040652190800265, "gate_mean": 1.5581026673316956e-06, "lr": 1.8069748146675873e-05, "steps_per_second": 4.841243526616382 }, { "step": 19760, "loss": 1.858685851097107, "lm_loss": 1.858685851097107, "ppl": 6.415300569225523, "gate_mean": 1.312699168920517e-06, "lr": 1.8064301955805722e-05, "steps_per_second": 4.841391538900954 }, { "step": 19770, "loss": 1.9075095653533936, "lm_loss": 1.9075095653533936, "ppl": 6.736291597063788, "gate_mean": 1.3425014913082123e-06, "lr": 1.8059077040043498e-05, "steps_per_second": 4.841564808597918 }, { "step": 19780, "loss": 1.9217407703399658, "lm_loss": 1.9217407703399658, "ppl": 6.832842532195337, "gate_mean": 1.5920959413051605e-06, "lr": 1.805407341367393e-05, "steps_per_second": 4.841719269740669 }, { "step": 19790, "loss": 1.7882574796676636, "lm_loss": 1.7882574796676636, "ppl": 5.979024811519712, "gate_mean": 1.3408716768026352e-06, "lr": 1.804929109037673e-05, "steps_per_second": 4.841882424345502 }, { "step": 19800, "loss": 1.8685585260391235, "lm_loss": 1.8685585260391235, "ppl": 6.478950426583128, "gate_mean": 1.5711411833763123e-06, "lr": 1.8044730083226597e-05, "steps_per_second": 4.841973488884193 }, { "step": 19810, "loss": 1.855586051940918, "lm_loss": 1.855586051940918, "ppl": 6.395445215637254, "gate_mean": 1.4365650713443756e-06, "lr": 1.8040390404693152e-05, "steps_per_second": 4.840254515680023 }, { "step": 19820, "loss": 1.8441557884216309, "lm_loss": 1.8441557884216309, "ppl": 6.322759789652316, "gate_mean": 1.1874362826347351e-06, "lr": 1.8036272066640895e-05, "steps_per_second": 4.840281532195755 }, { "step": 19830, "loss": 1.7652573585510254, "lm_loss": 1.7652573585510254, "ppl": 5.843075930154047, "gate_mean": 1.6330741345882416e-06, "lr": 1.8032375080329222e-05, "steps_per_second": 4.840422311029511 }, { "step": 19840, "loss": 1.923425555229187, "lm_loss": 1.923425555229187, "ppl": 6.844364105004559, "gate_mean": 1.5352852642536163e-06, "lr": 1.802869945641234e-05, "steps_per_second": 4.840570416681408 }, { "step": 19850, "loss": 1.9070067405700684, "lm_loss": 1.9070067405700684, "ppl": 6.732905274135973, "gate_mean": 1.5385448932647705e-06, "lr": 1.802524520493926e-05, "steps_per_second": 4.840723682405811 }, { "step": 19860, "loss": 1.825492262840271, "lm_loss": 1.825492262840271, "ppl": 6.205849176260952, "gate_mean": 1.3629905879497528e-06, "lr": 1.8022012335353775e-05, "steps_per_second": 4.839141244365275 }, { "step": 19870, "loss": 1.8351795673370361, "lm_loss": 1.8351795673370361, "ppl": 6.266259259904266, "gate_mean": 1.375097781419754e-06, "lr": 1.8019000856494432e-05, "steps_per_second": 4.839254724280574 }, { "step": 19880, "loss": 1.9708278179168701, "lm_loss": 1.9708278179168701, "ppl": 7.176614960467429, "gate_mean": 1.3168901205062866e-06, "lr": 1.801621077659449e-05, "steps_per_second": 4.839417693350449 }, { "step": 19890, "loss": 1.88783860206604, "lm_loss": 1.88783860206604, "ppl": 6.605077041775329, "gate_mean": 1.317821443080902e-06, "lr": 1.801364210328193e-05, "steps_per_second": 4.839385013332595 }, { "step": 19900, "loss": 1.8993223905563354, "lm_loss": 1.8993223905563354, "ppl": 6.681365551648655, "gate_mean": 1.546461135149002e-06, "lr": 1.8011294843579406e-05, "steps_per_second": 4.839527198633859 }, { "step": 19910, "loss": 1.8426252603530884, "lm_loss": 1.8426252603530884, "ppl": 6.3130900301304145, "gate_mean": 1.2316741049289703e-06, "lr": 1.800916900390423e-05, "steps_per_second": 4.83965280671047 }, { "step": 19920, "loss": 1.826015830039978, "lm_loss": 1.826015830039978, "ppl": 6.209099206066231, "gate_mean": 1.5259720385074615e-06, "lr": 1.800726459006837e-05, "steps_per_second": 4.838031676247101 }, { "step": 19930, "loss": 1.977007508277893, "lm_loss": 1.977007508277893, "ppl": 7.221101533814517, "gate_mean": 1.5050172805786133e-06, "lr": 1.800558160727843e-05, "steps_per_second": 4.838173221846537 }, { "step": 19940, "loss": 1.990035057067871, "lm_loss": 1.990035057067871, "ppl": 7.315790227968626, "gate_mean": 1.332256942987442e-06, "lr": 1.8004120060135603e-05, "steps_per_second": 4.835001555353316 }, { "step": 19950, "loss": 1.878553867340088, "lm_loss": 1.878553867340088, "ppl": 6.544034474142222, "gate_mean": 1.269858330488205e-06, "lr": 1.800287995263571e-05, "steps_per_second": 4.835018943612757 }, { "step": 19960, "loss": 1.9677555561065674, "lm_loss": 1.9677555561065674, "ppl": 7.154600355028855, "gate_mean": 1.4700926840305328e-06, "lr": 1.8001861288169158e-05, "steps_per_second": 4.835094270322575 }, { "step": 19970, "loss": 1.8693695068359375, "lm_loss": 1.8693695068359375, "ppl": 6.48420686210864, "gate_mean": 1.491047441959381e-06, "lr": 1.800106406952095e-05, "steps_per_second": 4.8352172963059195 }, { "step": 19980, "loss": 1.873852014541626, "lm_loss": 1.873852014541626, "ppl": 6.513337609957482, "gate_mean": 1.7373822629451752e-06, "lr": 1.800048829887063e-05, "steps_per_second": 4.835214720786281 }, { "step": 19990, "loss": 1.9126499891281128, "lm_loss": 1.9126499891281128, "ppl": 6.771008142976093, "gate_mean": 1.4961697161197662e-06, "lr": 1.8000133977792338e-05, "steps_per_second": 4.83529742066312 }, { "step": 20000, "loss": 1.9907056093215942, "lm_loss": 1.9907056093215942, "ppl": 7.320697492698593, "gate_mean": 1.426786184310913e-06, "lr": 1.8000001107254788e-05, "steps_per_second": 4.835381646383091 } ], "eval": { "step": 20000, "retrieval_on": { "loss": 1.7580267190933228, "lm_loss": 1.7580267190933228, "ppl": 5.800979131574639, "gate_mean": 1.749867806211114e-06 }, "retrieval_off": { "loss": 1.7650717496871948, "lm_loss": 1.7650717496871948, "ppl": 5.841991504112031, "gate_mean": 0.0 }, "random_retrieval": { "loss": 1.7536429166793823, "lm_loss": 1.7536429166793823, "ppl": 5.775604444698179, "gate_mean": 1.7668644431978464e-06 }, "delta_lm_loss_off_minus_on": 0.00704503059387207, "delta_lm_loss_random_minus_on": -0.00438380241394043 } }