diff --git "a/checkpoint-4500/trainer_state.json" "b/checkpoint-4500/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-4500/trainer_state.json" @@ -0,0 +1,150310 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2939104177151486, + "eval_steps": 500, + "global_step": 4500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "ce_ib": 65.99971008300781, + "ce_orig": 0.8247115612030029, + "epoch": 0, + "kl_loss": 3969.01025390625, + "loss_ib": 39.69670104980469, + "step": 0 + }, + { + "ce_ib": 61.875301361083984, + "ce_orig": 0.3094598948955536, + "epoch": 0, + "kl_loss": 1816.435302734375, + "loss_ib": 18.17053985595703, + "step": 0 + }, + { + "ce_ib": 65.33805084228516, + "ce_orig": 1.0820972919464111, + "epoch": 0, + "kl_loss": 4051.13818359375, + "loss_ib": 40.517913818359375, + "step": 0 + }, + { + "ce_ib": 65.36083221435547, + "ce_orig": 0.8601827025413513, + "epoch": 0, + "kl_loss": 3727.80126953125, + "loss_ib": 37.2845458984375, + "step": 0 + }, + { + "ce_ib": 64.40461730957031, + "ce_orig": 1.3601988554000854, + "epoch": 0.00028758357897764035, + "kl_loss": 3548.660888671875, + "loss_ib": 35.49304962158203, + "step": 1 + }, + { + "ce_ib": 66.136474609375, + "ce_orig": 0.9451982975006104, + "epoch": 0.00028758357897764035, + "kl_loss": 4003.119140625, + "loss_ib": 40.037803649902344, + "step": 1 + }, + { + "ce_ib": 65.30732727050781, + "ce_orig": 1.3611608743667603, + "epoch": 0.00028758357897764035, + "kl_loss": 3076.302490234375, + "loss_ib": 30.769554138183594, + "step": 1 + }, + { + "ce_ib": 63.613216400146484, + "ce_orig": 0.5681392550468445, + "epoch": 0.00028758357897764035, + "kl_loss": 3922.22265625, + "loss_ib": 39.22858810424805, + "step": 1 + }, + { + "ce_ib": 65.20169067382812, + "ce_orig": 0.9869711399078369, + "epoch": 0.0005751671579552807, + "kl_loss": 4010.333251953125, + "loss_ib": 40.1098518371582, + "step": 2 + }, + { + "ce_ib": 64.6613540649414, + "ce_orig": 1.0124142169952393, + "epoch": 0.0005751671579552807, + "kl_loss": 3416.4658203125, + "loss_ib": 34.17112350463867, + "step": 2 + }, + { + "ce_ib": 64.3924560546875, + "ce_orig": 0.825140118598938, + "epoch": 0.0005751671579552807, + "kl_loss": 3954.5244140625, + "loss_ib": 39.55168533325195, + "step": 2 + }, + { + "ce_ib": 66.31563568115234, + "ce_orig": 1.6114795207977295, + "epoch": 0.0005751671579552807, + "kl_loss": 3360.53955078125, + "loss_ib": 33.61202621459961, + "step": 2 + }, + { + "ce_ib": 63.97846603393555, + "ce_orig": 1.0248628854751587, + "epoch": 0.0008627507369329212, + "kl_loss": 3866.74462890625, + "loss_ib": 38.67384338378906, + "step": 3 + }, + { + "ce_ib": 64.94669342041016, + "ce_orig": 0.7158174514770508, + "epoch": 0.0008627507369329212, + "kl_loss": 3586.52783203125, + "loss_ib": 35.87177276611328, + "step": 3 + }, + { + "ce_ib": 66.78568267822266, + "ce_orig": 1.1728931665420532, + "epoch": 0.0008627507369329212, + "kl_loss": 3981.269775390625, + "loss_ib": 39.81937789916992, + "step": 3 + }, + { + "ce_ib": 66.30445861816406, + "ce_orig": 0.9273799657821655, + "epoch": 0.0008627507369329212, + "kl_loss": 3999.728271484375, + "loss_ib": 40.00391387939453, + "step": 3 + }, + { + "ce_ib": 63.22294616699219, + "ce_orig": 0.6721798181533813, + "epoch": 0.0011503343159105614, + "kl_loss": 3434.2626953125, + "loss_ib": 34.34894943237305, + "step": 4 + }, + { + "ce_ib": 65.629150390625, + "ce_orig": 0.851636528968811, + "epoch": 0.0011503343159105614, + "kl_loss": 3777.80029296875, + "loss_ib": 37.78456497192383, + "step": 4 + }, + { + "ce_ib": 65.70416259765625, + "ce_orig": 0.8407150506973267, + "epoch": 0.0011503343159105614, + "kl_loss": 3663.44775390625, + "loss_ib": 36.641048431396484, + "step": 4 + }, + { + "ce_ib": 65.25149536132812, + "ce_orig": 0.8431562781333923, + "epoch": 0.0011503343159105614, + "kl_loss": 4073.102783203125, + "loss_ib": 40.737552642822266, + "step": 4 + }, + { + "epoch": 0.0014379178948882019, + "grad_norm": Infinity, + "learning_rate": 0.0, + "loss": 37.6069, + "step": 5 + }, + { + "ce_ib": 63.31033706665039, + "ce_orig": 0.5193647146224976, + "epoch": 0.0014379178948882019, + "kl_loss": 3829.75732421875, + "loss_ib": 38.30390167236328, + "step": 5 + }, + { + "ce_ib": 64.82113647460938, + "ce_orig": 0.9080048203468323, + "epoch": 0.0014379178948882019, + "kl_loss": 4034.60400390625, + "loss_ib": 40.35251998901367, + "step": 5 + }, + { + "ce_ib": 67.75746154785156, + "ce_orig": 1.7583141326904297, + "epoch": 0.0014379178948882019, + "kl_loss": 3362.895751953125, + "loss_ib": 33.6357307434082, + "step": 5 + }, + { + "ce_ib": 65.55052947998047, + "ce_orig": 1.0019645690917969, + "epoch": 0.0014379178948882019, + "kl_loss": 3561.7119140625, + "loss_ib": 35.62367248535156, + "step": 5 + }, + { + "ce_ib": 65.5093765258789, + "ce_orig": 1.2022827863693237, + "epoch": 0.0017255014738658423, + "kl_loss": 3854.793212890625, + "loss_ib": 38.554481506347656, + "step": 6 + }, + { + "ce_ib": 63.95633316040039, + "ce_orig": 0.5561846494674683, + "epoch": 0.0017255014738658423, + "kl_loss": 3231.163818359375, + "loss_ib": 32.31803512573242, + "step": 6 + }, + { + "ce_ib": 66.91143798828125, + "ce_orig": 1.007911205291748, + "epoch": 0.0017255014738658423, + "kl_loss": 3694.936767578125, + "loss_ib": 36.956058502197266, + "step": 6 + }, + { + "ce_ib": 65.86326599121094, + "ce_orig": 1.1325939893722534, + "epoch": 0.0017255014738658423, + "kl_loss": 3653.87255859375, + "loss_ib": 36.545310974121094, + "step": 6 + }, + { + "ce_ib": 61.93317794799805, + "ce_orig": 0.3588999807834625, + "epoch": 0.0020130850528434826, + "kl_loss": 2617.40478515625, + "loss_ib": 26.180240631103516, + "step": 7 + }, + { + "ce_ib": 66.48375701904297, + "ce_orig": 0.9551417231559753, + "epoch": 0.0020130850528434826, + "kl_loss": 4009.37158203125, + "loss_ib": 40.100364685058594, + "step": 7 + }, + { + "ce_ib": 64.68529510498047, + "ce_orig": 1.3479645252227783, + "epoch": 0.0020130850528434826, + "kl_loss": 3682.17919921875, + "loss_ib": 36.82826232910156, + "step": 7 + }, + { + "ce_ib": 65.71565246582031, + "ce_orig": 1.4119635820388794, + "epoch": 0.0020130850528434826, + "kl_loss": 3543.86865234375, + "loss_ib": 35.44525909423828, + "step": 7 + }, + { + "ce_ib": 64.79589080810547, + "ce_orig": 1.1264829635620117, + "epoch": 0.002300668631821123, + "kl_loss": 3802.966552734375, + "loss_ib": 38.0361442565918, + "step": 8 + }, + { + "ce_ib": 64.57544708251953, + "ce_orig": 0.8281353712081909, + "epoch": 0.002300668631821123, + "kl_loss": 4064.0234375, + "loss_ib": 40.646690368652344, + "step": 8 + }, + { + "ce_ib": 64.70269012451172, + "ce_orig": 0.8244958519935608, + "epoch": 0.002300668631821123, + "kl_loss": 3695.80029296875, + "loss_ib": 36.964473724365234, + "step": 8 + }, + { + "ce_ib": 66.23006439208984, + "ce_orig": 0.7464499473571777, + "epoch": 0.002300668631821123, + "kl_loss": 3909.509765625, + "loss_ib": 39.10171890258789, + "step": 8 + }, + { + "ce_ib": 66.00849151611328, + "ce_orig": 1.2234286069869995, + "epoch": 0.0025882522107987635, + "kl_loss": 3269.261962890625, + "loss_ib": 32.69921875, + "step": 9 + }, + { + "ce_ib": 61.78355407714844, + "ce_orig": 0.6015470623970032, + "epoch": 0.0025882522107987635, + "kl_loss": 3815.06103515625, + "loss_ib": 38.15678787231445, + "step": 9 + }, + { + "ce_ib": 61.84153747558594, + "ce_orig": 0.6827983856201172, + "epoch": 0.0025882522107987635, + "kl_loss": 3885.240966796875, + "loss_ib": 38.85859298706055, + "step": 9 + }, + { + "ce_ib": 66.06260681152344, + "ce_orig": 1.3108824491500854, + "epoch": 0.0025882522107987635, + "kl_loss": 3949.405029296875, + "loss_ib": 39.50065612792969, + "step": 9 + }, + { + "epoch": 0.0028758357897764038, + "grad_norm": 518.9179077148438, + "learning_rate": 6.369426751592357e-07, + "loss": 37.6918, + "step": 10 + }, + { + "ce_ib": 64.02448272705078, + "ce_orig": 0.762144923210144, + "epoch": 0.0028758357897764038, + "kl_loss": 3554.281005859375, + "loss_ib": 35.54920959472656, + "step": 10 + }, + { + "ce_ib": 68.01136016845703, + "ce_orig": 1.6496213674545288, + "epoch": 0.0028758357897764038, + "kl_loss": 3769.318359375, + "loss_ib": 37.69998550415039, + "step": 10 + }, + { + "ce_ib": 68.6688003540039, + "ce_orig": 1.7943211793899536, + "epoch": 0.0028758357897764038, + "kl_loss": 3359.124267578125, + "loss_ib": 33.59811019897461, + "step": 10 + }, + { + "ce_ib": 66.47740936279297, + "ce_orig": 0.9888946413993835, + "epoch": 0.0028758357897764038, + "kl_loss": 3657.010009765625, + "loss_ib": 36.57674789428711, + "step": 10 + }, + { + "ce_ib": 68.97245788574219, + "ce_orig": 1.853747844696045, + "epoch": 0.003163419368754044, + "kl_loss": 3813.30908203125, + "loss_ib": 38.13998794555664, + "step": 11 + }, + { + "ce_ib": 66.31513214111328, + "ce_orig": 1.50633704662323, + "epoch": 0.003163419368754044, + "kl_loss": 3285.7900390625, + "loss_ib": 32.86452865600586, + "step": 11 + }, + { + "ce_ib": 63.428436279296875, + "ce_orig": 0.9150334000587463, + "epoch": 0.003163419368754044, + "kl_loss": 3867.107421875, + "loss_ib": 38.67741775512695, + "step": 11 + }, + { + "ce_ib": 64.99458312988281, + "ce_orig": 0.8206988573074341, + "epoch": 0.003163419368754044, + "kl_loss": 1840.817626953125, + "loss_ib": 18.414674758911133, + "step": 11 + }, + { + "ce_ib": 64.90898132324219, + "ce_orig": 1.1344208717346191, + "epoch": 0.0034510029477316847, + "kl_loss": 3756.06201171875, + "loss_ib": 37.56711196899414, + "step": 12 + }, + { + "ce_ib": 65.14974975585938, + "ce_orig": 0.8449010848999023, + "epoch": 0.0034510029477316847, + "kl_loss": 3669.84326171875, + "loss_ib": 36.70494842529297, + "step": 12 + }, + { + "ce_ib": 61.738800048828125, + "ce_orig": 0.8912803530693054, + "epoch": 0.0034510029477316847, + "kl_loss": 3856.634765625, + "loss_ib": 38.5725212097168, + "step": 12 + }, + { + "ce_ib": 62.223426818847656, + "ce_orig": 0.6894405484199524, + "epoch": 0.0034510029477316847, + "kl_loss": 3847.307373046875, + "loss_ib": 38.47929763793945, + "step": 12 + }, + { + "ce_ib": 62.734622955322266, + "ce_orig": 0.8210228085517883, + "epoch": 0.003738586526709325, + "kl_loss": 4047.759765625, + "loss_ib": 40.48387145996094, + "step": 13 + }, + { + "ce_ib": 63.48801040649414, + "ce_orig": 0.6192799806594849, + "epoch": 0.003738586526709325, + "kl_loss": 3207.78173828125, + "loss_ib": 32.084163665771484, + "step": 13 + }, + { + "ce_ib": 63.36425018310547, + "ce_orig": 0.8307191729545593, + "epoch": 0.003738586526709325, + "kl_loss": 4154.55859375, + "loss_ib": 41.55192184448242, + "step": 13 + }, + { + "ce_ib": 63.72712326049805, + "ce_orig": 0.6353437304496765, + "epoch": 0.003738586526709325, + "kl_loss": 3861.010009765625, + "loss_ib": 38.61647033691406, + "step": 13 + }, + { + "ce_ib": 63.29721450805664, + "ce_orig": 1.0746912956237793, + "epoch": 0.004026170105686965, + "kl_loss": 3788.18603515625, + "loss_ib": 37.88819122314453, + "step": 14 + }, + { + "ce_ib": 64.876708984375, + "ce_orig": 0.2551986575126648, + "epoch": 0.004026170105686965, + "kl_loss": 3221.554931640625, + "loss_ib": 32.2220344543457, + "step": 14 + }, + { + "ce_ib": 63.66843032836914, + "ce_orig": 0.9092416763305664, + "epoch": 0.004026170105686965, + "kl_loss": 4153.7578125, + "loss_ib": 41.5439453125, + "step": 14 + }, + { + "ce_ib": 62.37735366821289, + "ce_orig": 0.4772454798221588, + "epoch": 0.004026170105686965, + "kl_loss": 3842.333984375, + "loss_ib": 38.4295768737793, + "step": 14 + }, + { + "epoch": 0.004313753684664605, + "grad_norm": 522.8844604492188, + "learning_rate": 1.4331210191082802e-06, + "loss": 37.6292, + "step": 15 + }, + { + "ce_ib": 65.9225845336914, + "ce_orig": 1.2483989000320435, + "epoch": 0.004313753684664605, + "kl_loss": 3718.763427734375, + "loss_ib": 37.1942253112793, + "step": 15 + }, + { + "ce_ib": 62.323360443115234, + "ce_orig": 0.6228176951408386, + "epoch": 0.004313753684664605, + "kl_loss": 3593.427490234375, + "loss_ib": 35.94050598144531, + "step": 15 + }, + { + "ce_ib": 63.397438049316406, + "ce_orig": 1.2859151363372803, + "epoch": 0.004313753684664605, + "kl_loss": 4189.4609375, + "loss_ib": 41.90094757080078, + "step": 15 + }, + { + "ce_ib": 63.35916519165039, + "ce_orig": 0.7082123160362244, + "epoch": 0.004313753684664605, + "kl_loss": 3782.929443359375, + "loss_ib": 37.835628509521484, + "step": 15 + }, + { + "ce_ib": 63.69921112060547, + "ce_orig": 0.7915922999382019, + "epoch": 0.004601337263642246, + "kl_loss": 3295.76953125, + "loss_ib": 32.96406555175781, + "step": 16 + }, + { + "ce_ib": 64.55254364013672, + "ce_orig": 1.4573107957839966, + "epoch": 0.004601337263642246, + "kl_loss": 3830.550537109375, + "loss_ib": 38.31195831298828, + "step": 16 + }, + { + "ce_ib": 63.20068359375, + "ce_orig": 0.9544379115104675, + "epoch": 0.004601337263642246, + "kl_loss": 3457.2080078125, + "loss_ib": 34.578399658203125, + "step": 16 + }, + { + "ce_ib": 67.24832153320312, + "ce_orig": 0.8406115174293518, + "epoch": 0.004601337263642246, + "kl_loss": 4083.5341796875, + "loss_ib": 40.842063903808594, + "step": 16 + }, + { + "ce_ib": 66.60543060302734, + "ce_orig": 1.3419269323349, + "epoch": 0.004888920842619887, + "kl_loss": 3605.4677734375, + "loss_ib": 36.061336517333984, + "step": 17 + }, + { + "ce_ib": 62.604434967041016, + "ce_orig": 0.6389816999435425, + "epoch": 0.004888920842619887, + "kl_loss": 4083.78759765625, + "loss_ib": 40.84413528442383, + "step": 17 + }, + { + "ce_ib": 64.72972106933594, + "ce_orig": 1.176672101020813, + "epoch": 0.004888920842619887, + "kl_loss": 1900.025634765625, + "loss_ib": 19.006729125976562, + "step": 17 + }, + { + "ce_ib": 65.18509674072266, + "ce_orig": 1.2524960041046143, + "epoch": 0.004888920842619887, + "kl_loss": 3951.653076171875, + "loss_ib": 39.523048400878906, + "step": 17 + }, + { + "ce_ib": 64.49193572998047, + "ce_orig": 1.1009666919708252, + "epoch": 0.005176504421597527, + "kl_loss": 3485.996826171875, + "loss_ib": 34.866416931152344, + "step": 18 + }, + { + "ce_ib": 61.90851974487305, + "ce_orig": 0.4805839955806732, + "epoch": 0.005176504421597527, + "kl_loss": 3180.7490234375, + "loss_ib": 31.81368064880371, + "step": 18 + }, + { + "ce_ib": 65.22083282470703, + "ce_orig": 0.80530846118927, + "epoch": 0.005176504421597527, + "kl_loss": 3836.34423828125, + "loss_ib": 38.36996078491211, + "step": 18 + }, + { + "ce_ib": 65.64443969726562, + "ce_orig": 1.2098023891448975, + "epoch": 0.005176504421597527, + "kl_loss": 3608.783203125, + "loss_ib": 36.09439468383789, + "step": 18 + }, + { + "ce_ib": 64.61073303222656, + "ce_orig": 1.073931097984314, + "epoch": 0.005464088000575167, + "kl_loss": 3759.63671875, + "loss_ib": 37.60282516479492, + "step": 19 + }, + { + "ce_ib": 66.11485290527344, + "ce_orig": 1.3443665504455566, + "epoch": 0.005464088000575167, + "kl_loss": 3318.16650390625, + "loss_ib": 33.18827438354492, + "step": 19 + }, + { + "ce_ib": 66.71015167236328, + "ce_orig": 0.8358739018440247, + "epoch": 0.005464088000575167, + "kl_loss": 4202.9921875, + "loss_ib": 42.036590576171875, + "step": 19 + }, + { + "ce_ib": 67.69292449951172, + "ce_orig": 1.7301944494247437, + "epoch": 0.005464088000575167, + "kl_loss": 3555.893310546875, + "loss_ib": 35.56570053100586, + "step": 19 + }, + { + "epoch": 0.0057516715795528075, + "grad_norm": 523.6597900390625, + "learning_rate": 2.229299363057325e-06, + "loss": 38.0114, + "step": 20 + }, + { + "ce_ib": 63.84000015258789, + "ce_orig": 0.7589442729949951, + "epoch": 0.0057516715795528075, + "kl_loss": 4097.67724609375, + "loss_ib": 40.983154296875, + "step": 20 + }, + { + "ce_ib": 62.5760612487793, + "ce_orig": 0.6316663026809692, + "epoch": 0.0057516715795528075, + "kl_loss": 3378.1162109375, + "loss_ib": 33.787418365478516, + "step": 20 + }, + { + "ce_ib": 64.90914154052734, + "ce_orig": 0.8841529488563538, + "epoch": 0.0057516715795528075, + "kl_loss": 4190.35009765625, + "loss_ib": 41.90998840332031, + "step": 20 + }, + { + "ce_ib": 63.49992370605469, + "ce_orig": 1.1218868494033813, + "epoch": 0.0057516715795528075, + "kl_loss": 3893.951171875, + "loss_ib": 38.94586181640625, + "step": 20 + }, + { + "ce_ib": 62.81148147583008, + "ce_orig": 0.7255597710609436, + "epoch": 0.006039255158530448, + "kl_loss": 4084.71142578125, + "loss_ib": 40.8533935546875, + "step": 21 + }, + { + "ce_ib": 62.18263244628906, + "ce_orig": 0.6901943683624268, + "epoch": 0.006039255158530448, + "kl_loss": 3732.931396484375, + "loss_ib": 37.335533142089844, + "step": 21 + }, + { + "ce_ib": 64.20355987548828, + "ce_orig": 1.0124316215515137, + "epoch": 0.006039255158530448, + "kl_loss": 3761.3251953125, + "loss_ib": 37.61967086791992, + "step": 21 + }, + { + "ce_ib": 61.63228988647461, + "ce_orig": 0.5279907584190369, + "epoch": 0.006039255158530448, + "kl_loss": 3691.64111328125, + "loss_ib": 36.92257308959961, + "step": 21 + }, + { + "ce_ib": 62.56758499145508, + "ce_orig": 0.7798469066619873, + "epoch": 0.006326838737508088, + "kl_loss": 3670.70166015625, + "loss_ib": 36.71327209472656, + "step": 22 + }, + { + "ce_ib": 63.57075881958008, + "ce_orig": 0.8365420699119568, + "epoch": 0.006326838737508088, + "kl_loss": 3658.580322265625, + "loss_ib": 36.592159271240234, + "step": 22 + }, + { + "ce_ib": 61.62807083129883, + "ce_orig": 0.5540810823440552, + "epoch": 0.006326838737508088, + "kl_loss": 3681.03662109375, + "loss_ib": 36.8165283203125, + "step": 22 + }, + { + "ce_ib": 64.64292907714844, + "ce_orig": 1.0211745500564575, + "epoch": 0.006326838737508088, + "kl_loss": 3909.70458984375, + "loss_ib": 39.10350799560547, + "step": 22 + }, + { + "ce_ib": 63.90407180786133, + "ce_orig": 1.0038657188415527, + "epoch": 0.006614422316485728, + "kl_loss": 3516.5341796875, + "loss_ib": 35.17173385620117, + "step": 23 + }, + { + "ce_ib": 64.3149642944336, + "ce_orig": 1.43198823928833, + "epoch": 0.006614422316485728, + "kl_loss": 3473.03955078125, + "loss_ib": 34.73682403564453, + "step": 23 + }, + { + "ce_ib": 65.7113037109375, + "ce_orig": 1.3454030752182007, + "epoch": 0.006614422316485728, + "kl_loss": 3446.73095703125, + "loss_ib": 34.473880767822266, + "step": 23 + }, + { + "ce_ib": 64.66767120361328, + "ce_orig": 1.1042531728744507, + "epoch": 0.006614422316485728, + "kl_loss": 3285.74560546875, + "loss_ib": 32.863922119140625, + "step": 23 + }, + { + "ce_ib": 62.60567092895508, + "ce_orig": 0.8803403973579407, + "epoch": 0.006902005895463369, + "kl_loss": 4045.211669921875, + "loss_ib": 40.4583740234375, + "step": 24 + }, + { + "ce_ib": 62.218082427978516, + "ce_orig": 0.5355222225189209, + "epoch": 0.006902005895463369, + "kl_loss": 2061.3427734375, + "loss_ib": 20.619647979736328, + "step": 24 + }, + { + "ce_ib": 62.739349365234375, + "ce_orig": 0.7724053263664246, + "epoch": 0.006902005895463369, + "kl_loss": 3955.72021484375, + "loss_ib": 39.5634765625, + "step": 24 + }, + { + "ce_ib": 64.84529876708984, + "ce_orig": 0.9221442341804504, + "epoch": 0.006902005895463369, + "kl_loss": 4072.906982421875, + "loss_ib": 40.73555374145508, + "step": 24 + }, + { + "epoch": 0.00718958947444101, + "grad_norm": 500.6357116699219, + "learning_rate": 3.0254777070063695e-06, + "loss": 37.5291, + "step": 25 + }, + { + "ce_ib": 64.90079498291016, + "ce_orig": 1.3044438362121582, + "epoch": 0.00718958947444101, + "kl_loss": 3491.833984375, + "loss_ib": 34.924827575683594, + "step": 25 + }, + { + "ce_ib": 60.71520233154297, + "ce_orig": 0.4326849579811096, + "epoch": 0.00718958947444101, + "kl_loss": 3640.559326171875, + "loss_ib": 36.41166305541992, + "step": 25 + }, + { + "ce_ib": 64.01187896728516, + "ce_orig": 1.0306893587112427, + "epoch": 0.00718958947444101, + "kl_loss": 3960.2236328125, + "loss_ib": 39.60863494873047, + "step": 25 + }, + { + "ce_ib": 64.18307495117188, + "ce_orig": 0.9839837551116943, + "epoch": 0.00718958947444101, + "kl_loss": 3976.920654296875, + "loss_ib": 39.7756233215332, + "step": 25 + }, + { + "ce_ib": 64.72349548339844, + "ce_orig": 1.4616881608963013, + "epoch": 0.00747717305341865, + "kl_loss": 3761.33935546875, + "loss_ib": 37.61986541748047, + "step": 26 + }, + { + "ce_ib": 64.97052764892578, + "ce_orig": 0.7502491474151611, + "epoch": 0.00747717305341865, + "kl_loss": 3893.336669921875, + "loss_ib": 38.93986129760742, + "step": 26 + }, + { + "ce_ib": 65.82380676269531, + "ce_orig": 1.34544038772583, + "epoch": 0.00747717305341865, + "kl_loss": 3513.64404296875, + "loss_ib": 35.14302062988281, + "step": 26 + }, + { + "ce_ib": 65.6162109375, + "ce_orig": 1.0590736865997314, + "epoch": 0.00747717305341865, + "kl_loss": 3592.0341796875, + "loss_ib": 35.926902770996094, + "step": 26 + }, + { + "ce_ib": 60.9577522277832, + "ce_orig": 0.7530563473701477, + "epoch": 0.00776475663239629, + "kl_loss": 3584.533203125, + "loss_ib": 35.8514289855957, + "step": 27 + }, + { + "ce_ib": 62.96725845336914, + "ce_orig": 0.7575862407684326, + "epoch": 0.00776475663239629, + "kl_loss": 4020.78271484375, + "loss_ib": 40.2141227722168, + "step": 27 + }, + { + "ce_ib": 63.01191329956055, + "ce_orig": 0.8695152997970581, + "epoch": 0.00776475663239629, + "kl_loss": 3717.216064453125, + "loss_ib": 37.178462982177734, + "step": 27 + }, + { + "ce_ib": 61.84136199951172, + "ce_orig": 0.5044524669647217, + "epoch": 0.00776475663239629, + "kl_loss": 3559.84326171875, + "loss_ib": 35.6046142578125, + "step": 27 + }, + { + "ce_ib": 62.60879898071289, + "ce_orig": 0.7411525249481201, + "epoch": 0.00805234021137393, + "kl_loss": 3949.431640625, + "loss_ib": 39.50057601928711, + "step": 28 + }, + { + "ce_ib": 63.889503479003906, + "ce_orig": 0.7678407430648804, + "epoch": 0.00805234021137393, + "kl_loss": 3446.158447265625, + "loss_ib": 34.46797180175781, + "step": 28 + }, + { + "ce_ib": 64.12403869628906, + "ce_orig": 1.3409479856491089, + "epoch": 0.00805234021137393, + "kl_loss": 3590.536376953125, + "loss_ib": 35.91177749633789, + "step": 28 + }, + { + "ce_ib": 67.9863510131836, + "ce_orig": 1.4907015562057495, + "epoch": 0.00805234021137393, + "kl_loss": 4102.9951171875, + "loss_ib": 41.036746978759766, + "step": 28 + }, + { + "ce_ib": 63.33028793334961, + "ce_orig": 0.7299618721008301, + "epoch": 0.008339923790351571, + "kl_loss": 3933.06591796875, + "loss_ib": 39.33699035644531, + "step": 29 + }, + { + "ce_ib": 65.11859893798828, + "ce_orig": 1.07808256149292, + "epoch": 0.008339923790351571, + "kl_loss": 3267.3017578125, + "loss_ib": 32.679527282714844, + "step": 29 + }, + { + "ce_ib": 61.750606536865234, + "ce_orig": 1.5331333875656128, + "epoch": 0.008339923790351571, + "kl_loss": 3922.66748046875, + "loss_ib": 39.23284912109375, + "step": 29 + }, + { + "ce_ib": 63.025821685791016, + "ce_orig": 0.9248456954956055, + "epoch": 0.008339923790351571, + "kl_loss": 3479.8994140625, + "loss_ib": 34.8052978515625, + "step": 29 + }, + { + "epoch": 0.00862750736932921, + "grad_norm": 531.7625732421875, + "learning_rate": 3.821656050955414e-06, + "loss": 37.9283, + "step": 30 + }, + { + "ce_ib": 63.19704055786133, + "ce_orig": 0.813347339630127, + "epoch": 0.00862750736932921, + "kl_loss": 3683.62451171875, + "loss_ib": 36.84256362915039, + "step": 30 + }, + { + "ce_ib": 64.78852844238281, + "ce_orig": 1.25947105884552, + "epoch": 0.00862750736932921, + "kl_loss": 3672.6279296875, + "loss_ib": 36.732757568359375, + "step": 30 + }, + { + "ce_ib": 65.48835754394531, + "ce_orig": 1.5832844972610474, + "epoch": 0.00862750736932921, + "kl_loss": 3535.34765625, + "loss_ib": 35.360023498535156, + "step": 30 + }, + { + "ce_ib": 67.59868621826172, + "ce_orig": 1.2523659467697144, + "epoch": 0.00862750736932921, + "kl_loss": 3592.551513671875, + "loss_ib": 35.932273864746094, + "step": 30 + }, + { + "ce_ib": 65.58070373535156, + "ce_orig": 1.3204323053359985, + "epoch": 0.008915090948306852, + "kl_loss": 3685.355224609375, + "loss_ib": 36.860107421875, + "step": 31 + }, + { + "ce_ib": 65.22953033447266, + "ce_orig": 1.7766001224517822, + "epoch": 0.008915090948306852, + "kl_loss": 3778.63427734375, + "loss_ib": 37.79286575317383, + "step": 31 + }, + { + "ce_ib": 63.3836555480957, + "ce_orig": 1.204979419708252, + "epoch": 0.008915090948306852, + "kl_loss": 3684.710693359375, + "loss_ib": 36.85344314575195, + "step": 31 + }, + { + "ce_ib": 62.92445755004883, + "ce_orig": 0.7449155449867249, + "epoch": 0.008915090948306852, + "kl_loss": 3957.828857421875, + "loss_ib": 39.58457946777344, + "step": 31 + }, + { + "ce_ib": 63.693058013916016, + "ce_orig": 0.963614821434021, + "epoch": 0.009202674527284491, + "kl_loss": 3871.184814453125, + "loss_ib": 38.71821594238281, + "step": 32 + }, + { + "ce_ib": 62.92957305908203, + "ce_orig": 0.699960470199585, + "epoch": 0.009202674527284491, + "kl_loss": 3611.31103515625, + "loss_ib": 36.11940383911133, + "step": 32 + }, + { + "ce_ib": 61.641639709472656, + "ce_orig": 0.5809459686279297, + "epoch": 0.009202674527284491, + "kl_loss": 3945.29052734375, + "loss_ib": 39.459068298339844, + "step": 32 + }, + { + "ce_ib": 61.629180908203125, + "ce_orig": 0.4764775037765503, + "epoch": 0.009202674527284491, + "kl_loss": 3752.23681640625, + "loss_ib": 37.52853012084961, + "step": 32 + }, + { + "ce_ib": 65.62612915039062, + "ce_orig": 1.0748307704925537, + "epoch": 0.009490258106262132, + "kl_loss": 3731.794921875, + "loss_ib": 37.32451248168945, + "step": 33 + }, + { + "ce_ib": 61.40937423706055, + "ce_orig": 1.1108014583587646, + "epoch": 0.009490258106262132, + "kl_loss": 3651.0771484375, + "loss_ib": 36.516910552978516, + "step": 33 + }, + { + "ce_ib": 59.90447235107422, + "ce_orig": 0.32240188121795654, + "epoch": 0.009490258106262132, + "kl_loss": 3386.24365234375, + "loss_ib": 33.86842727661133, + "step": 33 + }, + { + "ce_ib": 62.98430633544922, + "ce_orig": 1.2453433275222778, + "epoch": 0.009490258106262132, + "kl_loss": 3598.07275390625, + "loss_ib": 35.98702621459961, + "step": 33 + }, + { + "ce_ib": 63.15117263793945, + "ce_orig": 0.7339913249015808, + "epoch": 0.009777841685239774, + "kl_loss": 2625.630859375, + "loss_ib": 26.262624740600586, + "step": 34 + }, + { + "ce_ib": 61.62659454345703, + "ce_orig": 1.0284781455993652, + "epoch": 0.009777841685239774, + "kl_loss": 3689.47705078125, + "loss_ib": 36.90093231201172, + "step": 34 + }, + { + "ce_ib": 62.00101852416992, + "ce_orig": 0.7457196116447449, + "epoch": 0.009777841685239774, + "kl_loss": 3870.28466796875, + "loss_ib": 38.70904541015625, + "step": 34 + }, + { + "ce_ib": 60.758033752441406, + "ce_orig": 0.4431888163089752, + "epoch": 0.009777841685239774, + "kl_loss": 2776.04248046875, + "loss_ib": 27.76650047302246, + "step": 34 + }, + { + "epoch": 0.010065425264217413, + "grad_norm": 514.6280517578125, + "learning_rate": 4.6178343949044585e-06, + "loss": 36.6953, + "step": 35 + }, + { + "ce_ib": 61.88233947753906, + "ce_orig": 0.742504894733429, + "epoch": 0.010065425264217413, + "kl_loss": 3676.15625, + "loss_ib": 36.76774978637695, + "step": 35 + }, + { + "ce_ib": 65.21971130371094, + "ce_orig": 1.4801323413848877, + "epoch": 0.010065425264217413, + "kl_loss": 3239.951171875, + "loss_ib": 32.40603256225586, + "step": 35 + }, + { + "ce_ib": 61.43843460083008, + "ce_orig": 0.8911157846450806, + "epoch": 0.010065425264217413, + "kl_loss": 3907.4306640625, + "loss_ib": 39.08045196533203, + "step": 35 + }, + { + "ce_ib": 60.8431510925293, + "ce_orig": 0.6813702583312988, + "epoch": 0.010065425264217413, + "kl_loss": 3710.92919921875, + "loss_ib": 37.11537551879883, + "step": 35 + }, + { + "ce_ib": 62.07615280151367, + "ce_orig": 0.9490892887115479, + "epoch": 0.010353008843195054, + "kl_loss": 3473.35009765625, + "loss_ib": 34.739707946777344, + "step": 36 + }, + { + "ce_ib": 63.5875358581543, + "ce_orig": 1.1264761686325073, + "epoch": 0.010353008843195054, + "kl_loss": 3691.65673828125, + "loss_ib": 36.92292785644531, + "step": 36 + }, + { + "ce_ib": 61.989559173583984, + "ce_orig": 0.7521851062774658, + "epoch": 0.010353008843195054, + "kl_loss": 3681.02392578125, + "loss_ib": 36.816436767578125, + "step": 36 + }, + { + "ce_ib": 66.18658447265625, + "ce_orig": 1.4330860376358032, + "epoch": 0.010353008843195054, + "kl_loss": 3735.92529296875, + "loss_ib": 37.36587142944336, + "step": 36 + }, + { + "ce_ib": 61.510005950927734, + "ce_orig": 0.6856619119644165, + "epoch": 0.010640592422172693, + "kl_loss": 3193.4365234375, + "loss_ib": 31.940513610839844, + "step": 37 + }, + { + "ce_ib": 65.34510040283203, + "ce_orig": 1.6867130994796753, + "epoch": 0.010640592422172693, + "kl_loss": 3602.250244140625, + "loss_ib": 36.02903747558594, + "step": 37 + }, + { + "ce_ib": 63.198848724365234, + "ce_orig": 1.003406286239624, + "epoch": 0.010640592422172693, + "kl_loss": 3923.912109375, + "loss_ib": 39.24544143676758, + "step": 37 + }, + { + "ce_ib": 63.12528610229492, + "ce_orig": 1.0664888620376587, + "epoch": 0.010640592422172693, + "kl_loss": 3344.81298828125, + "loss_ib": 33.45444107055664, + "step": 37 + }, + { + "ce_ib": 61.980186462402344, + "ce_orig": 0.6754278540611267, + "epoch": 0.010928176001150335, + "kl_loss": 3765.134033203125, + "loss_ib": 37.65753936767578, + "step": 38 + }, + { + "ce_ib": 64.89917755126953, + "ce_orig": 1.3892360925674438, + "epoch": 0.010928176001150335, + "kl_loss": 3431.3056640625, + "loss_ib": 34.31954574584961, + "step": 38 + }, + { + "ce_ib": 63.881900787353516, + "ce_orig": 0.9926798343658447, + "epoch": 0.010928176001150335, + "kl_loss": 3779.434814453125, + "loss_ib": 37.80073547363281, + "step": 38 + }, + { + "ce_ib": 61.26738357543945, + "ce_orig": 0.5064423084259033, + "epoch": 0.010928176001150335, + "kl_loss": 3226.73974609375, + "loss_ib": 32.27352523803711, + "step": 38 + }, + { + "ce_ib": 61.48027420043945, + "ce_orig": 0.6444438099861145, + "epoch": 0.011215759580127974, + "kl_loss": 3812.853271484375, + "loss_ib": 38.13467788696289, + "step": 39 + }, + { + "ce_ib": 65.14291381835938, + "ce_orig": 1.157513976097107, + "epoch": 0.011215759580127974, + "kl_loss": 3730.44921875, + "loss_ib": 37.311004638671875, + "step": 39 + }, + { + "ce_ib": 65.04698944091797, + "ce_orig": 1.4464482069015503, + "epoch": 0.011215759580127974, + "kl_loss": 3499.406005859375, + "loss_ib": 35.00056457519531, + "step": 39 + }, + { + "ce_ib": 61.870948791503906, + "ce_orig": 0.7682390213012695, + "epoch": 0.011215759580127974, + "kl_loss": 3903.7294921875, + "loss_ib": 39.043479919433594, + "step": 39 + }, + { + "epoch": 0.011503343159105615, + "grad_norm": 530.5418701171875, + "learning_rate": 5.414012738853504e-06, + "loss": 36.8859, + "step": 40 + }, + { + "ce_ib": 66.74645233154297, + "ce_orig": 1.7853225469589233, + "epoch": 0.011503343159105615, + "kl_loss": 3331.3544921875, + "loss_ib": 33.32021713256836, + "step": 40 + }, + { + "ce_ib": 62.318843841552734, + "ce_orig": 0.7977343797683716, + "epoch": 0.011503343159105615, + "kl_loss": 3502.8125, + "loss_ib": 35.034358978271484, + "step": 40 + }, + { + "ce_ib": 63.681610107421875, + "ce_orig": 1.3478271961212158, + "epoch": 0.011503343159105615, + "kl_loss": 3501.63134765625, + "loss_ib": 35.02267837524414, + "step": 40 + }, + { + "ce_ib": 62.9583740234375, + "ce_orig": 1.0469328165054321, + "epoch": 0.011503343159105615, + "kl_loss": 3721.776611328125, + "loss_ib": 37.22406005859375, + "step": 40 + }, + { + "ce_ib": 61.38816833496094, + "ce_orig": 0.7066026926040649, + "epoch": 0.011790926738083256, + "kl_loss": 3805.5771484375, + "loss_ib": 38.06190872192383, + "step": 41 + }, + { + "ce_ib": 61.03729248046875, + "ce_orig": 0.7563859820365906, + "epoch": 0.011790926738083256, + "kl_loss": 3660.0556640625, + "loss_ib": 36.606658935546875, + "step": 41 + }, + { + "ce_ib": 63.25761795043945, + "ce_orig": 0.8998059034347534, + "epoch": 0.011790926738083256, + "kl_loss": 3492.48876953125, + "loss_ib": 34.93121337890625, + "step": 41 + }, + { + "ce_ib": 62.87449264526367, + "ce_orig": 0.9313430190086365, + "epoch": 0.011790926738083256, + "kl_loss": 3241.703125, + "loss_ib": 32.423316955566406, + "step": 41 + }, + { + "ce_ib": 64.49095916748047, + "ce_orig": 1.71138596534729, + "epoch": 0.012078510317060896, + "kl_loss": 3664.8984375, + "loss_ib": 36.655433654785156, + "step": 42 + }, + { + "ce_ib": 61.09724807739258, + "ce_orig": 0.7595378756523132, + "epoch": 0.012078510317060896, + "kl_loss": 3622.4169921875, + "loss_ib": 36.230281829833984, + "step": 42 + }, + { + "ce_ib": 60.89824676513672, + "ce_orig": 0.9210549592971802, + "epoch": 0.012078510317060896, + "kl_loss": 3331.73388671875, + "loss_ib": 33.32342529296875, + "step": 42 + }, + { + "ce_ib": 62.15813446044922, + "ce_orig": 0.6488374471664429, + "epoch": 0.012078510317060896, + "kl_loss": 3685.21826171875, + "loss_ib": 36.8583984375, + "step": 42 + }, + { + "ce_ib": 62.060028076171875, + "ce_orig": 0.8468491435050964, + "epoch": 0.012366093896038537, + "kl_loss": 3631.91064453125, + "loss_ib": 36.325313568115234, + "step": 43 + }, + { + "ce_ib": 64.66139221191406, + "ce_orig": 1.069366693496704, + "epoch": 0.012366093896038537, + "kl_loss": 3382.873046875, + "loss_ib": 33.83519744873047, + "step": 43 + }, + { + "ce_ib": 59.3759651184082, + "ce_orig": 0.6913302540779114, + "epoch": 0.012366093896038537, + "kl_loss": 3886.6796875, + "loss_ib": 38.87273406982422, + "step": 43 + }, + { + "ce_ib": 63.80333709716797, + "ce_orig": 1.2420175075531006, + "epoch": 0.012366093896038537, + "kl_loss": 3315.640625, + "loss_ib": 33.162784576416016, + "step": 43 + }, + { + "ce_ib": 61.69696044921875, + "ce_orig": 0.9505507349967957, + "epoch": 0.012653677475016176, + "kl_loss": 3327.21044921875, + "loss_ib": 33.27827453613281, + "step": 44 + }, + { + "ce_ib": 60.231571197509766, + "ce_orig": 1.0403425693511963, + "epoch": 0.012653677475016176, + "kl_loss": 3099.7314453125, + "loss_ib": 31.00333595275879, + "step": 44 + }, + { + "ce_ib": 60.59477996826172, + "ce_orig": 0.9732199907302856, + "epoch": 0.012653677475016176, + "kl_loss": 3785.45654296875, + "loss_ib": 37.86062240600586, + "step": 44 + }, + { + "ce_ib": 60.5795783996582, + "ce_orig": 0.9181808233261108, + "epoch": 0.012653677475016176, + "kl_loss": 3470.03466796875, + "loss_ib": 34.70640182495117, + "step": 44 + }, + { + "epoch": 0.012941261053993817, + "grad_norm": 476.64434814453125, + "learning_rate": 6.210191082802548e-06, + "loss": 35.3798, + "step": 45 + }, + { + "ce_ib": 61.55487060546875, + "ce_orig": 1.3028727769851685, + "epoch": 0.012941261053993817, + "kl_loss": 3647.279541015625, + "loss_ib": 36.47895050048828, + "step": 45 + }, + { + "ce_ib": 60.31386947631836, + "ce_orig": 0.6792593598365784, + "epoch": 0.012941261053993817, + "kl_loss": 3385.7255859375, + "loss_ib": 33.863285064697266, + "step": 45 + }, + { + "ce_ib": 61.40266036987305, + "ce_orig": 0.9505258798599243, + "epoch": 0.012941261053993817, + "kl_loss": 3687.73779296875, + "loss_ib": 36.88351821899414, + "step": 45 + }, + { + "ce_ib": 62.62788009643555, + "ce_orig": 0.7607139945030212, + "epoch": 0.012941261053993817, + "kl_loss": 3367.47412109375, + "loss_ib": 33.68100357055664, + "step": 45 + }, + { + "ce_ib": 60.83255386352539, + "ce_orig": 1.0399366617202759, + "epoch": 0.013228844632971457, + "kl_loss": 3394.5810546875, + "loss_ib": 33.9518928527832, + "step": 46 + }, + { + "ce_ib": 61.447139739990234, + "ce_orig": 0.8051762580871582, + "epoch": 0.013228844632971457, + "kl_loss": 3173.857421875, + "loss_ib": 31.744718551635742, + "step": 46 + }, + { + "ce_ib": 61.36168670654297, + "ce_orig": 1.1020334959030151, + "epoch": 0.013228844632971457, + "kl_loss": 3414.359130859375, + "loss_ib": 34.14972686767578, + "step": 46 + }, + { + "ce_ib": 60.41455841064453, + "ce_orig": 0.706063449382782, + "epoch": 0.013228844632971457, + "kl_loss": 3687.080078125, + "loss_ib": 36.8768424987793, + "step": 46 + }, + { + "ce_ib": 60.49672317504883, + "ce_orig": 0.8405731320381165, + "epoch": 0.013516428211949098, + "kl_loss": 3867.85693359375, + "loss_ib": 38.68461990356445, + "step": 47 + }, + { + "ce_ib": 63.74329376220703, + "ce_orig": 0.8466535806655884, + "epoch": 0.013516428211949098, + "kl_loss": 3611.955078125, + "loss_ib": 36.12592315673828, + "step": 47 + }, + { + "ce_ib": 61.18013381958008, + "ce_orig": 0.9858855605125427, + "epoch": 0.013516428211949098, + "kl_loss": 3885.5107421875, + "loss_ib": 38.86122512817383, + "step": 47 + }, + { + "ce_ib": 65.59097290039062, + "ce_orig": 1.7553099393844604, + "epoch": 0.013516428211949098, + "kl_loss": 3490.478515625, + "loss_ib": 34.91134262084961, + "step": 47 + }, + { + "ce_ib": 61.62842559814453, + "ce_orig": 0.838309109210968, + "epoch": 0.013804011790926739, + "kl_loss": 3487.0517578125, + "loss_ib": 34.876678466796875, + "step": 48 + }, + { + "ce_ib": 62.534908294677734, + "ce_orig": 1.5411649942398071, + "epoch": 0.013804011790926739, + "kl_loss": 3555.408203125, + "loss_ib": 35.560333251953125, + "step": 48 + }, + { + "ce_ib": 61.39102554321289, + "ce_orig": 0.5941852927207947, + "epoch": 0.013804011790926739, + "kl_loss": 3320.55078125, + "loss_ib": 33.211647033691406, + "step": 48 + }, + { + "ce_ib": 62.17927551269531, + "ce_orig": 1.398880958557129, + "epoch": 0.013804011790926739, + "kl_loss": 2767.198974609375, + "loss_ib": 27.678207397460938, + "step": 48 + }, + { + "ce_ib": 61.87635803222656, + "ce_orig": 1.1401021480560303, + "epoch": 0.014091595369904378, + "kl_loss": 3533.70703125, + "loss_ib": 35.343257904052734, + "step": 49 + }, + { + "ce_ib": 59.036781311035156, + "ce_orig": 0.7694017291069031, + "epoch": 0.014091595369904378, + "kl_loss": 3748.14453125, + "loss_ib": 37.48734664916992, + "step": 49 + }, + { + "ce_ib": 60.804603576660156, + "ce_orig": 1.3716927766799927, + "epoch": 0.014091595369904378, + "kl_loss": 3407.55224609375, + "loss_ib": 34.08160400390625, + "step": 49 + }, + { + "ce_ib": 60.6622200012207, + "ce_orig": 0.6214744448661804, + "epoch": 0.014091595369904378, + "kl_loss": 3320.9990234375, + "loss_ib": 33.21605682373047, + "step": 49 + }, + { + "epoch": 0.01437917894888202, + "grad_norm": 510.91943359375, + "learning_rate": 7.006369426751593e-06, + "loss": 35.2805, + "step": 50 + }, + { + "ce_ib": 58.74787139892578, + "ce_orig": 0.8769705891609192, + "epoch": 0.01437917894888202, + "kl_loss": 3581.33544921875, + "loss_ib": 35.81922912597656, + "step": 50 + }, + { + "ce_ib": 57.18757247924805, + "ce_orig": 0.699286699295044, + "epoch": 0.01437917894888202, + "kl_loss": 3327.9033203125, + "loss_ib": 33.284751892089844, + "step": 50 + }, + { + "ce_ib": 60.32874298095703, + "ce_orig": 0.722357451915741, + "epoch": 0.01437917894888202, + "kl_loss": 3368.121337890625, + "loss_ib": 33.6872444152832, + "step": 50 + }, + { + "ce_ib": 63.51592254638672, + "ce_orig": 1.0477139949798584, + "epoch": 0.01437917894888202, + "kl_loss": 3434.626708984375, + "loss_ib": 34.35261917114258, + "step": 50 + }, + { + "ce_ib": 58.714073181152344, + "ce_orig": 0.6123059391975403, + "epoch": 0.014666762527859659, + "kl_loss": 3747.56982421875, + "loss_ib": 37.4815673828125, + "step": 51 + }, + { + "ce_ib": 61.6097526550293, + "ce_orig": 1.2942873239517212, + "epoch": 0.014666762527859659, + "kl_loss": 3235.971923828125, + "loss_ib": 32.36587905883789, + "step": 51 + }, + { + "ce_ib": 58.607505798339844, + "ce_orig": 0.6153243184089661, + "epoch": 0.014666762527859659, + "kl_loss": 3752.208984375, + "loss_ib": 37.527950286865234, + "step": 51 + }, + { + "ce_ib": 60.741729736328125, + "ce_orig": 1.3440642356872559, + "epoch": 0.014666762527859659, + "kl_loss": 3612.9765625, + "loss_ib": 36.13583755493164, + "step": 51 + }, + { + "ce_ib": 60.87339782714844, + "ce_orig": 0.8129587769508362, + "epoch": 0.0149543461068373, + "kl_loss": 3125.83642578125, + "loss_ib": 31.264450073242188, + "step": 52 + }, + { + "ce_ib": 58.96531295776367, + "ce_orig": 0.9034717082977295, + "epoch": 0.0149543461068373, + "kl_loss": 3257.168212890625, + "loss_ib": 32.57757568359375, + "step": 52 + }, + { + "ce_ib": 60.43812942504883, + "ce_orig": 0.8096925020217896, + "epoch": 0.0149543461068373, + "kl_loss": 3082.474853515625, + "loss_ib": 30.830793380737305, + "step": 52 + }, + { + "ce_ib": 61.23511505126953, + "ce_orig": 1.078736424446106, + "epoch": 0.0149543461068373, + "kl_loss": 3490.707275390625, + "loss_ib": 34.9131965637207, + "step": 52 + }, + { + "ce_ib": 60.389286041259766, + "ce_orig": 1.2757381200790405, + "epoch": 0.015241929685814939, + "kl_loss": 3353.767578125, + "loss_ib": 33.54371643066406, + "step": 53 + }, + { + "ce_ib": 57.41773223876953, + "ce_orig": 0.552437424659729, + "epoch": 0.015241929685814939, + "kl_loss": 2717.25927734375, + "loss_ib": 27.178335189819336, + "step": 53 + }, + { + "ce_ib": 60.19536590576172, + "ce_orig": 0.7778604626655579, + "epoch": 0.015241929685814939, + "kl_loss": 3204.783203125, + "loss_ib": 32.05385208129883, + "step": 53 + }, + { + "ce_ib": 61.59130859375, + "ce_orig": 1.6185189485549927, + "epoch": 0.015241929685814939, + "kl_loss": 3477.7470703125, + "loss_ib": 34.78363037109375, + "step": 53 + }, + { + "ce_ib": 63.59449005126953, + "ce_orig": 1.4870353937149048, + "epoch": 0.01552951326479258, + "kl_loss": 3315.589599609375, + "loss_ib": 33.162254333496094, + "step": 54 + }, + { + "ce_ib": 59.9765625, + "ce_orig": 1.258398175239563, + "epoch": 0.01552951326479258, + "kl_loss": 3485.625, + "loss_ib": 34.862247467041016, + "step": 54 + }, + { + "ce_ib": 58.29402160644531, + "ce_orig": 0.9382989406585693, + "epoch": 0.01552951326479258, + "kl_loss": 3394.168701171875, + "loss_ib": 33.94751739501953, + "step": 54 + }, + { + "ce_ib": 58.97043228149414, + "ce_orig": 0.6211685538291931, + "epoch": 0.01552951326479258, + "kl_loss": 3460.42333984375, + "loss_ib": 34.610130310058594, + "step": 54 + }, + { + "epoch": 0.01581709684377022, + "grad_norm": 469.67340087890625, + "learning_rate": 7.802547770700637e-06, + "loss": 34.1513, + "step": 55 + }, + { + "ce_ib": 61.76213073730469, + "ce_orig": 0.8650195002555847, + "epoch": 0.01581709684377022, + "kl_loss": 3174.44775390625, + "loss_ib": 31.750654220581055, + "step": 55 + }, + { + "ce_ib": 55.92485809326172, + "ce_orig": 0.08637077361345291, + "epoch": 0.01581709684377022, + "kl_loss": 468.1816101074219, + "loss_ib": 4.687408447265625, + "step": 55 + }, + { + "ce_ib": 59.152156829833984, + "ce_orig": 0.9888356328010559, + "epoch": 0.01581709684377022, + "kl_loss": 3260.65771484375, + "loss_ib": 32.612491607666016, + "step": 55 + }, + { + "ce_ib": 60.753597259521484, + "ce_orig": 0.837508499622345, + "epoch": 0.01581709684377022, + "kl_loss": 3544.2236328125, + "loss_ib": 35.44831085205078, + "step": 55 + }, + { + "ce_ib": 56.82107925415039, + "ce_orig": 0.8040409684181213, + "epoch": 0.01610468042274786, + "kl_loss": 3288.75830078125, + "loss_ib": 32.89326477050781, + "step": 56 + }, + { + "ce_ib": 58.140541076660156, + "ce_orig": 0.666872501373291, + "epoch": 0.01610468042274786, + "kl_loss": 3718.909912109375, + "loss_ib": 37.19491195678711, + "step": 56 + }, + { + "ce_ib": 59.2910041809082, + "ce_orig": 0.9221104979515076, + "epoch": 0.01610468042274786, + "kl_loss": 3336.389404296875, + "loss_ib": 33.36982345581055, + "step": 56 + }, + { + "ce_ib": 61.521507263183594, + "ce_orig": 1.3518534898757935, + "epoch": 0.01610468042274786, + "kl_loss": 3092.3486328125, + "loss_ib": 30.929637908935547, + "step": 56 + }, + { + "ce_ib": 57.95586013793945, + "ce_orig": 0.8081279397010803, + "epoch": 0.016392264001725502, + "kl_loss": 3491.455810546875, + "loss_ib": 34.920352935791016, + "step": 57 + }, + { + "ce_ib": 58.949745178222656, + "ce_orig": 0.5393152236938477, + "epoch": 0.016392264001725502, + "kl_loss": 3235.858154296875, + "loss_ib": 32.36447525024414, + "step": 57 + }, + { + "ce_ib": 60.55893325805664, + "ce_orig": 0.7738803029060364, + "epoch": 0.016392264001725502, + "kl_loss": 2924.27783203125, + "loss_ib": 29.24883270263672, + "step": 57 + }, + { + "ce_ib": 61.02143478393555, + "ce_orig": 1.209029197692871, + "epoch": 0.016392264001725502, + "kl_loss": 3489.797607421875, + "loss_ib": 34.904075622558594, + "step": 57 + }, + { + "ce_ib": 60.51230239868164, + "ce_orig": 1.1623197793960571, + "epoch": 0.016679847580703143, + "kl_loss": 3124.18798828125, + "loss_ib": 31.2479305267334, + "step": 58 + }, + { + "ce_ib": 60.99052429199219, + "ce_orig": 1.069433569908142, + "epoch": 0.016679847580703143, + "kl_loss": 3169.855712890625, + "loss_ib": 31.704654693603516, + "step": 58 + }, + { + "ce_ib": 58.70066452026367, + "ce_orig": 1.0279523134231567, + "epoch": 0.016679847580703143, + "kl_loss": 3483.8056640625, + "loss_ib": 34.84392547607422, + "step": 58 + }, + { + "ce_ib": 59.15578842163086, + "ce_orig": 1.0242782831192017, + "epoch": 0.016679847580703143, + "kl_loss": 3071.107421875, + "loss_ib": 30.716989517211914, + "step": 58 + }, + { + "ce_ib": 61.244327545166016, + "ce_orig": 1.7360433340072632, + "epoch": 0.01696743115968078, + "kl_loss": 2984.33984375, + "loss_ib": 29.849523544311523, + "step": 59 + }, + { + "ce_ib": 60.01206588745117, + "ce_orig": 1.2617676258087158, + "epoch": 0.01696743115968078, + "kl_loss": 3349.006591796875, + "loss_ib": 33.49606704711914, + "step": 59 + }, + { + "ce_ib": 56.53895568847656, + "ce_orig": 0.5960240960121155, + "epoch": 0.01696743115968078, + "kl_loss": 3249.41357421875, + "loss_ib": 32.49979019165039, + "step": 59 + }, + { + "ce_ib": 59.71278762817383, + "ce_orig": 0.8869993090629578, + "epoch": 0.01696743115968078, + "kl_loss": 3232.68505859375, + "loss_ib": 32.332820892333984, + "step": 59 + }, + { + "epoch": 0.01725501473865842, + "grad_norm": 486.4937438964844, + "learning_rate": 8.598726114649681e-06, + "loss": 33.3977, + "step": 60 + }, + { + "ce_ib": 58.857635498046875, + "ce_orig": 0.7359964847564697, + "epoch": 0.01725501473865842, + "kl_loss": 3203.08740234375, + "loss_ib": 32.03675842285156, + "step": 60 + }, + { + "ce_ib": 59.75052261352539, + "ce_orig": 1.0773297548294067, + "epoch": 0.01725501473865842, + "kl_loss": 3154.876953125, + "loss_ib": 31.55474281311035, + "step": 60 + }, + { + "ce_ib": 57.949344635009766, + "ce_orig": 0.8577583432197571, + "epoch": 0.01725501473865842, + "kl_loss": 3265.8623046875, + "loss_ib": 32.6644172668457, + "step": 60 + }, + { + "ce_ib": 59.86404037475586, + "ce_orig": 1.3723738193511963, + "epoch": 0.01725501473865842, + "kl_loss": 2843.7080078125, + "loss_ib": 28.443065643310547, + "step": 60 + }, + { + "ce_ib": 56.99949645996094, + "ce_orig": 0.5773953199386597, + "epoch": 0.017542598317636063, + "kl_loss": 3343.131103515625, + "loss_ib": 33.43701171875, + "step": 61 + }, + { + "ce_ib": 60.948787689208984, + "ce_orig": 1.6039363145828247, + "epoch": 0.017542598317636063, + "kl_loss": 3304.048583984375, + "loss_ib": 33.04657745361328, + "step": 61 + }, + { + "ce_ib": 58.39208221435547, + "ce_orig": 0.984937310218811, + "epoch": 0.017542598317636063, + "kl_loss": 3363.2353515625, + "loss_ib": 33.63819122314453, + "step": 61 + }, + { + "ce_ib": 59.0418586730957, + "ce_orig": 0.783109188079834, + "epoch": 0.017542598317636063, + "kl_loss": 3394.3154296875, + "loss_ib": 33.949058532714844, + "step": 61 + }, + { + "ce_ib": 62.10023880004883, + "ce_orig": 1.8530871868133545, + "epoch": 0.017830181896613704, + "kl_loss": 3175.18505859375, + "loss_ib": 31.758060455322266, + "step": 62 + }, + { + "ce_ib": 56.835514068603516, + "ce_orig": 0.7488876581192017, + "epoch": 0.017830181896613704, + "kl_loss": 2187.16943359375, + "loss_ib": 21.877376556396484, + "step": 62 + }, + { + "ce_ib": 60.125152587890625, + "ce_orig": 1.4274426698684692, + "epoch": 0.017830181896613704, + "kl_loss": 3222.76611328125, + "loss_ib": 32.233673095703125, + "step": 62 + }, + { + "ce_ib": 55.86289978027344, + "ce_orig": 0.7154338955879211, + "epoch": 0.017830181896613704, + "kl_loss": 3421.8095703125, + "loss_ib": 34.22368240356445, + "step": 62 + }, + { + "ce_ib": 59.97455596923828, + "ce_orig": 1.2463781833648682, + "epoch": 0.018117765475591345, + "kl_loss": 2927.573486328125, + "loss_ib": 29.2817325592041, + "step": 63 + }, + { + "ce_ib": 58.33196258544922, + "ce_orig": 0.5972486734390259, + "epoch": 0.018117765475591345, + "kl_loss": 3288.707763671875, + "loss_ib": 32.89291000366211, + "step": 63 + }, + { + "ce_ib": 60.974822998046875, + "ce_orig": 1.4676904678344727, + "epoch": 0.018117765475591345, + "kl_loss": 2948.23388671875, + "loss_ib": 29.48843765258789, + "step": 63 + }, + { + "ce_ib": 57.45879364013672, + "ce_orig": 0.7307599782943726, + "epoch": 0.018117765475591345, + "kl_loss": 2998.276123046875, + "loss_ib": 29.988508224487305, + "step": 63 + }, + { + "ce_ib": 57.19486618041992, + "ce_orig": 0.7821041345596313, + "epoch": 0.018405349054568983, + "kl_loss": 3225.396728515625, + "loss_ib": 32.25968551635742, + "step": 64 + }, + { + "ce_ib": 57.73394775390625, + "ce_orig": 0.6754387617111206, + "epoch": 0.018405349054568983, + "kl_loss": 3085.6650390625, + "loss_ib": 30.862422943115234, + "step": 64 + }, + { + "ce_ib": 55.39207077026367, + "ce_orig": 0.5158528685569763, + "epoch": 0.018405349054568983, + "kl_loss": 3063.913818359375, + "loss_ib": 30.644676208496094, + "step": 64 + }, + { + "ce_ib": 57.978118896484375, + "ce_orig": 0.9754984974861145, + "epoch": 0.018405349054568983, + "kl_loss": 3214.08251953125, + "loss_ib": 32.14662170410156, + "step": 64 + }, + { + "epoch": 0.018692932633546624, + "grad_norm": 475.1830139160156, + "learning_rate": 9.394904458598726e-06, + "loss": 32.2213, + "step": 65 + }, + { + "ce_ib": 57.8340950012207, + "ce_orig": 0.7973042726516724, + "epoch": 0.018692932633546624, + "kl_loss": 3016.814208984375, + "loss_ib": 30.173925399780273, + "step": 65 + }, + { + "ce_ib": 56.048274993896484, + "ce_orig": 0.7601141929626465, + "epoch": 0.018692932633546624, + "kl_loss": 3293.418212890625, + "loss_ib": 32.93978500366211, + "step": 65 + }, + { + "ce_ib": 55.681297302246094, + "ce_orig": 0.9179377555847168, + "epoch": 0.018692932633546624, + "kl_loss": 3359.49755859375, + "loss_ib": 33.60054397583008, + "step": 65 + }, + { + "ce_ib": 58.35697937011719, + "ce_orig": 1.3632832765579224, + "epoch": 0.018692932633546624, + "kl_loss": 3379.584228515625, + "loss_ib": 33.80167770385742, + "step": 65 + }, + { + "ce_ib": 60.778778076171875, + "ce_orig": 1.0116430521011353, + "epoch": 0.018980516212524265, + "kl_loss": 3229.637451171875, + "loss_ib": 32.302452087402344, + "step": 66 + }, + { + "ce_ib": 56.4047737121582, + "ce_orig": 1.053054928779602, + "epoch": 0.018980516212524265, + "kl_loss": 3030.66552734375, + "loss_ib": 30.312294006347656, + "step": 66 + }, + { + "ce_ib": 57.94568634033203, + "ce_orig": 0.946856677532196, + "epoch": 0.018980516212524265, + "kl_loss": 3040.84765625, + "loss_ib": 30.414268493652344, + "step": 66 + }, + { + "ce_ib": 56.41437911987305, + "ce_orig": 0.7833185195922852, + "epoch": 0.018980516212524265, + "kl_loss": 3050.6826171875, + "loss_ib": 30.512468338012695, + "step": 66 + }, + { + "ce_ib": 59.401546478271484, + "ce_orig": 1.328580617904663, + "epoch": 0.019268099791501906, + "kl_loss": 2916.177978515625, + "loss_ib": 29.1677188873291, + "step": 67 + }, + { + "ce_ib": 57.880680084228516, + "ce_orig": 0.7882740497589111, + "epoch": 0.019268099791501906, + "kl_loss": 3422.02783203125, + "loss_ib": 34.22606658935547, + "step": 67 + }, + { + "ce_ib": 59.44600296020508, + "ce_orig": 0.7883732914924622, + "epoch": 0.019268099791501906, + "kl_loss": 3327.45703125, + "loss_ib": 33.280513763427734, + "step": 67 + }, + { + "ce_ib": 59.17395782470703, + "ce_orig": 1.088062047958374, + "epoch": 0.019268099791501906, + "kl_loss": 1573.11181640625, + "loss_ib": 15.737035751342773, + "step": 67 + }, + { + "ce_ib": 56.406436920166016, + "ce_orig": 0.592961847782135, + "epoch": 0.019555683370479547, + "kl_loss": 3268.48779296875, + "loss_ib": 32.69051742553711, + "step": 68 + }, + { + "ce_ib": 56.2723274230957, + "ce_orig": 0.5646532773971558, + "epoch": 0.019555683370479547, + "kl_loss": 3152.529296875, + "loss_ib": 31.530920028686523, + "step": 68 + }, + { + "ce_ib": 55.98052215576172, + "ce_orig": 0.5173469185829163, + "epoch": 0.019555683370479547, + "kl_loss": 2985.48681640625, + "loss_ib": 29.86046600341797, + "step": 68 + }, + { + "ce_ib": 55.83637237548828, + "ce_orig": 0.4308261573314667, + "epoch": 0.019555683370479547, + "kl_loss": 2723.078369140625, + "loss_ib": 27.236366271972656, + "step": 68 + }, + { + "ce_ib": 59.32357406616211, + "ce_orig": 1.509739637374878, + "epoch": 0.019843266949457185, + "kl_loss": 2924.435546875, + "loss_ib": 29.250288009643555, + "step": 69 + }, + { + "ce_ib": 59.09616470336914, + "ce_orig": 2.3940815925598145, + "epoch": 0.019843266949457185, + "kl_loss": 2945.3994140625, + "loss_ib": 29.459903717041016, + "step": 69 + }, + { + "ce_ib": 55.772178649902344, + "ce_orig": 1.207844614982605, + "epoch": 0.019843266949457185, + "kl_loss": 3110.099365234375, + "loss_ib": 31.106569290161133, + "step": 69 + }, + { + "ce_ib": 59.807865142822266, + "ce_orig": 1.4729925394058228, + "epoch": 0.019843266949457185, + "kl_loss": 2798.36474609375, + "loss_ib": 27.989627838134766, + "step": 69 + }, + { + "epoch": 0.020130850528434826, + "grad_norm": 451.3029479980469, + "learning_rate": 1.0191082802547772e-05, + "loss": 31.1615, + "step": 70 + }, + { + "ce_ib": 57.67570495605469, + "ce_orig": 0.8390839695930481, + "epoch": 0.020130850528434826, + "kl_loss": 3100.12451171875, + "loss_ib": 31.00701141357422, + "step": 70 + }, + { + "ce_ib": 56.19687271118164, + "ce_orig": 0.4426974654197693, + "epoch": 0.020130850528434826, + "kl_loss": 2944.1025390625, + "loss_ib": 29.446645736694336, + "step": 70 + }, + { + "ce_ib": 57.55405044555664, + "ce_orig": 1.0506970882415771, + "epoch": 0.020130850528434826, + "kl_loss": 2788.48095703125, + "loss_ib": 27.89056396484375, + "step": 70 + }, + { + "ce_ib": 57.250022888183594, + "ce_orig": 0.8879465460777283, + "epoch": 0.020130850528434826, + "kl_loss": 3008.960693359375, + "loss_ib": 30.0953311920166, + "step": 70 + }, + { + "ce_ib": 56.017364501953125, + "ce_orig": 0.8262448310852051, + "epoch": 0.020418434107412467, + "kl_loss": 3022.1181640625, + "loss_ib": 30.226781845092773, + "step": 71 + }, + { + "ce_ib": 57.61652755737305, + "ce_orig": 0.628873348236084, + "epoch": 0.020418434107412467, + "kl_loss": 2915.463134765625, + "loss_ib": 29.16039276123047, + "step": 71 + }, + { + "ce_ib": 56.01335525512695, + "ce_orig": 0.9199650883674622, + "epoch": 0.020418434107412467, + "kl_loss": 2920.94091796875, + "loss_ib": 29.215009689331055, + "step": 71 + }, + { + "ce_ib": 55.02300262451172, + "ce_orig": 0.636806845664978, + "epoch": 0.020418434107412467, + "kl_loss": 2990.93994140625, + "loss_ib": 29.914901733398438, + "step": 71 + }, + { + "ce_ib": 58.92692947387695, + "ce_orig": 1.6168797016143799, + "epoch": 0.020706017686390108, + "kl_loss": 2569.22216796875, + "loss_ib": 25.6981143951416, + "step": 72 + }, + { + "ce_ib": 54.50556182861328, + "ce_orig": 0.7561197280883789, + "epoch": 0.020706017686390108, + "kl_loss": 3147.120361328125, + "loss_ib": 31.476654052734375, + "step": 72 + }, + { + "ce_ib": 54.70288848876953, + "ce_orig": 0.7983661890029907, + "epoch": 0.020706017686390108, + "kl_loss": 2991.32568359375, + "loss_ib": 29.91872787475586, + "step": 72 + }, + { + "ce_ib": 55.79477310180664, + "ce_orig": 1.1335042715072632, + "epoch": 0.020706017686390108, + "kl_loss": 2651.6884765625, + "loss_ib": 26.522462844848633, + "step": 72 + }, + { + "ce_ib": 55.5018424987793, + "ce_orig": 0.6628856062889099, + "epoch": 0.02099360126536775, + "kl_loss": 2991.87646484375, + "loss_ib": 29.924312591552734, + "step": 73 + }, + { + "ce_ib": 54.66994094848633, + "ce_orig": 0.9854854941368103, + "epoch": 0.02099360126536775, + "kl_loss": 2946.215087890625, + "loss_ib": 29.46761703491211, + "step": 73 + }, + { + "ce_ib": 58.076210021972656, + "ce_orig": 1.3572182655334473, + "epoch": 0.02099360126536775, + "kl_loss": 3050.9150390625, + "loss_ib": 30.514957427978516, + "step": 73 + }, + { + "ce_ib": 55.1069221496582, + "ce_orig": 0.8574339747428894, + "epoch": 0.02099360126536775, + "kl_loss": 3119.42724609375, + "loss_ib": 31.19978141784668, + "step": 73 + }, + { + "ce_ib": 54.855194091796875, + "ce_orig": 0.8055992126464844, + "epoch": 0.021281184844345387, + "kl_loss": 3110.638671875, + "loss_ib": 31.11187171936035, + "step": 74 + }, + { + "ce_ib": 52.890716552734375, + "ce_orig": 0.522036075592041, + "epoch": 0.021281184844345387, + "kl_loss": 2826.3251953125, + "loss_ib": 28.268539428710938, + "step": 74 + }, + { + "ce_ib": 54.693538665771484, + "ce_orig": 0.729824960231781, + "epoch": 0.021281184844345387, + "kl_loss": 3347.115966796875, + "loss_ib": 33.476627349853516, + "step": 74 + }, + { + "ce_ib": 55.074676513671875, + "ce_orig": 0.9839091897010803, + "epoch": 0.021281184844345387, + "kl_loss": 2667.3046875, + "loss_ib": 26.67855453491211, + "step": 74 + }, + { + "epoch": 0.021568768423323028, + "grad_norm": 460.4914245605469, + "learning_rate": 1.0987261146496815e-05, + "loss": 30.2003, + "step": 75 + }, + { + "ce_ib": 55.64740753173828, + "ce_orig": 0.6976457238197327, + "epoch": 0.021568768423323028, + "kl_loss": 2705.426025390625, + "loss_ib": 27.059823989868164, + "step": 75 + }, + { + "ce_ib": 56.64052963256836, + "ce_orig": 1.448681116104126, + "epoch": 0.021568768423323028, + "kl_loss": 2853.453369140625, + "loss_ib": 28.540197372436523, + "step": 75 + }, + { + "ce_ib": 55.26200485229492, + "ce_orig": 0.7230492234230042, + "epoch": 0.021568768423323028, + "kl_loss": 3141.7001953125, + "loss_ib": 31.422529220581055, + "step": 75 + }, + { + "ce_ib": 54.74195098876953, + "ce_orig": 0.611904501914978, + "epoch": 0.021568768423323028, + "kl_loss": 3029.38916015625, + "loss_ib": 30.29936408996582, + "step": 75 + }, + { + "ce_ib": 54.31443786621094, + "ce_orig": 1.1379636526107788, + "epoch": 0.02185635200230067, + "kl_loss": 3160.546875, + "loss_ib": 31.61090087890625, + "step": 76 + }, + { + "ce_ib": 53.96803665161133, + "ce_orig": 0.7516291737556458, + "epoch": 0.02185635200230067, + "kl_loss": 3008.7666015625, + "loss_ib": 30.093063354492188, + "step": 76 + }, + { + "ce_ib": 54.48766326904297, + "ce_orig": 1.210386872291565, + "epoch": 0.02185635200230067, + "kl_loss": 2852.64794921875, + "loss_ib": 28.53192901611328, + "step": 76 + }, + { + "ce_ib": 54.85871124267578, + "ce_orig": 1.4512004852294922, + "epoch": 0.02185635200230067, + "kl_loss": 3227.62255859375, + "loss_ib": 32.28171157836914, + "step": 76 + }, + { + "ce_ib": 54.70118713378906, + "ce_orig": 1.2653746604919434, + "epoch": 0.02214393558127831, + "kl_loss": 2925.536865234375, + "loss_ib": 29.260839462280273, + "step": 77 + }, + { + "ce_ib": 54.4128532409668, + "ce_orig": 0.7473430633544922, + "epoch": 0.02214393558127831, + "kl_loss": 2963.836669921875, + "loss_ib": 29.643808364868164, + "step": 77 + }, + { + "ce_ib": 56.661434173583984, + "ce_orig": 1.4954595565795898, + "epoch": 0.02214393558127831, + "kl_loss": 2720.67724609375, + "loss_ib": 27.212438583374023, + "step": 77 + }, + { + "ce_ib": 54.728179931640625, + "ce_orig": 0.827836275100708, + "epoch": 0.02214393558127831, + "kl_loss": 2690.066650390625, + "loss_ib": 26.906139373779297, + "step": 77 + }, + { + "ce_ib": 53.935279846191406, + "ce_orig": 0.7729896903038025, + "epoch": 0.022431519160255948, + "kl_loss": 3118.779296875, + "loss_ib": 31.193185806274414, + "step": 78 + }, + { + "ce_ib": 53.90089797973633, + "ce_orig": 1.05341637134552, + "epoch": 0.022431519160255948, + "kl_loss": 2867.553466796875, + "loss_ib": 28.680925369262695, + "step": 78 + }, + { + "ce_ib": 57.06119155883789, + "ce_orig": 1.1991482973098755, + "epoch": 0.022431519160255948, + "kl_loss": 2615.50390625, + "loss_ib": 26.160743713378906, + "step": 78 + }, + { + "ce_ib": 55.66145324707031, + "ce_orig": 1.2269896268844604, + "epoch": 0.022431519160255948, + "kl_loss": 2716.58154296875, + "loss_ib": 27.1713809967041, + "step": 78 + }, + { + "ce_ib": 56.3227424621582, + "ce_orig": 1.6050187349319458, + "epoch": 0.02271910273923359, + "kl_loss": 2608.66943359375, + "loss_ib": 26.09232521057129, + "step": 79 + }, + { + "ce_ib": 55.02303695678711, + "ce_orig": 1.3928401470184326, + "epoch": 0.02271910273923359, + "kl_loss": 2699.94677734375, + "loss_ib": 27.00497055053711, + "step": 79 + }, + { + "ce_ib": 53.92034912109375, + "ce_orig": 0.8707427978515625, + "epoch": 0.02271910273923359, + "kl_loss": 2800.906494140625, + "loss_ib": 28.01445770263672, + "step": 79 + }, + { + "ce_ib": 55.906776428222656, + "ce_orig": 1.184428095817566, + "epoch": 0.02271910273923359, + "kl_loss": 2736.019775390625, + "loss_ib": 27.365787506103516, + "step": 79 + }, + { + "epoch": 0.02300668631821123, + "grad_norm": 423.0335998535156, + "learning_rate": 1.178343949044586e-05, + "loss": 28.5021, + "step": 80 + }, + { + "ce_ib": 54.60192108154297, + "ce_orig": 1.0805023908615112, + "epoch": 0.02300668631821123, + "kl_loss": 2762.3125, + "loss_ib": 27.628583908081055, + "step": 80 + }, + { + "ce_ib": 54.54481506347656, + "ce_orig": 0.9205932021141052, + "epoch": 0.02300668631821123, + "kl_loss": 2967.17919921875, + "loss_ib": 29.67724609375, + "step": 80 + }, + { + "ce_ib": 59.29964065551758, + "ce_orig": 1.6950358152389526, + "epoch": 0.02300668631821123, + "kl_loss": 2693.053955078125, + "loss_ib": 26.93647003173828, + "step": 80 + }, + { + "ce_ib": 52.242156982421875, + "ce_orig": 0.9504690766334534, + "epoch": 0.02300668631821123, + "kl_loss": 2905.36669921875, + "loss_ib": 29.05889129638672, + "step": 80 + }, + { + "ce_ib": 55.506492614746094, + "ce_orig": 1.2976820468902588, + "epoch": 0.02329426989718887, + "kl_loss": 2749.579345703125, + "loss_ib": 27.501344680786133, + "step": 81 + }, + { + "ce_ib": 55.9608039855957, + "ce_orig": 1.092388391494751, + "epoch": 0.02329426989718887, + "kl_loss": 2680.87744140625, + "loss_ib": 26.814369201660156, + "step": 81 + }, + { + "ce_ib": 55.16973114013672, + "ce_orig": 1.686485767364502, + "epoch": 0.02329426989718887, + "kl_loss": 2655.03369140625, + "loss_ib": 26.55585479736328, + "step": 81 + }, + { + "ce_ib": 54.82407760620117, + "ce_orig": 0.9071881175041199, + "epoch": 0.02329426989718887, + "kl_loss": 2584.10107421875, + "loss_ib": 25.846492767333984, + "step": 81 + }, + { + "ce_ib": 52.7196044921875, + "ce_orig": 0.6356145739555359, + "epoch": 0.023581853476166512, + "kl_loss": 2775.622802734375, + "loss_ib": 27.761497497558594, + "step": 82 + }, + { + "ce_ib": 52.03306198120117, + "ce_orig": 0.935957133769989, + "epoch": 0.023581853476166512, + "kl_loss": 2809.512451171875, + "loss_ib": 28.100326538085938, + "step": 82 + }, + { + "ce_ib": 53.90233612060547, + "ce_orig": 1.145911455154419, + "epoch": 0.023581853476166512, + "kl_loss": 2701.700927734375, + "loss_ib": 27.02239990234375, + "step": 82 + }, + { + "ce_ib": 53.358924865722656, + "ce_orig": 0.881079375743866, + "epoch": 0.023581853476166512, + "kl_loss": 2774.904296875, + "loss_ib": 27.754379272460938, + "step": 82 + }, + { + "ce_ib": 51.51953125, + "ce_orig": 1.0080299377441406, + "epoch": 0.02386943705514415, + "kl_loss": 2673.2802734375, + "loss_ib": 26.737953186035156, + "step": 83 + }, + { + "ce_ib": 54.799827575683594, + "ce_orig": 1.211903691291809, + "epoch": 0.02386943705514415, + "kl_loss": 2386.9873046875, + "loss_ib": 23.87535285949707, + "step": 83 + }, + { + "ce_ib": 53.07735824584961, + "ce_orig": 0.9792759418487549, + "epoch": 0.02386943705514415, + "kl_loss": 2844.404296875, + "loss_ib": 28.449350357055664, + "step": 83 + }, + { + "ce_ib": 57.56839370727539, + "ce_orig": 1.708152413368225, + "epoch": 0.02386943705514415, + "kl_loss": 2686.004150390625, + "loss_ib": 26.86579704284668, + "step": 83 + }, + { + "ce_ib": 56.169593811035156, + "ce_orig": 1.0889211893081665, + "epoch": 0.02415702063412179, + "kl_loss": 2531.922607421875, + "loss_ib": 25.32484245300293, + "step": 84 + }, + { + "ce_ib": 52.437904357910156, + "ce_orig": 0.9348316788673401, + "epoch": 0.02415702063412179, + "kl_loss": 2593.02294921875, + "loss_ib": 25.93547248840332, + "step": 84 + }, + { + "ce_ib": 50.439369201660156, + "ce_orig": 0.710110068321228, + "epoch": 0.02415702063412179, + "kl_loss": 2809.076171875, + "loss_ib": 28.09580421447754, + "step": 84 + }, + { + "ce_ib": 51.97590637207031, + "ce_orig": 0.7616681456565857, + "epoch": 0.02415702063412179, + "kl_loss": 2728.056640625, + "loss_ib": 27.285762786865234, + "step": 84 + }, + { + "epoch": 0.024444604213099432, + "grad_norm": 432.98297119140625, + "learning_rate": 1.2579617834394904e-05, + "loss": 27.55, + "step": 85 + }, + { + "ce_ib": 53.3626594543457, + "ce_orig": 1.2040644884109497, + "epoch": 0.024444604213099432, + "kl_loss": 2776.72802734375, + "loss_ib": 27.77261734008789, + "step": 85 + }, + { + "ce_ib": 50.8441276550293, + "ce_orig": 0.3849184215068817, + "epoch": 0.024444604213099432, + "kl_loss": 2446.23681640625, + "loss_ib": 24.467453002929688, + "step": 85 + }, + { + "ce_ib": 56.2274055480957, + "ce_orig": 1.5932576656341553, + "epoch": 0.024444604213099432, + "kl_loss": 2712.46728515625, + "loss_ib": 27.130294799804688, + "step": 85 + }, + { + "ce_ib": 51.763954162597656, + "ce_orig": 0.8911384344100952, + "epoch": 0.024444604213099432, + "kl_loss": 2615.62939453125, + "loss_ib": 26.161468505859375, + "step": 85 + }, + { + "ce_ib": 58.18117904663086, + "ce_orig": 2.2738187313079834, + "epoch": 0.024732187792077073, + "kl_loss": 2173.28564453125, + "loss_ib": 21.73867416381836, + "step": 86 + }, + { + "ce_ib": 52.2686882019043, + "ce_orig": 0.9880481958389282, + "epoch": 0.024732187792077073, + "kl_loss": 2540.4462890625, + "loss_ib": 25.40968894958496, + "step": 86 + }, + { + "ce_ib": 52.55824661254883, + "ce_orig": 0.7514367699623108, + "epoch": 0.024732187792077073, + "kl_loss": 2694.514404296875, + "loss_ib": 26.95039939880371, + "step": 86 + }, + { + "ce_ib": 54.02186584472656, + "ce_orig": 1.0578229427337646, + "epoch": 0.024732187792077073, + "kl_loss": 2508.5751953125, + "loss_ib": 25.091154098510742, + "step": 86 + }, + { + "ce_ib": 51.11041259765625, + "ce_orig": 1.0314545631408691, + "epoch": 0.025019771371054714, + "kl_loss": 2482.1767578125, + "loss_ib": 24.82687759399414, + "step": 87 + }, + { + "ce_ib": 52.022422790527344, + "ce_orig": 0.9868292212486267, + "epoch": 0.025019771371054714, + "kl_loss": 2379.2041015625, + "loss_ib": 23.797243118286133, + "step": 87 + }, + { + "ce_ib": 51.21082305908203, + "ce_orig": 0.7481355667114258, + "epoch": 0.025019771371054714, + "kl_loss": 2631.20556640625, + "loss_ib": 26.317176818847656, + "step": 87 + }, + { + "ce_ib": 55.54100799560547, + "ce_orig": 1.7815814018249512, + "epoch": 0.025019771371054714, + "kl_loss": 2545.77587890625, + "loss_ib": 25.46331214904785, + "step": 87 + }, + { + "ce_ib": 52.07685470581055, + "ce_orig": 0.6155886054039001, + "epoch": 0.025307354950032352, + "kl_loss": 2352.36767578125, + "loss_ib": 23.52888298034668, + "step": 88 + }, + { + "ce_ib": 53.431034088134766, + "ce_orig": 0.2792898416519165, + "epoch": 0.025307354950032352, + "kl_loss": 2138.5185546875, + "loss_ib": 21.39052963256836, + "step": 88 + }, + { + "ce_ib": 51.92123794555664, + "ce_orig": 0.8495407104492188, + "epoch": 0.025307354950032352, + "kl_loss": 2580.02490234375, + "loss_ib": 25.80544090270996, + "step": 88 + }, + { + "ce_ib": 51.645992279052734, + "ce_orig": 0.8546672463417053, + "epoch": 0.025307354950032352, + "kl_loss": 2461.83203125, + "loss_ib": 24.623483657836914, + "step": 88 + }, + { + "ce_ib": 54.053565979003906, + "ce_orig": 1.8279736042022705, + "epoch": 0.025594938529009993, + "kl_loss": 2367.5888671875, + "loss_ib": 23.681293487548828, + "step": 89 + }, + { + "ce_ib": 51.267242431640625, + "ce_orig": 1.1525427103042603, + "epoch": 0.025594938529009993, + "kl_loss": 2426.50537109375, + "loss_ib": 24.270179748535156, + "step": 89 + }, + { + "ce_ib": 49.84978485107422, + "ce_orig": 0.9793948531150818, + "epoch": 0.025594938529009993, + "kl_loss": 2521.16650390625, + "loss_ib": 25.216650009155273, + "step": 89 + }, + { + "ce_ib": 53.35862350463867, + "ce_orig": 0.9854567646980286, + "epoch": 0.025594938529009993, + "kl_loss": 2478.33984375, + "loss_ib": 24.788734436035156, + "step": 89 + }, + { + "epoch": 0.025882522107987634, + "grad_norm": 397.4322814941406, + "learning_rate": 1.337579617834395e-05, + "loss": 25.8688, + "step": 90 + }, + { + "ce_ib": 50.910614013671875, + "ce_orig": 0.6994275450706482, + "epoch": 0.025882522107987634, + "kl_loss": 2584.65478515625, + "loss_ib": 25.85163688659668, + "step": 90 + }, + { + "ce_ib": 53.02171325683594, + "ce_orig": 1.0508131980895996, + "epoch": 0.025882522107987634, + "kl_loss": 2165.5048828125, + "loss_ib": 21.660350799560547, + "step": 90 + }, + { + "ce_ib": 55.81970977783203, + "ce_orig": 2.0338478088378906, + "epoch": 0.025882522107987634, + "kl_loss": 2277.224609375, + "loss_ib": 22.7778263092041, + "step": 90 + }, + { + "ce_ib": 52.05428695678711, + "ce_orig": 1.1036865711212158, + "epoch": 0.025882522107987634, + "kl_loss": 2340.133056640625, + "loss_ib": 23.40653419494629, + "step": 90 + }, + { + "ce_ib": 50.32669448852539, + "ce_orig": 0.6009736657142639, + "epoch": 0.026170105686965275, + "kl_loss": 2234.68017578125, + "loss_ib": 22.35183334350586, + "step": 91 + }, + { + "ce_ib": 53.1220817565918, + "ce_orig": 1.201808214187622, + "epoch": 0.026170105686965275, + "kl_loss": 2135.09423828125, + "loss_ib": 21.35625457763672, + "step": 91 + }, + { + "ce_ib": 49.957176208496094, + "ce_orig": 1.040064811706543, + "epoch": 0.026170105686965275, + "kl_loss": 2486.378662109375, + "loss_ib": 24.86878204345703, + "step": 91 + }, + { + "ce_ib": 50.12799835205078, + "ce_orig": 0.863908588886261, + "epoch": 0.026170105686965275, + "kl_loss": 2400.52392578125, + "loss_ib": 24.010250091552734, + "step": 91 + }, + { + "ce_ib": 52.6755256652832, + "ce_orig": 1.1070929765701294, + "epoch": 0.026457689265942913, + "kl_loss": 1971.570068359375, + "loss_ib": 19.72096824645996, + "step": 92 + }, + { + "ce_ib": 49.004791259765625, + "ce_orig": 1.1243679523468018, + "epoch": 0.026457689265942913, + "kl_loss": 2431.189453125, + "loss_ib": 24.31679344177246, + "step": 92 + }, + { + "ce_ib": 56.39695739746094, + "ce_orig": 2.0895683765411377, + "epoch": 0.026457689265942913, + "kl_loss": 2032.292724609375, + "loss_ib": 20.32856559753418, + "step": 92 + }, + { + "ce_ib": 48.332069396972656, + "ce_orig": 0.7159590721130371, + "epoch": 0.026457689265942913, + "kl_loss": 2333.5986328125, + "loss_ib": 23.3408203125, + "step": 92 + }, + { + "ce_ib": 51.314674377441406, + "ce_orig": 0.5955004692077637, + "epoch": 0.026745272844920554, + "kl_loss": 2113.81005859375, + "loss_ib": 21.143232345581055, + "step": 93 + }, + { + "ce_ib": 50.85389709472656, + "ce_orig": 1.063989281654358, + "epoch": 0.026745272844920554, + "kl_loss": 2374.11474609375, + "loss_ib": 23.746232986450195, + "step": 93 + }, + { + "ce_ib": 52.245399475097656, + "ce_orig": 0.854840099811554, + "epoch": 0.026745272844920554, + "kl_loss": 1991.779052734375, + "loss_ib": 19.923015594482422, + "step": 93 + }, + { + "ce_ib": 49.68756103515625, + "ce_orig": 0.7530232667922974, + "epoch": 0.026745272844920554, + "kl_loss": 2126.8818359375, + "loss_ib": 21.273786544799805, + "step": 93 + }, + { + "ce_ib": 54.24198913574219, + "ce_orig": 1.3871289491653442, + "epoch": 0.027032856423898195, + "kl_loss": 2152.40869140625, + "loss_ib": 21.529510498046875, + "step": 94 + }, + { + "ce_ib": 51.039283752441406, + "ce_orig": 1.1029423475265503, + "epoch": 0.027032856423898195, + "kl_loss": 2147.513916015625, + "loss_ib": 21.480243682861328, + "step": 94 + }, + { + "ce_ib": 48.558746337890625, + "ce_orig": 0.8018097877502441, + "epoch": 0.027032856423898195, + "kl_loss": 2464.363525390625, + "loss_ib": 24.64849090576172, + "step": 94 + }, + { + "ce_ib": 51.13576889038086, + "ce_orig": 1.1009808778762817, + "epoch": 0.027032856423898195, + "kl_loss": 2014.6015625, + "loss_ib": 20.1511287689209, + "step": 94 + }, + { + "epoch": 0.027320440002875836, + "grad_norm": 380.0746154785156, + "learning_rate": 1.4171974522292993e-05, + "loss": 23.87, + "step": 95 + }, + { + "ce_ib": 55.49427795410156, + "ce_orig": 1.7911261320114136, + "epoch": 0.027320440002875836, + "kl_loss": 2262.322265625, + "loss_ib": 22.62877082824707, + "step": 95 + }, + { + "ce_ib": 49.06692886352539, + "ce_orig": 1.022802710533142, + "epoch": 0.027320440002875836, + "kl_loss": 2229.0361328125, + "loss_ib": 22.29526710510254, + "step": 95 + }, + { + "ce_ib": 50.922794342041016, + "ce_orig": 0.8836882710456848, + "epoch": 0.027320440002875836, + "kl_loss": 2195.3564453125, + "loss_ib": 21.958656311035156, + "step": 95 + }, + { + "ce_ib": 49.9268913269043, + "ce_orig": 1.0127633810043335, + "epoch": 0.027320440002875836, + "kl_loss": 2146.888671875, + "loss_ib": 21.473878860473633, + "step": 95 + }, + { + "ce_ib": 49.77082061767578, + "ce_orig": 0.9023265838623047, + "epoch": 0.027608023581853477, + "kl_loss": 2299.9072265625, + "loss_ib": 23.00404930114746, + "step": 96 + }, + { + "ce_ib": 47.34916305541992, + "ce_orig": 0.5646235346794128, + "epoch": 0.027608023581853477, + "kl_loss": 2068.25634765625, + "loss_ib": 20.687297821044922, + "step": 96 + }, + { + "ce_ib": 49.27576446533203, + "ce_orig": 0.8970661163330078, + "epoch": 0.027608023581853477, + "kl_loss": 2058.2939453125, + "loss_ib": 20.587865829467773, + "step": 96 + }, + { + "ce_ib": 51.644412994384766, + "ce_orig": 1.6223132610321045, + "epoch": 0.027608023581853477, + "kl_loss": 1921.7979736328125, + "loss_ib": 19.22314453125, + "step": 96 + }, + { + "ce_ib": 49.01395034790039, + "ce_orig": 0.74750155210495, + "epoch": 0.027895607160831115, + "kl_loss": 1786.2314453125, + "loss_ib": 17.867216110229492, + "step": 97 + }, + { + "ce_ib": 49.16642761230469, + "ce_orig": 1.0198180675506592, + "epoch": 0.027895607160831115, + "kl_loss": 2137.5, + "loss_ib": 21.37991714477539, + "step": 97 + }, + { + "ce_ib": 51.9681396484375, + "ce_orig": 1.1882346868515015, + "epoch": 0.027895607160831115, + "kl_loss": 2058.308837890625, + "loss_ib": 20.588285446166992, + "step": 97 + }, + { + "ce_ib": 49.95888900756836, + "ce_orig": 0.9148277640342712, + "epoch": 0.027895607160831115, + "kl_loss": 2065.841552734375, + "loss_ib": 20.663410186767578, + "step": 97 + }, + { + "ce_ib": 49.571800231933594, + "ce_orig": 1.0728422403335571, + "epoch": 0.028183190739808756, + "kl_loss": 2083.6865234375, + "loss_ib": 20.841821670532227, + "step": 98 + }, + { + "ce_ib": 49.93168640136719, + "ce_orig": 0.8260626792907715, + "epoch": 0.028183190739808756, + "kl_loss": 1862.836181640625, + "loss_ib": 18.63335418701172, + "step": 98 + }, + { + "ce_ib": 48.66516876220703, + "ce_orig": 0.562609851360321, + "epoch": 0.028183190739808756, + "kl_loss": 2181.225830078125, + "loss_ib": 21.817123413085938, + "step": 98 + }, + { + "ce_ib": 47.39895248413086, + "ce_orig": 1.3863259553909302, + "epoch": 0.028183190739808756, + "kl_loss": 2107.5283203125, + "loss_ib": 21.080020904541016, + "step": 98 + }, + { + "ce_ib": 48.56808090209961, + "ce_orig": 0.7511693835258484, + "epoch": 0.028470774318786397, + "kl_loss": 2047.9061279296875, + "loss_ib": 20.483917236328125, + "step": 99 + }, + { + "ce_ib": 48.201148986816406, + "ce_orig": 1.3514686822891235, + "epoch": 0.028470774318786397, + "kl_loss": 2207.186279296875, + "loss_ib": 22.07668113708496, + "step": 99 + }, + { + "ce_ib": 47.37411880493164, + "ce_orig": 0.8630917072296143, + "epoch": 0.028470774318786397, + "kl_loss": 2191.63232421875, + "loss_ib": 21.921058654785156, + "step": 99 + }, + { + "ce_ib": 50.7148323059082, + "ce_orig": 1.3532038927078247, + "epoch": 0.028470774318786397, + "kl_loss": 2022.049560546875, + "loss_ib": 20.225566864013672, + "step": 99 + }, + { + "epoch": 0.02875835789776404, + "grad_norm": 356.2680358886719, + "learning_rate": 1.4968152866242039e-05, + "loss": 21.7419, + "step": 100 + }, + { + "ce_ib": 47.18466567993164, + "ce_orig": 1.1329602003097534, + "epoch": 0.02875835789776404, + "kl_loss": 1989.36962890625, + "loss_ib": 19.898414611816406, + "step": 100 + }, + { + "ce_ib": 53.88701248168945, + "ce_orig": 2.2805113792419434, + "epoch": 0.02875835789776404, + "kl_loss": 1755.5928955078125, + "loss_ib": 17.561317443847656, + "step": 100 + }, + { + "ce_ib": 48.18925094604492, + "ce_orig": 0.8274984359741211, + "epoch": 0.02875835789776404, + "kl_loss": 1906.422119140625, + "loss_ib": 19.069040298461914, + "step": 100 + }, + { + "ce_ib": 46.921417236328125, + "ce_orig": 0.7587900757789612, + "epoch": 0.02875835789776404, + "kl_loss": 2072.403076171875, + "loss_ib": 20.728723526000977, + "step": 100 + }, + { + "ce_ib": 53.836181640625, + "ce_orig": 2.063023805618286, + "epoch": 0.02904594147674168, + "kl_loss": 1652.205322265625, + "loss_ib": 16.527435302734375, + "step": 101 + }, + { + "ce_ib": 47.936073303222656, + "ce_orig": 0.6192855834960938, + "epoch": 0.02904594147674168, + "kl_loss": 1928.050048828125, + "loss_ib": 19.285293579101562, + "step": 101 + }, + { + "ce_ib": 52.24326705932617, + "ce_orig": 1.2729721069335938, + "epoch": 0.02904594147674168, + "kl_loss": 1928.1400146484375, + "loss_ib": 19.286624908447266, + "step": 101 + }, + { + "ce_ib": 46.775421142578125, + "ce_orig": 0.7013092041015625, + "epoch": 0.02904594147674168, + "kl_loss": 2082.877685546875, + "loss_ib": 20.833454132080078, + "step": 101 + }, + { + "ce_ib": 46.20343780517578, + "ce_orig": 0.7948331832885742, + "epoch": 0.029333525055719317, + "kl_loss": 1859.212890625, + "loss_ib": 18.59674835205078, + "step": 102 + }, + { + "ce_ib": 48.856143951416016, + "ce_orig": 1.1838785409927368, + "epoch": 0.029333525055719317, + "kl_loss": 1660.9873046875, + "loss_ib": 16.614757537841797, + "step": 102 + }, + { + "ce_ib": 47.809078216552734, + "ce_orig": 1.779065728187561, + "epoch": 0.029333525055719317, + "kl_loss": 1773.1181640625, + "loss_ib": 17.7359619140625, + "step": 102 + }, + { + "ce_ib": 48.513916015625, + "ce_orig": 1.4937797784805298, + "epoch": 0.029333525055719317, + "kl_loss": 1870.419189453125, + "loss_ib": 18.709043502807617, + "step": 102 + }, + { + "ce_ib": 50.835166931152344, + "ce_orig": 1.46725594997406, + "epoch": 0.02962110863469696, + "kl_loss": 1848.5126953125, + "loss_ib": 18.490209579467773, + "step": 103 + }, + { + "ce_ib": 49.164024353027344, + "ce_orig": 1.4439281225204468, + "epoch": 0.02962110863469696, + "kl_loss": 1786.349365234375, + "loss_ib": 17.868410110473633, + "step": 103 + }, + { + "ce_ib": 47.89384841918945, + "ce_orig": 1.4249969720840454, + "epoch": 0.02962110863469696, + "kl_loss": 1961.390625, + "loss_ib": 19.618694305419922, + "step": 103 + }, + { + "ce_ib": 50.38489532470703, + "ce_orig": 1.2643296718597412, + "epoch": 0.02962110863469696, + "kl_loss": 1844.09521484375, + "loss_ib": 18.44599151611328, + "step": 103 + }, + { + "ce_ib": 47.76396942138672, + "ce_orig": 0.7680091857910156, + "epoch": 0.0299086922136746, + "kl_loss": 1581.564697265625, + "loss_ib": 15.820423126220703, + "step": 104 + }, + { + "ce_ib": 47.076053619384766, + "ce_orig": 1.2261803150177002, + "epoch": 0.0299086922136746, + "kl_loss": 1776.835693359375, + "loss_ib": 17.77306365966797, + "step": 104 + }, + { + "ce_ib": 47.24263000488281, + "ce_orig": 0.8971230983734131, + "epoch": 0.0299086922136746, + "kl_loss": 1715.18798828125, + "loss_ib": 17.15660285949707, + "step": 104 + }, + { + "ce_ib": 45.86969757080078, + "ce_orig": 0.2927989065647125, + "epoch": 0.0299086922136746, + "kl_loss": 1346.9228515625, + "loss_ib": 13.47381591796875, + "step": 104 + }, + { + "epoch": 0.03019627579265224, + "grad_norm": 313.57281494140625, + "learning_rate": 1.5764331210191083e-05, + "loss": 19.1552, + "step": 105 + }, + { + "ce_ib": 46.8266487121582, + "ce_orig": 1.2301392555236816, + "epoch": 0.03019627579265224, + "kl_loss": 1715.3831787109375, + "loss_ib": 17.15851402282715, + "step": 105 + }, + { + "ce_ib": 48.934349060058594, + "ce_orig": 1.5663868188858032, + "epoch": 0.03019627579265224, + "kl_loss": 1713.754638671875, + "loss_ib": 17.142438888549805, + "step": 105 + }, + { + "ce_ib": 47.73679733276367, + "ce_orig": 0.7973002791404724, + "epoch": 0.03019627579265224, + "kl_loss": 1519.5120849609375, + "loss_ib": 15.199894905090332, + "step": 105 + }, + { + "ce_ib": 48.780006408691406, + "ce_orig": 1.2290194034576416, + "epoch": 0.03019627579265224, + "kl_loss": 1714.5169677734375, + "loss_ib": 17.150047302246094, + "step": 105 + }, + { + "ce_ib": 46.12453079223633, + "ce_orig": 0.7794104218482971, + "epoch": 0.030483859371629878, + "kl_loss": 1571.0771484375, + "loss_ib": 15.715384483337402, + "step": 106 + }, + { + "ce_ib": 46.5201530456543, + "ce_orig": 0.8720536231994629, + "epoch": 0.030483859371629878, + "kl_loss": 1566.1361083984375, + "loss_ib": 15.66601276397705, + "step": 106 + }, + { + "ce_ib": 44.020755767822266, + "ce_orig": 0.22585166990756989, + "epoch": 0.030483859371629878, + "kl_loss": 1053.362548828125, + "loss_ib": 10.538026809692383, + "step": 106 + }, + { + "ce_ib": 44.2620735168457, + "ce_orig": 0.26073363423347473, + "epoch": 0.030483859371629878, + "kl_loss": 1120.464599609375, + "loss_ib": 11.20907211303711, + "step": 106 + }, + { + "ce_ib": 47.4915885925293, + "ce_orig": 1.2189853191375732, + "epoch": 0.03077144295060752, + "kl_loss": 947.7132568359375, + "loss_ib": 9.481881141662598, + "step": 107 + }, + { + "ce_ib": 47.91807556152344, + "ce_orig": 1.3612654209136963, + "epoch": 0.03077144295060752, + "kl_loss": 1467.544677734375, + "loss_ib": 14.680237770080566, + "step": 107 + }, + { + "ce_ib": 42.33379364013672, + "ce_orig": 0.8033524751663208, + "epoch": 0.03077144295060752, + "kl_loss": 1744.4759521484375, + "loss_ib": 17.448991775512695, + "step": 107 + }, + { + "ce_ib": 47.13089370727539, + "ce_orig": 0.9136131405830383, + "epoch": 0.03077144295060752, + "kl_loss": 1629.6527099609375, + "loss_ib": 16.301239013671875, + "step": 107 + }, + { + "ce_ib": 46.97206115722656, + "ce_orig": 1.0767881870269775, + "epoch": 0.03105902652958516, + "kl_loss": 1675.902099609375, + "loss_ib": 16.763717651367188, + "step": 108 + }, + { + "ce_ib": 43.823402404785156, + "ce_orig": 0.7930386662483215, + "epoch": 0.03105902652958516, + "kl_loss": 1553.387939453125, + "loss_ib": 15.538261413574219, + "step": 108 + }, + { + "ce_ib": 45.571510314941406, + "ce_orig": 0.781028151512146, + "epoch": 0.03105902652958516, + "kl_loss": 1472.5889892578125, + "loss_ib": 14.730446815490723, + "step": 108 + }, + { + "ce_ib": 48.90847396850586, + "ce_orig": 1.6240291595458984, + "epoch": 0.03105902652958516, + "kl_loss": 1453.4114990234375, + "loss_ib": 14.539006233215332, + "step": 108 + }, + { + "ce_ib": 47.367916107177734, + "ce_orig": 0.615014374256134, + "epoch": 0.0313466101085628, + "kl_loss": 1492.5986328125, + "loss_ib": 14.9307222366333, + "step": 109 + }, + { + "ce_ib": 46.28598403930664, + "ce_orig": 1.3170636892318726, + "epoch": 0.0313466101085628, + "kl_loss": 1374.111572265625, + "loss_ib": 13.745744705200195, + "step": 109 + }, + { + "ce_ib": 47.25022506713867, + "ce_orig": 1.893700361251831, + "epoch": 0.0313466101085628, + "kl_loss": 1464.014404296875, + "loss_ib": 14.644868850708008, + "step": 109 + }, + { + "ce_ib": 43.77083969116211, + "ce_orig": 0.5421922206878662, + "epoch": 0.0313466101085628, + "kl_loss": 1321.385986328125, + "loss_ib": 13.218236923217773, + "step": 109 + }, + { + "epoch": 0.03163419368754044, + "grad_norm": 294.16033935546875, + "learning_rate": 1.6560509554140128e-05, + "loss": 16.7628, + "step": 110 + }, + { + "ce_ib": 47.9586067199707, + "ce_orig": 0.962894082069397, + "epoch": 0.03163419368754044, + "kl_loss": 1451.75830078125, + "loss_ib": 14.522378921508789, + "step": 110 + }, + { + "ce_ib": 43.98506546020508, + "ce_orig": 0.734940767288208, + "epoch": 0.03163419368754044, + "kl_loss": 1582.23291015625, + "loss_ib": 15.826726913452148, + "step": 110 + }, + { + "ce_ib": 43.784725189208984, + "ce_orig": 1.0270369052886963, + "epoch": 0.03163419368754044, + "kl_loss": 1439.91748046875, + "loss_ib": 14.403553009033203, + "step": 110 + }, + { + "ce_ib": 47.88432312011719, + "ce_orig": 1.8923044204711914, + "epoch": 0.03163419368754044, + "kl_loss": 1339.40087890625, + "loss_ib": 13.398797035217285, + "step": 110 + }, + { + "ce_ib": 45.02385711669922, + "ce_orig": 0.563847005367279, + "epoch": 0.031921777266518084, + "kl_loss": 1429.29736328125, + "loss_ib": 14.29747486114502, + "step": 111 + }, + { + "ce_ib": 44.20392990112305, + "ce_orig": 1.070389986038208, + "epoch": 0.031921777266518084, + "kl_loss": 1417.1707763671875, + "loss_ib": 14.176127433776855, + "step": 111 + }, + { + "ce_ib": 47.985328674316406, + "ce_orig": 1.82245671749115, + "epoch": 0.031921777266518084, + "kl_loss": 1312.49853515625, + "loss_ib": 13.129783630371094, + "step": 111 + }, + { + "ce_ib": 47.73635482788086, + "ce_orig": 1.0577436685562134, + "epoch": 0.031921777266518084, + "kl_loss": 1424.302734375, + "loss_ib": 14.247800827026367, + "step": 111 + }, + { + "ce_ib": 46.29003143310547, + "ce_orig": 1.197394847869873, + "epoch": 0.03220936084549572, + "kl_loss": 1341.0858154296875, + "loss_ib": 13.415486335754395, + "step": 112 + }, + { + "ce_ib": 45.729007720947266, + "ce_orig": 0.9139751195907593, + "epoch": 0.03220936084549572, + "kl_loss": 1310.7313232421875, + "loss_ib": 13.111886024475098, + "step": 112 + }, + { + "ce_ib": 46.864864349365234, + "ce_orig": 1.5965396165847778, + "epoch": 0.03220936084549572, + "kl_loss": 1156.0203857421875, + "loss_ib": 11.564889907836914, + "step": 112 + }, + { + "ce_ib": 43.946414947509766, + "ce_orig": 1.2731986045837402, + "epoch": 0.03220936084549572, + "kl_loss": 1306.838134765625, + "loss_ib": 13.072775840759277, + "step": 112 + }, + { + "ce_ib": 44.451026916503906, + "ce_orig": 1.2415810823440552, + "epoch": 0.032496944424473366, + "kl_loss": 1380.1820068359375, + "loss_ib": 13.80626392364502, + "step": 113 + }, + { + "ce_ib": 45.05312728881836, + "ce_orig": 0.7081640362739563, + "epoch": 0.032496944424473366, + "kl_loss": 1326.8614501953125, + "loss_ib": 13.273119926452637, + "step": 113 + }, + { + "ce_ib": 45.38166809082031, + "ce_orig": 0.49601882696151733, + "epoch": 0.032496944424473366, + "kl_loss": 1228.1025390625, + "loss_ib": 12.285563468933105, + "step": 113 + }, + { + "ce_ib": 42.399070739746094, + "ce_orig": 1.1342860460281372, + "epoch": 0.032496944424473366, + "kl_loss": 1330.390380859375, + "loss_ib": 13.30814266204834, + "step": 113 + }, + { + "ce_ib": 42.90398406982422, + "ce_orig": 0.8459790945053101, + "epoch": 0.032784528003451004, + "kl_loss": 1192.5108642578125, + "loss_ib": 11.929399490356445, + "step": 114 + }, + { + "ce_ib": 42.922142028808594, + "ce_orig": 0.657600462436676, + "epoch": 0.032784528003451004, + "kl_loss": 1351.73291015625, + "loss_ib": 13.521620750427246, + "step": 114 + }, + { + "ce_ib": 43.3038215637207, + "ce_orig": 0.5711429119110107, + "epoch": 0.032784528003451004, + "kl_loss": 1127.6776123046875, + "loss_ib": 11.281105995178223, + "step": 114 + }, + { + "ce_ib": 45.69675064086914, + "ce_orig": 1.5162954330444336, + "epoch": 0.032784528003451004, + "kl_loss": 1221.5166015625, + "loss_ib": 12.219735145568848, + "step": 114 + }, + { + "epoch": 0.03307211158242864, + "grad_norm": 255.8572540283203, + "learning_rate": 1.7356687898089173e-05, + "loss": 14.1527, + "step": 115 + }, + { + "ce_ib": 45.3430061340332, + "ce_orig": 1.2573144435882568, + "epoch": 0.03307211158242864, + "kl_loss": 1201.44775390625, + "loss_ib": 12.019010543823242, + "step": 115 + }, + { + "ce_ib": 41.08205795288086, + "ce_orig": 0.44548746943473816, + "epoch": 0.03307211158242864, + "kl_loss": 553.1367797851562, + "loss_ib": 5.535475730895996, + "step": 115 + }, + { + "ce_ib": 42.01618957519531, + "ce_orig": 0.9748629927635193, + "epoch": 0.03307211158242864, + "kl_loss": 1253.167724609375, + "loss_ib": 12.53587818145752, + "step": 115 + }, + { + "ce_ib": 46.25815200805664, + "ce_orig": 1.704155445098877, + "epoch": 0.03307211158242864, + "kl_loss": 1103.8121337890625, + "loss_ib": 11.042746543884277, + "step": 115 + }, + { + "ce_ib": 41.26174545288086, + "ce_orig": 0.6826565861701965, + "epoch": 0.033359695161406286, + "kl_loss": 1163.051025390625, + "loss_ib": 11.634635925292969, + "step": 116 + }, + { + "ce_ib": 40.63665008544922, + "ce_orig": 0.900534987449646, + "epoch": 0.033359695161406286, + "kl_loss": 1172.418212890625, + "loss_ib": 11.728245735168457, + "step": 116 + }, + { + "ce_ib": 44.31190872192383, + "ce_orig": 1.1153950691223145, + "epoch": 0.033359695161406286, + "kl_loss": 1033.94287109375, + "loss_ib": 10.343859672546387, + "step": 116 + }, + { + "ce_ib": 43.1805305480957, + "ce_orig": 0.9051750302314758, + "epoch": 0.033359695161406286, + "kl_loss": 1077.079833984375, + "loss_ib": 10.775115966796875, + "step": 116 + }, + { + "ce_ib": 41.2710075378418, + "ce_orig": 0.38958603143692017, + "epoch": 0.033647278740383924, + "kl_loss": 966.39892578125, + "loss_ib": 9.668116569519043, + "step": 117 + }, + { + "ce_ib": 42.25935745239258, + "ce_orig": 0.8274361491203308, + "epoch": 0.033647278740383924, + "kl_loss": 1053.1240234375, + "loss_ib": 10.535466194152832, + "step": 117 + }, + { + "ce_ib": 40.06983947753906, + "ce_orig": 0.7041372060775757, + "epoch": 0.033647278740383924, + "kl_loss": 1147.9034423828125, + "loss_ib": 11.483041763305664, + "step": 117 + }, + { + "ce_ib": 46.431880950927734, + "ce_orig": 1.3825441598892212, + "epoch": 0.033647278740383924, + "kl_loss": 1083.45751953125, + "loss_ib": 10.839218139648438, + "step": 117 + }, + { + "ce_ib": 43.25281524658203, + "ce_orig": 0.9396786689758301, + "epoch": 0.03393486231936156, + "kl_loss": 1038.500244140625, + "loss_ib": 10.389327049255371, + "step": 118 + }, + { + "ce_ib": 40.72803497314453, + "ce_orig": 0.5316663980484009, + "epoch": 0.03393486231936156, + "kl_loss": 935.3412475585938, + "loss_ib": 9.357484817504883, + "step": 118 + }, + { + "ce_ib": 40.19837951660156, + "ce_orig": 0.7874443531036377, + "epoch": 0.03393486231936156, + "kl_loss": 987.9114379882812, + "loss_ib": 9.883133888244629, + "step": 118 + }, + { + "ce_ib": 42.92312240600586, + "ce_orig": 0.8642221093177795, + "epoch": 0.03393486231936156, + "kl_loss": 1002.868896484375, + "loss_ib": 10.032980918884277, + "step": 118 + }, + { + "ce_ib": 43.140602111816406, + "ce_orig": 0.9903743863105774, + "epoch": 0.034222445898339206, + "kl_loss": 945.7418212890625, + "loss_ib": 9.461731910705566, + "step": 119 + }, + { + "ce_ib": 44.95377731323242, + "ce_orig": 0.7084935903549194, + "epoch": 0.034222445898339206, + "kl_loss": 1001.4967041015625, + "loss_ib": 10.019461631774902, + "step": 119 + }, + { + "ce_ib": 43.81313705444336, + "ce_orig": 1.0507792234420776, + "epoch": 0.034222445898339206, + "kl_loss": 966.8169555664062, + "loss_ib": 9.672550201416016, + "step": 119 + }, + { + "ce_ib": 38.509605407714844, + "ce_orig": 0.24314048886299133, + "epoch": 0.034222445898339206, + "kl_loss": 900.1303100585938, + "loss_ib": 9.00515365600586, + "step": 119 + }, + { + "epoch": 0.03451002947731684, + "grad_norm": 213.6476287841797, + "learning_rate": 1.8152866242038215e-05, + "loss": 11.2209, + "step": 120 + }, + { + "ce_ib": 43.3798942565918, + "ce_orig": 1.417758822441101, + "epoch": 0.03451002947731684, + "kl_loss": 874.8845825195312, + "loss_ib": 8.753183364868164, + "step": 120 + }, + { + "ce_ib": 41.32734298706055, + "ce_orig": 0.8705493807792664, + "epoch": 0.03451002947731684, + "kl_loss": 937.0117797851562, + "loss_ib": 9.374250411987305, + "step": 120 + }, + { + "ce_ib": 38.6541633605957, + "ce_orig": 0.23277077078819275, + "epoch": 0.03451002947731684, + "kl_loss": 378.5799560546875, + "loss_ib": 3.7896647453308105, + "step": 120 + }, + { + "ce_ib": 43.36669921875, + "ce_orig": 1.0161871910095215, + "epoch": 0.03451002947731684, + "kl_loss": 853.2371215820312, + "loss_ib": 8.536707878112793, + "step": 120 + }, + { + "ce_ib": 41.16870880126953, + "ce_orig": 0.9253172874450684, + "epoch": 0.03479761305629449, + "kl_loss": 846.5045776367188, + "loss_ib": 8.469161987304688, + "step": 121 + }, + { + "ce_ib": 44.02971267700195, + "ce_orig": 1.2445998191833496, + "epoch": 0.03479761305629449, + "kl_loss": 718.2892456054688, + "loss_ib": 7.187295436859131, + "step": 121 + }, + { + "ce_ib": 43.64518737792969, + "ce_orig": 1.7942239046096802, + "epoch": 0.03479761305629449, + "kl_loss": 756.7442626953125, + "loss_ib": 7.571806907653809, + "step": 121 + }, + { + "ce_ib": 42.69846725463867, + "ce_orig": 0.8896026015281677, + "epoch": 0.03479761305629449, + "kl_loss": 883.087890625, + "loss_ib": 8.835148811340332, + "step": 121 + }, + { + "ce_ib": 40.64472579956055, + "ce_orig": 0.8089054822921753, + "epoch": 0.035085196635272126, + "kl_loss": 727.42724609375, + "loss_ib": 7.278336524963379, + "step": 122 + }, + { + "ce_ib": 41.40364456176758, + "ce_orig": 0.8790702819824219, + "epoch": 0.035085196635272126, + "kl_loss": 701.3001708984375, + "loss_ib": 7.017142295837402, + "step": 122 + }, + { + "ce_ib": 45.067813873291016, + "ce_orig": 0.3258854150772095, + "epoch": 0.035085196635272126, + "kl_loss": 577.9033203125, + "loss_ib": 5.783539772033691, + "step": 122 + }, + { + "ce_ib": 44.684349060058594, + "ce_orig": 1.5608466863632202, + "epoch": 0.035085196635272126, + "kl_loss": 660.216796875, + "loss_ib": 6.606636047363281, + "step": 122 + }, + { + "ce_ib": 42.09290313720703, + "ce_orig": 0.7853221297264099, + "epoch": 0.03537278021424976, + "kl_loss": 688.5507202148438, + "loss_ib": 6.889716625213623, + "step": 123 + }, + { + "ce_ib": 41.19681930541992, + "ce_orig": 1.0058292150497437, + "epoch": 0.03537278021424976, + "kl_loss": 660.992431640625, + "loss_ib": 6.614044189453125, + "step": 123 + }, + { + "ce_ib": 41.700340270996094, + "ce_orig": 0.7194111347198486, + "epoch": 0.03537278021424976, + "kl_loss": 460.14190673828125, + "loss_ib": 4.605588912963867, + "step": 123 + }, + { + "ce_ib": 44.97040939331055, + "ce_orig": 1.1617707014083862, + "epoch": 0.03537278021424976, + "kl_loss": 618.4761962890625, + "loss_ib": 6.189259052276611, + "step": 123 + }, + { + "ce_ib": 42.433712005615234, + "ce_orig": 0.7316823601722717, + "epoch": 0.03566036379322741, + "kl_loss": 761.43359375, + "loss_ib": 7.618578910827637, + "step": 124 + }, + { + "ce_ib": 42.72594451904297, + "ce_orig": 0.9406179189682007, + "epoch": 0.03566036379322741, + "kl_loss": 608.378173828125, + "loss_ib": 6.088054180145264, + "step": 124 + }, + { + "ce_ib": 44.054351806640625, + "ce_orig": 1.101775050163269, + "epoch": 0.03566036379322741, + "kl_loss": 533.7700805664062, + "loss_ib": 5.342106342315674, + "step": 124 + }, + { + "ce_ib": 41.86262130737305, + "ce_orig": 1.2485934495925903, + "epoch": 0.03566036379322741, + "kl_loss": 604.7998657226562, + "loss_ib": 6.05218505859375, + "step": 124 + }, + { + "epoch": 0.035947947372205045, + "grad_norm": 165.4596405029297, + "learning_rate": 1.8949044585987264e-05, + "loss": 8.3603, + "step": 125 + }, + { + "ce_ib": 41.704227447509766, + "ce_orig": 0.7984323501586914, + "epoch": 0.035947947372205045, + "kl_loss": 570.8909912109375, + "loss_ib": 5.713080406188965, + "step": 125 + }, + { + "ce_ib": 44.47398376464844, + "ce_orig": 1.532689094543457, + "epoch": 0.035947947372205045, + "kl_loss": 572.6158447265625, + "loss_ib": 5.7306060791015625, + "step": 125 + }, + { + "ce_ib": 44.11103820800781, + "ce_orig": 0.8605639338493347, + "epoch": 0.035947947372205045, + "kl_loss": 560.5389404296875, + "loss_ib": 5.609800338745117, + "step": 125 + }, + { + "ce_ib": 42.20602798461914, + "ce_orig": 1.0192476511001587, + "epoch": 0.035947947372205045, + "kl_loss": 583.3988647460938, + "loss_ib": 5.83820915222168, + "step": 125 + }, + { + "ce_ib": 43.26830291748047, + "ce_orig": 0.7587823271751404, + "epoch": 0.03623553095118269, + "kl_loss": 595.424072265625, + "loss_ib": 5.958567142486572, + "step": 126 + }, + { + "ce_ib": 42.44752502441406, + "ce_orig": 1.2043931484222412, + "epoch": 0.03623553095118269, + "kl_loss": 496.97802734375, + "loss_ib": 4.974024772644043, + "step": 126 + }, + { + "ce_ib": 43.29387283325195, + "ce_orig": 1.735470175743103, + "epoch": 0.03623553095118269, + "kl_loss": 459.98992919921875, + "loss_ib": 4.604228496551514, + "step": 126 + }, + { + "ce_ib": 44.7207145690918, + "ce_orig": 1.0822900533676147, + "epoch": 0.03623553095118269, + "kl_loss": 513.75048828125, + "loss_ib": 5.141976833343506, + "step": 126 + }, + { + "ce_ib": 43.62824249267578, + "ce_orig": 1.2997839450836182, + "epoch": 0.03652311453016033, + "kl_loss": 499.59527587890625, + "loss_ib": 5.0003156661987305, + "step": 127 + }, + { + "ce_ib": 45.198951721191406, + "ce_orig": 1.4282422065734863, + "epoch": 0.03652311453016033, + "kl_loss": 442.3271484375, + "loss_ib": 4.427791118621826, + "step": 127 + }, + { + "ce_ib": 41.611297607421875, + "ce_orig": 0.6139658093452454, + "epoch": 0.03652311453016033, + "kl_loss": 492.67706298828125, + "loss_ib": 4.930931568145752, + "step": 127 + }, + { + "ce_ib": 43.24065017700195, + "ce_orig": 1.5706660747528076, + "epoch": 0.03652311453016033, + "kl_loss": 477.58251953125, + "loss_ib": 4.780148983001709, + "step": 127 + }, + { + "ce_ib": 45.77122497558594, + "ce_orig": 1.3252004384994507, + "epoch": 0.036810698109137965, + "kl_loss": 367.55364990234375, + "loss_ib": 3.6801135540008545, + "step": 128 + }, + { + "ce_ib": 43.11273956298828, + "ce_orig": 1.3362116813659668, + "epoch": 0.036810698109137965, + "kl_loss": 402.22039794921875, + "loss_ib": 4.026515007019043, + "step": 128 + }, + { + "ce_ib": 43.218231201171875, + "ce_orig": 1.3605029582977295, + "epoch": 0.036810698109137965, + "kl_loss": 467.728759765625, + "loss_ib": 4.681609630584717, + "step": 128 + }, + { + "ce_ib": 42.986263275146484, + "ce_orig": 1.012925386428833, + "epoch": 0.036810698109137965, + "kl_loss": 461.2456359863281, + "loss_ib": 4.61675500869751, + "step": 128 + }, + { + "ce_ib": 45.84535598754883, + "ce_orig": 0.9996353387832642, + "epoch": 0.03709828168811561, + "kl_loss": 307.2117919921875, + "loss_ib": 3.07670259475708, + "step": 129 + }, + { + "ce_ib": 47.88697052001953, + "ce_orig": 1.4389865398406982, + "epoch": 0.03709828168811561, + "kl_loss": 270.8951416015625, + "loss_ib": 2.713740110397339, + "step": 129 + }, + { + "ce_ib": 44.36796569824219, + "ce_orig": 0.28512611985206604, + "epoch": 0.03709828168811561, + "kl_loss": 424.74493408203125, + "loss_ib": 4.251885890960693, + "step": 129 + }, + { + "ce_ib": 49.39015197753906, + "ce_orig": 1.1395325660705566, + "epoch": 0.03709828168811561, + "kl_loss": 358.5738220214844, + "loss_ib": 3.59067702293396, + "step": 129 + }, + { + "epoch": 0.03738586526709325, + "grad_norm": 114.32688903808594, + "learning_rate": 1.974522292993631e-05, + "loss": 5.7247, + "step": 130 + }, + { + "ce_ib": 46.33614730834961, + "ce_orig": 1.3037816286087036, + "epoch": 0.03738586526709325, + "kl_loss": 291.12872314453125, + "loss_ib": 2.9159207344055176, + "step": 130 + }, + { + "ce_ib": 47.692874908447266, + "ce_orig": 0.864368736743927, + "epoch": 0.03738586526709325, + "kl_loss": 327.73089599609375, + "loss_ib": 3.282078266143799, + "step": 130 + }, + { + "ce_ib": 49.18111038208008, + "ce_orig": 1.450368046760559, + "epoch": 0.03738586526709325, + "kl_loss": 326.9405212402344, + "loss_ib": 3.2743234634399414, + "step": 130 + }, + { + "ce_ib": 47.151817321777344, + "ce_orig": 0.6973881125450134, + "epoch": 0.03738586526709325, + "kl_loss": 341.2000732421875, + "loss_ib": 3.4167158603668213, + "step": 130 + }, + { + "ce_ib": 47.11798858642578, + "ce_orig": 0.9874345660209656, + "epoch": 0.03767344884607089, + "kl_loss": 245.39163208007812, + "loss_ib": 2.458627939224243, + "step": 131 + }, + { + "ce_ib": 48.82184600830078, + "ce_orig": 1.7434450387954712, + "epoch": 0.03767344884607089, + "kl_loss": 233.43666076660156, + "loss_ib": 2.3392486572265625, + "step": 131 + }, + { + "ce_ib": 52.432861328125, + "ce_orig": 1.6617094278335571, + "epoch": 0.03767344884607089, + "kl_loss": 249.0355682373047, + "loss_ib": 2.495598793029785, + "step": 131 + }, + { + "ce_ib": 48.34469223022461, + "ce_orig": 0.8209026455879211, + "epoch": 0.03767344884607089, + "kl_loss": 258.18017578125, + "loss_ib": 2.5866363048553467, + "step": 131 + }, + { + "ce_ib": 49.49347686767578, + "ce_orig": 1.1256669759750366, + "epoch": 0.03796103242504853, + "kl_loss": 249.56777954101562, + "loss_ib": 2.500627040863037, + "step": 132 + }, + { + "ce_ib": 53.37258529663086, + "ce_orig": 1.1655960083007812, + "epoch": 0.03796103242504853, + "kl_loss": 296.350830078125, + "loss_ib": 2.9688453674316406, + "step": 132 + }, + { + "ce_ib": 48.557525634765625, + "ce_orig": 1.608039140701294, + "epoch": 0.03796103242504853, + "kl_loss": 236.46792602539062, + "loss_ib": 2.369534969329834, + "step": 132 + }, + { + "ce_ib": 49.64237594604492, + "ce_orig": 1.220885157585144, + "epoch": 0.03796103242504853, + "kl_loss": 232.15313720703125, + "loss_ib": 2.32649564743042, + "step": 132 + }, + { + "ce_ib": 53.12397384643555, + "ce_orig": 1.4053157567977905, + "epoch": 0.03824861600402617, + "kl_loss": 148.06582641601562, + "loss_ib": 1.4859706163406372, + "step": 133 + }, + { + "ce_ib": 46.214569091796875, + "ce_orig": 0.5235381722450256, + "epoch": 0.03824861600402617, + "kl_loss": 216.90960693359375, + "loss_ib": 2.173717498779297, + "step": 133 + }, + { + "ce_ib": 63.17025375366211, + "ce_orig": 1.6942404508590698, + "epoch": 0.03824861600402617, + "kl_loss": 157.73348999023438, + "loss_ib": 1.5836519002914429, + "step": 133 + }, + { + "ce_ib": 56.94756317138672, + "ce_orig": 1.1503974199295044, + "epoch": 0.03824861600402617, + "kl_loss": 172.39581298828125, + "loss_ib": 1.7296528816223145, + "step": 133 + }, + { + "ce_ib": 56.051700592041016, + "ce_orig": 1.2757078409194946, + "epoch": 0.03853619958300381, + "kl_loss": 132.28765869140625, + "loss_ib": 1.328481674194336, + "step": 134 + }, + { + "ce_ib": 57.61357879638672, + "ce_orig": 0.6087625026702881, + "epoch": 0.03853619958300381, + "kl_loss": 177.25326538085938, + "loss_ib": 1.7782940864562988, + "step": 134 + }, + { + "ce_ib": 62.553672790527344, + "ce_orig": 0.979148805141449, + "epoch": 0.03853619958300381, + "kl_loss": 148.2458038330078, + "loss_ib": 1.4887133836746216, + "step": 134 + }, + { + "ce_ib": 59.60444259643555, + "ce_orig": 0.786743700504303, + "epoch": 0.03853619958300381, + "kl_loss": 177.24185180664062, + "loss_ib": 1.7783788442611694, + "step": 134 + }, + { + "epoch": 0.03882378316198145, + "grad_norm": 61.325218200683594, + "learning_rate": 2.054140127388535e-05, + "loss": 3.423, + "step": 135 + }, + { + "ce_ib": 47.83711624145508, + "ce_orig": 1.0823129415512085, + "epoch": 0.03882378316198145, + "kl_loss": 117.60155487060547, + "loss_ib": 1.1807992458343506, + "step": 135 + }, + { + "ce_ib": 53.23299789428711, + "ce_orig": 2.0539369583129883, + "epoch": 0.03882378316198145, + "kl_loss": 105.29367065429688, + "loss_ib": 1.0582599639892578, + "step": 135 + }, + { + "ce_ib": 62.1235466003418, + "ce_orig": 1.288967251777649, + "epoch": 0.03882378316198145, + "kl_loss": 117.70156860351562, + "loss_ib": 1.1832280158996582, + "step": 135 + }, + { + "ce_ib": 60.246185302734375, + "ce_orig": 1.0303462743759155, + "epoch": 0.03882378316198145, + "kl_loss": 119.00950622558594, + "loss_ib": 1.1961196660995483, + "step": 135 + }, + { + "ce_ib": 64.75760650634766, + "ce_orig": 1.0299265384674072, + "epoch": 0.039111366740959094, + "kl_loss": 97.26797485351562, + "loss_ib": 0.9791554808616638, + "step": 136 + }, + { + "ce_ib": 59.91645812988281, + "ce_orig": 0.9491832256317139, + "epoch": 0.039111366740959094, + "kl_loss": 101.67373657226562, + "loss_ib": 1.0227290391921997, + "step": 136 + }, + { + "ce_ib": 64.09386444091797, + "ce_orig": 1.0575294494628906, + "epoch": 0.039111366740959094, + "kl_loss": 86.674072265625, + "loss_ib": 0.873150110244751, + "step": 136 + }, + { + "ce_ib": 55.84811782836914, + "ce_orig": 0.5318177342414856, + "epoch": 0.039111366740959094, + "kl_loss": 69.2609634399414, + "loss_ib": 0.6981943845748901, + "step": 136 + }, + { + "ce_ib": 61.201900482177734, + "ce_orig": 1.5152733325958252, + "epoch": 0.03939895031993673, + "kl_loss": 86.82037353515625, + "loss_ib": 0.8743239641189575, + "step": 137 + }, + { + "ce_ib": 48.1711540222168, + "ce_orig": 0.9321234822273254, + "epoch": 0.03939895031993673, + "kl_loss": 88.01079559326172, + "loss_ib": 0.8849250674247742, + "step": 137 + }, + { + "ce_ib": 56.62932205200195, + "ce_orig": 0.9663710594177246, + "epoch": 0.03939895031993673, + "kl_loss": 92.86305236816406, + "loss_ib": 0.9342934489250183, + "step": 137 + }, + { + "ce_ib": 50.61298751831055, + "ce_orig": 1.012882113456726, + "epoch": 0.03939895031993673, + "kl_loss": 100.32122802734375, + "loss_ib": 1.0082734823226929, + "step": 137 + }, + { + "ce_ib": 52.78557586669922, + "ce_orig": 1.3567779064178467, + "epoch": 0.03968653389891437, + "kl_loss": 75.2830581665039, + "loss_ib": 0.7581090927124023, + "step": 138 + }, + { + "ce_ib": 52.53969192504883, + "ce_orig": 1.0360667705535889, + "epoch": 0.03968653389891437, + "kl_loss": 80.51203918457031, + "loss_ib": 0.81037437915802, + "step": 138 + }, + { + "ce_ib": 42.97282028198242, + "ce_orig": 0.6911470890045166, + "epoch": 0.03968653389891437, + "kl_loss": 107.92098236083984, + "loss_ib": 1.0835070610046387, + "step": 138 + }, + { + "ce_ib": 55.01441192626953, + "ce_orig": 1.0413540601730347, + "epoch": 0.03968653389891437, + "kl_loss": 84.3485107421875, + "loss_ib": 0.8489865064620972, + "step": 138 + }, + { + "ce_ib": 49.41832733154297, + "ce_orig": 0.5004691481590271, + "epoch": 0.039974117477892014, + "kl_loss": 64.56187438964844, + "loss_ib": 0.6505606174468994, + "step": 139 + }, + { + "ce_ib": 54.80555725097656, + "ce_orig": 0.8709143996238708, + "epoch": 0.039974117477892014, + "kl_loss": 75.56375122070312, + "loss_ib": 0.7611180543899536, + "step": 139 + }, + { + "ce_ib": 44.817134857177734, + "ce_orig": 1.1028708219528198, + "epoch": 0.039974117477892014, + "kl_loss": 56.54993438720703, + "loss_ib": 0.569981038570404, + "step": 139 + }, + { + "ce_ib": 50.21042251586914, + "ce_orig": 1.0824670791625977, + "epoch": 0.039974117477892014, + "kl_loss": 69.47157287597656, + "loss_ib": 0.6997367739677429, + "step": 139 + }, + { + "epoch": 0.04026170105686965, + "grad_norm": 27.144216537475586, + "learning_rate": 2.1337579617834397e-05, + "loss": 2.0774, + "step": 140 + }, + { + "ce_ib": 47.516990661621094, + "ce_orig": 1.2334574460983276, + "epoch": 0.04026170105686965, + "kl_loss": 63.68349838256836, + "loss_ib": 0.6415866613388062, + "step": 140 + }, + { + "ce_ib": 42.7164306640625, + "ce_orig": 0.7245992422103882, + "epoch": 0.04026170105686965, + "kl_loss": 62.23073196411133, + "loss_ib": 0.6265789866447449, + "step": 140 + }, + { + "ce_ib": 47.90711212158203, + "ce_orig": 0.797220766544342, + "epoch": 0.04026170105686965, + "kl_loss": 70.8463134765625, + "loss_ib": 0.7132538557052612, + "step": 140 + }, + { + "ce_ib": 44.96827697753906, + "ce_orig": 0.9708709716796875, + "epoch": 0.04026170105686965, + "kl_loss": 62.58702087402344, + "loss_ib": 0.6303670406341553, + "step": 140 + }, + { + "ce_ib": 43.66420364379883, + "ce_orig": 1.5154752731323242, + "epoch": 0.040549284635847296, + "kl_loss": 63.67985916137695, + "loss_ib": 0.641165018081665, + "step": 141 + }, + { + "ce_ib": 39.74589920043945, + "ce_orig": 1.1597050428390503, + "epoch": 0.040549284635847296, + "kl_loss": 68.15312194824219, + "loss_ib": 0.6855058073997498, + "step": 141 + }, + { + "ce_ib": 38.30898666381836, + "ce_orig": 1.208406686782837, + "epoch": 0.040549284635847296, + "kl_loss": 54.45480728149414, + "loss_ib": 0.5483789443969727, + "step": 141 + }, + { + "ce_ib": 42.6765022277832, + "ce_orig": 1.5113545656204224, + "epoch": 0.040549284635847296, + "kl_loss": 45.85490417480469, + "loss_ib": 0.46281668543815613, + "step": 141 + }, + { + "ce_ib": 40.479461669921875, + "ce_orig": 1.2261719703674316, + "epoch": 0.040836868214824934, + "kl_loss": 44.18513488769531, + "loss_ib": 0.44589927792549133, + "step": 142 + }, + { + "ce_ib": 38.052547454833984, + "ce_orig": 0.6562057137489319, + "epoch": 0.040836868214824934, + "kl_loss": 54.23804473876953, + "loss_ib": 0.5461856722831726, + "step": 142 + }, + { + "ce_ib": 42.021270751953125, + "ce_orig": 0.31955811381340027, + "epoch": 0.040836868214824934, + "kl_loss": 29.33365249633789, + "loss_ib": 0.2975386381149292, + "step": 142 + }, + { + "ce_ib": 41.40554428100586, + "ce_orig": 0.9634075164794922, + "epoch": 0.040836868214824934, + "kl_loss": 54.530174255371094, + "loss_ib": 0.5494422912597656, + "step": 142 + }, + { + "ce_ib": 38.900028228759766, + "ce_orig": 1.0080485343933105, + "epoch": 0.04112445179380257, + "kl_loss": 52.973106384277344, + "loss_ib": 0.533621072769165, + "step": 143 + }, + { + "ce_ib": 38.12443923950195, + "ce_orig": 0.9282627105712891, + "epoch": 0.04112445179380257, + "kl_loss": 42.263893127441406, + "loss_ib": 0.42645135521888733, + "step": 143 + }, + { + "ce_ib": 43.536231994628906, + "ce_orig": 1.5404144525527954, + "epoch": 0.04112445179380257, + "kl_loss": 39.663185119628906, + "loss_ib": 0.4009854793548584, + "step": 143 + }, + { + "ce_ib": 39.83261489868164, + "ce_orig": 1.2672309875488281, + "epoch": 0.04112445179380257, + "kl_loss": 42.35781478881836, + "loss_ib": 0.4275614023208618, + "step": 143 + }, + { + "ce_ib": 41.01529312133789, + "ce_orig": 1.457834005355835, + "epoch": 0.041412035372780216, + "kl_loss": 34.49407958984375, + "loss_ib": 0.3490423262119293, + "step": 144 + }, + { + "ce_ib": 29.345317840576172, + "ce_orig": 0.3651731610298157, + "epoch": 0.041412035372780216, + "kl_loss": 68.94469451904297, + "loss_ib": 0.6923814415931702, + "step": 144 + }, + { + "ce_ib": 34.595951080322266, + "ce_orig": 0.5874239802360535, + "epoch": 0.041412035372780216, + "kl_loss": 42.69541931152344, + "loss_ib": 0.43041378259658813, + "step": 144 + }, + { + "ce_ib": 33.79957962036133, + "ce_orig": 0.6981248259544373, + "epoch": 0.041412035372780216, + "kl_loss": 43.602195739746094, + "loss_ib": 0.43940192461013794, + "step": 144 + }, + { + "epoch": 0.041699618951757854, + "grad_norm": 11.803001403808594, + "learning_rate": 2.2133757961783442e-05, + "loss": 1.5408, + "step": 145 + }, + { + "ce_ib": 35.549190521240234, + "ce_orig": 1.1565806865692139, + "epoch": 0.041699618951757854, + "kl_loss": 33.884830474853516, + "loss_ib": 0.34240320324897766, + "step": 145 + }, + { + "ce_ib": 36.85725021362305, + "ce_orig": 0.614605188369751, + "epoch": 0.041699618951757854, + "kl_loss": 38.68310546875, + "loss_ib": 0.3905167579650879, + "step": 145 + }, + { + "ce_ib": 33.613216400146484, + "ce_orig": 0.774656355381012, + "epoch": 0.041699618951757854, + "kl_loss": 35.071624755859375, + "loss_ib": 0.35407754778862, + "step": 145 + }, + { + "ce_ib": 38.60401916503906, + "ce_orig": 1.3087610006332397, + "epoch": 0.041699618951757854, + "kl_loss": 27.925447463989258, + "loss_ib": 0.28311488032341003, + "step": 145 + }, + { + "ce_ib": 31.10846519470215, + "ce_orig": 0.8307720422744751, + "epoch": 0.0419872025307355, + "kl_loss": 35.52260208129883, + "loss_ib": 0.358336865901947, + "step": 146 + }, + { + "ce_ib": 35.52298355102539, + "ce_orig": 0.6402543187141418, + "epoch": 0.0419872025307355, + "kl_loss": 20.296550750732422, + "loss_ib": 0.2065178006887436, + "step": 146 + }, + { + "ce_ib": 32.333797454833984, + "ce_orig": 0.5773739814758301, + "epoch": 0.0419872025307355, + "kl_loss": 33.569828033447266, + "loss_ib": 0.3389316499233246, + "step": 146 + }, + { + "ce_ib": 36.07624816894531, + "ce_orig": 1.3285282850265503, + "epoch": 0.0419872025307355, + "kl_loss": 28.952056884765625, + "loss_ib": 0.29312819242477417, + "step": 146 + }, + { + "ce_ib": 34.523563385009766, + "ce_orig": 1.455711841583252, + "epoch": 0.042274786109713136, + "kl_loss": 33.6051025390625, + "loss_ib": 0.33950334787368774, + "step": 147 + }, + { + "ce_ib": 32.496185302734375, + "ce_orig": 0.8119601011276245, + "epoch": 0.042274786109713136, + "kl_loss": 30.560955047607422, + "loss_ib": 0.3088591694831848, + "step": 147 + }, + { + "ce_ib": 31.24298858642578, + "ce_orig": 0.6599155068397522, + "epoch": 0.042274786109713136, + "kl_loss": 25.330509185791016, + "loss_ib": 0.25642937421798706, + "step": 147 + }, + { + "ce_ib": 34.15837097167969, + "ce_orig": 0.7831727862358093, + "epoch": 0.042274786109713136, + "kl_loss": 32.8238410949707, + "loss_ib": 0.33165425062179565, + "step": 147 + }, + { + "ce_ib": 36.50813674926758, + "ce_orig": 1.3959016799926758, + "epoch": 0.042562369688690774, + "kl_loss": 24.21875762939453, + "loss_ib": 0.24583838880062103, + "step": 148 + }, + { + "ce_ib": 33.1202392578125, + "ce_orig": 0.511696457862854, + "epoch": 0.042562369688690774, + "kl_loss": 32.21922302246094, + "loss_ib": 0.32550424337387085, + "step": 148 + }, + { + "ce_ib": 31.0117130279541, + "ce_orig": 0.6812951564788818, + "epoch": 0.042562369688690774, + "kl_loss": 24.291759490966797, + "loss_ib": 0.24601876735687256, + "step": 148 + }, + { + "ce_ib": 31.82808494567871, + "ce_orig": 0.6159489750862122, + "epoch": 0.042562369688690774, + "kl_loss": 21.070880889892578, + "loss_ib": 0.21389161050319672, + "step": 148 + }, + { + "ce_ib": 30.777088165283203, + "ce_orig": 0.6892868280410767, + "epoch": 0.04284995326766842, + "kl_loss": 22.594371795654297, + "loss_ib": 0.22902143001556396, + "step": 149 + }, + { + "ce_ib": 37.14453887939453, + "ce_orig": 1.9816077947616577, + "epoch": 0.04284995326766842, + "kl_loss": 44.33348083496094, + "loss_ib": 0.44704926013946533, + "step": 149 + }, + { + "ce_ib": 28.821805953979492, + "ce_orig": 0.8520447611808777, + "epoch": 0.04284995326766842, + "kl_loss": 25.312294006347656, + "loss_ib": 0.25600510835647583, + "step": 149 + }, + { + "ce_ib": 34.31684494018555, + "ce_orig": 1.2896530628204346, + "epoch": 0.04284995326766842, + "kl_loss": 29.53026580810547, + "loss_ib": 0.2987343370914459, + "step": 149 + }, + { + "epoch": 0.043137536846646056, + "grad_norm": 6.311826705932617, + "learning_rate": 2.2929936305732484e-05, + "loss": 1.2869, + "step": 150 + }, + { + "ce_ib": 29.53162384033203, + "ce_orig": 0.9975224733352661, + "epoch": 0.043137536846646056, + "kl_loss": 21.389690399169922, + "loss_ib": 0.21685007214546204, + "step": 150 + }, + { + "ce_ib": 33.31801986694336, + "ce_orig": 1.2958406209945679, + "epoch": 0.043137536846646056, + "kl_loss": 24.239055633544922, + "loss_ib": 0.24572233855724335, + "step": 150 + }, + { + "ce_ib": 35.665565490722656, + "ce_orig": 0.5235558152198792, + "epoch": 0.043137536846646056, + "kl_loss": 22.637462615966797, + "loss_ib": 0.22994117438793182, + "step": 150 + }, + { + "ce_ib": 35.29521179199219, + "ce_orig": 1.2123780250549316, + "epoch": 0.043137536846646056, + "kl_loss": 21.82353401184082, + "loss_ib": 0.22176486253738403, + "step": 150 + }, + { + "ce_ib": 33.05928421020508, + "ce_orig": 0.8596100807189941, + "epoch": 0.043425120425623694, + "kl_loss": 24.44532012939453, + "loss_ib": 0.2477591335773468, + "step": 151 + }, + { + "ce_ib": 38.77718734741211, + "ce_orig": 2.1412835121154785, + "epoch": 0.043425120425623694, + "kl_loss": 28.010578155517578, + "loss_ib": 0.2839834988117218, + "step": 151 + }, + { + "ce_ib": 34.24200439453125, + "ce_orig": 0.6295925974845886, + "epoch": 0.043425120425623694, + "kl_loss": 21.05971908569336, + "loss_ib": 0.21402138471603394, + "step": 151 + }, + { + "ce_ib": 33.19257736206055, + "ce_orig": 1.2560220956802368, + "epoch": 0.043425120425623694, + "kl_loss": 21.76863670349121, + "loss_ib": 0.2210056185722351, + "step": 151 + }, + { + "ce_ib": 32.58409881591797, + "ce_orig": 0.7950013279914856, + "epoch": 0.04371270400460134, + "kl_loss": 28.509788513183594, + "loss_ib": 0.28835630416870117, + "step": 152 + }, + { + "ce_ib": 32.49606704711914, + "ce_orig": 1.8779208660125732, + "epoch": 0.04371270400460134, + "kl_loss": 30.838430404663086, + "loss_ib": 0.3116339147090912, + "step": 152 + }, + { + "ce_ib": 33.429622650146484, + "ce_orig": 0.7865967154502869, + "epoch": 0.04371270400460134, + "kl_loss": 29.228368759155273, + "loss_ib": 0.2956266403198242, + "step": 152 + }, + { + "ce_ib": 29.401348114013672, + "ce_orig": 0.7986537218093872, + "epoch": 0.04371270400460134, + "kl_loss": 24.602405548095703, + "loss_ib": 0.24896419048309326, + "step": 152 + }, + { + "ce_ib": 35.428165435791016, + "ce_orig": 1.3112716674804688, + "epoch": 0.044000287583578976, + "kl_loss": 18.217554092407227, + "loss_ib": 0.1857183575630188, + "step": 153 + }, + { + "ce_ib": 33.20622634887695, + "ce_orig": 1.091870903968811, + "epoch": 0.044000287583578976, + "kl_loss": 18.279142379760742, + "loss_ib": 0.18611203134059906, + "step": 153 + }, + { + "ce_ib": 32.40380859375, + "ce_orig": 1.0627433061599731, + "epoch": 0.044000287583578976, + "kl_loss": 20.22241973876953, + "loss_ib": 0.20546457171440125, + "step": 153 + }, + { + "ce_ib": 29.084455490112305, + "ce_orig": 0.823095440864563, + "epoch": 0.044000287583578976, + "kl_loss": 23.27497673034668, + "loss_ib": 0.235658198595047, + "step": 153 + }, + { + "ce_ib": 30.08700180053711, + "ce_orig": 0.6791905164718628, + "epoch": 0.04428787116255662, + "kl_loss": 17.341888427734375, + "loss_ib": 0.17642758786678314, + "step": 154 + }, + { + "ce_ib": 28.275983810424805, + "ce_orig": 0.40569692850112915, + "epoch": 0.04428787116255662, + "kl_loss": 19.95773696899414, + "loss_ib": 0.20240497589111328, + "step": 154 + }, + { + "ce_ib": 33.74617004394531, + "ce_orig": 1.7346209287643433, + "epoch": 0.04428787116255662, + "kl_loss": 18.556991577148438, + "loss_ib": 0.18894453346729279, + "step": 154 + }, + { + "ce_ib": 30.752422332763672, + "ce_orig": 0.71451336145401, + "epoch": 0.04428787116255662, + "kl_loss": 16.57543182373047, + "loss_ib": 0.168829545378685, + "step": 154 + }, + { + "epoch": 0.04457545474153426, + "grad_norm": 3.4597690105438232, + "learning_rate": 2.372611464968153e-05, + "loss": 1.2585, + "step": 155 + }, + { + "ce_ib": 28.92538833618164, + "ce_orig": 1.050588607788086, + "epoch": 0.04457545474153426, + "kl_loss": 19.674413681030273, + "loss_ib": 0.19963666796684265, + "step": 155 + }, + { + "ce_ib": 31.494068145751953, + "ce_orig": 1.3162670135498047, + "epoch": 0.04457545474153426, + "kl_loss": 15.291072845458984, + "loss_ib": 0.156060129404068, + "step": 155 + }, + { + "ce_ib": 31.52849769592285, + "ce_orig": 0.6012848615646362, + "epoch": 0.04457545474153426, + "kl_loss": 15.920844078063965, + "loss_ib": 0.1623612940311432, + "step": 155 + }, + { + "ce_ib": 33.46098709106445, + "ce_orig": 1.0411237478256226, + "epoch": 0.04457545474153426, + "kl_loss": 17.932607650756836, + "loss_ib": 0.1826721727848053, + "step": 155 + }, + { + "ce_ib": 34.06367874145508, + "ce_orig": 0.7581042647361755, + "epoch": 0.044863038320511896, + "kl_loss": 22.63808822631836, + "loss_ib": 0.22978724539279938, + "step": 156 + }, + { + "ce_ib": 30.403427124023438, + "ce_orig": 0.5148236751556396, + "epoch": 0.044863038320511896, + "kl_loss": 14.46303939819336, + "loss_ib": 0.14767073094844818, + "step": 156 + }, + { + "ce_ib": 29.40231704711914, + "ce_orig": 0.7519353032112122, + "epoch": 0.044863038320511896, + "kl_loss": 21.479459762573242, + "loss_ib": 0.21773482859134674, + "step": 156 + }, + { + "ce_ib": 27.831212997436523, + "ce_orig": 0.80788654088974, + "epoch": 0.044863038320511896, + "kl_loss": 16.518142700195312, + "loss_ib": 0.16796454787254333, + "step": 156 + }, + { + "ce_ib": 27.716188430786133, + "ce_orig": 0.7496557831764221, + "epoch": 0.04515062189948954, + "kl_loss": 13.905786514282227, + "loss_ib": 0.1418294757604599, + "step": 157 + }, + { + "ce_ib": 28.782617568969727, + "ce_orig": 0.7090852856636047, + "epoch": 0.04515062189948954, + "kl_loss": 15.777366638183594, + "loss_ib": 0.16065192222595215, + "step": 157 + }, + { + "ce_ib": 26.00276756286621, + "ce_orig": 0.494842529296875, + "epoch": 0.04515062189948954, + "kl_loss": 16.35750389099121, + "loss_ib": 0.16617530584335327, + "step": 157 + }, + { + "ce_ib": 28.558490753173828, + "ce_orig": 0.948776364326477, + "epoch": 0.04515062189948954, + "kl_loss": 15.582597732543945, + "loss_ib": 0.15868182480335236, + "step": 157 + }, + { + "ce_ib": 34.348106384277344, + "ce_orig": 1.4037892818450928, + "epoch": 0.04543820547846718, + "kl_loss": 15.35753059387207, + "loss_ib": 0.15701010823249817, + "step": 158 + }, + { + "ce_ib": 30.06648826599121, + "ce_orig": 1.0562583208084106, + "epoch": 0.04543820547846718, + "kl_loss": 13.857028007507324, + "loss_ib": 0.14157693088054657, + "step": 158 + }, + { + "ce_ib": 33.296878814697266, + "ce_orig": 1.184076189994812, + "epoch": 0.04543820547846718, + "kl_loss": 14.362920761108398, + "loss_ib": 0.14695888757705688, + "step": 158 + }, + { + "ce_ib": 30.477880477905273, + "ce_orig": 0.6938384771347046, + "epoch": 0.04543820547846718, + "kl_loss": 13.156094551086426, + "loss_ib": 0.13460873067378998, + "step": 158 + }, + { + "ce_ib": 30.92000961303711, + "ce_orig": 0.8126051425933838, + "epoch": 0.04572578905744482, + "kl_loss": 13.102733612060547, + "loss_ib": 0.13411933183670044, + "step": 159 + }, + { + "ce_ib": 32.433162689208984, + "ce_orig": 1.181881070137024, + "epoch": 0.04572578905744482, + "kl_loss": 14.020172119140625, + "loss_ib": 0.14344502985477448, + "step": 159 + }, + { + "ce_ib": 33.700931549072266, + "ce_orig": 1.5680264234542847, + "epoch": 0.04572578905744482, + "kl_loss": 13.899885177612305, + "loss_ib": 0.14236894249916077, + "step": 159 + }, + { + "ce_ib": 28.371702194213867, + "ce_orig": 0.9268200397491455, + "epoch": 0.04572578905744482, + "kl_loss": 18.150760650634766, + "loss_ib": 0.18434476852416992, + "step": 159 + }, + { + "epoch": 0.04601337263642246, + "grad_norm": 2.066725254058838, + "learning_rate": 2.4522292993630575e-05, + "loss": 1.0186, + "step": 160 + }, + { + "ce_ib": 29.896162033081055, + "ce_orig": 0.8601086735725403, + "epoch": 0.04601337263642246, + "kl_loss": 12.835603713989258, + "loss_ib": 0.13134564459323883, + "step": 160 + }, + { + "ce_ib": 29.533695220947266, + "ce_orig": 1.1664679050445557, + "epoch": 0.04601337263642246, + "kl_loss": 15.90629768371582, + "loss_ib": 0.1620163470506668, + "step": 160 + }, + { + "ce_ib": 28.180938720703125, + "ce_orig": 0.8322929739952087, + "epoch": 0.04601337263642246, + "kl_loss": 12.797597885131836, + "loss_ib": 0.13079407811164856, + "step": 160 + }, + { + "ce_ib": 28.38677215576172, + "ce_orig": 0.8806703090667725, + "epoch": 0.04601337263642246, + "kl_loss": 16.691715240478516, + "loss_ib": 0.16975581645965576, + "step": 160 + }, + { + "ce_ib": 27.899879455566406, + "ce_orig": 0.6471708416938782, + "epoch": 0.0463009562154001, + "kl_loss": 14.290294647216797, + "loss_ib": 0.14569292962551117, + "step": 161 + }, + { + "ce_ib": 27.485563278198242, + "ce_orig": 0.9937444925308228, + "epoch": 0.0463009562154001, + "kl_loss": 15.568538665771484, + "loss_ib": 0.1584339439868927, + "step": 161 + }, + { + "ce_ib": 30.291170120239258, + "ce_orig": 0.7304977178573608, + "epoch": 0.0463009562154001, + "kl_loss": 9.206818580627441, + "loss_ib": 0.09509730339050293, + "step": 161 + }, + { + "ce_ib": 29.51616859436035, + "ce_orig": 1.386801838874817, + "epoch": 0.0463009562154001, + "kl_loss": 20.9112548828125, + "loss_ib": 0.21206416189670563, + "step": 161 + }, + { + "ce_ib": 27.004371643066406, + "ce_orig": 0.6013516783714294, + "epoch": 0.04658853979437774, + "kl_loss": 15.80407428741455, + "loss_ib": 0.1607411801815033, + "step": 162 + }, + { + "ce_ib": 25.159454345703125, + "ce_orig": 0.9960594773292542, + "epoch": 0.04658853979437774, + "kl_loss": 13.249858856201172, + "loss_ib": 0.13501453399658203, + "step": 162 + }, + { + "ce_ib": 26.201725006103516, + "ce_orig": 0.5098617076873779, + "epoch": 0.04658853979437774, + "kl_loss": 11.842464447021484, + "loss_ib": 0.1210448145866394, + "step": 162 + }, + { + "ce_ib": 29.0825138092041, + "ce_orig": 0.8241496086120605, + "epoch": 0.04658853979437774, + "kl_loss": 16.010656356811523, + "loss_ib": 0.16301481425762177, + "step": 162 + }, + { + "ce_ib": 26.992971420288086, + "ce_orig": 0.8256320357322693, + "epoch": 0.04687612337335538, + "kl_loss": 11.819284439086914, + "loss_ib": 0.12089213728904724, + "step": 163 + }, + { + "ce_ib": 27.297061920166016, + "ce_orig": 0.9797989726066589, + "epoch": 0.04687612337335538, + "kl_loss": 12.12143325805664, + "loss_ib": 0.12394402921199799, + "step": 163 + }, + { + "ce_ib": 26.038820266723633, + "ce_orig": 0.779868483543396, + "epoch": 0.04687612337335538, + "kl_loss": 12.200529098510742, + "loss_ib": 0.12460917234420776, + "step": 163 + }, + { + "ce_ib": 30.567201614379883, + "ce_orig": 1.4183546304702759, + "epoch": 0.04687612337335538, + "kl_loss": 11.150833129882812, + "loss_ib": 0.11456504464149475, + "step": 163 + }, + { + "ce_ib": 27.423969268798828, + "ce_orig": 0.6227314472198486, + "epoch": 0.047163706952333025, + "kl_loss": 11.591859817504883, + "loss_ib": 0.11866099387407303, + "step": 164 + }, + { + "ce_ib": 31.1706485748291, + "ce_orig": 0.592538595199585, + "epoch": 0.047163706952333025, + "kl_loss": 11.143152236938477, + "loss_ib": 0.11454858630895615, + "step": 164 + }, + { + "ce_ib": 32.50811004638672, + "ce_orig": 0.8140405416488647, + "epoch": 0.047163706952333025, + "kl_loss": 13.64712905883789, + "loss_ib": 0.1397220939397812, + "step": 164 + }, + { + "ce_ib": 27.141164779663086, + "ce_orig": 0.41711243987083435, + "epoch": 0.047163706952333025, + "kl_loss": 12.186077117919922, + "loss_ib": 0.12457488477230072, + "step": 164 + }, + { + "epoch": 0.04745129053131066, + "grad_norm": 1.150227665901184, + "learning_rate": 2.531847133757962e-05, + "loss": 0.983, + "step": 165 + }, + { + "ce_ib": 25.222728729248047, + "ce_orig": 0.6572214365005493, + "epoch": 0.04745129053131066, + "kl_loss": 11.52933120727539, + "loss_ib": 0.11781557649374008, + "step": 165 + }, + { + "ce_ib": 33.31783676147461, + "ce_orig": 0.8566097617149353, + "epoch": 0.04745129053131066, + "kl_loss": 12.30784797668457, + "loss_ib": 0.12641026079654694, + "step": 165 + }, + { + "ce_ib": 24.890148162841797, + "ce_orig": 0.5485845804214478, + "epoch": 0.04745129053131066, + "kl_loss": 13.047416687011719, + "loss_ib": 0.1329631805419922, + "step": 165 + }, + { + "ce_ib": 28.270605087280273, + "ce_orig": 0.3421739935874939, + "epoch": 0.04745129053131066, + "kl_loss": 15.006128311157227, + "loss_ib": 0.15288834273815155, + "step": 165 + }, + { + "ce_ib": 26.621320724487305, + "ce_orig": 1.039825439453125, + "epoch": 0.0477388741102883, + "kl_loss": 11.003231048583984, + "loss_ib": 0.11269444227218628, + "step": 166 + }, + { + "ce_ib": 17.792619705200195, + "ce_orig": 0.09731145948171616, + "epoch": 0.0477388741102883, + "kl_loss": 6.299266338348389, + "loss_ib": 0.06477192044258118, + "step": 166 + }, + { + "ce_ib": 29.130701065063477, + "ce_orig": 1.3552623987197876, + "epoch": 0.0477388741102883, + "kl_loss": 12.245400428771973, + "loss_ib": 0.12536707520484924, + "step": 166 + }, + { + "ce_ib": 29.495161056518555, + "ce_orig": 0.5962749123573303, + "epoch": 0.0477388741102883, + "kl_loss": 11.876587867736816, + "loss_ib": 0.12171538919210434, + "step": 166 + }, + { + "ce_ib": 28.59954833984375, + "ce_orig": 1.011759638786316, + "epoch": 0.048026457689265944, + "kl_loss": 10.475525856018066, + "loss_ib": 0.10761521011590958, + "step": 167 + }, + { + "ce_ib": 26.95580291748047, + "ce_orig": 0.7863696813583374, + "epoch": 0.048026457689265944, + "kl_loss": 11.420799255371094, + "loss_ib": 0.11690356582403183, + "step": 167 + }, + { + "ce_ib": 25.404388427734375, + "ce_orig": 0.49368423223495483, + "epoch": 0.048026457689265944, + "kl_loss": 10.898456573486328, + "loss_ib": 0.11152499914169312, + "step": 167 + }, + { + "ce_ib": 27.980323791503906, + "ce_orig": 0.8852983713150024, + "epoch": 0.048026457689265944, + "kl_loss": 11.366129875183105, + "loss_ib": 0.11645933240652084, + "step": 167 + }, + { + "ce_ib": 25.69623374938965, + "ce_orig": 0.875866174697876, + "epoch": 0.04831404126824358, + "kl_loss": 11.159571647644043, + "loss_ib": 0.11416534334421158, + "step": 168 + }, + { + "ce_ib": 26.52794647216797, + "ce_orig": 0.7182326912879944, + "epoch": 0.04831404126824358, + "kl_loss": 10.362823486328125, + "loss_ib": 0.10628102719783783, + "step": 168 + }, + { + "ce_ib": 26.30867576599121, + "ce_orig": 0.8692768216133118, + "epoch": 0.04831404126824358, + "kl_loss": 10.388944625854492, + "loss_ib": 0.10652031004428864, + "step": 168 + }, + { + "ce_ib": 24.659727096557617, + "ce_orig": 0.6755059361457825, + "epoch": 0.04831404126824358, + "kl_loss": 10.561405181884766, + "loss_ib": 0.10808002203702927, + "step": 168 + }, + { + "ce_ib": 29.72395896911621, + "ce_orig": 1.1792970895767212, + "epoch": 0.04860162484722123, + "kl_loss": 10.214838981628418, + "loss_ib": 0.10512077808380127, + "step": 169 + }, + { + "ce_ib": 27.70913314819336, + "ce_orig": 0.9696344137191772, + "epoch": 0.04860162484722123, + "kl_loss": 10.429807662963867, + "loss_ib": 0.1070689857006073, + "step": 169 + }, + { + "ce_ib": 27.571584701538086, + "ce_orig": 0.9312324523925781, + "epoch": 0.04860162484722123, + "kl_loss": 10.552055358886719, + "loss_ib": 0.10827770829200745, + "step": 169 + }, + { + "ce_ib": 25.53492546081543, + "ce_orig": 0.8822551369667053, + "epoch": 0.04860162484722123, + "kl_loss": 11.001655578613281, + "loss_ib": 0.11257004737854004, + "step": 169 + }, + { + "epoch": 0.048889208426198864, + "grad_norm": 0.8837189674377441, + "learning_rate": 2.6114649681528662e-05, + "loss": 0.9786, + "step": 170 + }, + { + "ce_ib": 25.924278259277344, + "ce_orig": 1.155822992324829, + "epoch": 0.048889208426198864, + "kl_loss": 10.456619262695312, + "loss_ib": 0.10715862363576889, + "step": 170 + }, + { + "ce_ib": 27.14344024658203, + "ce_orig": 1.013275146484375, + "epoch": 0.048889208426198864, + "kl_loss": 9.088029861450195, + "loss_ib": 0.09359464794397354, + "step": 170 + }, + { + "ce_ib": 23.778573989868164, + "ce_orig": 0.5937850475311279, + "epoch": 0.048889208426198864, + "kl_loss": 11.208532333374023, + "loss_ib": 0.11446317285299301, + "step": 170 + }, + { + "ce_ib": 28.437326431274414, + "ce_orig": 1.3872705698013306, + "epoch": 0.048889208426198864, + "kl_loss": 11.0403413772583, + "loss_ib": 0.11324714124202728, + "step": 170 + }, + { + "ce_ib": 29.6293888092041, + "ce_orig": 1.264078974723816, + "epoch": 0.0491767920051765, + "kl_loss": 10.278146743774414, + "loss_ib": 0.10574440658092499, + "step": 171 + }, + { + "ce_ib": 23.555601119995117, + "ce_orig": 0.5615886449813843, + "epoch": 0.0491767920051765, + "kl_loss": 10.424758911132812, + "loss_ib": 0.10660314559936523, + "step": 171 + }, + { + "ce_ib": 29.939388275146484, + "ce_orig": 0.7696157097816467, + "epoch": 0.0491767920051765, + "kl_loss": 10.607294082641602, + "loss_ib": 0.10906687378883362, + "step": 171 + }, + { + "ce_ib": 21.64013671875, + "ce_orig": 0.8276143074035645, + "epoch": 0.0491767920051765, + "kl_loss": 11.212567329406738, + "loss_ib": 0.11428967863321304, + "step": 171 + }, + { + "ce_ib": 27.828157424926758, + "ce_orig": 0.9696255922317505, + "epoch": 0.04946437558415415, + "kl_loss": 10.001435279846191, + "loss_ib": 0.10279716551303864, + "step": 172 + }, + { + "ce_ib": 23.958757400512695, + "ce_orig": 0.6945645213127136, + "epoch": 0.04946437558415415, + "kl_loss": 11.062480926513672, + "loss_ib": 0.11302068829536438, + "step": 172 + }, + { + "ce_ib": 27.76424217224121, + "ce_orig": 0.9959214329719543, + "epoch": 0.04946437558415415, + "kl_loss": 10.427704811096191, + "loss_ib": 0.10705346614122391, + "step": 172 + }, + { + "ce_ib": 25.078935623168945, + "ce_orig": 0.5796197056770325, + "epoch": 0.04946437558415415, + "kl_loss": 10.434064865112305, + "loss_ib": 0.10684854537248611, + "step": 172 + }, + { + "ce_ib": 28.147438049316406, + "ce_orig": 0.8044544458389282, + "epoch": 0.049751959163131784, + "kl_loss": 10.538864135742188, + "loss_ib": 0.10820338129997253, + "step": 173 + }, + { + "ce_ib": 29.065446853637695, + "ce_orig": 0.8273786902427673, + "epoch": 0.049751959163131784, + "kl_loss": 9.853753089904785, + "loss_ib": 0.1014440730214119, + "step": 173 + }, + { + "ce_ib": 27.74785614013672, + "ce_orig": 1.2104791402816772, + "epoch": 0.049751959163131784, + "kl_loss": 10.07681655883789, + "loss_ib": 0.10354294627904892, + "step": 173 + }, + { + "ce_ib": 26.78622817993164, + "ce_orig": 1.5120453834533691, + "epoch": 0.049751959163131784, + "kl_loss": 9.62009334564209, + "loss_ib": 0.09887955337762833, + "step": 173 + }, + { + "ce_ib": 19.017391204833984, + "ce_orig": 0.5162482857704163, + "epoch": 0.05003954274210943, + "kl_loss": 7.353050231933594, + "loss_ib": 0.07543224096298218, + "step": 174 + }, + { + "ce_ib": 23.64644432067871, + "ce_orig": 0.9337442517280579, + "epoch": 0.05003954274210943, + "kl_loss": 10.005657196044922, + "loss_ib": 0.10242121666669846, + "step": 174 + }, + { + "ce_ib": 26.815704345703125, + "ce_orig": 0.6984226107597351, + "epoch": 0.05003954274210943, + "kl_loss": 10.047138214111328, + "loss_ib": 0.10315295308828354, + "step": 174 + }, + { + "ce_ib": 23.5247859954834, + "ce_orig": 0.6298738718032837, + "epoch": 0.05003954274210943, + "kl_loss": 9.880701065063477, + "loss_ib": 0.1011594831943512, + "step": 174 + }, + { + "epoch": 0.050327126321087066, + "grad_norm": 0.5031439661979675, + "learning_rate": 2.6910828025477707e-05, + "loss": 0.9779, + "step": 175 + }, + { + "ce_ib": 27.915422439575195, + "ce_orig": 1.2774735689163208, + "epoch": 0.050327126321087066, + "kl_loss": 10.713859558105469, + "loss_ib": 0.1099301278591156, + "step": 175 + }, + { + "ce_ib": 25.17244529724121, + "ce_orig": 0.5252784490585327, + "epoch": 0.050327126321087066, + "kl_loss": 10.287009239196777, + "loss_ib": 0.10538733005523682, + "step": 175 + }, + { + "ce_ib": 25.812246322631836, + "ce_orig": 0.7732113599777222, + "epoch": 0.050327126321087066, + "kl_loss": 8.927350997924805, + "loss_ib": 0.09185472875833511, + "step": 175 + }, + { + "ce_ib": 23.02685546875, + "ce_orig": 0.7139325141906738, + "epoch": 0.050327126321087066, + "kl_loss": 10.248601913452148, + "loss_ib": 0.10478869825601578, + "step": 175 + }, + { + "ce_ib": 22.77245330810547, + "ce_orig": 0.8318886756896973, + "epoch": 0.050614709900064704, + "kl_loss": 10.135682106018066, + "loss_ib": 0.1036340594291687, + "step": 176 + }, + { + "ce_ib": 27.42522430419922, + "ce_orig": 0.9915688037872314, + "epoch": 0.050614709900064704, + "kl_loss": 10.458263397216797, + "loss_ib": 0.10732515156269073, + "step": 176 + }, + { + "ce_ib": 24.290016174316406, + "ce_orig": 0.7032797932624817, + "epoch": 0.050614709900064704, + "kl_loss": 10.18847370147705, + "loss_ib": 0.10431373119354248, + "step": 176 + }, + { + "ce_ib": 27.81938362121582, + "ce_orig": 1.1106735467910767, + "epoch": 0.050614709900064704, + "kl_loss": 9.77632999420166, + "loss_ib": 0.100545234978199, + "step": 176 + }, + { + "ce_ib": 28.1321964263916, + "ce_orig": 1.3234449625015259, + "epoch": 0.05090229347904235, + "kl_loss": 9.504875183105469, + "loss_ib": 0.09786196798086166, + "step": 177 + }, + { + "ce_ib": 25.004257202148438, + "ce_orig": 0.7698526382446289, + "epoch": 0.05090229347904235, + "kl_loss": 10.274667739868164, + "loss_ib": 0.10524710267782211, + "step": 177 + }, + { + "ce_ib": 25.05718421936035, + "ce_orig": 0.8450519442558289, + "epoch": 0.05090229347904235, + "kl_loss": 7.935550689697266, + "loss_ib": 0.0818612277507782, + "step": 177 + }, + { + "ce_ib": 24.45059585571289, + "ce_orig": 0.6560284495353699, + "epoch": 0.05090229347904235, + "kl_loss": 10.264579772949219, + "loss_ib": 0.10509085655212402, + "step": 177 + }, + { + "ce_ib": 22.231950759887695, + "ce_orig": 0.8478792309761047, + "epoch": 0.051189877058019986, + "kl_loss": 9.088963508605957, + "loss_ib": 0.09311282634735107, + "step": 178 + }, + { + "ce_ib": 25.44860076904297, + "ce_orig": 0.7396875619888306, + "epoch": 0.051189877058019986, + "kl_loss": 9.547811508178711, + "loss_ib": 0.09802297502756119, + "step": 178 + }, + { + "ce_ib": 26.52227783203125, + "ce_orig": 1.3045439720153809, + "epoch": 0.051189877058019986, + "kl_loss": 9.957924842834473, + "loss_ib": 0.1022314727306366, + "step": 178 + }, + { + "ce_ib": 25.924222946166992, + "ce_orig": 1.1649706363677979, + "epoch": 0.051189877058019986, + "kl_loss": 10.241584777832031, + "loss_ib": 0.10500826686620712, + "step": 178 + }, + { + "ce_ib": 27.674495697021484, + "ce_orig": 0.5262369513511658, + "epoch": 0.051477460636997624, + "kl_loss": 8.583837509155273, + "loss_ib": 0.08860582113265991, + "step": 179 + }, + { + "ce_ib": 29.368635177612305, + "ce_orig": 1.8323945999145508, + "epoch": 0.051477460636997624, + "kl_loss": 10.265556335449219, + "loss_ib": 0.10559242218732834, + "step": 179 + }, + { + "ce_ib": 24.87542152404785, + "ce_orig": 0.8622165322303772, + "epoch": 0.051477460636997624, + "kl_loss": 9.931290626525879, + "loss_ib": 0.10180044919252396, + "step": 179 + }, + { + "ce_ib": 24.743249893188477, + "ce_orig": 0.8221871852874756, + "epoch": 0.051477460636997624, + "kl_loss": 9.529619216918945, + "loss_ib": 0.09777051955461502, + "step": 179 + }, + { + "epoch": 0.05176504421597527, + "grad_norm": 0.6572920680046082, + "learning_rate": 2.7707006369426753e-05, + "loss": 0.9762, + "step": 180 + }, + { + "ce_ib": 26.228139877319336, + "ce_orig": 0.9346477389335632, + "epoch": 0.05176504421597527, + "kl_loss": 9.228906631469727, + "loss_ib": 0.09491188079118729, + "step": 180 + }, + { + "ce_ib": 27.96097183227539, + "ce_orig": 1.4497267007827759, + "epoch": 0.05176504421597527, + "kl_loss": 9.41794204711914, + "loss_ib": 0.09697551280260086, + "step": 180 + }, + { + "ce_ib": 25.501893997192383, + "ce_orig": 1.3674439191818237, + "epoch": 0.05176504421597527, + "kl_loss": 9.394105911254883, + "loss_ib": 0.0964912474155426, + "step": 180 + }, + { + "ce_ib": 27.754831314086914, + "ce_orig": 0.9353328943252563, + "epoch": 0.05176504421597527, + "kl_loss": 9.990425109863281, + "loss_ib": 0.10267972946166992, + "step": 180 + }, + { + "ce_ib": 24.477588653564453, + "ce_orig": 1.223670482635498, + "epoch": 0.052052627794952906, + "kl_loss": 9.197659492492676, + "loss_ib": 0.09442435204982758, + "step": 181 + }, + { + "ce_ib": 23.42432403564453, + "ce_orig": 0.34692513942718506, + "epoch": 0.052052627794952906, + "kl_loss": 9.035377502441406, + "loss_ib": 0.09269620478153229, + "step": 181 + }, + { + "ce_ib": 27.408384323120117, + "ce_orig": 1.0176830291748047, + "epoch": 0.052052627794952906, + "kl_loss": 8.829448699951172, + "loss_ib": 0.09103532880544662, + "step": 181 + }, + { + "ce_ib": 20.15254020690918, + "ce_orig": 0.4916859269142151, + "epoch": 0.052052627794952906, + "kl_loss": 8.9959716796875, + "loss_ib": 0.09197497367858887, + "step": 181 + }, + { + "ce_ib": 28.84882164001465, + "ce_orig": 1.4133418798446655, + "epoch": 0.05234021137393055, + "kl_loss": 9.433625221252441, + "loss_ib": 0.09722113609313965, + "step": 182 + }, + { + "ce_ib": 24.220762252807617, + "ce_orig": 0.7626959085464478, + "epoch": 0.05234021137393055, + "kl_loss": 9.004999160766602, + "loss_ib": 0.09247206151485443, + "step": 182 + }, + { + "ce_ib": 26.707427978515625, + "ce_orig": 0.950811505317688, + "epoch": 0.05234021137393055, + "kl_loss": 9.357291221618652, + "loss_ib": 0.09624365717172623, + "step": 182 + }, + { + "ce_ib": 23.827503204345703, + "ce_orig": 0.5993396639823914, + "epoch": 0.05234021137393055, + "kl_loss": 9.675762176513672, + "loss_ib": 0.09914036840200424, + "step": 182 + }, + { + "ce_ib": 25.040048599243164, + "ce_orig": 1.0414315462112427, + "epoch": 0.05262779495290819, + "kl_loss": 9.17612075805664, + "loss_ib": 0.09426520764827728, + "step": 183 + }, + { + "ce_ib": 26.632596969604492, + "ce_orig": 1.2410509586334229, + "epoch": 0.05262779495290819, + "kl_loss": 9.67950439453125, + "loss_ib": 0.09945829957723618, + "step": 183 + }, + { + "ce_ib": 22.586328506469727, + "ce_orig": 0.5787039399147034, + "epoch": 0.05262779495290819, + "kl_loss": 9.118326187133789, + "loss_ib": 0.09344189614057541, + "step": 183 + }, + { + "ce_ib": 29.002498626708984, + "ce_orig": 1.1482164859771729, + "epoch": 0.05262779495290819, + "kl_loss": 10.303224563598633, + "loss_ib": 0.10593248903751373, + "step": 183 + }, + { + "ce_ib": 23.188966751098633, + "ce_orig": 0.7345482110977173, + "epoch": 0.052915378531885826, + "kl_loss": 9.377893447875977, + "loss_ib": 0.09609782695770264, + "step": 184 + }, + { + "ce_ib": 25.167457580566406, + "ce_orig": 1.279574990272522, + "epoch": 0.052915378531885826, + "kl_loss": 8.157093048095703, + "loss_ib": 0.08408767729997635, + "step": 184 + }, + { + "ce_ib": 25.17441749572754, + "ce_orig": 1.2902156114578247, + "epoch": 0.052915378531885826, + "kl_loss": 8.778035163879395, + "loss_ib": 0.09029779583215714, + "step": 184 + }, + { + "ce_ib": 27.5651798248291, + "ce_orig": 0.6481632590293884, + "epoch": 0.052915378531885826, + "kl_loss": 8.615208625793457, + "loss_ib": 0.0889086052775383, + "step": 184 + }, + { + "epoch": 0.05320296211086347, + "grad_norm": 0.774932861328125, + "learning_rate": 2.8503184713375798e-05, + "loss": 1.0273, + "step": 185 + }, + { + "ce_ib": 28.26412582397461, + "ce_orig": 0.9693439602851868, + "epoch": 0.05320296211086347, + "kl_loss": 9.72558307647705, + "loss_ib": 0.10008224099874496, + "step": 185 + }, + { + "ce_ib": 20.355464935302734, + "ce_orig": 0.6961947679519653, + "epoch": 0.05320296211086347, + "kl_loss": 8.683027267456055, + "loss_ib": 0.08886582404375076, + "step": 185 + }, + { + "ce_ib": 19.516334533691406, + "ce_orig": 0.6023780703544617, + "epoch": 0.05320296211086347, + "kl_loss": 9.514884948730469, + "loss_ib": 0.09710048139095306, + "step": 185 + }, + { + "ce_ib": 21.90512466430664, + "ce_orig": 0.8949795961380005, + "epoch": 0.05320296211086347, + "kl_loss": 9.39652156829834, + "loss_ib": 0.09615572541952133, + "step": 185 + }, + { + "ce_ib": 24.16393280029297, + "ce_orig": 0.730219304561615, + "epoch": 0.05349054568984111, + "kl_loss": 8.413753509521484, + "loss_ib": 0.08655392378568649, + "step": 186 + }, + { + "ce_ib": 24.4334774017334, + "ce_orig": 1.2984904050827026, + "epoch": 0.05349054568984111, + "kl_loss": 8.700630187988281, + "loss_ib": 0.08944965153932571, + "step": 186 + }, + { + "ce_ib": 22.994918823242188, + "ce_orig": 0.9692792296409607, + "epoch": 0.05349054568984111, + "kl_loss": 8.794185638427734, + "loss_ib": 0.09024134278297424, + "step": 186 + }, + { + "ce_ib": 27.755258560180664, + "ce_orig": 1.4234228134155273, + "epoch": 0.05349054568984111, + "kl_loss": 5.130355358123779, + "loss_ib": 0.0540790781378746, + "step": 186 + }, + { + "ce_ib": 19.920040130615234, + "ce_orig": 0.459452748298645, + "epoch": 0.05377812926881875, + "kl_loss": 7.401340484619141, + "loss_ib": 0.07600540667772293, + "step": 187 + }, + { + "ce_ib": 22.339643478393555, + "ce_orig": 0.7629045844078064, + "epoch": 0.05377812926881875, + "kl_loss": 8.446830749511719, + "loss_ib": 0.08670226484537125, + "step": 187 + }, + { + "ce_ib": 23.796178817749023, + "ce_orig": 1.3895570039749146, + "epoch": 0.05377812926881875, + "kl_loss": 9.165254592895508, + "loss_ib": 0.0940321609377861, + "step": 187 + }, + { + "ce_ib": 21.33721351623535, + "ce_orig": 0.2807011902332306, + "epoch": 0.05377812926881875, + "kl_loss": 8.006156921386719, + "loss_ib": 0.08219528943300247, + "step": 187 + }, + { + "ce_ib": 20.01226234436035, + "ce_orig": 0.7121122479438782, + "epoch": 0.05406571284779639, + "kl_loss": 8.959085464477539, + "loss_ib": 0.09159208089113235, + "step": 188 + }, + { + "ce_ib": 19.18909454345703, + "ce_orig": 0.7582953572273254, + "epoch": 0.05406571284779639, + "kl_loss": 8.402653694152832, + "loss_ib": 0.08594544231891632, + "step": 188 + }, + { + "ce_ib": 23.931289672851562, + "ce_orig": 0.8940808773040771, + "epoch": 0.05406571284779639, + "kl_loss": 8.514259338378906, + "loss_ib": 0.08753572404384613, + "step": 188 + }, + { + "ce_ib": 20.879886627197266, + "ce_orig": 0.5851081609725952, + "epoch": 0.05406571284779639, + "kl_loss": 9.260396957397461, + "loss_ib": 0.09469195455312729, + "step": 188 + }, + { + "ce_ib": 20.511985778808594, + "ce_orig": 0.8533673882484436, + "epoch": 0.05435329642677403, + "kl_loss": 8.759720802307129, + "loss_ib": 0.08964840322732925, + "step": 189 + }, + { + "ce_ib": 23.13450050354004, + "ce_orig": 1.0011026859283447, + "epoch": 0.05435329642677403, + "kl_loss": 8.676036834716797, + "loss_ib": 0.08907381445169449, + "step": 189 + }, + { + "ce_ib": 22.484384536743164, + "ce_orig": 0.5926994681358337, + "epoch": 0.05435329642677403, + "kl_loss": 9.435342788696289, + "loss_ib": 0.09660186618566513, + "step": 189 + }, + { + "ce_ib": 21.21821403503418, + "ce_orig": 0.8962640166282654, + "epoch": 0.05435329642677403, + "kl_loss": 8.669075012207031, + "loss_ib": 0.08881256729364395, + "step": 189 + }, + { + "epoch": 0.05464088000575167, + "grad_norm": 0.43721508979797363, + "learning_rate": 2.929936305732484e-05, + "loss": 1.0218, + "step": 190 + }, + { + "ce_ib": 21.65335464477539, + "ce_orig": 0.7994527816772461, + "epoch": 0.05464088000575167, + "kl_loss": 8.212764739990234, + "loss_ib": 0.08429298549890518, + "step": 190 + }, + { + "ce_ib": 25.716175079345703, + "ce_orig": 1.026253342628479, + "epoch": 0.05464088000575167, + "kl_loss": 8.664275169372559, + "loss_ib": 0.08921436965465546, + "step": 190 + }, + { + "ce_ib": 19.4307861328125, + "ce_orig": 0.8895479440689087, + "epoch": 0.05464088000575167, + "kl_loss": 8.509403228759766, + "loss_ib": 0.0870371162891388, + "step": 190 + }, + { + "ce_ib": 21.85231590270996, + "ce_orig": 0.7853972911834717, + "epoch": 0.05464088000575167, + "kl_loss": 8.377355575561523, + "loss_ib": 0.08595878630876541, + "step": 190 + }, + { + "ce_ib": 21.575359344482422, + "ce_orig": 1.0062997341156006, + "epoch": 0.05492846358472931, + "kl_loss": 9.271797180175781, + "loss_ib": 0.0948755070567131, + "step": 191 + }, + { + "ce_ib": 18.518245697021484, + "ce_orig": 0.6092102527618408, + "epoch": 0.05492846358472931, + "kl_loss": 8.641265869140625, + "loss_ib": 0.08826448023319244, + "step": 191 + }, + { + "ce_ib": 25.908557891845703, + "ce_orig": 0.7555634379386902, + "epoch": 0.05492846358472931, + "kl_loss": 9.02600383758545, + "loss_ib": 0.09285089373588562, + "step": 191 + }, + { + "ce_ib": 22.47454071044922, + "ce_orig": 0.5190201997756958, + "epoch": 0.05492846358472931, + "kl_loss": 9.089900970458984, + "loss_ib": 0.09314646571874619, + "step": 191 + }, + { + "ce_ib": 19.4965763092041, + "ce_orig": 0.8628413081169128, + "epoch": 0.055216047163706955, + "kl_loss": 9.05790901184082, + "loss_ib": 0.09252873808145523, + "step": 192 + }, + { + "ce_ib": 24.495662689208984, + "ce_orig": 1.0552870035171509, + "epoch": 0.055216047163706955, + "kl_loss": 8.362863540649414, + "loss_ib": 0.0860782042145729, + "step": 192 + }, + { + "ce_ib": 24.617902755737305, + "ce_orig": 1.3669184446334839, + "epoch": 0.055216047163706955, + "kl_loss": 8.252336502075195, + "loss_ib": 0.08498515188694, + "step": 192 + }, + { + "ce_ib": 21.620195388793945, + "ce_orig": 0.9216135144233704, + "epoch": 0.055216047163706955, + "kl_loss": 8.622823715209961, + "loss_ib": 0.08839025348424911, + "step": 192 + }, + { + "ce_ib": 23.482017517089844, + "ce_orig": 1.2965989112854004, + "epoch": 0.05550363074268459, + "kl_loss": 8.686678886413574, + "loss_ib": 0.089214988052845, + "step": 193 + }, + { + "ce_ib": 21.503093719482422, + "ce_orig": 1.1102378368377686, + "epoch": 0.05550363074268459, + "kl_loss": 8.758203506469727, + "loss_ib": 0.0897323414683342, + "step": 193 + }, + { + "ce_ib": 21.88249969482422, + "ce_orig": 0.3644579350948334, + "epoch": 0.05550363074268459, + "kl_loss": 7.924787998199463, + "loss_ib": 0.08143612742424011, + "step": 193 + }, + { + "ce_ib": 21.49346160888672, + "ce_orig": 0.8568457961082458, + "epoch": 0.05550363074268459, + "kl_loss": 8.320171356201172, + "loss_ib": 0.08535105735063553, + "step": 193 + }, + { + "ce_ib": 18.73956298828125, + "ce_orig": 0.8066674470901489, + "epoch": 0.05579121432166223, + "kl_loss": 8.048727035522461, + "loss_ib": 0.08236122876405716, + "step": 194 + }, + { + "ce_ib": 24.636383056640625, + "ce_orig": 0.97906494140625, + "epoch": 0.05579121432166223, + "kl_loss": 7.121569633483887, + "loss_ib": 0.07367932796478271, + "step": 194 + }, + { + "ce_ib": 20.886672973632812, + "ce_orig": 1.25295090675354, + "epoch": 0.05579121432166223, + "kl_loss": 8.471221923828125, + "loss_ib": 0.08680088818073273, + "step": 194 + }, + { + "ce_ib": 19.9046630859375, + "ce_orig": 0.5161154866218567, + "epoch": 0.05579121432166223, + "kl_loss": 7.873350143432617, + "loss_ib": 0.08072397112846375, + "step": 194 + }, + { + "epoch": 0.056078797900639875, + "grad_norm": 0.8270230293273926, + "learning_rate": 3.0095541401273885e-05, + "loss": 0.9806, + "step": 195 + }, + { + "ce_ib": 20.66087532043457, + "ce_orig": 0.6626381874084473, + "epoch": 0.056078797900639875, + "kl_loss": 8.434722900390625, + "loss_ib": 0.08641331642866135, + "step": 195 + }, + { + "ce_ib": 22.705623626708984, + "ce_orig": 0.9331481456756592, + "epoch": 0.056078797900639875, + "kl_loss": 7.2313232421875, + "loss_ib": 0.07458379119634628, + "step": 195 + }, + { + "ce_ib": 23.993696212768555, + "ce_orig": 0.9489652514457703, + "epoch": 0.056078797900639875, + "kl_loss": 7.374420166015625, + "loss_ib": 0.07614357024431229, + "step": 195 + }, + { + "ce_ib": 24.617033004760742, + "ce_orig": 0.7637354135513306, + "epoch": 0.056078797900639875, + "kl_loss": 7.686088562011719, + "loss_ib": 0.07932259142398834, + "step": 195 + }, + { + "ce_ib": 21.54843521118164, + "ce_orig": 0.8521741032600403, + "epoch": 0.05636638147961751, + "kl_loss": 8.026320457458496, + "loss_ib": 0.0824180468916893, + "step": 196 + }, + { + "ce_ib": 20.1884822845459, + "ce_orig": 0.8504369258880615, + "epoch": 0.05636638147961751, + "kl_loss": 7.845184326171875, + "loss_ib": 0.08047069609165192, + "step": 196 + }, + { + "ce_ib": 22.211240768432617, + "ce_orig": 0.4319168031215668, + "epoch": 0.05636638147961751, + "kl_loss": 6.318869590759277, + "loss_ib": 0.065409816801548, + "step": 196 + }, + { + "ce_ib": 20.042993545532227, + "ce_orig": 0.6225204467773438, + "epoch": 0.05636638147961751, + "kl_loss": 8.074682235717773, + "loss_ib": 0.08275111764669418, + "step": 196 + }, + { + "ce_ib": 13.801077842712402, + "ce_orig": 0.4428274929523468, + "epoch": 0.05665396505859516, + "kl_loss": 7.092032432556152, + "loss_ib": 0.07230043411254883, + "step": 197 + }, + { + "ce_ib": 19.726043701171875, + "ce_orig": 0.5650824904441833, + "epoch": 0.05665396505859516, + "kl_loss": 7.761396408081055, + "loss_ib": 0.07958656549453735, + "step": 197 + }, + { + "ce_ib": 23.52407455444336, + "ce_orig": 1.374847650527954, + "epoch": 0.05665396505859516, + "kl_loss": 6.945611000061035, + "loss_ib": 0.07180851697921753, + "step": 197 + }, + { + "ce_ib": 21.02933120727539, + "ce_orig": 0.9913616180419922, + "epoch": 0.05665396505859516, + "kl_loss": 7.67958402633667, + "loss_ib": 0.07889877259731293, + "step": 197 + }, + { + "ce_ib": 26.16086196899414, + "ce_orig": 1.5300548076629639, + "epoch": 0.056941548637572795, + "kl_loss": 7.4000701904296875, + "loss_ib": 0.0766167864203453, + "step": 198 + }, + { + "ce_ib": 20.750835418701172, + "ce_orig": 0.9555485844612122, + "epoch": 0.056941548637572795, + "kl_loss": 7.1511125564575195, + "loss_ib": 0.07358621060848236, + "step": 198 + }, + { + "ce_ib": 23.05903434753418, + "ce_orig": 1.1008634567260742, + "epoch": 0.056941548637572795, + "kl_loss": 7.473138332366943, + "loss_ib": 0.0770372822880745, + "step": 198 + }, + { + "ce_ib": 21.61954689025879, + "ce_orig": 1.4359227418899536, + "epoch": 0.056941548637572795, + "kl_loss": 7.772992134094238, + "loss_ib": 0.0798918753862381, + "step": 198 + }, + { + "ce_ib": 22.668001174926758, + "ce_orig": 1.0338892936706543, + "epoch": 0.05722913221655043, + "kl_loss": 7.578032493591309, + "loss_ib": 0.07804711908102036, + "step": 199 + }, + { + "ce_ib": 20.854860305786133, + "ce_orig": 0.9726830124855042, + "epoch": 0.05722913221655043, + "kl_loss": 7.427217483520508, + "loss_ib": 0.0763576552271843, + "step": 199 + }, + { + "ce_ib": 19.557754516601562, + "ce_orig": 0.8703896403312683, + "epoch": 0.05722913221655043, + "kl_loss": 7.243409633636475, + "loss_ib": 0.07438986748456955, + "step": 199 + }, + { + "ce_ib": 18.793437957763672, + "ce_orig": 0.8299582004547119, + "epoch": 0.05722913221655043, + "kl_loss": 6.471531867980957, + "loss_ib": 0.06659466028213501, + "step": 199 + }, + { + "epoch": 0.05751671579552808, + "grad_norm": 0.6937683820724487, + "learning_rate": 3.089171974522293e-05, + "loss": 0.982, + "step": 200 + }, + { + "ce_ib": 18.451953887939453, + "ce_orig": 1.0577921867370605, + "epoch": 0.05751671579552808, + "kl_loss": 7.2733869552612305, + "loss_ib": 0.07457906752824783, + "step": 200 + }, + { + "ce_ib": 23.337678909301758, + "ce_orig": 1.3253329992294312, + "epoch": 0.05751671579552808, + "kl_loss": 7.374900817871094, + "loss_ib": 0.07608277350664139, + "step": 200 + }, + { + "ce_ib": 18.122037887573242, + "ce_orig": 0.9964814782142639, + "epoch": 0.05751671579552808, + "kl_loss": 7.532997131347656, + "loss_ib": 0.07714217156171799, + "step": 200 + }, + { + "ce_ib": 19.866018295288086, + "ce_orig": 0.7532010078430176, + "epoch": 0.05751671579552808, + "kl_loss": 6.963897705078125, + "loss_ib": 0.07162558287382126, + "step": 200 + }, + { + "ce_ib": 18.71930503845215, + "ce_orig": 0.8960237503051758, + "epoch": 0.057804299374505715, + "kl_loss": 6.8690900802612305, + "loss_ib": 0.07056283205747604, + "step": 201 + }, + { + "ce_ib": 22.056734085083008, + "ce_orig": 1.0263980627059937, + "epoch": 0.057804299374505715, + "kl_loss": 6.602439880371094, + "loss_ib": 0.06823007017374039, + "step": 201 + }, + { + "ce_ib": 24.96833038330078, + "ce_orig": 1.6670337915420532, + "epoch": 0.057804299374505715, + "kl_loss": 6.63405704498291, + "loss_ib": 0.06883740425109863, + "step": 201 + }, + { + "ce_ib": 17.40508460998535, + "ce_orig": 0.3124699592590332, + "epoch": 0.057804299374505715, + "kl_loss": 6.398665428161621, + "loss_ib": 0.06572715938091278, + "step": 201 + }, + { + "ce_ib": 13.636467933654785, + "ce_orig": 0.5163049697875977, + "epoch": 0.05809188295348336, + "kl_loss": 6.814591407775879, + "loss_ib": 0.06950955837965012, + "step": 202 + }, + { + "ce_ib": 18.503398895263672, + "ce_orig": 0.8511436581611633, + "epoch": 0.05809188295348336, + "kl_loss": 4.558845043182373, + "loss_ib": 0.047438789159059525, + "step": 202 + }, + { + "ce_ib": 22.386396408081055, + "ce_orig": 0.974443793296814, + "epoch": 0.05809188295348336, + "kl_loss": 6.523048400878906, + "loss_ib": 0.06746912002563477, + "step": 202 + }, + { + "ce_ib": 20.26010513305664, + "ce_orig": 0.7885109186172485, + "epoch": 0.05809188295348336, + "kl_loss": 6.658895015716553, + "loss_ib": 0.06861495971679688, + "step": 202 + }, + { + "ce_ib": 24.684518814086914, + "ce_orig": 1.362154245376587, + "epoch": 0.058379466532461, + "kl_loss": 5.672746658325195, + "loss_ib": 0.05919591709971428, + "step": 203 + }, + { + "ce_ib": 18.035612106323242, + "ce_orig": 0.6344237923622131, + "epoch": 0.058379466532461, + "kl_loss": 6.552053451538086, + "loss_ib": 0.06732409447431564, + "step": 203 + }, + { + "ce_ib": 19.674922943115234, + "ce_orig": 1.129352331161499, + "epoch": 0.058379466532461, + "kl_loss": 6.27418327331543, + "loss_ib": 0.06470932066440582, + "step": 203 + }, + { + "ce_ib": 12.673866271972656, + "ce_orig": 0.2503475248813629, + "epoch": 0.058379466532461, + "kl_loss": 4.017500877380371, + "loss_ib": 0.0414423942565918, + "step": 203 + }, + { + "ce_ib": 16.628164291381836, + "ce_orig": 0.6799634099006653, + "epoch": 0.058667050111438634, + "kl_loss": 5.470815181732178, + "loss_ib": 0.056370966136455536, + "step": 204 + }, + { + "ce_ib": 23.80594825744629, + "ce_orig": 1.2403467893600464, + "epoch": 0.058667050111438634, + "kl_loss": 5.936471462249756, + "loss_ib": 0.061745308339595795, + "step": 204 + }, + { + "ce_ib": 22.816349029541016, + "ce_orig": 1.0042665004730225, + "epoch": 0.058667050111438634, + "kl_loss": 6.33897066116333, + "loss_ib": 0.06567133963108063, + "step": 204 + }, + { + "ce_ib": 15.579545021057129, + "ce_orig": 0.5806044936180115, + "epoch": 0.058667050111438634, + "kl_loss": 6.2884297370910645, + "loss_ib": 0.06444225460290909, + "step": 204 + }, + { + "epoch": 0.05895463369041628, + "grad_norm": 0.43801939487457275, + "learning_rate": 3.1687898089171976e-05, + "loss": 0.9615, + "step": 205 + }, + { + "ce_ib": 20.434833526611328, + "ce_orig": 0.9689016342163086, + "epoch": 0.05895463369041628, + "kl_loss": 6.424372673034668, + "loss_ib": 0.06628721207380295, + "step": 205 + }, + { + "ce_ib": 15.206514358520508, + "ce_orig": 0.4593224823474884, + "epoch": 0.05895463369041628, + "kl_loss": 6.005724906921387, + "loss_ib": 0.06157790124416351, + "step": 205 + }, + { + "ce_ib": 21.497190475463867, + "ce_orig": 1.140707015991211, + "epoch": 0.05895463369041628, + "kl_loss": 6.157401084899902, + "loss_ib": 0.06372372806072235, + "step": 205 + }, + { + "ce_ib": 21.64202308654785, + "ce_orig": 0.8690503239631653, + "epoch": 0.05895463369041628, + "kl_loss": 6.049506187438965, + "loss_ib": 0.06265926361083984, + "step": 205 + }, + { + "ce_ib": 17.65264129638672, + "ce_orig": 0.8661278486251831, + "epoch": 0.05924221726939392, + "kl_loss": 6.142066955566406, + "loss_ib": 0.0631859302520752, + "step": 206 + }, + { + "ce_ib": 18.6512451171875, + "ce_orig": 1.1120545864105225, + "epoch": 0.05924221726939392, + "kl_loss": 6.5508646965026855, + "loss_ib": 0.06737376749515533, + "step": 206 + }, + { + "ce_ib": 19.080215454101562, + "ce_orig": 1.205805778503418, + "epoch": 0.05924221726939392, + "kl_loss": 5.709317207336426, + "loss_ib": 0.059001192450523376, + "step": 206 + }, + { + "ce_ib": 18.595766067504883, + "ce_orig": 0.9016050696372986, + "epoch": 0.05924221726939392, + "kl_loss": 5.711783409118652, + "loss_ib": 0.0589774064719677, + "step": 206 + }, + { + "ce_ib": 20.492830276489258, + "ce_orig": 0.8852484822273254, + "epoch": 0.05952980084837156, + "kl_loss": 5.075399398803711, + "loss_ib": 0.05280327796936035, + "step": 207 + }, + { + "ce_ib": 20.912809371948242, + "ce_orig": 0.784504771232605, + "epoch": 0.05952980084837156, + "kl_loss": 3.873704433441162, + "loss_ib": 0.04082832112908363, + "step": 207 + }, + { + "ce_ib": 16.91316795349121, + "ce_orig": 0.7453713417053223, + "epoch": 0.05952980084837156, + "kl_loss": 4.164481163024902, + "loss_ib": 0.04333612695336342, + "step": 207 + }, + { + "ce_ib": 23.150854110717773, + "ce_orig": 1.8038743734359741, + "epoch": 0.05952980084837156, + "kl_loss": 6.1978583335876465, + "loss_ib": 0.06429366767406464, + "step": 207 + }, + { + "ce_ib": 20.63064193725586, + "ce_orig": 1.5274888277053833, + "epoch": 0.0598173844273492, + "kl_loss": 4.736423492431641, + "loss_ib": 0.049427296966314316, + "step": 208 + }, + { + "ce_ib": 20.130807876586914, + "ce_orig": 0.9102981686592102, + "epoch": 0.0598173844273492, + "kl_loss": 4.98231315612793, + "loss_ib": 0.05183621123433113, + "step": 208 + }, + { + "ce_ib": 21.33695411682129, + "ce_orig": 0.3839934766292572, + "epoch": 0.0598173844273492, + "kl_loss": 3.1306653022766113, + "loss_ib": 0.033440347760915756, + "step": 208 + }, + { + "ce_ib": 19.694299697875977, + "ce_orig": 0.9525083899497986, + "epoch": 0.0598173844273492, + "kl_loss": 4.653975486755371, + "loss_ib": 0.048509180545806885, + "step": 208 + }, + { + "ce_ib": 21.432010650634766, + "ce_orig": 1.1178271770477295, + "epoch": 0.06010496800632684, + "kl_loss": 3.3814926147460938, + "loss_ib": 0.035958126187324524, + "step": 209 + }, + { + "ce_ib": 19.939306259155273, + "ce_orig": 1.0498489141464233, + "epoch": 0.06010496800632684, + "kl_loss": 4.5651397705078125, + "loss_ib": 0.04764533042907715, + "step": 209 + }, + { + "ce_ib": 19.62514877319336, + "ce_orig": 1.1263172626495361, + "epoch": 0.06010496800632684, + "kl_loss": 4.4086761474609375, + "loss_ib": 0.04604927450418472, + "step": 209 + }, + { + "ce_ib": 17.709300994873047, + "ce_orig": 1.0799542665481567, + "epoch": 0.06010496800632684, + "kl_loss": 4.823720932006836, + "loss_ib": 0.0500081367790699, + "step": 209 + }, + { + "epoch": 0.06039255158530448, + "grad_norm": 0.667425274848938, + "learning_rate": 3.248407643312102e-05, + "loss": 0.9474, + "step": 210 + }, + { + "ce_ib": 22.428897857666016, + "ce_orig": 1.325988531112671, + "epoch": 0.06039255158530448, + "kl_loss": 3.7257208824157715, + "loss_ib": 0.039500098675489426, + "step": 210 + }, + { + "ce_ib": 18.08632469177246, + "ce_orig": 0.3834853172302246, + "epoch": 0.06039255158530448, + "kl_loss": 3.6763598918914795, + "loss_ib": 0.03857222944498062, + "step": 210 + }, + { + "ce_ib": 18.410423278808594, + "ce_orig": 0.5081047415733337, + "epoch": 0.06039255158530448, + "kl_loss": 3.751244068145752, + "loss_ib": 0.03935348242521286, + "step": 210 + }, + { + "ce_ib": 17.127031326293945, + "ce_orig": 0.6193530559539795, + "epoch": 0.06039255158530448, + "kl_loss": 4.159435749053955, + "loss_ib": 0.04330705851316452, + "step": 210 + }, + { + "ce_ib": 21.037677764892578, + "ce_orig": 0.9653246998786926, + "epoch": 0.06068013516428212, + "kl_loss": 3.6135926246643066, + "loss_ib": 0.03823969140648842, + "step": 211 + }, + { + "ce_ib": 15.933859825134277, + "ce_orig": 0.5138083696365356, + "epoch": 0.06068013516428212, + "kl_loss": 2.297349452972412, + "loss_ib": 0.024566879495978355, + "step": 211 + }, + { + "ce_ib": 16.323041915893555, + "ce_orig": 1.1270829439163208, + "epoch": 0.06068013516428212, + "kl_loss": 2.4675984382629395, + "loss_ib": 0.026308288797736168, + "step": 211 + }, + { + "ce_ib": 18.137113571166992, + "ce_orig": 0.5509803891181946, + "epoch": 0.06068013516428212, + "kl_loss": 2.595515251159668, + "loss_ib": 0.02776886336505413, + "step": 211 + }, + { + "ce_ib": 14.729268074035645, + "ce_orig": 0.4266526401042938, + "epoch": 0.060967718743259756, + "kl_loss": 1.8660860061645508, + "loss_ib": 0.020133785903453827, + "step": 212 + }, + { + "ce_ib": 19.452571868896484, + "ce_orig": 0.8632349967956543, + "epoch": 0.060967718743259756, + "kl_loss": 2.0922412872314453, + "loss_ib": 0.02286767028272152, + "step": 212 + }, + { + "ce_ib": 21.427040100097656, + "ce_orig": 1.345961332321167, + "epoch": 0.060967718743259756, + "kl_loss": 1.7945568561553955, + "loss_ib": 0.02008827216923237, + "step": 212 + }, + { + "ce_ib": 21.80542755126953, + "ce_orig": 1.136615514755249, + "epoch": 0.060967718743259756, + "kl_loss": 1.982521891593933, + "loss_ib": 0.022005761042237282, + "step": 212 + }, + { + "ce_ib": 23.523645401000977, + "ce_orig": 1.9374449253082275, + "epoch": 0.0612553023222374, + "kl_loss": 2.183800220489502, + "loss_ib": 0.02419036626815796, + "step": 213 + }, + { + "ce_ib": 23.139501571655273, + "ce_orig": 1.261841058731079, + "epoch": 0.0612553023222374, + "kl_loss": 1.225545048713684, + "loss_ib": 0.014569399878382683, + "step": 213 + }, + { + "ce_ib": 14.595914840698242, + "ce_orig": 0.4094107151031494, + "epoch": 0.0612553023222374, + "kl_loss": 1.4455546140670776, + "loss_ib": 0.01591513678431511, + "step": 213 + }, + { + "ce_ib": 18.374540328979492, + "ce_orig": 0.9666364789009094, + "epoch": 0.0612553023222374, + "kl_loss": 1.5520057678222656, + "loss_ib": 0.017357511445879936, + "step": 213 + }, + { + "ce_ib": 17.489238739013672, + "ce_orig": 0.4721967875957489, + "epoch": 0.06154288590121504, + "kl_loss": 1.1278434991836548, + "loss_ib": 0.013027358800172806, + "step": 214 + }, + { + "ce_ib": 21.929288864135742, + "ce_orig": 0.6112910509109497, + "epoch": 0.06154288590121504, + "kl_loss": 1.07718026638031, + "loss_ib": 0.012964731082320213, + "step": 214 + }, + { + "ce_ib": 18.024003982543945, + "ce_orig": 0.4354954957962036, + "epoch": 0.06154288590121504, + "kl_loss": 1.1616055965423584, + "loss_ib": 0.013418455608189106, + "step": 214 + }, + { + "ce_ib": 19.154476165771484, + "ce_orig": 0.9074000120162964, + "epoch": 0.06154288590121504, + "kl_loss": 1.048647165298462, + "loss_ib": 0.01240191888064146, + "step": 214 + }, + { + "epoch": 0.06183046948019268, + "grad_norm": 0.19470971822738647, + "learning_rate": 3.328025477707007e-05, + "loss": 0.8823, + "step": 215 + }, + { + "ce_ib": 16.662282943725586, + "ce_orig": 0.5908299088478088, + "epoch": 0.06183046948019268, + "kl_loss": 1.0955469608306885, + "loss_ib": 0.012621697969734669, + "step": 215 + }, + { + "ce_ib": 15.762285232543945, + "ce_orig": 0.5801149606704712, + "epoch": 0.06183046948019268, + "kl_loss": 0.9997921586036682, + "loss_ib": 0.011574150063097477, + "step": 215 + }, + { + "ce_ib": 20.8988094329834, + "ce_orig": 0.9614391922950745, + "epoch": 0.06183046948019268, + "kl_loss": 0.9723953008651733, + "loss_ib": 0.011813833378255367, + "step": 215 + }, + { + "ce_ib": 20.39583969116211, + "ce_orig": 1.0183390378952026, + "epoch": 0.06183046948019268, + "kl_loss": 0.9245635867118835, + "loss_ib": 0.011285219341516495, + "step": 215 + }, + { + "ce_ib": 14.578946113586426, + "ce_orig": 0.734819769859314, + "epoch": 0.06211805305917032, + "kl_loss": 0.8773603439331055, + "loss_ib": 0.010231498628854752, + "step": 216 + }, + { + "ce_ib": 13.65113639831543, + "ce_orig": 0.4453405439853668, + "epoch": 0.06211805305917032, + "kl_loss": 0.902603030204773, + "loss_ib": 0.010391143150627613, + "step": 216 + }, + { + "ce_ib": 24.090421676635742, + "ce_orig": 1.5431694984436035, + "epoch": 0.06211805305917032, + "kl_loss": 0.8628696203231812, + "loss_ib": 0.011037738062441349, + "step": 216 + }, + { + "ce_ib": 16.371984481811523, + "ce_orig": 0.6436638832092285, + "epoch": 0.06211805305917032, + "kl_loss": 0.9481015205383301, + "loss_ib": 0.011118213646113873, + "step": 216 + }, + { + "ce_ib": 16.45316505432129, + "ce_orig": 0.6905233263969421, + "epoch": 0.06240563663814796, + "kl_loss": 0.8371706008911133, + "loss_ib": 0.010017022490501404, + "step": 217 + }, + { + "ce_ib": 19.796371459960938, + "ce_orig": 0.9113252758979797, + "epoch": 0.06240563663814796, + "kl_loss": 0.7912114262580872, + "loss_ib": 0.00989175122231245, + "step": 217 + }, + { + "ce_ib": 20.719688415527344, + "ce_orig": 0.6585960388183594, + "epoch": 0.06240563663814796, + "kl_loss": 0.7871678471565247, + "loss_ib": 0.009943647310137749, + "step": 217 + }, + { + "ce_ib": 18.301244735717773, + "ce_orig": 0.9191728234291077, + "epoch": 0.06240563663814796, + "kl_loss": 0.7824192643165588, + "loss_ib": 0.009654317051172256, + "step": 217 + }, + { + "ce_ib": 16.72066879272461, + "ce_orig": 0.46977195143699646, + "epoch": 0.0626932202171256, + "kl_loss": 0.7237412929534912, + "loss_ib": 0.008909479714930058, + "step": 218 + }, + { + "ce_ib": 16.60516357421875, + "ce_orig": 0.8432900309562683, + "epoch": 0.0626932202171256, + "kl_loss": 0.7562671899795532, + "loss_ib": 0.009223188273608685, + "step": 218 + }, + { + "ce_ib": 19.466259002685547, + "ce_orig": 0.9356642365455627, + "epoch": 0.0626932202171256, + "kl_loss": 0.7408407926559448, + "loss_ib": 0.009355033747851849, + "step": 218 + }, + { + "ce_ib": 19.53274917602539, + "ce_orig": 0.6844194531440735, + "epoch": 0.0626932202171256, + "kl_loss": 0.7530844807624817, + "loss_ib": 0.009484118781983852, + "step": 218 + }, + { + "ce_ib": 22.165111541748047, + "ce_orig": 1.2755643129348755, + "epoch": 0.06298080379610324, + "kl_loss": 0.6112433671951294, + "loss_ib": 0.008328944444656372, + "step": 219 + }, + { + "ce_ib": 15.414247512817383, + "ce_orig": 0.8299206495285034, + "epoch": 0.06298080379610324, + "kl_loss": 0.7032531499862671, + "loss_ib": 0.008573955856263638, + "step": 219 + }, + { + "ce_ib": 16.366025924682617, + "ce_orig": 0.660663902759552, + "epoch": 0.06298080379610324, + "kl_loss": 0.6545971632003784, + "loss_ib": 0.008182574063539505, + "step": 219 + }, + { + "ce_ib": 19.620121002197266, + "ce_orig": 1.2592724561691284, + "epoch": 0.06298080379610324, + "kl_loss": 0.7279493808746338, + "loss_ib": 0.009241505526006222, + "step": 219 + }, + { + "epoch": 0.06326838737508088, + "grad_norm": 0.07929490506649017, + "learning_rate": 3.407643312101911e-05, + "loss": 0.8749, + "step": 220 + }, + { + "ce_ib": 20.658096313476562, + "ce_orig": 1.0993177890777588, + "epoch": 0.06326838737508088, + "kl_loss": 0.6090418100357056, + "loss_ib": 0.008156226947903633, + "step": 220 + }, + { + "ce_ib": 17.3227481842041, + "ce_orig": 0.6923868656158447, + "epoch": 0.06326838737508088, + "kl_loss": 0.6292073130607605, + "loss_ib": 0.008024347946047783, + "step": 220 + }, + { + "ce_ib": 15.15152359008789, + "ce_orig": 0.697593092918396, + "epoch": 0.06326838737508088, + "kl_loss": 0.604870617389679, + "loss_ib": 0.007563858292996883, + "step": 220 + }, + { + "ce_ib": 20.777067184448242, + "ce_orig": 1.269119381904602, + "epoch": 0.06326838737508088, + "kl_loss": 0.6507552862167358, + "loss_ib": 0.008585259318351746, + "step": 220 + }, + { + "ce_ib": 18.540620803833008, + "ce_orig": 1.0002384185791016, + "epoch": 0.06355597095405853, + "kl_loss": 0.578797459602356, + "loss_ib": 0.0076420363038778305, + "step": 221 + }, + { + "ce_ib": 13.15246868133545, + "ce_orig": 0.25608256459236145, + "epoch": 0.06355597095405853, + "kl_loss": 0.7502469420433044, + "loss_ib": 0.008817716501653194, + "step": 221 + }, + { + "ce_ib": 13.858514785766602, + "ce_orig": 0.682886004447937, + "epoch": 0.06355597095405853, + "kl_loss": 0.538476824760437, + "loss_ib": 0.006770619656890631, + "step": 221 + }, + { + "ce_ib": 17.7680606842041, + "ce_orig": 0.6039354801177979, + "epoch": 0.06355597095405853, + "kl_loss": 0.5650777816772461, + "loss_ib": 0.007427583914250135, + "step": 221 + }, + { + "ce_ib": 19.38729476928711, + "ce_orig": 0.8789693117141724, + "epoch": 0.06384355453303617, + "kl_loss": 0.6542633771896362, + "loss_ib": 0.008481362834572792, + "step": 222 + }, + { + "ce_ib": 18.89866065979004, + "ce_orig": 0.7741104960441589, + "epoch": 0.06384355453303617, + "kl_loss": 0.5847321152687073, + "loss_ib": 0.007737187203019857, + "step": 222 + }, + { + "ce_ib": 21.54572296142578, + "ce_orig": 0.7229393124580383, + "epoch": 0.06384355453303617, + "kl_loss": 0.5832604169845581, + "loss_ib": 0.007987176068127155, + "step": 222 + }, + { + "ce_ib": 13.869481086730957, + "ce_orig": 0.5696704387664795, + "epoch": 0.06384355453303617, + "kl_loss": 0.5188637375831604, + "loss_ib": 0.006575585342943668, + "step": 222 + }, + { + "ce_ib": 14.482152938842773, + "ce_orig": 0.5589219331741333, + "epoch": 0.0641311381120138, + "kl_loss": 0.47938820719718933, + "loss_ib": 0.006242097355425358, + "step": 223 + }, + { + "ce_ib": 16.80389976501465, + "ce_orig": 0.5980596542358398, + "epoch": 0.0641311381120138, + "kl_loss": 0.6172512769699097, + "loss_ib": 0.007852902635931969, + "step": 223 + }, + { + "ce_ib": 22.49806785583496, + "ce_orig": 0.7062133550643921, + "epoch": 0.0641311381120138, + "kl_loss": 0.5784010887145996, + "loss_ib": 0.008033817633986473, + "step": 223 + }, + { + "ce_ib": 17.86919593811035, + "ce_orig": 0.6028913259506226, + "epoch": 0.0641311381120138, + "kl_loss": 0.5538998246192932, + "loss_ib": 0.007325917482376099, + "step": 223 + }, + { + "ce_ib": 21.530719757080078, + "ce_orig": 1.1314647197723389, + "epoch": 0.06441872169099144, + "kl_loss": 0.5061776638031006, + "loss_ib": 0.007214848417788744, + "step": 224 + }, + { + "ce_ib": 17.580371856689453, + "ce_orig": 0.8268778920173645, + "epoch": 0.06441872169099144, + "kl_loss": 0.5295155644416809, + "loss_ib": 0.007053192704916, + "step": 224 + }, + { + "ce_ib": 16.985912322998047, + "ce_orig": 0.92490553855896, + "epoch": 0.06441872169099144, + "kl_loss": 0.47081345319747925, + "loss_ib": 0.006406725384294987, + "step": 224 + }, + { + "ce_ib": 20.34337043762207, + "ce_orig": 1.7162299156188965, + "epoch": 0.06441872169099144, + "kl_loss": 0.5065193772315979, + "loss_ib": 0.0070995306596159935, + "step": 224 + }, + { + "epoch": 0.06470630526996908, + "grad_norm": 0.07722701877355576, + "learning_rate": 3.487261146496815e-05, + "loss": 0.8907, + "step": 225 + }, + { + "ce_ib": 15.637423515319824, + "ce_orig": 0.8265129923820496, + "epoch": 0.06470630526996908, + "kl_loss": 0.6796841025352478, + "loss_ib": 0.008360583335161209, + "step": 225 + }, + { + "ce_ib": 21.416494369506836, + "ce_orig": 1.278948426246643, + "epoch": 0.06470630526996908, + "kl_loss": 0.48355668783187866, + "loss_ib": 0.006977215874940157, + "step": 225 + }, + { + "ce_ib": 19.162933349609375, + "ce_orig": 0.33566343784332275, + "epoch": 0.06470630526996908, + "kl_loss": 0.4927806854248047, + "loss_ib": 0.006844100076705217, + "step": 225 + }, + { + "ce_ib": 17.001327514648438, + "ce_orig": 0.8918877840042114, + "epoch": 0.06470630526996908, + "kl_loss": 0.4844684898853302, + "loss_ib": 0.006544817704707384, + "step": 225 + }, + { + "ce_ib": 19.425342559814453, + "ce_orig": 1.1151187419891357, + "epoch": 0.06499388884894673, + "kl_loss": 0.44420889019966125, + "loss_ib": 0.006384622771292925, + "step": 226 + }, + { + "ce_ib": 20.979902267456055, + "ce_orig": 1.1682270765304565, + "epoch": 0.06499388884894673, + "kl_loss": 0.5223791003227234, + "loss_ib": 0.007321780547499657, + "step": 226 + }, + { + "ce_ib": 21.13734245300293, + "ce_orig": 0.6381849050521851, + "epoch": 0.06499388884894673, + "kl_loss": 0.5622669458389282, + "loss_ib": 0.007736403960734606, + "step": 226 + }, + { + "ce_ib": 19.859725952148438, + "ce_orig": 1.4218535423278809, + "epoch": 0.06499388884894673, + "kl_loss": 0.5652158260345459, + "loss_ib": 0.0076381308026611805, + "step": 226 + }, + { + "ce_ib": 18.34955596923828, + "ce_orig": 0.8892937302589417, + "epoch": 0.06528147242792437, + "kl_loss": 0.477683424949646, + "loss_ib": 0.006611789111047983, + "step": 227 + }, + { + "ce_ib": 16.387413024902344, + "ce_orig": 0.5451152324676514, + "epoch": 0.06528147242792437, + "kl_loss": 0.4405210614204407, + "loss_ib": 0.006043951492756605, + "step": 227 + }, + { + "ce_ib": 16.179128646850586, + "ce_orig": 0.5826038718223572, + "epoch": 0.06528147242792437, + "kl_loss": 0.4468899965286255, + "loss_ib": 0.006086812354624271, + "step": 227 + }, + { + "ce_ib": 15.503174781799316, + "ce_orig": 0.4542520344257355, + "epoch": 0.06528147242792437, + "kl_loss": 0.4687623083591461, + "loss_ib": 0.006237940862774849, + "step": 227 + }, + { + "ce_ib": 17.556617736816406, + "ce_orig": 0.6604429483413696, + "epoch": 0.06556905600690201, + "kl_loss": 0.4429520070552826, + "loss_ib": 0.006185181438922882, + "step": 228 + }, + { + "ce_ib": 14.565351486206055, + "ce_orig": 0.663815438747406, + "epoch": 0.06556905600690201, + "kl_loss": 0.41016486287117004, + "loss_ib": 0.005558183882385492, + "step": 228 + }, + { + "ce_ib": 9.793392181396484, + "ce_orig": 0.1998930275440216, + "epoch": 0.06556905600690201, + "kl_loss": 0.5553406476974487, + "loss_ib": 0.006532745435833931, + "step": 228 + }, + { + "ce_ib": 15.705013275146484, + "ce_orig": 0.949131965637207, + "epoch": 0.06556905600690201, + "kl_loss": 0.5142616033554077, + "loss_ib": 0.006713117007166147, + "step": 228 + }, + { + "ce_ib": 22.55472755432129, + "ce_orig": 1.56355881690979, + "epoch": 0.06585663958587964, + "kl_loss": 0.508346676826477, + "loss_ib": 0.007338939234614372, + "step": 229 + }, + { + "ce_ib": 20.364665985107422, + "ce_orig": 1.1723278760910034, + "epoch": 0.06585663958587964, + "kl_loss": 0.4082661271095276, + "loss_ib": 0.006119127850979567, + "step": 229 + }, + { + "ce_ib": 16.15178680419922, + "ce_orig": 0.8450407981872559, + "epoch": 0.06585663958587964, + "kl_loss": 0.5447847843170166, + "loss_ib": 0.007063026074320078, + "step": 229 + }, + { + "ce_ib": 18.998653411865234, + "ce_orig": 1.2076853513717651, + "epoch": 0.06585663958587964, + "kl_loss": 0.42832180857658386, + "loss_ib": 0.006183082703500986, + "step": 229 + }, + { + "epoch": 0.06614422316485728, + "grad_norm": 0.07635564357042313, + "learning_rate": 3.56687898089172e-05, + "loss": 0.8962, + "step": 230 + }, + { + "ce_ib": 22.920480728149414, + "ce_orig": 1.7977490425109863, + "epoch": 0.06614422316485728, + "kl_loss": 0.4276275932788849, + "loss_ib": 0.0065683238208293915, + "step": 230 + }, + { + "ce_ib": 18.89206886291504, + "ce_orig": 1.2325646877288818, + "epoch": 0.06614422316485728, + "kl_loss": 0.42932283878326416, + "loss_ib": 0.006182434968650341, + "step": 230 + }, + { + "ce_ib": 19.41433334350586, + "ce_orig": 1.1008139848709106, + "epoch": 0.06614422316485728, + "kl_loss": 0.4227140247821808, + "loss_ib": 0.006168573163449764, + "step": 230 + }, + { + "ce_ib": 16.897308349609375, + "ce_orig": 0.8783938884735107, + "epoch": 0.06614422316485728, + "kl_loss": 0.4387606680393219, + "loss_ib": 0.0060773370787501335, + "step": 230 + }, + { + "ce_ib": 20.402490615844727, + "ce_orig": 0.4892929196357727, + "epoch": 0.06643180674383492, + "kl_loss": 0.5622318387031555, + "loss_ib": 0.007662567310035229, + "step": 231 + }, + { + "ce_ib": 18.776615142822266, + "ce_orig": 0.8454554080963135, + "epoch": 0.06643180674383492, + "kl_loss": 0.3648257553577423, + "loss_ib": 0.005525919143110514, + "step": 231 + }, + { + "ce_ib": 19.490280151367188, + "ce_orig": 0.5873563289642334, + "epoch": 0.06643180674383492, + "kl_loss": 0.49844107031822205, + "loss_ib": 0.006933438591659069, + "step": 231 + }, + { + "ce_ib": 18.226577758789062, + "ce_orig": 0.9095281362533569, + "epoch": 0.06643180674383492, + "kl_loss": 0.42676785588264465, + "loss_ib": 0.006090336479246616, + "step": 231 + }, + { + "ce_ib": 14.32204818725586, + "ce_orig": 0.4136866331100464, + "epoch": 0.06671939032281257, + "kl_loss": 0.5454199910163879, + "loss_ib": 0.006886404473334551, + "step": 232 + }, + { + "ce_ib": 15.030553817749023, + "ce_orig": 1.1399730443954468, + "epoch": 0.06671939032281257, + "kl_loss": 0.3817654848098755, + "loss_ib": 0.00532070966437459, + "step": 232 + }, + { + "ce_ib": 17.033527374267578, + "ce_orig": 0.9704214334487915, + "epoch": 0.06671939032281257, + "kl_loss": 0.45833879709243774, + "loss_ib": 0.006286740303039551, + "step": 232 + }, + { + "ce_ib": 16.526573181152344, + "ce_orig": 0.6879238486289978, + "epoch": 0.06671939032281257, + "kl_loss": 0.3568248152732849, + "loss_ib": 0.005220905411988497, + "step": 232 + }, + { + "ce_ib": 22.991727828979492, + "ce_orig": 1.4614430665969849, + "epoch": 0.06700697390179021, + "kl_loss": 0.42173314094543457, + "loss_ib": 0.006516504101455212, + "step": 233 + }, + { + "ce_ib": 18.632497787475586, + "ce_orig": 0.5046707391738892, + "epoch": 0.06700697390179021, + "kl_loss": 0.41837000846862793, + "loss_ib": 0.006046949420124292, + "step": 233 + }, + { + "ce_ib": 18.9344425201416, + "ce_orig": 1.0734100341796875, + "epoch": 0.06700697390179021, + "kl_loss": 0.40839213132858276, + "loss_ib": 0.00597736518830061, + "step": 233 + }, + { + "ce_ib": 11.921408653259277, + "ce_orig": 0.525043785572052, + "epoch": 0.06700697390179021, + "kl_loss": 0.35900092124938965, + "loss_ib": 0.004782150033861399, + "step": 233 + }, + { + "ce_ib": 16.360872268676758, + "ce_orig": 0.9665996432304382, + "epoch": 0.06729455748076785, + "kl_loss": 0.352176696062088, + "loss_ib": 0.005157853942364454, + "step": 234 + }, + { + "ce_ib": 17.86981964111328, + "ce_orig": 0.9465886354446411, + "epoch": 0.06729455748076785, + "kl_loss": 0.39093858003616333, + "loss_ib": 0.005696367472410202, + "step": 234 + }, + { + "ce_ib": 18.097686767578125, + "ce_orig": 0.8353486657142639, + "epoch": 0.06729455748076785, + "kl_loss": 0.3744758367538452, + "loss_ib": 0.005554527044296265, + "step": 234 + }, + { + "ce_ib": 22.27129554748535, + "ce_orig": 1.4471676349639893, + "epoch": 0.06729455748076785, + "kl_loss": 0.41227924823760986, + "loss_ib": 0.006349921692162752, + "step": 234 + }, + { + "epoch": 0.06758214105974548, + "grad_norm": 0.08016426116228104, + "learning_rate": 3.646496815286624e-05, + "loss": 0.8961, + "step": 235 + }, + { + "ce_ib": 17.102169036865234, + "ce_orig": 0.6802967190742493, + "epoch": 0.06758214105974548, + "kl_loss": 0.3811526298522949, + "loss_ib": 0.005521743092685938, + "step": 235 + }, + { + "ce_ib": 23.21323585510254, + "ce_orig": 1.7242603302001953, + "epoch": 0.06758214105974548, + "kl_loss": 0.43173903226852417, + "loss_ib": 0.006638714112341404, + "step": 235 + }, + { + "ce_ib": 13.367447853088379, + "ce_orig": 0.7693591713905334, + "epoch": 0.06758214105974548, + "kl_loss": 0.33568012714385986, + "loss_ib": 0.004693545866757631, + "step": 235 + }, + { + "ce_ib": 16.364444732666016, + "ce_orig": 0.9041774868965149, + "epoch": 0.06758214105974548, + "kl_loss": 0.3745976388454437, + "loss_ib": 0.005382420960813761, + "step": 235 + }, + { + "ce_ib": 17.089984893798828, + "ce_orig": 0.8724990487098694, + "epoch": 0.06786972463872312, + "kl_loss": 0.3381209969520569, + "loss_ib": 0.005090207792818546, + "step": 236 + }, + { + "ce_ib": 17.797290802001953, + "ce_orig": 1.2699850797653198, + "epoch": 0.06786972463872312, + "kl_loss": 0.4074021279811859, + "loss_ib": 0.0058537498116493225, + "step": 236 + }, + { + "ce_ib": 10.40896224975586, + "ce_orig": 0.2636343240737915, + "epoch": 0.06786972463872312, + "kl_loss": 0.6993351578712463, + "loss_ib": 0.008034247905015945, + "step": 236 + }, + { + "ce_ib": 23.780147552490234, + "ce_orig": 1.2295787334442139, + "epoch": 0.06786972463872312, + "kl_loss": 0.47904911637306213, + "loss_ib": 0.007168505806475878, + "step": 236 + }, + { + "ce_ib": 17.171159744262695, + "ce_orig": 1.21332848072052, + "epoch": 0.06815730821770077, + "kl_loss": 0.34175872802734375, + "loss_ib": 0.005134702660143375, + "step": 237 + }, + { + "ce_ib": 18.72576904296875, + "ce_orig": 0.8414787650108337, + "epoch": 0.06815730821770077, + "kl_loss": 0.41571539640426636, + "loss_ib": 0.006029731128364801, + "step": 237 + }, + { + "ce_ib": 18.983478546142578, + "ce_orig": 0.5497841238975525, + "epoch": 0.06815730821770077, + "kl_loss": 0.400782972574234, + "loss_ib": 0.005906177684664726, + "step": 237 + }, + { + "ce_ib": 15.720340728759766, + "ce_orig": 0.6456199884414673, + "epoch": 0.06815730821770077, + "kl_loss": 0.3572655916213989, + "loss_ib": 0.005144690163433552, + "step": 237 + }, + { + "ce_ib": 14.64540958404541, + "ce_orig": 0.5299506783485413, + "epoch": 0.06844489179667841, + "kl_loss": 0.3614061176776886, + "loss_ib": 0.005078601650893688, + "step": 238 + }, + { + "ce_ib": 14.622838973999023, + "ce_orig": 0.6512343883514404, + "epoch": 0.06844489179667841, + "kl_loss": 0.3499015271663666, + "loss_ib": 0.004961299244314432, + "step": 238 + }, + { + "ce_ib": 18.794681549072266, + "ce_orig": 0.5560781359672546, + "epoch": 0.06844489179667841, + "kl_loss": 0.3744252324104309, + "loss_ib": 0.005623720120638609, + "step": 238 + }, + { + "ce_ib": 16.820043563842773, + "ce_orig": 0.7689210772514343, + "epoch": 0.06844489179667841, + "kl_loss": 0.3898257613182068, + "loss_ib": 0.005580261815339327, + "step": 238 + }, + { + "ce_ib": 20.637855529785156, + "ce_orig": 1.1541659832000732, + "epoch": 0.06873247537565605, + "kl_loss": 0.4067099392414093, + "loss_ib": 0.006130884867161512, + "step": 239 + }, + { + "ce_ib": 14.409849166870117, + "ce_orig": 0.6937656402587891, + "epoch": 0.06873247537565605, + "kl_loss": 0.30936238169670105, + "loss_ib": 0.00453460868448019, + "step": 239 + }, + { + "ce_ib": 19.96538543701172, + "ce_orig": 1.6420783996582031, + "epoch": 0.06873247537565605, + "kl_loss": 0.3878486752510071, + "loss_ib": 0.005875025410205126, + "step": 239 + }, + { + "ce_ib": 16.174251556396484, + "ce_orig": 0.7331187725067139, + "epoch": 0.06873247537565605, + "kl_loss": 0.3374932110309601, + "loss_ib": 0.004992356989532709, + "step": 239 + }, + { + "epoch": 0.06902005895463369, + "grad_norm": 0.09181191027164459, + "learning_rate": 3.7261146496815283e-05, + "loss": 0.9216, + "step": 240 + }, + { + "ce_ib": 16.239824295043945, + "ce_orig": 0.45115554332733154, + "epoch": 0.06902005895463369, + "kl_loss": 0.3430927097797394, + "loss_ib": 0.005054909270256758, + "step": 240 + }, + { + "ce_ib": 20.204030990600586, + "ce_orig": 1.1928750276565552, + "epoch": 0.06902005895463369, + "kl_loss": 0.37950411438941956, + "loss_ib": 0.005815444048494101, + "step": 240 + }, + { + "ce_ib": 11.103907585144043, + "ce_orig": 0.41011255979537964, + "epoch": 0.06902005895463369, + "kl_loss": 0.3157821595668793, + "loss_ib": 0.004268212243914604, + "step": 240 + }, + { + "ce_ib": 16.61946678161621, + "ce_orig": 0.958586573600769, + "epoch": 0.06902005895463369, + "kl_loss": 0.472045361995697, + "loss_ib": 0.006382400169968605, + "step": 240 + }, + { + "ce_ib": 18.43888282775879, + "ce_orig": 0.8224636912345886, + "epoch": 0.06930764253361132, + "kl_loss": 0.31457871198654175, + "loss_ib": 0.004989675246179104, + "step": 241 + }, + { + "ce_ib": 19.861295700073242, + "ce_orig": 0.5294429659843445, + "epoch": 0.06930764253361132, + "kl_loss": 0.30837568640708923, + "loss_ib": 0.0050698863342404366, + "step": 241 + }, + { + "ce_ib": 17.056270599365234, + "ce_orig": 0.6633943319320679, + "epoch": 0.06930764253361132, + "kl_loss": 0.3993522524833679, + "loss_ib": 0.0056991493329405785, + "step": 241 + }, + { + "ce_ib": 13.237872123718262, + "ce_orig": 0.6085235476493835, + "epoch": 0.06930764253361132, + "kl_loss": 0.276342511177063, + "loss_ib": 0.0040872120298445225, + "step": 241 + }, + { + "ce_ib": 15.212370872497559, + "ce_orig": 0.5519753694534302, + "epoch": 0.06959522611258898, + "kl_loss": 0.3807227611541748, + "loss_ib": 0.005328464321792126, + "step": 242 + }, + { + "ce_ib": 14.194107055664062, + "ce_orig": 0.6263343691825867, + "epoch": 0.06959522611258898, + "kl_loss": 0.33415570855140686, + "loss_ib": 0.004760967567563057, + "step": 242 + }, + { + "ce_ib": 18.368473052978516, + "ce_orig": 0.8124864101409912, + "epoch": 0.06959522611258898, + "kl_loss": 0.3696582317352295, + "loss_ib": 0.005533429328352213, + "step": 242 + }, + { + "ce_ib": 18.977922439575195, + "ce_orig": 0.7746420502662659, + "epoch": 0.06959522611258898, + "kl_loss": 0.3186471462249756, + "loss_ib": 0.005084263626486063, + "step": 242 + }, + { + "ce_ib": 18.268203735351562, + "ce_orig": 0.9710350632667542, + "epoch": 0.06988280969156661, + "kl_loss": 0.32097768783569336, + "loss_ib": 0.005036597140133381, + "step": 243 + }, + { + "ce_ib": 20.087373733520508, + "ce_orig": 1.0895576477050781, + "epoch": 0.06988280969156661, + "kl_loss": 0.3057764768600464, + "loss_ib": 0.005066501908004284, + "step": 243 + }, + { + "ce_ib": 18.099958419799805, + "ce_orig": 0.907975971698761, + "epoch": 0.06988280969156661, + "kl_loss": 0.35890674591064453, + "loss_ib": 0.005399063229560852, + "step": 243 + }, + { + "ce_ib": 18.00782012939453, + "ce_orig": 0.520527184009552, + "epoch": 0.06988280969156661, + "kl_loss": 0.36176708340644836, + "loss_ib": 0.005418452434241772, + "step": 243 + }, + { + "ce_ib": 15.336793899536133, + "ce_orig": 0.755825936794281, + "epoch": 0.07017039327054425, + "kl_loss": 0.27832502126693726, + "loss_ib": 0.0043169292621314526, + "step": 244 + }, + { + "ce_ib": 14.561528205871582, + "ce_orig": 0.5862277150154114, + "epoch": 0.07017039327054425, + "kl_loss": 0.3262811303138733, + "loss_ib": 0.004718963988125324, + "step": 244 + }, + { + "ce_ib": 21.186973571777344, + "ce_orig": 1.1542885303497314, + "epoch": 0.07017039327054425, + "kl_loss": 0.3798186182975769, + "loss_ib": 0.005916883237659931, + "step": 244 + }, + { + "ce_ib": 17.660518646240234, + "ce_orig": 0.7000678181648254, + "epoch": 0.07017039327054425, + "kl_loss": 0.40329715609550476, + "loss_ib": 0.005799023434519768, + "step": 244 + }, + { + "epoch": 0.07045797684952189, + "grad_norm": 0.07606582343578339, + "learning_rate": 3.805732484076434e-05, + "loss": 0.8966, + "step": 245 + }, + { + "ce_ib": 17.2858829498291, + "ce_orig": 0.6848281025886536, + "epoch": 0.07045797684952189, + "kl_loss": 0.3115922510623932, + "loss_ib": 0.0048445104621350765, + "step": 245 + }, + { + "ce_ib": 12.877274513244629, + "ce_orig": 0.42091885209083557, + "epoch": 0.07045797684952189, + "kl_loss": 0.33227574825286865, + "loss_ib": 0.004610484931617975, + "step": 245 + }, + { + "ce_ib": 19.50572967529297, + "ce_orig": 1.2013055086135864, + "epoch": 0.07045797684952189, + "kl_loss": 0.29557040333747864, + "loss_ib": 0.004906277172267437, + "step": 245 + }, + { + "ce_ib": 17.724828720092773, + "ce_orig": 0.7979478240013123, + "epoch": 0.07045797684952189, + "kl_loss": 0.3825852870941162, + "loss_ib": 0.005598335526883602, + "step": 245 + }, + { + "ce_ib": 17.733549118041992, + "ce_orig": 1.1459635496139526, + "epoch": 0.07074556042849953, + "kl_loss": 0.29438281059265137, + "loss_ib": 0.004717182833701372, + "step": 246 + }, + { + "ce_ib": 16.1971378326416, + "ce_orig": 0.13715046644210815, + "epoch": 0.07074556042849953, + "kl_loss": 0.6951940059661865, + "loss_ib": 0.008571653626859188, + "step": 246 + }, + { + "ce_ib": 20.530433654785156, + "ce_orig": 1.4083482027053833, + "epoch": 0.07074556042849953, + "kl_loss": 0.35388949513435364, + "loss_ib": 0.005591938272118568, + "step": 246 + }, + { + "ce_ib": 16.526330947875977, + "ce_orig": 0.6567316055297852, + "epoch": 0.07074556042849953, + "kl_loss": 0.3231682777404785, + "loss_ib": 0.004884315654635429, + "step": 246 + }, + { + "ce_ib": 14.093873977661133, + "ce_orig": 0.871584951877594, + "epoch": 0.07103314400747718, + "kl_loss": 0.2740858793258667, + "loss_ib": 0.004150246270000935, + "step": 247 + }, + { + "ce_ib": 21.621118545532227, + "ce_orig": 1.2882436513900757, + "epoch": 0.07103314400747718, + "kl_loss": 0.4834282696247101, + "loss_ib": 0.006996394135057926, + "step": 247 + }, + { + "ce_ib": 19.13994026184082, + "ce_orig": 1.2011432647705078, + "epoch": 0.07103314400747718, + "kl_loss": 0.31072482466697693, + "loss_ib": 0.005021241959184408, + "step": 247 + }, + { + "ce_ib": 13.94855785369873, + "ce_orig": 0.6923986673355103, + "epoch": 0.07103314400747718, + "kl_loss": 0.34611976146698, + "loss_ib": 0.004856053274124861, + "step": 247 + }, + { + "ce_ib": 18.997570037841797, + "ce_orig": 1.2123730182647705, + "epoch": 0.07132072758645482, + "kl_loss": 0.3029173016548157, + "loss_ib": 0.004928929731249809, + "step": 248 + }, + { + "ce_ib": 18.082622528076172, + "ce_orig": 0.7000762224197388, + "epoch": 0.07132072758645482, + "kl_loss": 0.3445127308368683, + "loss_ib": 0.005253389477729797, + "step": 248 + }, + { + "ce_ib": 13.205419540405273, + "ce_orig": 0.6170308589935303, + "epoch": 0.07132072758645482, + "kl_loss": 0.2936919927597046, + "loss_ib": 0.004257461987435818, + "step": 248 + }, + { + "ce_ib": 16.406328201293945, + "ce_orig": 0.8249359130859375, + "epoch": 0.07132072758645482, + "kl_loss": 0.2896481454372406, + "loss_ib": 0.004537113942205906, + "step": 248 + }, + { + "ce_ib": 19.35923194885254, + "ce_orig": 0.9184130430221558, + "epoch": 0.07160831116543245, + "kl_loss": 0.2877388000488281, + "loss_ib": 0.004813311155885458, + "step": 249 + }, + { + "ce_ib": 14.84507942199707, + "ce_orig": 0.962078332901001, + "epoch": 0.07160831116543245, + "kl_loss": 0.33685553073883057, + "loss_ib": 0.004853063262999058, + "step": 249 + }, + { + "ce_ib": 13.66109848022461, + "ce_orig": 0.8815235495567322, + "epoch": 0.07160831116543245, + "kl_loss": 0.2970173954963684, + "loss_ib": 0.004336283542215824, + "step": 249 + }, + { + "ce_ib": 15.64456844329834, + "ce_orig": 0.9901912808418274, + "epoch": 0.07160831116543245, + "kl_loss": 0.30990880727767944, + "loss_ib": 0.004663544707000256, + "step": 249 + }, + { + "epoch": 0.07189589474441009, + "grad_norm": 0.07667157799005508, + "learning_rate": 3.885350318471338e-05, + "loss": 0.8786, + "step": 250 + }, + { + "ce_ib": 17.655288696289062, + "ce_orig": 0.9249431490898132, + "epoch": 0.07189589474441009, + "kl_loss": 0.24412468075752258, + "loss_ib": 0.004206775221973658, + "step": 250 + }, + { + "ce_ib": 16.22374153137207, + "ce_orig": 0.8454200625419617, + "epoch": 0.07189589474441009, + "kl_loss": 0.3324206471443176, + "loss_ib": 0.004946580622345209, + "step": 250 + }, + { + "ce_ib": 18.696596145629883, + "ce_orig": 1.0471431016921997, + "epoch": 0.07189589474441009, + "kl_loss": 0.3605830669403076, + "loss_ib": 0.005475489888340235, + "step": 250 + }, + { + "ce_ib": 19.617197036743164, + "ce_orig": 0.630739152431488, + "epoch": 0.07189589474441009, + "kl_loss": 0.42468297481536865, + "loss_ib": 0.006208549719303846, + "step": 250 + }, + { + "ce_ib": 14.190434455871582, + "ce_orig": 0.5681670904159546, + "epoch": 0.07218347832338773, + "kl_loss": 0.2779189646244049, + "loss_ib": 0.004198232665657997, + "step": 251 + }, + { + "ce_ib": 12.449499130249023, + "ce_orig": 0.7335292100906372, + "epoch": 0.07218347832338773, + "kl_loss": 0.30513036251068115, + "loss_ib": 0.004296253435313702, + "step": 251 + }, + { + "ce_ib": 17.19744300842285, + "ce_orig": 0.664770245552063, + "epoch": 0.07218347832338773, + "kl_loss": 0.3403986096382141, + "loss_ib": 0.005123730283230543, + "step": 251 + }, + { + "ce_ib": 16.57467269897461, + "ce_orig": 1.2152647972106934, + "epoch": 0.07218347832338773, + "kl_loss": 0.2757129669189453, + "loss_ib": 0.0044145965948700905, + "step": 251 + }, + { + "ce_ib": 20.365676879882812, + "ce_orig": 0.9634372591972351, + "epoch": 0.07247106190236538, + "kl_loss": 0.31626924872398376, + "loss_ib": 0.005199260078370571, + "step": 252 + }, + { + "ce_ib": 18.716487884521484, + "ce_orig": 1.0937143564224243, + "epoch": 0.07247106190236538, + "kl_loss": 0.3098085820674896, + "loss_ib": 0.0049697342328727245, + "step": 252 + }, + { + "ce_ib": 16.123247146606445, + "ce_orig": 1.0408896207809448, + "epoch": 0.07247106190236538, + "kl_loss": 0.31008970737457275, + "loss_ib": 0.004713221453130245, + "step": 252 + }, + { + "ce_ib": 17.019351959228516, + "ce_orig": 1.1104483604431152, + "epoch": 0.07247106190236538, + "kl_loss": 0.30466794967651367, + "loss_ib": 0.004748614504933357, + "step": 252 + }, + { + "ce_ib": 14.710082054138184, + "ce_orig": 0.9470803737640381, + "epoch": 0.07275864548134302, + "kl_loss": 0.37655502557754517, + "loss_ib": 0.005236558150500059, + "step": 253 + }, + { + "ce_ib": 20.534164428710938, + "ce_orig": 0.901342511177063, + "epoch": 0.07275864548134302, + "kl_loss": 0.3438325524330139, + "loss_ib": 0.005491741932928562, + "step": 253 + }, + { + "ce_ib": 16.916545867919922, + "ce_orig": 0.9539148211479187, + "epoch": 0.07275864548134302, + "kl_loss": 0.35449427366256714, + "loss_ib": 0.005236596800386906, + "step": 253 + }, + { + "ce_ib": 15.45893383026123, + "ce_orig": 0.6202948689460754, + "epoch": 0.07275864548134302, + "kl_loss": 0.3998444080352783, + "loss_ib": 0.005544337444007397, + "step": 253 + }, + { + "ce_ib": 17.64470100402832, + "ce_orig": 1.3979955911636353, + "epoch": 0.07304622906032066, + "kl_loss": 0.31596821546554565, + "loss_ib": 0.004924152046442032, + "step": 254 + }, + { + "ce_ib": 17.690441131591797, + "ce_orig": 0.8207519054412842, + "epoch": 0.07304622906032066, + "kl_loss": 0.2767926752567291, + "loss_ib": 0.0045369709841907024, + "step": 254 + }, + { + "ce_ib": 10.424705505371094, + "ce_orig": 0.5839744210243225, + "epoch": 0.07304622906032066, + "kl_loss": 0.22676922380924225, + "loss_ib": 0.0033101625740528107, + "step": 254 + }, + { + "ce_ib": 20.686954498291016, + "ce_orig": 0.6900187730789185, + "epoch": 0.07304622906032066, + "kl_loss": 0.4284232258796692, + "loss_ib": 0.006352927535772324, + "step": 254 + }, + { + "epoch": 0.0733338126392983, + "grad_norm": 0.06607817858457565, + "learning_rate": 3.964968152866242e-05, + "loss": 0.846, + "step": 255 + }, + { + "ce_ib": 14.843269348144531, + "ce_orig": 0.8040740489959717, + "epoch": 0.0733338126392983, + "kl_loss": 0.37673041224479675, + "loss_ib": 0.005251631140708923, + "step": 255 + }, + { + "ce_ib": 14.139528274536133, + "ce_orig": 0.7245256304740906, + "epoch": 0.0733338126392983, + "kl_loss": 0.3231876790523529, + "loss_ib": 0.004645829554647207, + "step": 255 + }, + { + "ce_ib": 24.16304588317871, + "ce_orig": 1.9036223888397217, + "epoch": 0.0733338126392983, + "kl_loss": 0.2994362413883209, + "loss_ib": 0.005410667043179274, + "step": 255 + }, + { + "ce_ib": 17.201786041259766, + "ce_orig": 0.680133044719696, + "epoch": 0.0733338126392983, + "kl_loss": 0.3443969786167145, + "loss_ib": 0.005164148285984993, + "step": 255 + }, + { + "ce_ib": 11.843783378601074, + "ce_orig": 0.5880969166755676, + "epoch": 0.07362139621827593, + "kl_loss": 0.2765531539916992, + "loss_ib": 0.003949909936636686, + "step": 256 + }, + { + "ce_ib": 15.530258178710938, + "ce_orig": 0.7509983777999878, + "epoch": 0.07362139621827593, + "kl_loss": 0.3052097260951996, + "loss_ib": 0.004605122841894627, + "step": 256 + }, + { + "ce_ib": 18.292640686035156, + "ce_orig": 0.9828827977180481, + "epoch": 0.07362139621827593, + "kl_loss": 0.29785263538360596, + "loss_ib": 0.0048077902756631374, + "step": 256 + }, + { + "ce_ib": 12.437490463256836, + "ce_orig": 0.5975197553634644, + "epoch": 0.07362139621827593, + "kl_loss": 0.32125842571258545, + "loss_ib": 0.004456333350390196, + "step": 256 + }, + { + "ce_ib": 17.51129722595215, + "ce_orig": 0.6895196437835693, + "epoch": 0.07390897979725358, + "kl_loss": 0.29928696155548096, + "loss_ib": 0.00474399933591485, + "step": 257 + }, + { + "ce_ib": 19.982250213623047, + "ce_orig": 1.2780667543411255, + "epoch": 0.07390897979725358, + "kl_loss": 0.4109703004360199, + "loss_ib": 0.006107928231358528, + "step": 257 + }, + { + "ce_ib": 18.59293556213379, + "ce_orig": 0.7878507375717163, + "epoch": 0.07390897979725358, + "kl_loss": 0.2821478247642517, + "loss_ib": 0.004680771846324205, + "step": 257 + }, + { + "ce_ib": 16.528976440429688, + "ce_orig": 0.9417824149131775, + "epoch": 0.07390897979725358, + "kl_loss": 0.2769574522972107, + "loss_ib": 0.004422471858561039, + "step": 257 + }, + { + "ce_ib": 12.966187477111816, + "ce_orig": 0.5238648653030396, + "epoch": 0.07419656337623122, + "kl_loss": 0.31731730699539185, + "loss_ib": 0.004469791427254677, + "step": 258 + }, + { + "ce_ib": 13.36031723022461, + "ce_orig": 0.5534040927886963, + "epoch": 0.07419656337623122, + "kl_loss": 0.2720886170864105, + "loss_ib": 0.00405691796913743, + "step": 258 + }, + { + "ce_ib": 19.149700164794922, + "ce_orig": 1.4191524982452393, + "epoch": 0.07419656337623122, + "kl_loss": 0.34151729941368103, + "loss_ib": 0.00533014303073287, + "step": 258 + }, + { + "ce_ib": 19.49608612060547, + "ce_orig": 1.3304085731506348, + "epoch": 0.07419656337623122, + "kl_loss": 0.303983211517334, + "loss_ib": 0.004989440552890301, + "step": 258 + }, + { + "ce_ib": 16.158462524414062, + "ce_orig": 0.7256084680557251, + "epoch": 0.07448414695520886, + "kl_loss": 0.3417550027370453, + "loss_ib": 0.0050333961844444275, + "step": 259 + }, + { + "ce_ib": 15.630940437316895, + "ce_orig": 0.45199069380760193, + "epoch": 0.07448414695520886, + "kl_loss": 0.3961242437362671, + "loss_ib": 0.005524335894733667, + "step": 259 + }, + { + "ce_ib": 15.765962600708008, + "ce_orig": 1.1196238994598389, + "epoch": 0.07448414695520886, + "kl_loss": 0.25270766019821167, + "loss_ib": 0.004103672690689564, + "step": 259 + }, + { + "ce_ib": 13.915310859680176, + "ce_orig": 0.8033282160758972, + "epoch": 0.07448414695520886, + "kl_loss": 0.292714923620224, + "loss_ib": 0.00431868014857173, + "step": 259 + }, + { + "epoch": 0.0747717305341865, + "grad_norm": 0.08863961696624756, + "learning_rate": 4.044585987261147e-05, + "loss": 0.8927, + "step": 260 + }, + { + "ce_ib": 15.066596031188965, + "ce_orig": 0.8063942193984985, + "epoch": 0.0747717305341865, + "kl_loss": 0.28827041387557983, + "loss_ib": 0.004389363341033459, + "step": 260 + }, + { + "ce_ib": 12.796646118164062, + "ce_orig": 0.41930192708969116, + "epoch": 0.0747717305341865, + "kl_loss": 0.2615140378475189, + "loss_ib": 0.0038948049768805504, + "step": 260 + }, + { + "ce_ib": 14.110474586486816, + "ce_orig": 0.68003249168396, + "epoch": 0.0747717305341865, + "kl_loss": 0.261357843875885, + "loss_ib": 0.0040246257558465, + "step": 260 + }, + { + "ce_ib": 17.80391502380371, + "ce_orig": 0.7285661697387695, + "epoch": 0.0747717305341865, + "kl_loss": 0.29259398579597473, + "loss_ib": 0.00470633152872324, + "step": 260 + }, + { + "ce_ib": 10.909024238586426, + "ce_orig": 0.47324129939079285, + "epoch": 0.07505931411316413, + "kl_loss": 0.2249765694141388, + "loss_ib": 0.0033406680449843407, + "step": 261 + }, + { + "ce_ib": 15.371655464172363, + "ce_orig": 0.9706589579582214, + "epoch": 0.07505931411316413, + "kl_loss": 0.2712176442146301, + "loss_ib": 0.004249341785907745, + "step": 261 + }, + { + "ce_ib": 13.265528678894043, + "ce_orig": 0.8591080904006958, + "epoch": 0.07505931411316413, + "kl_loss": 0.2495938241481781, + "loss_ib": 0.0038224910385906696, + "step": 261 + }, + { + "ce_ib": 10.983420372009277, + "ce_orig": 0.6682037711143494, + "epoch": 0.07505931411316413, + "kl_loss": 0.18751020729541779, + "loss_ib": 0.0029734440613538027, + "step": 261 + }, + { + "ce_ib": 11.951087951660156, + "ce_orig": 0.6800048351287842, + "epoch": 0.07534689769214178, + "kl_loss": 0.3001071512699127, + "loss_ib": 0.004196180030703545, + "step": 262 + }, + { + "ce_ib": 17.0267276763916, + "ce_orig": 0.4345322549343109, + "epoch": 0.07534689769214178, + "kl_loss": 0.3833320736885071, + "loss_ib": 0.005535993259400129, + "step": 262 + }, + { + "ce_ib": 18.44808006286621, + "ce_orig": 0.9550086259841919, + "epoch": 0.07534689769214178, + "kl_loss": 0.2870803773403168, + "loss_ib": 0.004715611692517996, + "step": 262 + }, + { + "ce_ib": 16.392032623291016, + "ce_orig": 1.0675876140594482, + "epoch": 0.07534689769214178, + "kl_loss": 0.30814555287361145, + "loss_ib": 0.004720658529549837, + "step": 262 + }, + { + "ce_ib": 16.68846321105957, + "ce_orig": 0.6256803274154663, + "epoch": 0.07563448127111942, + "kl_loss": 0.2585268020629883, + "loss_ib": 0.004254114348441362, + "step": 263 + }, + { + "ce_ib": 12.127674102783203, + "ce_orig": 0.5661578178405762, + "epoch": 0.07563448127111942, + "kl_loss": 0.23702625930309296, + "loss_ib": 0.0035830300766974688, + "step": 263 + }, + { + "ce_ib": 16.941381454467773, + "ce_orig": 0.6445264220237732, + "epoch": 0.07563448127111942, + "kl_loss": 0.29147881269454956, + "loss_ib": 0.004608925897628069, + "step": 263 + }, + { + "ce_ib": 11.159895896911621, + "ce_orig": 0.6294872164726257, + "epoch": 0.07563448127111942, + "kl_loss": 0.25180041790008545, + "loss_ib": 0.0036339936777949333, + "step": 263 + }, + { + "ce_ib": 16.635135650634766, + "ce_orig": 1.0586961507797241, + "epoch": 0.07592206485009706, + "kl_loss": 0.2687499523162842, + "loss_ib": 0.004351012874394655, + "step": 264 + }, + { + "ce_ib": 12.455622673034668, + "ce_orig": 0.8253864049911499, + "epoch": 0.07592206485009706, + "kl_loss": 0.24592146277427673, + "loss_ib": 0.00370477675460279, + "step": 264 + }, + { + "ce_ib": 11.99234676361084, + "ce_orig": 0.571262776851654, + "epoch": 0.07592206485009706, + "kl_loss": 0.19673100113868713, + "loss_ib": 0.0031665447168052197, + "step": 264 + }, + { + "ce_ib": 16.71959114074707, + "ce_orig": 0.8490833044052124, + "epoch": 0.07592206485009706, + "kl_loss": 0.3431280553340912, + "loss_ib": 0.005103239323943853, + "step": 264 + }, + { + "epoch": 0.0762096484290747, + "grad_norm": 0.09324845671653748, + "learning_rate": 4.1242038216560514e-05, + "loss": 0.8594, + "step": 265 + }, + { + "ce_ib": 13.668450355529785, + "ce_orig": 0.7410028576850891, + "epoch": 0.0762096484290747, + "kl_loss": 0.3923606872558594, + "loss_ib": 0.005290451925247908, + "step": 265 + }, + { + "ce_ib": 16.50493621826172, + "ce_orig": 0.904811441898346, + "epoch": 0.0762096484290747, + "kl_loss": 0.32142162322998047, + "loss_ib": 0.004864709917455912, + "step": 265 + }, + { + "ce_ib": 14.940958976745605, + "ce_orig": 1.0615205764770508, + "epoch": 0.0762096484290747, + "kl_loss": 0.2736600637435913, + "loss_ib": 0.004230696242302656, + "step": 265 + }, + { + "ce_ib": 20.21286964416504, + "ce_orig": 0.936401903629303, + "epoch": 0.0762096484290747, + "kl_loss": 0.2895187437534332, + "loss_ib": 0.004916474223136902, + "step": 265 + }, + { + "ce_ib": 15.859089851379395, + "ce_orig": 0.9013702869415283, + "epoch": 0.07649723200805233, + "kl_loss": 0.2485354244709015, + "loss_ib": 0.004071263130754232, + "step": 266 + }, + { + "ce_ib": 14.475261688232422, + "ce_orig": 0.9954730272293091, + "epoch": 0.07649723200805233, + "kl_loss": 0.2513744533061981, + "loss_ib": 0.003961270209401846, + "step": 266 + }, + { + "ce_ib": 20.038143157958984, + "ce_orig": 1.3421083688735962, + "epoch": 0.07649723200805233, + "kl_loss": 0.281283974647522, + "loss_ib": 0.00481665413826704, + "step": 266 + }, + { + "ce_ib": 22.541851043701172, + "ce_orig": 1.7132771015167236, + "epoch": 0.07649723200805233, + "kl_loss": 0.31567251682281494, + "loss_ib": 0.005410910118371248, + "step": 266 + }, + { + "ce_ib": 14.337822914123535, + "ce_orig": 0.8144393563270569, + "epoch": 0.07678481558702999, + "kl_loss": 0.31490829586982727, + "loss_ib": 0.004582865163683891, + "step": 267 + }, + { + "ce_ib": 16.432825088500977, + "ce_orig": 0.5571436285972595, + "epoch": 0.07678481558702999, + "kl_loss": 0.3173448443412781, + "loss_ib": 0.004816730972379446, + "step": 267 + }, + { + "ce_ib": 18.915435791015625, + "ce_orig": 1.2111248970031738, + "epoch": 0.07678481558702999, + "kl_loss": 0.27650901675224304, + "loss_ib": 0.004656633827835321, + "step": 267 + }, + { + "ce_ib": 20.70977783203125, + "ce_orig": 1.6732383966445923, + "epoch": 0.07678481558702999, + "kl_loss": 0.32756784558296204, + "loss_ib": 0.00534665584564209, + "step": 267 + }, + { + "ce_ib": 19.59430503845215, + "ce_orig": 1.263615369796753, + "epoch": 0.07707239916600762, + "kl_loss": 0.2561582028865814, + "loss_ib": 0.004521012306213379, + "step": 268 + }, + { + "ce_ib": 24.518985748291016, + "ce_orig": 2.1271183490753174, + "epoch": 0.07707239916600762, + "kl_loss": 0.36851945519447327, + "loss_ib": 0.006137093063443899, + "step": 268 + }, + { + "ce_ib": 16.20216941833496, + "ce_orig": 0.524202823638916, + "epoch": 0.07707239916600762, + "kl_loss": 0.30454859137535095, + "loss_ib": 0.00466570258140564, + "step": 268 + }, + { + "ce_ib": 16.534713745117188, + "ce_orig": 0.59481281042099, + "epoch": 0.07707239916600762, + "kl_loss": 0.24704763293266296, + "loss_ib": 0.004123947583138943, + "step": 268 + }, + { + "ce_ib": 15.644123077392578, + "ce_orig": 0.9609376788139343, + "epoch": 0.07735998274498526, + "kl_loss": 0.24495989084243774, + "loss_ib": 0.0040140110068023205, + "step": 269 + }, + { + "ce_ib": 13.680428504943848, + "ce_orig": 0.7830809950828552, + "epoch": 0.07735998274498526, + "kl_loss": 0.23677141964435577, + "loss_ib": 0.0037357567343860865, + "step": 269 + }, + { + "ce_ib": 19.454843521118164, + "ce_orig": 0.8037047386169434, + "epoch": 0.07735998274498526, + "kl_loss": 0.33364683389663696, + "loss_ib": 0.005281952675431967, + "step": 269 + }, + { + "ce_ib": 13.62769889831543, + "ce_orig": 0.7561288475990295, + "epoch": 0.07735998274498526, + "kl_loss": 0.3251601457595825, + "loss_ib": 0.004614371340721846, + "step": 269 + }, + { + "epoch": 0.0776475663239629, + "grad_norm": 0.06999674439430237, + "learning_rate": 4.2038216560509556e-05, + "loss": 0.8477, + "step": 270 + }, + { + "ce_ib": 15.570327758789062, + "ce_orig": 0.5456323027610779, + "epoch": 0.0776475663239629, + "kl_loss": 0.24074603617191315, + "loss_ib": 0.003964493051171303, + "step": 270 + }, + { + "ce_ib": 12.79995346069336, + "ce_orig": 0.5012090802192688, + "epoch": 0.0776475663239629, + "kl_loss": 0.3022935688495636, + "loss_ib": 0.004302930552512407, + "step": 270 + }, + { + "ce_ib": 16.223758697509766, + "ce_orig": 0.3916482925415039, + "epoch": 0.0776475663239629, + "kl_loss": 0.27299919724464417, + "loss_ib": 0.004352367948740721, + "step": 270 + }, + { + "ce_ib": 14.882962226867676, + "ce_orig": 0.9101399183273315, + "epoch": 0.0776475663239629, + "kl_loss": 0.2818550765514374, + "loss_ib": 0.004306846763938665, + "step": 270 + }, + { + "ce_ib": 15.584421157836914, + "ce_orig": 0.616856575012207, + "epoch": 0.07793514990294054, + "kl_loss": 0.2159929871559143, + "loss_ib": 0.003718371968716383, + "step": 271 + }, + { + "ce_ib": 14.746891975402832, + "ce_orig": 0.5636629462242126, + "epoch": 0.07793514990294054, + "kl_loss": 0.3384188413619995, + "loss_ib": 0.004858877509832382, + "step": 271 + }, + { + "ce_ib": 19.72770881652832, + "ce_orig": 1.6866846084594727, + "epoch": 0.07793514990294054, + "kl_loss": 0.2658270597457886, + "loss_ib": 0.004631041083484888, + "step": 271 + }, + { + "ce_ib": 13.834111213684082, + "ce_orig": 0.4504989981651306, + "epoch": 0.07793514990294054, + "kl_loss": 0.309722900390625, + "loss_ib": 0.0044806404039263725, + "step": 271 + }, + { + "ce_ib": 16.82283592224121, + "ce_orig": 1.4967201948165894, + "epoch": 0.07822273348191819, + "kl_loss": 0.2537575364112854, + "loss_ib": 0.004219858907163143, + "step": 272 + }, + { + "ce_ib": 16.692188262939453, + "ce_orig": 0.7724244594573975, + "epoch": 0.07822273348191819, + "kl_loss": 0.3247263431549072, + "loss_ib": 0.004916482139378786, + "step": 272 + }, + { + "ce_ib": 17.720109939575195, + "ce_orig": 1.0197162628173828, + "epoch": 0.07822273348191819, + "kl_loss": 0.3129570484161377, + "loss_ib": 0.004901581443846226, + "step": 272 + }, + { + "ce_ib": 11.472055435180664, + "ce_orig": 0.7266469597816467, + "epoch": 0.07822273348191819, + "kl_loss": 0.2540128827095032, + "loss_ib": 0.0036873342469334602, + "step": 272 + }, + { + "ce_ib": 14.627097129821777, + "ce_orig": 0.5554067492485046, + "epoch": 0.07851031706089583, + "kl_loss": 0.27136164903640747, + "loss_ib": 0.004176326096057892, + "step": 273 + }, + { + "ce_ib": 12.32707405090332, + "ce_orig": 0.832676887512207, + "epoch": 0.07851031706089583, + "kl_loss": 0.23146983981132507, + "loss_ib": 0.0035474055912345648, + "step": 273 + }, + { + "ce_ib": 14.380395889282227, + "ce_orig": 0.5141070485115051, + "epoch": 0.07851031706089583, + "kl_loss": 0.2842778265476227, + "loss_ib": 0.004280817694962025, + "step": 273 + }, + { + "ce_ib": 16.54999351501465, + "ce_orig": 1.1931746006011963, + "epoch": 0.07851031706089583, + "kl_loss": 0.3062623143196106, + "loss_ib": 0.004717622417956591, + "step": 273 + }, + { + "ce_ib": 16.448566436767578, + "ce_orig": 0.9063194394111633, + "epoch": 0.07879790063987346, + "kl_loss": 0.254788339138031, + "loss_ib": 0.004192739725112915, + "step": 274 + }, + { + "ce_ib": 20.326473236083984, + "ce_orig": 1.289271593093872, + "epoch": 0.07879790063987346, + "kl_loss": 0.21474193036556244, + "loss_ib": 0.004180066287517548, + "step": 274 + }, + { + "ce_ib": 14.406719207763672, + "ce_orig": 1.0166672468185425, + "epoch": 0.07879790063987346, + "kl_loss": 0.2415088266134262, + "loss_ib": 0.0038557599764317274, + "step": 274 + }, + { + "ce_ib": 17.027986526489258, + "ce_orig": 1.0573188066482544, + "epoch": 0.07879790063987346, + "kl_loss": 0.33195120096206665, + "loss_ib": 0.0050223106518387794, + "step": 274 + }, + { + "epoch": 0.0790854842188511, + "grad_norm": 0.08076049387454987, + "learning_rate": 4.2834394904458604e-05, + "loss": 0.908, + "step": 275 + }, + { + "ce_ib": 11.249130249023438, + "ce_orig": 0.5311962962150574, + "epoch": 0.0790854842188511, + "kl_loss": 0.2099292278289795, + "loss_ib": 0.0032242052257061005, + "step": 275 + }, + { + "ce_ib": 17.736249923706055, + "ce_orig": 0.9784615635871887, + "epoch": 0.0790854842188511, + "kl_loss": 0.27468031644821167, + "loss_ib": 0.004520427901297808, + "step": 275 + }, + { + "ce_ib": 14.927811622619629, + "ce_orig": 0.7807605862617493, + "epoch": 0.0790854842188511, + "kl_loss": 0.35637491941452026, + "loss_ib": 0.005056530237197876, + "step": 275 + }, + { + "ce_ib": 12.488973617553711, + "ce_orig": 0.8185478448867798, + "epoch": 0.0790854842188511, + "kl_loss": 0.31116726994514465, + "loss_ib": 0.004360570106655359, + "step": 275 + }, + { + "ce_ib": 15.501134872436523, + "ce_orig": 1.238783597946167, + "epoch": 0.07937306779782874, + "kl_loss": 0.20382773876190186, + "loss_ib": 0.0035883907694369555, + "step": 276 + }, + { + "ce_ib": 17.22933578491211, + "ce_orig": 1.2730385065078735, + "epoch": 0.07937306779782874, + "kl_loss": 0.47506648302078247, + "loss_ib": 0.006473598536103964, + "step": 276 + }, + { + "ce_ib": 13.309030532836914, + "ce_orig": 0.5584474802017212, + "epoch": 0.07937306779782874, + "kl_loss": 0.23811832070350647, + "loss_ib": 0.003712086006999016, + "step": 276 + }, + { + "ce_ib": 12.37324333190918, + "ce_orig": 0.8598084449768066, + "epoch": 0.07937306779782874, + "kl_loss": 0.20274879038333893, + "loss_ib": 0.0032648120541125536, + "step": 276 + }, + { + "ce_ib": 18.88533592224121, + "ce_orig": 0.7922468781471252, + "epoch": 0.07966065137680639, + "kl_loss": 0.2773568034172058, + "loss_ib": 0.004662101622670889, + "step": 277 + }, + { + "ce_ib": 16.412996292114258, + "ce_orig": 0.7500933408737183, + "epoch": 0.07966065137680639, + "kl_loss": 0.27461355924606323, + "loss_ib": 0.004387435037642717, + "step": 277 + }, + { + "ce_ib": 15.230081558227539, + "ce_orig": 1.1697120666503906, + "epoch": 0.07966065137680639, + "kl_loss": 0.2722022533416748, + "loss_ib": 0.00424503069370985, + "step": 277 + }, + { + "ce_ib": 15.583248138427734, + "ce_orig": 0.8734590411186218, + "epoch": 0.07966065137680639, + "kl_loss": 0.2992432117462158, + "loss_ib": 0.004550756886601448, + "step": 277 + }, + { + "ce_ib": 15.06824779510498, + "ce_orig": 1.1103448867797852, + "epoch": 0.07994823495578403, + "kl_loss": 0.2773784101009369, + "loss_ib": 0.0042806086130440235, + "step": 278 + }, + { + "ce_ib": 19.022869110107422, + "ce_orig": 1.0327725410461426, + "epoch": 0.07994823495578403, + "kl_loss": 0.33331602811813354, + "loss_ib": 0.005235447082668543, + "step": 278 + }, + { + "ce_ib": 15.828374862670898, + "ce_orig": 0.5875866413116455, + "epoch": 0.07994823495578403, + "kl_loss": 0.1845521628856659, + "loss_ib": 0.0034283590503036976, + "step": 278 + }, + { + "ce_ib": 10.436365127563477, + "ce_orig": 0.6552335023880005, + "epoch": 0.07994823495578403, + "kl_loss": 0.18279395997524261, + "loss_ib": 0.0028715759981423616, + "step": 278 + }, + { + "ce_ib": 12.258537292480469, + "ce_orig": 0.6182965636253357, + "epoch": 0.08023581853476167, + "kl_loss": 0.2173025906085968, + "loss_ib": 0.00339887966401875, + "step": 279 + }, + { + "ce_ib": 16.939525604248047, + "ce_orig": 1.2229260206222534, + "epoch": 0.08023581853476167, + "kl_loss": 0.249847412109375, + "loss_ib": 0.004192426800727844, + "step": 279 + }, + { + "ce_ib": 16.706846237182617, + "ce_orig": 1.1211984157562256, + "epoch": 0.08023581853476167, + "kl_loss": 0.2522197961807251, + "loss_ib": 0.004192882217466831, + "step": 279 + }, + { + "ce_ib": 14.674199104309082, + "ce_orig": 0.790857195854187, + "epoch": 0.08023581853476167, + "kl_loss": 0.20820698142051697, + "loss_ib": 0.0035494896583259106, + "step": 279 + }, + { + "epoch": 0.0805234021137393, + "grad_norm": 0.07456893473863602, + "learning_rate": 4.3630573248407646e-05, + "loss": 0.8909, + "step": 280 + }, + { + "ce_ib": 17.366165161132812, + "ce_orig": 0.7224763035774231, + "epoch": 0.0805234021137393, + "kl_loss": 0.2818063497543335, + "loss_ib": 0.004554680082947016, + "step": 280 + }, + { + "ce_ib": 15.292021751403809, + "ce_orig": 0.5337414145469666, + "epoch": 0.0805234021137393, + "kl_loss": 0.24038422107696533, + "loss_ib": 0.003933044150471687, + "step": 280 + }, + { + "ce_ib": 13.491898536682129, + "ce_orig": 0.5026684999465942, + "epoch": 0.0805234021137393, + "kl_loss": 0.19754835963249207, + "loss_ib": 0.0033246735110878944, + "step": 280 + }, + { + "ce_ib": 18.61618995666504, + "ce_orig": 1.3851393461227417, + "epoch": 0.0805234021137393, + "kl_loss": 0.3309285044670105, + "loss_ib": 0.005170903634279966, + "step": 280 + }, + { + "ce_ib": 12.345269203186035, + "ce_orig": 0.6989408135414124, + "epoch": 0.08081098569271694, + "kl_loss": 0.22234660387039185, + "loss_ib": 0.0034579928033053875, + "step": 281 + }, + { + "ce_ib": 11.875962257385254, + "ce_orig": 0.6694311499595642, + "epoch": 0.08081098569271694, + "kl_loss": 0.19901394844055176, + "loss_ib": 0.003177735721692443, + "step": 281 + }, + { + "ce_ib": 17.15048599243164, + "ce_orig": 0.5024470090866089, + "epoch": 0.08081098569271694, + "kl_loss": 0.3105819821357727, + "loss_ib": 0.004820868372917175, + "step": 281 + }, + { + "ce_ib": 18.85089111328125, + "ce_orig": 1.5827473402023315, + "epoch": 0.08081098569271694, + "kl_loss": 0.2236773669719696, + "loss_ib": 0.004121862351894379, + "step": 281 + }, + { + "ce_ib": 14.070013999938965, + "ce_orig": 0.9050841331481934, + "epoch": 0.08109856927169459, + "kl_loss": 0.26318368315696716, + "loss_ib": 0.00403883820399642, + "step": 282 + }, + { + "ce_ib": 15.759684562683105, + "ce_orig": 0.9596781134605408, + "epoch": 0.08109856927169459, + "kl_loss": 0.3149911165237427, + "loss_ib": 0.0047258795239031315, + "step": 282 + }, + { + "ce_ib": 18.05628776550293, + "ce_orig": 0.9637153148651123, + "epoch": 0.08109856927169459, + "kl_loss": 0.27664974331855774, + "loss_ib": 0.004572126083076, + "step": 282 + }, + { + "ce_ib": 18.621143341064453, + "ce_orig": 1.693290114402771, + "epoch": 0.08109856927169459, + "kl_loss": 0.2584025263786316, + "loss_ib": 0.004446139093488455, + "step": 282 + }, + { + "ce_ib": 11.559967041015625, + "ce_orig": 0.8692753911018372, + "epoch": 0.08138615285067223, + "kl_loss": 0.26356419920921326, + "loss_ib": 0.0037916384171694517, + "step": 283 + }, + { + "ce_ib": 16.148008346557617, + "ce_orig": 1.3362113237380981, + "epoch": 0.08138615285067223, + "kl_loss": 0.316663920879364, + "loss_ib": 0.004781439900398254, + "step": 283 + }, + { + "ce_ib": 21.178966522216797, + "ce_orig": 1.0704444646835327, + "epoch": 0.08138615285067223, + "kl_loss": 0.310921311378479, + "loss_ib": 0.005227109882980585, + "step": 283 + }, + { + "ce_ib": 14.54597282409668, + "ce_orig": 0.6114582419395447, + "epoch": 0.08138615285067223, + "kl_loss": 0.2650757431983948, + "loss_ib": 0.004105354659259319, + "step": 283 + }, + { + "ce_ib": 12.013655662536621, + "ce_orig": 0.8571666479110718, + "epoch": 0.08167373642964987, + "kl_loss": 0.2690030634403229, + "loss_ib": 0.003891396103426814, + "step": 284 + }, + { + "ce_ib": 16.22176170349121, + "ce_orig": 0.6218030452728271, + "epoch": 0.08167373642964987, + "kl_loss": 0.291636198759079, + "loss_ib": 0.004538537934422493, + "step": 284 + }, + { + "ce_ib": 16.29920768737793, + "ce_orig": 1.2184849977493286, + "epoch": 0.08167373642964987, + "kl_loss": 0.2997811734676361, + "loss_ib": 0.004627732560038567, + "step": 284 + }, + { + "ce_ib": 12.09832763671875, + "ce_orig": 0.6244350075721741, + "epoch": 0.08167373642964987, + "kl_loss": 0.2652917504310608, + "loss_ib": 0.0038627502508461475, + "step": 284 + }, + { + "epoch": 0.0819613200086275, + "grad_norm": 0.07042936980724335, + "learning_rate": 4.442675159235669e-05, + "loss": 0.9056, + "step": 285 + }, + { + "ce_ib": 13.477680206298828, + "ce_orig": 0.6597225666046143, + "epoch": 0.0819613200086275, + "kl_loss": 0.26786327362060547, + "loss_ib": 0.0040264008566737175, + "step": 285 + }, + { + "ce_ib": 16.648954391479492, + "ce_orig": 0.7288675904273987, + "epoch": 0.0819613200086275, + "kl_loss": 0.234561488032341, + "loss_ib": 0.004010510165244341, + "step": 285 + }, + { + "ce_ib": 16.058998107910156, + "ce_orig": 0.9044990539550781, + "epoch": 0.0819613200086275, + "kl_loss": 0.438146710395813, + "loss_ib": 0.0059873671270906925, + "step": 285 + }, + { + "ce_ib": 14.675004005432129, + "ce_orig": 0.6858831644058228, + "epoch": 0.0819613200086275, + "kl_loss": 0.41518154740333557, + "loss_ib": 0.005619315896183252, + "step": 285 + }, + { + "ce_ib": 19.281421661376953, + "ce_orig": 1.3555938005447388, + "epoch": 0.08224890358760514, + "kl_loss": 0.27201730012893677, + "loss_ib": 0.004648315254598856, + "step": 286 + }, + { + "ce_ib": 14.502872467041016, + "ce_orig": 0.7407470345497131, + "epoch": 0.08224890358760514, + "kl_loss": 0.16434180736541748, + "loss_ib": 0.0030937050469219685, + "step": 286 + }, + { + "ce_ib": 16.41741180419922, + "ce_orig": 1.1262027025222778, + "epoch": 0.08224890358760514, + "kl_loss": 0.2105521857738495, + "loss_ib": 0.0037472627591341734, + "step": 286 + }, + { + "ce_ib": 19.66245460510254, + "ce_orig": 1.3670978546142578, + "epoch": 0.08224890358760514, + "kl_loss": 0.2873149514198303, + "loss_ib": 0.004839394707232714, + "step": 286 + }, + { + "ce_ib": 13.970117568969727, + "ce_orig": 0.588868260383606, + "epoch": 0.0825364871665828, + "kl_loss": 0.27275267243385315, + "loss_ib": 0.004124538041651249, + "step": 287 + }, + { + "ce_ib": 17.439908981323242, + "ce_orig": 1.1837517023086548, + "epoch": 0.0825364871665828, + "kl_loss": 0.1923927515745163, + "loss_ib": 0.0036679182667285204, + "step": 287 + }, + { + "ce_ib": 10.239778518676758, + "ce_orig": 0.5817263722419739, + "epoch": 0.0825364871665828, + "kl_loss": 0.2323988527059555, + "loss_ib": 0.003347966354340315, + "step": 287 + }, + { + "ce_ib": 13.104039192199707, + "ce_orig": 0.8184726238250732, + "epoch": 0.0825364871665828, + "kl_loss": 0.272987425327301, + "loss_ib": 0.0040402780286967754, + "step": 287 + }, + { + "ce_ib": 11.393816947937012, + "ce_orig": 0.882415235042572, + "epoch": 0.08282407074556043, + "kl_loss": 0.17284469306468964, + "loss_ib": 0.0028678285889327526, + "step": 288 + }, + { + "ce_ib": 14.922150611877441, + "ce_orig": 0.6627479791641235, + "epoch": 0.08282407074556043, + "kl_loss": 0.20301824808120728, + "loss_ib": 0.0035223974846303463, + "step": 288 + }, + { + "ce_ib": 17.70345687866211, + "ce_orig": 0.603179931640625, + "epoch": 0.08282407074556043, + "kl_loss": 0.32347768545150757, + "loss_ib": 0.005005122162401676, + "step": 288 + }, + { + "ce_ib": 11.391678810119629, + "ce_orig": 0.6061888337135315, + "epoch": 0.08282407074556043, + "kl_loss": 0.26528626680374146, + "loss_ib": 0.003792030503973365, + "step": 288 + }, + { + "ce_ib": 13.464797019958496, + "ce_orig": 0.5479658246040344, + "epoch": 0.08311165432453807, + "kl_loss": 0.21538397669792175, + "loss_ib": 0.003500319318845868, + "step": 289 + }, + { + "ce_ib": 19.783390045166016, + "ce_orig": 1.4441841840744019, + "epoch": 0.08311165432453807, + "kl_loss": 0.2706390619277954, + "loss_ib": 0.004684729501605034, + "step": 289 + }, + { + "ce_ib": 16.319841384887695, + "ce_orig": 1.0974111557006836, + "epoch": 0.08311165432453807, + "kl_loss": 0.3325914144515991, + "loss_ib": 0.004957898054271936, + "step": 289 + }, + { + "ce_ib": 15.902824401855469, + "ce_orig": 0.6798999905586243, + "epoch": 0.08311165432453807, + "kl_loss": 0.2273552566766739, + "loss_ib": 0.003863835008814931, + "step": 289 + }, + { + "epoch": 0.08339923790351571, + "grad_norm": 0.07085248827934265, + "learning_rate": 4.522292993630574e-05, + "loss": 0.8988, + "step": 290 + }, + { + "ce_ib": 12.853606224060059, + "ce_orig": 0.5977413654327393, + "epoch": 0.08339923790351571, + "kl_loss": 0.258215069770813, + "loss_ib": 0.003867511171847582, + "step": 290 + }, + { + "ce_ib": 13.45907211303711, + "ce_orig": 0.7957695722579956, + "epoch": 0.08339923790351571, + "kl_loss": 0.2829551100730896, + "loss_ib": 0.00417545810341835, + "step": 290 + }, + { + "ce_ib": 16.264999389648438, + "ce_orig": 0.921114444732666, + "epoch": 0.08339923790351571, + "kl_loss": 0.2568701505661011, + "loss_ib": 0.004195201210677624, + "step": 290 + }, + { + "ce_ib": 11.070430755615234, + "ce_orig": 0.828477680683136, + "epoch": 0.08339923790351571, + "kl_loss": 0.18585243821144104, + "loss_ib": 0.0029655674006789923, + "step": 290 + }, + { + "ce_ib": 15.938783645629883, + "ce_orig": 0.8985275626182556, + "epoch": 0.08368682148249335, + "kl_loss": 0.322698712348938, + "loss_ib": 0.004820865113288164, + "step": 291 + }, + { + "ce_ib": 12.973055839538574, + "ce_orig": 0.9591237902641296, + "epoch": 0.08368682148249335, + "kl_loss": 0.1751486361026764, + "loss_ib": 0.0030487917829304934, + "step": 291 + }, + { + "ce_ib": 15.191100120544434, + "ce_orig": 0.9854549765586853, + "epoch": 0.08368682148249335, + "kl_loss": 0.2032536417245865, + "loss_ib": 0.0035516463685780764, + "step": 291 + }, + { + "ce_ib": 15.310328483581543, + "ce_orig": 0.8211847543716431, + "epoch": 0.08368682148249335, + "kl_loss": 0.2667645514011383, + "loss_ib": 0.004198677837848663, + "step": 291 + }, + { + "ce_ib": 16.732818603515625, + "ce_orig": 0.9525948166847229, + "epoch": 0.083974405061471, + "kl_loss": 0.24904996156692505, + "loss_ib": 0.004163781180977821, + "step": 292 + }, + { + "ce_ib": 14.454859733581543, + "ce_orig": 0.8364003896713257, + "epoch": 0.083974405061471, + "kl_loss": 0.300500750541687, + "loss_ib": 0.0044504934921860695, + "step": 292 + }, + { + "ce_ib": 16.7276611328125, + "ce_orig": 1.103460431098938, + "epoch": 0.083974405061471, + "kl_loss": 0.25054287910461426, + "loss_ib": 0.004178194794803858, + "step": 292 + }, + { + "ce_ib": 18.47587776184082, + "ce_orig": 1.1350774765014648, + "epoch": 0.083974405061471, + "kl_loss": 0.2811344861984253, + "loss_ib": 0.00465893279761076, + "step": 292 + }, + { + "ce_ib": 15.593929290771484, + "ce_orig": 0.7370646595954895, + "epoch": 0.08426198864044863, + "kl_loss": 0.21932320296764374, + "loss_ib": 0.003752624848857522, + "step": 293 + }, + { + "ce_ib": 15.527499198913574, + "ce_orig": 1.2750834226608276, + "epoch": 0.08426198864044863, + "kl_loss": 0.23349913954734802, + "loss_ib": 0.0038877411279827356, + "step": 293 + }, + { + "ce_ib": 12.307111740112305, + "ce_orig": 0.68174147605896, + "epoch": 0.08426198864044863, + "kl_loss": 0.29035478830337524, + "loss_ib": 0.004134258721023798, + "step": 293 + }, + { + "ce_ib": 11.335613250732422, + "ce_orig": 0.65586918592453, + "epoch": 0.08426198864044863, + "kl_loss": 0.21021895110607147, + "loss_ib": 0.0032357508316636086, + "step": 293 + }, + { + "ce_ib": 12.788888931274414, + "ce_orig": 1.1009180545806885, + "epoch": 0.08454957221942627, + "kl_loss": 0.2401624619960785, + "loss_ib": 0.00368051347322762, + "step": 294 + }, + { + "ce_ib": 16.075815200805664, + "ce_orig": 0.7001639604568481, + "epoch": 0.08454957221942627, + "kl_loss": 0.32635319232940674, + "loss_ib": 0.00487111322581768, + "step": 294 + }, + { + "ce_ib": 12.539616584777832, + "ce_orig": 0.7154040932655334, + "epoch": 0.08454957221942627, + "kl_loss": 0.1870480477809906, + "loss_ib": 0.0031244419515132904, + "step": 294 + }, + { + "ce_ib": 12.290575981140137, + "ce_orig": 0.8135526180267334, + "epoch": 0.08454957221942627, + "kl_loss": 0.2699443995952606, + "loss_ib": 0.003928501624614, + "step": 294 + }, + { + "epoch": 0.08483715579840391, + "grad_norm": 0.07897292077541351, + "learning_rate": 4.601910828025478e-05, + "loss": 0.8706, + "step": 295 + }, + { + "ce_ib": 18.31114959716797, + "ce_orig": 0.6431681513786316, + "epoch": 0.08483715579840391, + "kl_loss": 0.27935224771499634, + "loss_ib": 0.004624637309461832, + "step": 295 + }, + { + "ce_ib": 13.731470108032227, + "ce_orig": 0.8691681623458862, + "epoch": 0.08483715579840391, + "kl_loss": 0.19459585845470428, + "loss_ib": 0.003319105366244912, + "step": 295 + }, + { + "ce_ib": 11.220470428466797, + "ce_orig": 0.7137093544006348, + "epoch": 0.08483715579840391, + "kl_loss": 0.1932218372821808, + "loss_ib": 0.003054265398532152, + "step": 295 + }, + { + "ce_ib": 13.056318283081055, + "ce_orig": 0.5071139931678772, + "epoch": 0.08483715579840391, + "kl_loss": 0.21704959869384766, + "loss_ib": 0.0034761279821395874, + "step": 295 + }, + { + "ce_ib": 11.606334686279297, + "ce_orig": 0.7217742800712585, + "epoch": 0.08512473937738155, + "kl_loss": 0.1716514676809311, + "loss_ib": 0.002877148101106286, + "step": 296 + }, + { + "ce_ib": 17.7640438079834, + "ce_orig": 1.607155203819275, + "epoch": 0.08512473937738155, + "kl_loss": 0.29561007022857666, + "loss_ib": 0.004732504952698946, + "step": 296 + }, + { + "ce_ib": 17.47099494934082, + "ce_orig": 0.9363014698028564, + "epoch": 0.08512473937738155, + "kl_loss": 0.27039089798927307, + "loss_ib": 0.004451008513569832, + "step": 296 + }, + { + "ce_ib": 17.832012176513672, + "ce_orig": 1.6501764059066772, + "epoch": 0.08512473937738155, + "kl_loss": 0.5178996324539185, + "loss_ib": 0.006962197367101908, + "step": 296 + }, + { + "ce_ib": 13.307634353637695, + "ce_orig": 0.8553745746612549, + "epoch": 0.08541232295635919, + "kl_loss": 0.28302001953125, + "loss_ib": 0.004160963464528322, + "step": 297 + }, + { + "ce_ib": 13.746207237243652, + "ce_orig": 0.6645457744598389, + "epoch": 0.08541232295635919, + "kl_loss": 0.2449042946100235, + "loss_ib": 0.0038236635737121105, + "step": 297 + }, + { + "ce_ib": 11.31932544708252, + "ce_orig": 0.4558902978897095, + "epoch": 0.08541232295635919, + "kl_loss": 0.1459667682647705, + "loss_ib": 0.002591600175946951, + "step": 297 + }, + { + "ce_ib": 10.261171340942383, + "ce_orig": 0.7316778302192688, + "epoch": 0.08541232295635919, + "kl_loss": 0.18448230624198914, + "loss_ib": 0.002870940137654543, + "step": 297 + }, + { + "ce_ib": 15.214677810668945, + "ce_orig": 0.7406959533691406, + "epoch": 0.08569990653533684, + "kl_loss": 0.3475422263145447, + "loss_ib": 0.004996889736503363, + "step": 298 + }, + { + "ce_ib": 12.732254981994629, + "ce_orig": 0.9583520889282227, + "epoch": 0.08569990653533684, + "kl_loss": 0.2888698875904083, + "loss_ib": 0.004161924123764038, + "step": 298 + }, + { + "ce_ib": 13.56235408782959, + "ce_orig": 0.7003698348999023, + "epoch": 0.08569990653533684, + "kl_loss": 0.22925713658332825, + "loss_ib": 0.00364880682900548, + "step": 298 + }, + { + "ce_ib": 17.023927688598633, + "ce_orig": 0.7627611756324768, + "epoch": 0.08569990653533684, + "kl_loss": 0.3114122152328491, + "loss_ib": 0.004816514905542135, + "step": 298 + }, + { + "ce_ib": 12.935884475708008, + "ce_orig": 0.5755088329315186, + "epoch": 0.08598749011431447, + "kl_loss": 0.23949839174747467, + "loss_ib": 0.003688572207465768, + "step": 299 + }, + { + "ce_ib": 15.182515144348145, + "ce_orig": 0.7093390226364136, + "epoch": 0.08598749011431447, + "kl_loss": 0.254234254360199, + "loss_ib": 0.0040605938993394375, + "step": 299 + }, + { + "ce_ib": 10.937736511230469, + "ce_orig": 0.7415173649787903, + "epoch": 0.08598749011431447, + "kl_loss": 0.21827897429466248, + "loss_ib": 0.003276563249528408, + "step": 299 + }, + { + "ce_ib": 14.389042854309082, + "ce_orig": 0.6369369029998779, + "epoch": 0.08598749011431447, + "kl_loss": 0.25055113434791565, + "loss_ib": 0.003944415133446455, + "step": 299 + }, + { + "epoch": 0.08627507369329211, + "grad_norm": 0.08575107157230377, + "learning_rate": 4.681528662420383e-05, + "loss": 0.8178, + "step": 300 + }, + { + "ce_ib": 15.538275718688965, + "ce_orig": 0.7116795778274536, + "epoch": 0.08627507369329211, + "kl_loss": 0.1944524198770523, + "loss_ib": 0.0034983514342457056, + "step": 300 + }, + { + "ce_ib": 14.46932315826416, + "ce_orig": 1.0576528310775757, + "epoch": 0.08627507369329211, + "kl_loss": 0.1829913854598999, + "loss_ib": 0.0032768461387604475, + "step": 300 + }, + { + "ce_ib": 12.83568000793457, + "ce_orig": 1.0147863626480103, + "epoch": 0.08627507369329211, + "kl_loss": 0.25184863805770874, + "loss_ib": 0.0038020543288439512, + "step": 300 + }, + { + "ce_ib": 11.396459579467773, + "ce_orig": 0.6744865775108337, + "epoch": 0.08627507369329211, + "kl_loss": 0.21117661893367767, + "loss_ib": 0.003251412184908986, + "step": 300 + }, + { + "ce_ib": 17.51881980895996, + "ce_orig": 1.0844104290008545, + "epoch": 0.08656265727226975, + "kl_loss": 0.24072128534317017, + "loss_ib": 0.004159094765782356, + "step": 301 + }, + { + "ce_ib": 16.883913040161133, + "ce_orig": 0.7893635034561157, + "epoch": 0.08656265727226975, + "kl_loss": 0.21162733435630798, + "loss_ib": 0.0038046645931899548, + "step": 301 + }, + { + "ce_ib": 20.599260330200195, + "ce_orig": 1.6651158332824707, + "epoch": 0.08656265727226975, + "kl_loss": 0.26889339089393616, + "loss_ib": 0.004748859908431768, + "step": 301 + }, + { + "ce_ib": 13.75562858581543, + "ce_orig": 0.9824258685112, + "epoch": 0.08656265727226975, + "kl_loss": 0.2234395146369934, + "loss_ib": 0.0036099578719586134, + "step": 301 + }, + { + "ce_ib": 13.83020305633545, + "ce_orig": 0.5410613417625427, + "epoch": 0.08685024085124739, + "kl_loss": 0.2865542471408844, + "loss_ib": 0.0042485627345740795, + "step": 302 + }, + { + "ce_ib": 17.551300048828125, + "ce_orig": 0.7735820412635803, + "epoch": 0.08685024085124739, + "kl_loss": 0.25298500061035156, + "loss_ib": 0.004284979775547981, + "step": 302 + }, + { + "ce_ib": 14.208580017089844, + "ce_orig": 0.8505828380584717, + "epoch": 0.08685024085124739, + "kl_loss": 0.5346580147743225, + "loss_ib": 0.006767437793314457, + "step": 302 + }, + { + "ce_ib": 19.750244140625, + "ce_orig": 1.176154613494873, + "epoch": 0.08685024085124739, + "kl_loss": 0.2657851576805115, + "loss_ib": 0.004632875788956881, + "step": 302 + }, + { + "ce_ib": 14.662118911743164, + "ce_orig": 1.080672264099121, + "epoch": 0.08713782443022504, + "kl_loss": 0.18392251431941986, + "loss_ib": 0.003305436810478568, + "step": 303 + }, + { + "ce_ib": 11.911545753479004, + "ce_orig": 0.9907505512237549, + "epoch": 0.08713782443022504, + "kl_loss": 0.18622635304927826, + "loss_ib": 0.0030534181278198957, + "step": 303 + }, + { + "ce_ib": 15.689916610717773, + "ce_orig": 1.1870077848434448, + "epoch": 0.08713782443022504, + "kl_loss": 0.39201515913009644, + "loss_ib": 0.005489143077284098, + "step": 303 + }, + { + "ce_ib": 12.059552192687988, + "ce_orig": 0.5444112420082092, + "epoch": 0.08713782443022504, + "kl_loss": 0.23845581710338593, + "loss_ib": 0.0035905134864151478, + "step": 303 + }, + { + "ce_ib": 14.907177925109863, + "ce_orig": 1.255871295928955, + "epoch": 0.08742540800920268, + "kl_loss": 0.22089144587516785, + "loss_ib": 0.003699632128700614, + "step": 304 + }, + { + "ce_ib": 15.632399559020996, + "ce_orig": 1.2641204595565796, + "epoch": 0.08742540800920268, + "kl_loss": 0.18549595773220062, + "loss_ib": 0.0034181992523372173, + "step": 304 + }, + { + "ce_ib": 11.078656196594238, + "ce_orig": 0.4686051607131958, + "epoch": 0.08742540800920268, + "kl_loss": 0.2145492285490036, + "loss_ib": 0.0032533579505980015, + "step": 304 + }, + { + "ce_ib": 16.48569107055664, + "ce_orig": 1.2514501810073853, + "epoch": 0.08742540800920268, + "kl_loss": 0.15880295634269714, + "loss_ib": 0.0032365985680371523, + "step": 304 + }, + { + "epoch": 0.08771299158818031, + "grad_norm": 0.08359609544277191, + "learning_rate": 4.761146496815287e-05, + "loss": 0.8481, + "step": 305 + }, + { + "ce_ib": 20.280155181884766, + "ce_orig": 1.9041812419891357, + "epoch": 0.08771299158818031, + "kl_loss": 0.2823646068572998, + "loss_ib": 0.004851661156862974, + "step": 305 + }, + { + "ce_ib": 8.3218994140625, + "ce_orig": 0.7794256806373596, + "epoch": 0.08771299158818031, + "kl_loss": 0.15604904294013977, + "loss_ib": 0.0023926803842186928, + "step": 305 + }, + { + "ce_ib": 11.23653793334961, + "ce_orig": 0.9274653792381287, + "epoch": 0.08771299158818031, + "kl_loss": 0.22056221961975098, + "loss_ib": 0.0033292758744210005, + "step": 305 + }, + { + "ce_ib": 15.99108600616455, + "ce_orig": 0.8273392915725708, + "epoch": 0.08771299158818031, + "kl_loss": 0.287705659866333, + "loss_ib": 0.004476164933294058, + "step": 305 + }, + { + "ce_ib": 12.447872161865234, + "ce_orig": 1.0994980335235596, + "epoch": 0.08800057516715795, + "kl_loss": 0.1934283971786499, + "loss_ib": 0.0031790712382644415, + "step": 306 + }, + { + "ce_ib": 13.02301025390625, + "ce_orig": 0.9902395009994507, + "epoch": 0.08800057516715795, + "kl_loss": 0.18311861157417297, + "loss_ib": 0.003133486956357956, + "step": 306 + }, + { + "ce_ib": 15.912498474121094, + "ce_orig": 0.8098648190498352, + "epoch": 0.08800057516715795, + "kl_loss": 0.302369087934494, + "loss_ib": 0.004614940844476223, + "step": 306 + }, + { + "ce_ib": 11.363813400268555, + "ce_orig": 0.7102072238922119, + "epoch": 0.08800057516715795, + "kl_loss": 0.21124617755413055, + "loss_ib": 0.0032488428987562656, + "step": 306 + }, + { + "ce_ib": 19.32331085205078, + "ce_orig": 1.889481544494629, + "epoch": 0.08828815874613559, + "kl_loss": 0.2169814109802246, + "loss_ib": 0.004102144856005907, + "step": 307 + }, + { + "ce_ib": 12.5094633102417, + "ce_orig": 0.49748650193214417, + "epoch": 0.08828815874613559, + "kl_loss": 0.19575588405132294, + "loss_ib": 0.0032085052225738764, + "step": 307 + }, + { + "ce_ib": 11.513327598571777, + "ce_orig": 0.46595466136932373, + "epoch": 0.08828815874613559, + "kl_loss": 0.22934874892234802, + "loss_ib": 0.0034448199439793825, + "step": 307 + }, + { + "ce_ib": 15.249246597290039, + "ce_orig": 0.3422534167766571, + "epoch": 0.08828815874613559, + "kl_loss": 0.20166319608688354, + "loss_ib": 0.0035415564198046923, + "step": 307 + }, + { + "ce_ib": 17.809545516967773, + "ce_orig": 1.352950096130371, + "epoch": 0.08857574232511324, + "kl_loss": 0.26321953535079956, + "loss_ib": 0.004413149785250425, + "step": 308 + }, + { + "ce_ib": 17.206872940063477, + "ce_orig": 1.341861605644226, + "epoch": 0.08857574232511324, + "kl_loss": 0.19497671723365784, + "loss_ib": 0.003670454490929842, + "step": 308 + }, + { + "ce_ib": 15.39387321472168, + "ce_orig": 0.6654926538467407, + "epoch": 0.08857574232511324, + "kl_loss": 0.25926852226257324, + "loss_ib": 0.004132072441279888, + "step": 308 + }, + { + "ce_ib": 14.418597221374512, + "ce_orig": 0.6665434837341309, + "epoch": 0.08857574232511324, + "kl_loss": 0.23031118512153625, + "loss_ib": 0.0037449717056006193, + "step": 308 + }, + { + "ce_ib": 14.938549995422363, + "ce_orig": 0.7124409079551697, + "epoch": 0.08886332590409088, + "kl_loss": 0.28744351863861084, + "loss_ib": 0.004368290305137634, + "step": 309 + }, + { + "ce_ib": 15.685905456542969, + "ce_orig": 1.2443615198135376, + "epoch": 0.08886332590409088, + "kl_loss": 0.29595404863357544, + "loss_ib": 0.004528130870312452, + "step": 309 + }, + { + "ce_ib": 15.15534496307373, + "ce_orig": 1.0202579498291016, + "epoch": 0.08886332590409088, + "kl_loss": 0.36547189950942993, + "loss_ib": 0.005170253571122885, + "step": 309 + }, + { + "ce_ib": 16.149023056030273, + "ce_orig": 1.1084511280059814, + "epoch": 0.08886332590409088, + "kl_loss": 0.20774857699871063, + "loss_ib": 0.0036923878360539675, + "step": 309 + }, + { + "epoch": 0.08915090948306852, + "grad_norm": 0.08066050708293915, + "learning_rate": 4.840764331210191e-05, + "loss": 0.9081, + "step": 310 + }, + { + "ce_ib": 11.725279808044434, + "ce_orig": 0.6793712973594666, + "epoch": 0.08915090948306852, + "kl_loss": 0.21743886172771454, + "loss_ib": 0.003346916288137436, + "step": 310 + }, + { + "ce_ib": 9.597118377685547, + "ce_orig": 0.5881115794181824, + "epoch": 0.08915090948306852, + "kl_loss": 0.20812270045280457, + "loss_ib": 0.0030409388709813356, + "step": 310 + }, + { + "ce_ib": 17.555280685424805, + "ce_orig": 1.6664210557937622, + "epoch": 0.08915090948306852, + "kl_loss": 0.3189018964767456, + "loss_ib": 0.004944546613842249, + "step": 310 + }, + { + "ce_ib": 10.35605239868164, + "ce_orig": 0.6129300594329834, + "epoch": 0.08915090948306852, + "kl_loss": 0.19374847412109375, + "loss_ib": 0.0029730896931141615, + "step": 310 + }, + { + "ce_ib": 10.367539405822754, + "ce_orig": 0.27700501680374146, + "epoch": 0.08943849306204615, + "kl_loss": 0.37522298097610474, + "loss_ib": 0.00478898361325264, + "step": 311 + }, + { + "ce_ib": 12.798125267028809, + "ce_orig": 0.35544443130493164, + "epoch": 0.08943849306204615, + "kl_loss": 0.21808908879756927, + "loss_ib": 0.003460703417658806, + "step": 311 + }, + { + "ce_ib": 18.546096801757812, + "ce_orig": 1.2737302780151367, + "epoch": 0.08943849306204615, + "kl_loss": 0.1624414622783661, + "loss_ib": 0.003479024162515998, + "step": 311 + }, + { + "ce_ib": 19.247314453125, + "ce_orig": 1.827391266822815, + "epoch": 0.08943849306204615, + "kl_loss": 0.24330879747867584, + "loss_ib": 0.0043578194454312325, + "step": 311 + }, + { + "ce_ib": 12.14682674407959, + "ce_orig": 0.4805859923362732, + "epoch": 0.08972607664102379, + "kl_loss": 0.2552676796913147, + "loss_ib": 0.003767359536141157, + "step": 312 + }, + { + "ce_ib": 13.024700164794922, + "ce_orig": 0.9543373584747314, + "epoch": 0.08972607664102379, + "kl_loss": 0.2680973708629608, + "loss_ib": 0.0039834436029195786, + "step": 312 + }, + { + "ce_ib": 15.072936058044434, + "ce_orig": 1.2514426708221436, + "epoch": 0.08972607664102379, + "kl_loss": 0.19017720222473145, + "loss_ib": 0.0034090655390173197, + "step": 312 + }, + { + "ce_ib": 11.06240177154541, + "ce_orig": 0.6679608225822449, + "epoch": 0.08972607664102379, + "kl_loss": 0.17655614018440247, + "loss_ib": 0.0028718013782054186, + "step": 312 + }, + { + "ce_ib": 11.684070587158203, + "ce_orig": 0.5835570096969604, + "epoch": 0.09001366022000144, + "kl_loss": 0.18113084137439728, + "loss_ib": 0.0029797153547406197, + "step": 313 + }, + { + "ce_ib": 15.398133277893066, + "ce_orig": 0.5319973230361938, + "epoch": 0.09001366022000144, + "kl_loss": 0.2180468738079071, + "loss_ib": 0.0037202818784862757, + "step": 313 + }, + { + "ce_ib": 18.00635528564453, + "ce_orig": 1.5159127712249756, + "epoch": 0.09001366022000144, + "kl_loss": 0.25306442379951477, + "loss_ib": 0.004331279546022415, + "step": 313 + }, + { + "ce_ib": 16.58292007446289, + "ce_orig": 1.6001865863800049, + "epoch": 0.09001366022000144, + "kl_loss": 0.2032628208398819, + "loss_ib": 0.0036909200716763735, + "step": 313 + }, + { + "ce_ib": 9.98106575012207, + "ce_orig": 0.5567405223846436, + "epoch": 0.09030124379897908, + "kl_loss": 0.18281474709510803, + "loss_ib": 0.002826254116371274, + "step": 314 + }, + { + "ce_ib": 14.181166648864746, + "ce_orig": 0.7332763075828552, + "epoch": 0.09030124379897908, + "kl_loss": 0.2009732574224472, + "loss_ib": 0.0034278493840247393, + "step": 314 + }, + { + "ce_ib": 14.160673141479492, + "ce_orig": 1.0669901371002197, + "epoch": 0.09030124379897908, + "kl_loss": 0.1853453665971756, + "loss_ib": 0.0032695208210498095, + "step": 314 + }, + { + "ce_ib": 13.65168285369873, + "ce_orig": 0.6072067618370056, + "epoch": 0.09030124379897908, + "kl_loss": 0.2821107506752014, + "loss_ib": 0.004186275415122509, + "step": 314 + }, + { + "epoch": 0.09058882737795672, + "grad_norm": 0.0828433707356453, + "learning_rate": 4.920382165605096e-05, + "loss": 0.9224, + "step": 315 + }, + { + "ce_ib": 17.264680862426758, + "ce_orig": 1.208104133605957, + "epoch": 0.09058882737795672, + "kl_loss": 0.2404087483882904, + "loss_ib": 0.0041305553168058395, + "step": 315 + }, + { + "ce_ib": 9.675837516784668, + "ce_orig": 0.6803741455078125, + "epoch": 0.09058882737795672, + "kl_loss": 0.17154952883720398, + "loss_ib": 0.002683078870177269, + "step": 315 + }, + { + "ce_ib": 7.8654890060424805, + "ce_orig": 0.27281931042671204, + "epoch": 0.09058882737795672, + "kl_loss": 0.3646223545074463, + "loss_ib": 0.004432772286236286, + "step": 315 + }, + { + "ce_ib": 10.074167251586914, + "ce_orig": 0.7812896370887756, + "epoch": 0.09058882737795672, + "kl_loss": 0.1766500473022461, + "loss_ib": 0.002773917280137539, + "step": 315 + }, + { + "ce_ib": 9.807964324951172, + "ce_orig": 0.7548955082893372, + "epoch": 0.09087641095693436, + "kl_loss": 0.15925069153308868, + "loss_ib": 0.0025733031798154116, + "step": 316 + }, + { + "ce_ib": 12.453634262084961, + "ce_orig": 0.3912903964519501, + "epoch": 0.09087641095693436, + "kl_loss": 0.28723639249801636, + "loss_ib": 0.004117727279663086, + "step": 316 + }, + { + "ce_ib": 15.324727058410645, + "ce_orig": 0.6814879179000854, + "epoch": 0.09087641095693436, + "kl_loss": 0.2591487467288971, + "loss_ib": 0.0041239601559937, + "step": 316 + }, + { + "ce_ib": 14.351997375488281, + "ce_orig": 1.1507986783981323, + "epoch": 0.09087641095693436, + "kl_loss": 0.1707887053489685, + "loss_ib": 0.003143086563795805, + "step": 316 + }, + { + "ce_ib": 11.927781105041504, + "ce_orig": 0.5891753435134888, + "epoch": 0.091163994535912, + "kl_loss": 0.2667624056339264, + "loss_ib": 0.0038604019209742546, + "step": 317 + }, + { + "ce_ib": 13.470232009887695, + "ce_orig": 0.6153560280799866, + "epoch": 0.091163994535912, + "kl_loss": 0.14252835512161255, + "loss_ib": 0.002772306790575385, + "step": 317 + }, + { + "ce_ib": 14.50222396850586, + "ce_orig": 0.888332724571228, + "epoch": 0.091163994535912, + "kl_loss": 0.16025802493095398, + "loss_ib": 0.003052802523598075, + "step": 317 + }, + { + "ce_ib": 16.769819259643555, + "ce_orig": 1.285980463027954, + "epoch": 0.091163994535912, + "kl_loss": 0.21203196048736572, + "loss_ib": 0.003797301556915045, + "step": 317 + }, + { + "ce_ib": 18.3016414642334, + "ce_orig": 0.6251075863838196, + "epoch": 0.09145157811488965, + "kl_loss": 0.22406277060508728, + "loss_ib": 0.00407079141587019, + "step": 318 + }, + { + "ce_ib": 13.008817672729492, + "ce_orig": 0.5530975461006165, + "epoch": 0.09145157811488965, + "kl_loss": 0.20128408074378967, + "loss_ib": 0.0033137225545942783, + "step": 318 + }, + { + "ce_ib": 16.728418350219727, + "ce_orig": 1.2118330001831055, + "epoch": 0.09145157811488965, + "kl_loss": 0.23463211953639984, + "loss_ib": 0.004019163083285093, + "step": 318 + }, + { + "ce_ib": 11.389937400817871, + "ce_orig": 0.665806770324707, + "epoch": 0.09145157811488965, + "kl_loss": 0.44625556468963623, + "loss_ib": 0.005601549055427313, + "step": 318 + }, + { + "ce_ib": 17.052478790283203, + "ce_orig": 1.3070660829544067, + "epoch": 0.09173916169386728, + "kl_loss": 0.22904498875141144, + "loss_ib": 0.003995697479695082, + "step": 319 + }, + { + "ce_ib": 16.69203758239746, + "ce_orig": 0.9425002336502075, + "epoch": 0.09173916169386728, + "kl_loss": 0.4092777371406555, + "loss_ib": 0.005761981010437012, + "step": 319 + }, + { + "ce_ib": 9.762811660766602, + "ce_orig": 0.8007186651229858, + "epoch": 0.09173916169386728, + "kl_loss": 0.19042058289051056, + "loss_ib": 0.0028804868925362825, + "step": 319 + }, + { + "ce_ib": 15.089471817016602, + "ce_orig": 0.9387741088867188, + "epoch": 0.09173916169386728, + "kl_loss": 0.1873193383216858, + "loss_ib": 0.0033821403048932552, + "step": 319 + }, + { + "epoch": 0.09202674527284492, + "grad_norm": 0.09044753015041351, + "learning_rate": 5e-05, + "loss": 0.8929, + "step": 320 + }, + { + "ce_ib": 8.850724220275879, + "ce_orig": 0.6556456089019775, + "epoch": 0.09202674527284492, + "kl_loss": 0.12834425270557404, + "loss_ib": 0.002168514998629689, + "step": 320 + }, + { + "ce_ib": 14.284525871276855, + "ce_orig": 1.2114744186401367, + "epoch": 0.09202674527284492, + "kl_loss": 0.2224428355693817, + "loss_ib": 0.0036528806667774916, + "step": 320 + }, + { + "ce_ib": 16.129127502441406, + "ce_orig": 0.861198902130127, + "epoch": 0.09202674527284492, + "kl_loss": 0.1843017041683197, + "loss_ib": 0.0034559296909719706, + "step": 320 + }, + { + "ce_ib": 13.054864883422852, + "ce_orig": 0.7228923439979553, + "epoch": 0.09202674527284492, + "kl_loss": 0.20991846919059753, + "loss_ib": 0.003404670860618353, + "step": 320 + }, + { + "ce_ib": 14.091462135314941, + "ce_orig": 0.6277745962142944, + "epoch": 0.09231432885182256, + "kl_loss": 0.29013434052467346, + "loss_ib": 0.004310489632189274, + "step": 321 + }, + { + "ce_ib": 11.505946159362793, + "ce_orig": 0.8680534958839417, + "epoch": 0.09231432885182256, + "kl_loss": 0.19910269975662231, + "loss_ib": 0.003141621593385935, + "step": 321 + }, + { + "ce_ib": 15.06042194366455, + "ce_orig": 1.2571470737457275, + "epoch": 0.09231432885182256, + "kl_loss": 0.18096956610679626, + "loss_ib": 0.0033157377038151026, + "step": 321 + }, + { + "ce_ib": 14.149365425109863, + "ce_orig": 0.4702112078666687, + "epoch": 0.09231432885182256, + "kl_loss": 0.24852751195430756, + "loss_ib": 0.0039002113044261932, + "step": 321 + }, + { + "ce_ib": 21.1373348236084, + "ce_orig": 1.5176701545715332, + "epoch": 0.0926019124308002, + "kl_loss": 0.2831588089466095, + "loss_ib": 0.004945321474224329, + "step": 322 + }, + { + "ce_ib": 14.581371307373047, + "ce_orig": 0.9066507816314697, + "epoch": 0.0926019124308002, + "kl_loss": 0.4094810485839844, + "loss_ib": 0.005552947521209717, + "step": 322 + }, + { + "ce_ib": 9.24908447265625, + "ce_orig": 0.6094862818717957, + "epoch": 0.0926019124308002, + "kl_loss": 0.16086669266223907, + "loss_ib": 0.002533575287088752, + "step": 322 + }, + { + "ce_ib": 12.565040588378906, + "ce_orig": 0.8015382289886475, + "epoch": 0.0926019124308002, + "kl_loss": 0.27496886253356934, + "loss_ib": 0.0040061925537884235, + "step": 322 + }, + { + "ce_ib": 11.96411418914795, + "ce_orig": 0.6789979934692383, + "epoch": 0.09288949600977785, + "kl_loss": 0.14112810790538788, + "loss_ib": 0.0026076924987137318, + "step": 323 + }, + { + "ce_ib": 14.88967227935791, + "ce_orig": 0.8319886326789856, + "epoch": 0.09288949600977785, + "kl_loss": 0.2537601888179779, + "loss_ib": 0.004026568960398436, + "step": 323 + }, + { + "ce_ib": 10.79393482208252, + "ce_orig": 0.5163490176200867, + "epoch": 0.09288949600977785, + "kl_loss": 0.1722070872783661, + "loss_ib": 0.002801464172080159, + "step": 323 + }, + { + "ce_ib": 10.035134315490723, + "ce_orig": 0.5581719875335693, + "epoch": 0.09288949600977785, + "kl_loss": 0.2455272376537323, + "loss_ib": 0.003458785591647029, + "step": 323 + }, + { + "ce_ib": 15.038890838623047, + "ce_orig": 1.1411057710647583, + "epoch": 0.09317707958875548, + "kl_loss": 0.2383771389722824, + "loss_ib": 0.0038876603357493877, + "step": 324 + }, + { + "ce_ib": 11.690932273864746, + "ce_orig": 0.6252244114875793, + "epoch": 0.09317707958875548, + "kl_loss": 0.1746075451374054, + "loss_ib": 0.002915168646723032, + "step": 324 + }, + { + "ce_ib": 14.972967147827148, + "ce_orig": 1.1634771823883057, + "epoch": 0.09317707958875548, + "kl_loss": 0.24864430725574493, + "loss_ib": 0.003983739297837019, + "step": 324 + }, + { + "ce_ib": 13.491129875183105, + "ce_orig": 0.9431242346763611, + "epoch": 0.09317707958875548, + "kl_loss": 0.19168300926685333, + "loss_ib": 0.0032659429125487804, + "step": 324 + }, + { + "epoch": 0.09346466316773312, + "grad_norm": 0.07285797595977783, + "learning_rate": 4.999996988459869e-05, + "loss": 0.9029, + "step": 325 + }, + { + "ce_ib": 10.874665260314941, + "ce_orig": 0.8838172554969788, + "epoch": 0.09346466316773312, + "kl_loss": 0.21639001369476318, + "loss_ib": 0.00325136655010283, + "step": 325 + }, + { + "ce_ib": 12.071526527404785, + "ce_orig": 0.715691089630127, + "epoch": 0.09346466316773312, + "kl_loss": 0.17473536729812622, + "loss_ib": 0.002954506315290928, + "step": 325 + }, + { + "ce_ib": 9.097990036010742, + "ce_orig": 0.6916231513023376, + "epoch": 0.09346466316773312, + "kl_loss": 0.14074796438217163, + "loss_ib": 0.0023172786459326744, + "step": 325 + }, + { + "ce_ib": 7.898350238800049, + "ce_orig": 0.7045942544937134, + "epoch": 0.09346466316773312, + "kl_loss": 0.19722947478294373, + "loss_ib": 0.0027621297631412745, + "step": 325 + }, + { + "ce_ib": 19.925533294677734, + "ce_orig": 1.8780890703201294, + "epoch": 0.09375224674671076, + "kl_loss": 0.29034847021102905, + "loss_ib": 0.004896038211882114, + "step": 326 + }, + { + "ce_ib": 11.936103820800781, + "ce_orig": 0.8948700428009033, + "epoch": 0.09375224674671076, + "kl_loss": 0.22047904133796692, + "loss_ib": 0.003398400731384754, + "step": 326 + }, + { + "ce_ib": 14.651416778564453, + "ce_orig": 1.5577762126922607, + "epoch": 0.09375224674671076, + "kl_loss": 0.2572871744632721, + "loss_ib": 0.004038013052195311, + "step": 326 + }, + { + "ce_ib": 14.2105073928833, + "ce_orig": 0.6887364983558655, + "epoch": 0.09375224674671076, + "kl_loss": 0.1484694480895996, + "loss_ib": 0.0029057450592517853, + "step": 326 + }, + { + "ce_ib": 9.847541809082031, + "ce_orig": 0.7175891995429993, + "epoch": 0.0940398303256884, + "kl_loss": 0.251788467168808, + "loss_ib": 0.003502638777717948, + "step": 327 + }, + { + "ce_ib": 8.781983375549316, + "ce_orig": 0.6928913593292236, + "epoch": 0.0940398303256884, + "kl_loss": 0.1348564177751541, + "loss_ib": 0.0022267624735832214, + "step": 327 + }, + { + "ce_ib": 11.540362358093262, + "ce_orig": 0.7704603672027588, + "epoch": 0.0940398303256884, + "kl_loss": 0.16489502787590027, + "loss_ib": 0.002802986418828368, + "step": 327 + }, + { + "ce_ib": 13.85096263885498, + "ce_orig": 0.8789340853691101, + "epoch": 0.0940398303256884, + "kl_loss": 0.17469263076782227, + "loss_ib": 0.003132022451609373, + "step": 327 + }, + { + "ce_ib": 9.01992130279541, + "ce_orig": 0.39120611548423767, + "epoch": 0.09432741390466605, + "kl_loss": 0.17137807607650757, + "loss_ib": 0.0026157726533710957, + "step": 328 + }, + { + "ce_ib": 12.827068328857422, + "ce_orig": 0.6231464147567749, + "epoch": 0.09432741390466605, + "kl_loss": 0.24973925948143005, + "loss_ib": 0.003780099330469966, + "step": 328 + }, + { + "ce_ib": 13.935663223266602, + "ce_orig": 0.9439969062805176, + "epoch": 0.09432741390466605, + "kl_loss": 0.2583736777305603, + "loss_ib": 0.003977302927523851, + "step": 328 + }, + { + "ce_ib": 15.863046646118164, + "ce_orig": 1.0275061130523682, + "epoch": 0.09432741390466605, + "kl_loss": 0.22072093188762665, + "loss_ib": 0.00379351363517344, + "step": 328 + }, + { + "ce_ib": 16.495813369750977, + "ce_orig": 1.1426280736923218, + "epoch": 0.09461499748364369, + "kl_loss": 0.1818699985742569, + "loss_ib": 0.0034682813566178083, + "step": 329 + }, + { + "ce_ib": 12.4020357131958, + "ce_orig": 0.8381017446517944, + "epoch": 0.09461499748364369, + "kl_loss": 0.16994866728782654, + "loss_ib": 0.0029396903701126575, + "step": 329 + }, + { + "ce_ib": 10.978039741516113, + "ce_orig": 0.6018507480621338, + "epoch": 0.09461499748364369, + "kl_loss": 0.1774011105298996, + "loss_ib": 0.0028718148823827505, + "step": 329 + }, + { + "ce_ib": 13.26439380645752, + "ce_orig": 0.8519594669342041, + "epoch": 0.09461499748364369, + "kl_loss": 0.1966477632522583, + "loss_ib": 0.003292917041108012, + "step": 329 + }, + { + "epoch": 0.09490258106262132, + "grad_norm": 0.08392878621816635, + "learning_rate": 4.9999879538467306e-05, + "loss": 0.9175, + "step": 330 + }, + { + "ce_ib": 14.5608549118042, + "ce_orig": 1.4586288928985596, + "epoch": 0.09490258106262132, + "kl_loss": 0.220241978764534, + "loss_ib": 0.0036585049238055944, + "step": 330 + }, + { + "ce_ib": 16.367464065551758, + "ce_orig": 0.7915551662445068, + "epoch": 0.09490258106262132, + "kl_loss": 0.22915303707122803, + "loss_ib": 0.003928276710212231, + "step": 330 + }, + { + "ce_ib": 13.064582824707031, + "ce_orig": 0.6998893618583679, + "epoch": 0.09490258106262132, + "kl_loss": 0.38041651248931885, + "loss_ib": 0.005110623314976692, + "step": 330 + }, + { + "ce_ib": 11.392269134521484, + "ce_orig": 0.36753031611442566, + "epoch": 0.09490258106262132, + "kl_loss": 0.4851597547531128, + "loss_ib": 0.0059908246621489525, + "step": 330 + }, + { + "ce_ib": 12.86525821685791, + "ce_orig": 0.7186346054077148, + "epoch": 0.09519016464159896, + "kl_loss": 0.15524542331695557, + "loss_ib": 0.0028389799408614635, + "step": 331 + }, + { + "ce_ib": 14.1820707321167, + "ce_orig": 0.8073091506958008, + "epoch": 0.09519016464159896, + "kl_loss": 0.22929759323596954, + "loss_ib": 0.003711183089762926, + "step": 331 + }, + { + "ce_ib": 12.15315055847168, + "ce_orig": 0.5545368194580078, + "epoch": 0.09519016464159896, + "kl_loss": 0.22285009920597076, + "loss_ib": 0.003443815978243947, + "step": 331 + }, + { + "ce_ib": 12.464353561401367, + "ce_orig": 0.864552915096283, + "epoch": 0.09519016464159896, + "kl_loss": 0.2674625515937805, + "loss_ib": 0.00392106082290411, + "step": 331 + }, + { + "ce_ib": 8.458319664001465, + "ce_orig": 0.49817538261413574, + "epoch": 0.0954777482205766, + "kl_loss": 0.20442625880241394, + "loss_ib": 0.002890094416216016, + "step": 332 + }, + { + "ce_ib": 15.385281562805176, + "ce_orig": 1.422017216682434, + "epoch": 0.0954777482205766, + "kl_loss": 0.25458666682243347, + "loss_ib": 0.004084394313395023, + "step": 332 + }, + { + "ce_ib": 13.032305717468262, + "ce_orig": 0.5466614365577698, + "epoch": 0.0954777482205766, + "kl_loss": 0.2672412097454071, + "loss_ib": 0.003975642379373312, + "step": 332 + }, + { + "ce_ib": 12.244396209716797, + "ce_orig": 0.4979858100414276, + "epoch": 0.0954777482205766, + "kl_loss": 0.1899423450231552, + "loss_ib": 0.0031238629017025232, + "step": 332 + }, + { + "ce_ib": 9.587015151977539, + "ce_orig": 0.3958915174007416, + "epoch": 0.09576533179955425, + "kl_loss": 0.31766587495803833, + "loss_ib": 0.004135360009968281, + "step": 333 + }, + { + "ce_ib": 16.561017990112305, + "ce_orig": 1.007829189300537, + "epoch": 0.09576533179955425, + "kl_loss": 0.20323669910430908, + "loss_ib": 0.0036884688306599855, + "step": 333 + }, + { + "ce_ib": 13.782530784606934, + "ce_orig": 0.8161399960517883, + "epoch": 0.09576533179955425, + "kl_loss": 0.2164289653301239, + "loss_ib": 0.00354254269041121, + "step": 333 + }, + { + "ce_ib": 9.973050117492676, + "ce_orig": 0.6706444025039673, + "epoch": 0.09576533179955425, + "kl_loss": 0.20162354409694672, + "loss_ib": 0.0030135405249893665, + "step": 333 + }, + { + "ce_ib": 12.195393562316895, + "ce_orig": 0.613254964351654, + "epoch": 0.09605291537853189, + "kl_loss": 0.2231156826019287, + "loss_ib": 0.003450696123763919, + "step": 334 + }, + { + "ce_ib": 8.084924697875977, + "ce_orig": 0.27541494369506836, + "epoch": 0.09605291537853189, + "kl_loss": 0.5247204303741455, + "loss_ib": 0.006055696401745081, + "step": 334 + }, + { + "ce_ib": 11.561151504516602, + "ce_orig": 0.5130017995834351, + "epoch": 0.09605291537853189, + "kl_loss": 0.15488451719284058, + "loss_ib": 0.0027049602940678596, + "step": 334 + }, + { + "ce_ib": 12.512232780456543, + "ce_orig": 0.6367411613464355, + "epoch": 0.09605291537853189, + "kl_loss": 0.2246486246585846, + "loss_ib": 0.003497709520161152, + "step": 334 + }, + { + "epoch": 0.09634049895750953, + "grad_norm": 0.07655756175518036, + "learning_rate": 4.999972896182352e-05, + "loss": 0.8394, + "step": 335 + }, + { + "ce_ib": 12.717463493347168, + "ce_orig": 0.9400395750999451, + "epoch": 0.09634049895750953, + "kl_loss": 0.17612457275390625, + "loss_ib": 0.0030329918954521418, + "step": 335 + }, + { + "ce_ib": 12.413676261901855, + "ce_orig": 0.973748505115509, + "epoch": 0.09634049895750953, + "kl_loss": 0.24444803595542908, + "loss_ib": 0.0036858480889350176, + "step": 335 + }, + { + "ce_ib": 14.029793739318848, + "ce_orig": 0.7324392795562744, + "epoch": 0.09634049895750953, + "kl_loss": 0.18520355224609375, + "loss_ib": 0.003255015006288886, + "step": 335 + }, + { + "ce_ib": 15.990370750427246, + "ce_orig": 0.6844960451126099, + "epoch": 0.09634049895750953, + "kl_loss": 0.27585840225219727, + "loss_ib": 0.004357621073722839, + "step": 335 + }, + { + "ce_ib": 14.750761985778809, + "ce_orig": 1.194319725036621, + "epoch": 0.09662808253648716, + "kl_loss": 0.67383873462677, + "loss_ib": 0.008213463239371777, + "step": 336 + }, + { + "ce_ib": 17.5143985748291, + "ce_orig": 1.5835182666778564, + "epoch": 0.09662808253648716, + "kl_loss": 0.2557618021965027, + "loss_ib": 0.0043090577237308025, + "step": 336 + }, + { + "ce_ib": 17.389286041259766, + "ce_orig": 1.3591183423995972, + "epoch": 0.09662808253648716, + "kl_loss": 0.27419230341911316, + "loss_ib": 0.004480851348489523, + "step": 336 + }, + { + "ce_ib": 9.241145133972168, + "ce_orig": 0.607307493686676, + "epoch": 0.09662808253648716, + "kl_loss": 0.11753670126199722, + "loss_ib": 0.002099481411278248, + "step": 336 + }, + { + "ce_ib": 15.914252281188965, + "ce_orig": 1.711224913597107, + "epoch": 0.0969156661154648, + "kl_loss": 0.2505919933319092, + "loss_ib": 0.004097345285117626, + "step": 337 + }, + { + "ce_ib": 14.283632278442383, + "ce_orig": 0.9926325082778931, + "epoch": 0.0969156661154648, + "kl_loss": 0.22402063012123108, + "loss_ib": 0.0036685692612081766, + "step": 337 + }, + { + "ce_ib": 16.994945526123047, + "ce_orig": 0.8979167938232422, + "epoch": 0.0969156661154648, + "kl_loss": 0.22958716750144958, + "loss_ib": 0.003995365928858519, + "step": 337 + }, + { + "ce_ib": 15.639780044555664, + "ce_orig": 0.5237170457839966, + "epoch": 0.0969156661154648, + "kl_loss": 0.2666119337081909, + "loss_ib": 0.004230096936225891, + "step": 337 + }, + { + "ce_ib": 18.789344787597656, + "ce_orig": 1.6600208282470703, + "epoch": 0.09720324969444245, + "kl_loss": 0.2622734010219574, + "loss_ib": 0.004501668270677328, + "step": 338 + }, + { + "ce_ib": 14.16718578338623, + "ce_orig": 0.8235701322555542, + "epoch": 0.09720324969444245, + "kl_loss": 0.1999812126159668, + "loss_ib": 0.0034165303222835064, + "step": 338 + }, + { + "ce_ib": 15.722373008728027, + "ce_orig": 0.8121756315231323, + "epoch": 0.09720324969444245, + "kl_loss": 0.17169177532196045, + "loss_ib": 0.003289154963567853, + "step": 338 + }, + { + "ce_ib": 12.625021934509277, + "ce_orig": 0.4633500277996063, + "epoch": 0.09720324969444245, + "kl_loss": 0.17351466417312622, + "loss_ib": 0.0029976486694067717, + "step": 338 + }, + { + "ce_ib": 14.416272163391113, + "ce_orig": 0.5611670613288879, + "epoch": 0.09749083327342009, + "kl_loss": 0.29108044505119324, + "loss_ib": 0.004352431278675795, + "step": 339 + }, + { + "ce_ib": 10.808735847473145, + "ce_orig": 0.768107533454895, + "epoch": 0.09749083327342009, + "kl_loss": 0.22904689610004425, + "loss_ib": 0.0033713423181325197, + "step": 339 + }, + { + "ce_ib": 17.312829971313477, + "ce_orig": 1.2763899564743042, + "epoch": 0.09749083327342009, + "kl_loss": 0.19993865489959717, + "loss_ib": 0.003730669617652893, + "step": 339 + }, + { + "ce_ib": 17.75509262084961, + "ce_orig": 1.1115076541900635, + "epoch": 0.09749083327342009, + "kl_loss": 0.26826444268226624, + "loss_ib": 0.004458153620362282, + "step": 339 + }, + { + "epoch": 0.09777841685239773, + "grad_norm": 0.07021026313304901, + "learning_rate": 4.999951815503011e-05, + "loss": 0.8976, + "step": 340 + }, + { + "ce_ib": 7.319950103759766, + "ce_orig": 0.25490763783454895, + "epoch": 0.09777841685239773, + "kl_loss": 0.4792310297489166, + "loss_ib": 0.005524305161088705, + "step": 340 + }, + { + "ce_ib": 14.642142295837402, + "ce_orig": 0.5069236755371094, + "epoch": 0.09777841685239773, + "kl_loss": 0.23783719539642334, + "loss_ib": 0.003842586185783148, + "step": 340 + }, + { + "ce_ib": 9.56615924835205, + "ce_orig": 0.686457633972168, + "epoch": 0.09777841685239773, + "kl_loss": 0.14670798182487488, + "loss_ib": 0.002423695521429181, + "step": 340 + }, + { + "ce_ib": 15.014992713928223, + "ce_orig": 0.9454907178878784, + "epoch": 0.09777841685239773, + "kl_loss": 0.18971547484397888, + "loss_ib": 0.003398653818294406, + "step": 340 + }, + { + "ce_ib": 13.204034805297852, + "ce_orig": 0.6687142252922058, + "epoch": 0.09806600043137537, + "kl_loss": 0.286098837852478, + "loss_ib": 0.004181392025202513, + "step": 341 + }, + { + "ce_ib": 15.514037132263184, + "ce_orig": 0.5012982487678528, + "epoch": 0.09806600043137537, + "kl_loss": 0.2910040020942688, + "loss_ib": 0.004461443517357111, + "step": 341 + }, + { + "ce_ib": 12.121696472167969, + "ce_orig": 0.6653417348861694, + "epoch": 0.09806600043137537, + "kl_loss": 0.24229881167411804, + "loss_ib": 0.003635157598182559, + "step": 341 + }, + { + "ce_ib": 20.12578773498535, + "ce_orig": 0.7655023336410522, + "epoch": 0.09806600043137537, + "kl_loss": 0.22568227350711823, + "loss_ib": 0.004269401542842388, + "step": 341 + }, + { + "ce_ib": 9.653379440307617, + "ce_orig": 0.7402390837669373, + "epoch": 0.098353584010353, + "kl_loss": 0.1966065615415573, + "loss_ib": 0.002931403461843729, + "step": 342 + }, + { + "ce_ib": 12.452383995056152, + "ce_orig": 0.8609440326690674, + "epoch": 0.098353584010353, + "kl_loss": 0.14747576415538788, + "loss_ib": 0.0027199957985430956, + "step": 342 + }, + { + "ce_ib": 11.025045394897461, + "ce_orig": 0.6533346772193909, + "epoch": 0.098353584010353, + "kl_loss": 0.16730177402496338, + "loss_ib": 0.002775522181764245, + "step": 342 + }, + { + "ce_ib": 16.325525283813477, + "ce_orig": 1.0561354160308838, + "epoch": 0.098353584010353, + "kl_loss": 0.18068361282348633, + "loss_ib": 0.003439388470724225, + "step": 342 + }, + { + "ce_ib": 17.46106719970703, + "ce_orig": 1.6512928009033203, + "epoch": 0.09864116758933066, + "kl_loss": 0.4481199383735657, + "loss_ib": 0.006227306090295315, + "step": 343 + }, + { + "ce_ib": 16.864215850830078, + "ce_orig": 1.4105547666549683, + "epoch": 0.09864116758933066, + "kl_loss": 0.22357934713363647, + "loss_ib": 0.003922215197235346, + "step": 343 + }, + { + "ce_ib": 9.996209144592285, + "ce_orig": 0.7162432670593262, + "epoch": 0.09864116758933066, + "kl_loss": 0.1958695501089096, + "loss_ib": 0.00295831635594368, + "step": 343 + }, + { + "ce_ib": 8.547676086425781, + "ce_orig": 0.41816598176956177, + "epoch": 0.09864116758933066, + "kl_loss": 0.1747799813747406, + "loss_ib": 0.002602567430585623, + "step": 343 + }, + { + "ce_ib": 12.569132804870605, + "ce_orig": 0.5451200008392334, + "epoch": 0.0989287511683083, + "kl_loss": 0.22563554346561432, + "loss_ib": 0.0035132686607539654, + "step": 344 + }, + { + "ce_ib": 12.415586471557617, + "ce_orig": 0.7706530690193176, + "epoch": 0.0989287511683083, + "kl_loss": 0.1709377020597458, + "loss_ib": 0.002950935624539852, + "step": 344 + }, + { + "ce_ib": 19.655101776123047, + "ce_orig": 1.3017544746398926, + "epoch": 0.0989287511683083, + "kl_loss": 0.29058775305747986, + "loss_ib": 0.004871387500315905, + "step": 344 + }, + { + "ce_ib": 17.928071975708008, + "ce_orig": 1.330518126487732, + "epoch": 0.0989287511683083, + "kl_loss": 0.23996703326702118, + "loss_ib": 0.004192477557808161, + "step": 344 + }, + { + "epoch": 0.09921633474728593, + "grad_norm": 0.0755721926689148, + "learning_rate": 4.999924711859495e-05, + "loss": 0.8515, + "step": 345 + }, + { + "ce_ib": 14.76935863494873, + "ce_orig": 1.2034357786178589, + "epoch": 0.09921633474728593, + "kl_loss": 0.22408181428909302, + "loss_ib": 0.003717753803357482, + "step": 345 + }, + { + "ce_ib": 11.515120506286621, + "ce_orig": 0.7314639687538147, + "epoch": 0.09921633474728593, + "kl_loss": 0.19642382860183716, + "loss_ib": 0.003115750150755048, + "step": 345 + }, + { + "ce_ib": 11.988049507141113, + "ce_orig": 0.5762550830841064, + "epoch": 0.09921633474728593, + "kl_loss": 0.25246232748031616, + "loss_ib": 0.0037234281189739704, + "step": 345 + }, + { + "ce_ib": 13.44121265411377, + "ce_orig": 0.9119555950164795, + "epoch": 0.09921633474728593, + "kl_loss": 0.19339382648468018, + "loss_ib": 0.003278059186413884, + "step": 345 + }, + { + "ce_ib": 16.88981819152832, + "ce_orig": 1.815802812576294, + "epoch": 0.09950391832626357, + "kl_loss": 0.22369977831840515, + "loss_ib": 0.003925979603081942, + "step": 346 + }, + { + "ce_ib": 11.10827350616455, + "ce_orig": 0.8919088244438171, + "epoch": 0.09950391832626357, + "kl_loss": 0.20008933544158936, + "loss_ib": 0.0031117205508053303, + "step": 346 + }, + { + "ce_ib": 16.906099319458008, + "ce_orig": 1.4990653991699219, + "epoch": 0.09950391832626357, + "kl_loss": 0.19835729897022247, + "loss_ib": 0.0036741828080266714, + "step": 346 + }, + { + "ce_ib": 8.436453819274902, + "ce_orig": 0.6757850050926208, + "epoch": 0.09950391832626357, + "kl_loss": 0.16680480539798737, + "loss_ib": 0.00251169316470623, + "step": 346 + }, + { + "ce_ib": 9.981969833374023, + "ce_orig": 0.7898837327957153, + "epoch": 0.0997915019052412, + "kl_loss": 0.13752737641334534, + "loss_ib": 0.0023734706919640303, + "step": 347 + }, + { + "ce_ib": 12.792706489562988, + "ce_orig": 0.8162432909011841, + "epoch": 0.0997915019052412, + "kl_loss": 0.14307913184165955, + "loss_ib": 0.0027100618463009596, + "step": 347 + }, + { + "ce_ib": 12.92000675201416, + "ce_orig": 1.0188729763031006, + "epoch": 0.0997915019052412, + "kl_loss": 0.18868780136108398, + "loss_ib": 0.0031788786873221397, + "step": 347 + }, + { + "ce_ib": 17.870426177978516, + "ce_orig": 1.1175206899642944, + "epoch": 0.0997915019052412, + "kl_loss": 0.2027907520532608, + "loss_ib": 0.0038149498868733644, + "step": 347 + }, + { + "ce_ib": 15.885435104370117, + "ce_orig": 0.41733163595199585, + "epoch": 0.10007908548421886, + "kl_loss": 0.20918244123458862, + "loss_ib": 0.0036803679540753365, + "step": 348 + }, + { + "ce_ib": 12.231342315673828, + "ce_orig": 0.48961135745048523, + "epoch": 0.10007908548421886, + "kl_loss": 0.20116449892520905, + "loss_ib": 0.003234779229387641, + "step": 348 + }, + { + "ce_ib": 9.306249618530273, + "ce_orig": 0.6630443334579468, + "epoch": 0.10007908548421886, + "kl_loss": 0.1751163899898529, + "loss_ib": 0.002681788755580783, + "step": 348 + }, + { + "ce_ib": 11.609100341796875, + "ce_orig": 0.879733145236969, + "epoch": 0.10007908548421886, + "kl_loss": 0.41010767221450806, + "loss_ib": 0.005261986516416073, + "step": 348 + }, + { + "ce_ib": 10.676206588745117, + "ce_orig": 0.6579650640487671, + "epoch": 0.1003666690631965, + "kl_loss": 0.2183065414428711, + "loss_ib": 0.003250685753300786, + "step": 349 + }, + { + "ce_ib": 11.89196491241455, + "ce_orig": 0.8634635806083679, + "epoch": 0.1003666690631965, + "kl_loss": 0.21364440023899078, + "loss_ib": 0.003325640456750989, + "step": 349 + }, + { + "ce_ib": 17.328413009643555, + "ce_orig": 0.9897369146347046, + "epoch": 0.1003666690631965, + "kl_loss": 0.27497774362564087, + "loss_ib": 0.004482618533074856, + "step": 349 + }, + { + "ce_ib": 15.745963096618652, + "ce_orig": 1.2580201625823975, + "epoch": 0.1003666690631965, + "kl_loss": 0.24680611491203308, + "loss_ib": 0.004042657557874918, + "step": 349 + }, + { + "epoch": 0.10065425264217413, + "grad_norm": 0.0724608451128006, + "learning_rate": 4.999891585317103e-05, + "loss": 0.857, + "step": 350 + }, + { + "ce_ib": 15.896184921264648, + "ce_orig": 0.8773839473724365, + "epoch": 0.10065425264217413, + "kl_loss": 0.25826671719551086, + "loss_ib": 0.004172285553067923, + "step": 350 + }, + { + "ce_ib": 10.407938003540039, + "ce_orig": 0.7755264639854431, + "epoch": 0.10065425264217413, + "kl_loss": 0.13498742878437042, + "loss_ib": 0.0023906680289655924, + "step": 350 + }, + { + "ce_ib": 15.827054023742676, + "ce_orig": 1.3559359312057495, + "epoch": 0.10065425264217413, + "kl_loss": 0.21587374806404114, + "loss_ib": 0.0037414426915347576, + "step": 350 + }, + { + "ce_ib": 12.431255340576172, + "ce_orig": 1.0548380613327026, + "epoch": 0.10065425264217413, + "kl_loss": 0.17993846535682678, + "loss_ib": 0.003042510012164712, + "step": 350 + }, + { + "ce_ib": 11.040481567382812, + "ce_orig": 0.8403714895248413, + "epoch": 0.10094183622115177, + "kl_loss": 0.1840890645980835, + "loss_ib": 0.002944938838481903, + "step": 351 + }, + { + "ce_ib": 15.373204231262207, + "ce_orig": 1.075166940689087, + "epoch": 0.10094183622115177, + "kl_loss": 0.18732014298439026, + "loss_ib": 0.003410521661862731, + "step": 351 + }, + { + "ce_ib": 12.945959091186523, + "ce_orig": 0.897395133972168, + "epoch": 0.10094183622115177, + "kl_loss": 0.1956326961517334, + "loss_ib": 0.0032509227748960257, + "step": 351 + }, + { + "ce_ib": 6.2110090255737305, + "ce_orig": 0.1484360694885254, + "epoch": 0.10094183622115177, + "kl_loss": 0.3617279529571533, + "loss_ib": 0.0042383805848658085, + "step": 351 + }, + { + "ce_ib": 12.221625328063965, + "ce_orig": 0.7823323607444763, + "epoch": 0.10122941980012941, + "kl_loss": 0.18742753565311432, + "loss_ib": 0.003096437780186534, + "step": 352 + }, + { + "ce_ib": 13.974717140197754, + "ce_orig": 0.5945900082588196, + "epoch": 0.10122941980012941, + "kl_loss": 0.149112731218338, + "loss_ib": 0.002888598944991827, + "step": 352 + }, + { + "ce_ib": 14.455492973327637, + "ce_orig": 0.8753033876419067, + "epoch": 0.10122941980012941, + "kl_loss": 0.28339695930480957, + "loss_ib": 0.004279518499970436, + "step": 352 + }, + { + "ce_ib": 12.000777244567871, + "ce_orig": 0.676581859588623, + "epoch": 0.10122941980012941, + "kl_loss": 0.24054169654846191, + "loss_ib": 0.0036054945085197687, + "step": 352 + }, + { + "ce_ib": 10.058462142944336, + "ce_orig": 0.9673516154289246, + "epoch": 0.10151700337910706, + "kl_loss": 0.18438181281089783, + "loss_ib": 0.002849664306268096, + "step": 353 + }, + { + "ce_ib": 14.86198616027832, + "ce_orig": 0.6923442482948303, + "epoch": 0.10151700337910706, + "kl_loss": 0.3370734453201294, + "loss_ib": 0.004856932908296585, + "step": 353 + }, + { + "ce_ib": 16.324743270874023, + "ce_orig": 1.7405447959899902, + "epoch": 0.10151700337910706, + "kl_loss": 0.1804196834564209, + "loss_ib": 0.0034366711042821407, + "step": 353 + }, + { + "ce_ib": 10.453534126281738, + "ce_orig": 0.6495627164840698, + "epoch": 0.10151700337910706, + "kl_loss": 0.18358883261680603, + "loss_ib": 0.0028812417294830084, + "step": 353 + }, + { + "ce_ib": 10.957155227661133, + "ce_orig": 0.9104872941970825, + "epoch": 0.1018045869580847, + "kl_loss": 0.18098904192447662, + "loss_ib": 0.002905606059357524, + "step": 354 + }, + { + "ce_ib": 12.083660125732422, + "ce_orig": 0.5857529640197754, + "epoch": 0.1018045869580847, + "kl_loss": 0.2257942408323288, + "loss_ib": 0.0034663083497434855, + "step": 354 + }, + { + "ce_ib": 14.075161933898926, + "ce_orig": 0.827916145324707, + "epoch": 0.1018045869580847, + "kl_loss": 0.22436052560806274, + "loss_ib": 0.0036511211656033993, + "step": 354 + }, + { + "ce_ib": 9.636879920959473, + "ce_orig": 0.3787972331047058, + "epoch": 0.1018045869580847, + "kl_loss": 0.4520171582698822, + "loss_ib": 0.005483859684318304, + "step": 354 + }, + { + "epoch": 0.10209217053706234, + "grad_norm": 0.07327523827552795, + "learning_rate": 4.9998524359556445e-05, + "loss": 0.8569, + "step": 355 + }, + { + "ce_ib": 14.932779312133789, + "ce_orig": 0.9075332880020142, + "epoch": 0.10209217053706234, + "kl_loss": 0.2982865273952484, + "loss_ib": 0.004476143047213554, + "step": 355 + }, + { + "ce_ib": 14.173552513122559, + "ce_orig": 0.8084387183189392, + "epoch": 0.10209217053706234, + "kl_loss": 0.33803728222846985, + "loss_ib": 0.0047977278009057045, + "step": 355 + }, + { + "ce_ib": 8.768068313598633, + "ce_orig": 0.1545993983745575, + "epoch": 0.10209217053706234, + "kl_loss": 0.22588716447353363, + "loss_ib": 0.003135678358376026, + "step": 355 + }, + { + "ce_ib": 11.144165992736816, + "ce_orig": 0.8069305419921875, + "epoch": 0.10209217053706234, + "kl_loss": 0.1619873195886612, + "loss_ib": 0.002734289737418294, + "step": 355 + }, + { + "ce_ib": 12.587420463562012, + "ce_orig": 1.174453616142273, + "epoch": 0.10237975411603997, + "kl_loss": 0.25647827982902527, + "loss_ib": 0.003823524573817849, + "step": 356 + }, + { + "ce_ib": 9.973530769348145, + "ce_orig": 0.5818334221839905, + "epoch": 0.10237975411603997, + "kl_loss": 0.14407533407211304, + "loss_ib": 0.0024381063412874937, + "step": 356 + }, + { + "ce_ib": 12.697566032409668, + "ce_orig": 1.0796477794647217, + "epoch": 0.10237975411603997, + "kl_loss": 0.24205471575260162, + "loss_ib": 0.003690303536131978, + "step": 356 + }, + { + "ce_ib": 12.71401596069336, + "ce_orig": 0.7969520688056946, + "epoch": 0.10237975411603997, + "kl_loss": 0.2004072666168213, + "loss_ib": 0.003275474300608039, + "step": 356 + }, + { + "ce_ib": 11.712475776672363, + "ce_orig": 0.8881208896636963, + "epoch": 0.10266733769501761, + "kl_loss": 0.17075558006763458, + "loss_ib": 0.0028788032941520214, + "step": 357 + }, + { + "ce_ib": 13.805883407592773, + "ce_orig": 0.6269095540046692, + "epoch": 0.10266733769501761, + "kl_loss": 0.2602432370185852, + "loss_ib": 0.003983020782470703, + "step": 357 + }, + { + "ce_ib": 9.764449119567871, + "ce_orig": 0.7314236760139465, + "epoch": 0.10266733769501761, + "kl_loss": 0.2458636462688446, + "loss_ib": 0.0034350811038166285, + "step": 357 + }, + { + "ce_ib": 12.507657051086426, + "ce_orig": 0.6812151670455933, + "epoch": 0.10266733769501761, + "kl_loss": 0.19368158280849457, + "loss_ib": 0.0031875811982899904, + "step": 357 + }, + { + "ce_ib": 11.365423202514648, + "ce_orig": 0.6420213580131531, + "epoch": 0.10295492127399525, + "kl_loss": 0.4926440715789795, + "loss_ib": 0.0060629830695688725, + "step": 358 + }, + { + "ce_ib": 7.592077255249023, + "ce_orig": 0.265299916267395, + "epoch": 0.10295492127399525, + "kl_loss": 0.4861292541027069, + "loss_ib": 0.005620500538498163, + "step": 358 + }, + { + "ce_ib": 15.327858924865723, + "ce_orig": 0.9397932887077332, + "epoch": 0.10295492127399525, + "kl_loss": 0.17111369967460632, + "loss_ib": 0.003243922721594572, + "step": 358 + }, + { + "ce_ib": 13.333529472351074, + "ce_orig": 0.787560224533081, + "epoch": 0.10295492127399525, + "kl_loss": 0.20652362704277039, + "loss_ib": 0.00339858909137547, + "step": 358 + }, + { + "ce_ib": 13.753911972045898, + "ce_orig": 0.3446463644504547, + "epoch": 0.1032425048529729, + "kl_loss": 0.4839940667152405, + "loss_ib": 0.006215331610292196, + "step": 359 + }, + { + "ce_ib": 16.900297164916992, + "ce_orig": 1.5462009906768799, + "epoch": 0.1032425048529729, + "kl_loss": 0.1991090476512909, + "loss_ib": 0.003681120229884982, + "step": 359 + }, + { + "ce_ib": 14.061060905456543, + "ce_orig": 0.8726510405540466, + "epoch": 0.1032425048529729, + "kl_loss": 0.19025549292564392, + "loss_ib": 0.003308660816401243, + "step": 359 + }, + { + "ce_ib": 13.532440185546875, + "ce_orig": 1.3304260969161987, + "epoch": 0.1032425048529729, + "kl_loss": 0.16146531701087952, + "loss_ib": 0.002967897104099393, + "step": 359 + }, + { + "epoch": 0.10353008843195054, + "grad_norm": 0.08045380562543869, + "learning_rate": 4.99980726386944e-05, + "loss": 0.9013, + "step": 360 + }, + { + "ce_ib": 16.031070709228516, + "ce_orig": 0.641457200050354, + "epoch": 0.10353008843195054, + "kl_loss": 0.3759670853614807, + "loss_ib": 0.005362777505069971, + "step": 360 + }, + { + "ce_ib": 10.804533004760742, + "ce_orig": 0.6665270924568176, + "epoch": 0.10353008843195054, + "kl_loss": 0.16297683119773865, + "loss_ib": 0.0027102213352918625, + "step": 360 + }, + { + "ce_ib": 11.559240341186523, + "ce_orig": 0.8196082711219788, + "epoch": 0.10353008843195054, + "kl_loss": 0.1367412805557251, + "loss_ib": 0.002523336559534073, + "step": 360 + }, + { + "ce_ib": 11.892834663391113, + "ce_orig": 0.439602255821228, + "epoch": 0.10353008843195054, + "kl_loss": 0.3415584862232208, + "loss_ib": 0.004604868125170469, + "step": 360 + }, + { + "ce_ib": 8.562467575073242, + "ce_orig": 0.45625510811805725, + "epoch": 0.10381767201092817, + "kl_loss": 0.1342378854751587, + "loss_ib": 0.0021986253559589386, + "step": 361 + }, + { + "ce_ib": 9.903914451599121, + "ce_orig": 0.8635908365249634, + "epoch": 0.10381767201092817, + "kl_loss": 0.22620341181755066, + "loss_ib": 0.0032524254638701677, + "step": 361 + }, + { + "ce_ib": 17.545305252075195, + "ce_orig": 1.781671166419983, + "epoch": 0.10381767201092817, + "kl_loss": 0.217964768409729, + "loss_ib": 0.003934178035706282, + "step": 361 + }, + { + "ce_ib": 12.740987777709961, + "ce_orig": 0.7617380619049072, + "epoch": 0.10381767201092817, + "kl_loss": 0.26160410046577454, + "loss_ib": 0.003890139749273658, + "step": 361 + }, + { + "ce_ib": 10.342143058776855, + "ce_orig": 1.0058131217956543, + "epoch": 0.10410525558990581, + "kl_loss": 0.3163241147994995, + "loss_ib": 0.0041974554769694805, + "step": 362 + }, + { + "ce_ib": 19.41802406311035, + "ce_orig": 1.730543851852417, + "epoch": 0.10410525558990581, + "kl_loss": 0.23577484488487244, + "loss_ib": 0.004299550782889128, + "step": 362 + }, + { + "ce_ib": 15.791916847229004, + "ce_orig": 1.4829944372177124, + "epoch": 0.10410525558990581, + "kl_loss": 0.14233699440956116, + "loss_ib": 0.003002561628818512, + "step": 362 + }, + { + "ce_ib": 9.117117881774902, + "ce_orig": 0.4645211696624756, + "epoch": 0.10410525558990581, + "kl_loss": 0.1363692581653595, + "loss_ib": 0.0022754042875021696, + "step": 362 + }, + { + "ce_ib": 13.213629722595215, + "ce_orig": 1.1761356592178345, + "epoch": 0.10439283916888345, + "kl_loss": 0.2095613181591034, + "loss_ib": 0.0034169761929661036, + "step": 363 + }, + { + "ce_ib": 12.37755012512207, + "ce_orig": 0.8143442273139954, + "epoch": 0.10439283916888345, + "kl_loss": 0.14410914480686188, + "loss_ib": 0.002678846474736929, + "step": 363 + }, + { + "ce_ib": 17.626813888549805, + "ce_orig": 1.4411890506744385, + "epoch": 0.10439283916888345, + "kl_loss": 0.1780979335308075, + "loss_ib": 0.0035436605103313923, + "step": 363 + }, + { + "ce_ib": 9.846484184265137, + "ce_orig": 0.5962998270988464, + "epoch": 0.10439283916888345, + "kl_loss": 0.1512468159198761, + "loss_ib": 0.002497116569429636, + "step": 363 + }, + { + "ce_ib": 12.717291831970215, + "ce_orig": 0.9116876721382141, + "epoch": 0.1046804227478611, + "kl_loss": 0.16959382593631744, + "loss_ib": 0.00296766753308475, + "step": 364 + }, + { + "ce_ib": 12.457673072814941, + "ce_orig": 0.09528730809688568, + "epoch": 0.1046804227478611, + "kl_loss": 0.38302385807037354, + "loss_ib": 0.005076006054878235, + "step": 364 + }, + { + "ce_ib": 12.967721939086914, + "ce_orig": 0.7480602860450745, + "epoch": 0.1046804227478611, + "kl_loss": 0.41338658332824707, + "loss_ib": 0.005430637858808041, + "step": 364 + }, + { + "ce_ib": 15.809802055358887, + "ce_orig": 1.5115008354187012, + "epoch": 0.1046804227478611, + "kl_loss": 0.2618887722492218, + "loss_ib": 0.004199867602437735, + "step": 364 + }, + { + "epoch": 0.10496800632683874, + "grad_norm": 0.1035882830619812, + "learning_rate": 4.9997560691673194e-05, + "loss": 0.9193, + "step": 365 + }, + { + "ce_ib": 13.008557319641113, + "ce_orig": 0.8673336505889893, + "epoch": 0.10496800632683874, + "kl_loss": 0.2006014883518219, + "loss_ib": 0.003306870348751545, + "step": 365 + }, + { + "ce_ib": 9.279165267944336, + "ce_orig": 0.656031608581543, + "epoch": 0.10496800632683874, + "kl_loss": 0.24030038714408875, + "loss_ib": 0.0033309203572571278, + "step": 365 + }, + { + "ce_ib": 14.56029987335205, + "ce_orig": 1.059706211090088, + "epoch": 0.10496800632683874, + "kl_loss": 0.22321242094039917, + "loss_ib": 0.0036881540436297655, + "step": 365 + }, + { + "ce_ib": 10.043885231018066, + "ce_orig": 0.7288112044334412, + "epoch": 0.10496800632683874, + "kl_loss": 0.1112736165523529, + "loss_ib": 0.002117124618962407, + "step": 365 + }, + { + "ce_ib": 14.350020408630371, + "ce_orig": 1.2987266778945923, + "epoch": 0.10525558990581638, + "kl_loss": 0.21527716517448425, + "loss_ib": 0.0035877733025699854, + "step": 366 + }, + { + "ce_ib": 9.209977149963379, + "ce_orig": 0.7698776721954346, + "epoch": 0.10525558990581638, + "kl_loss": 0.20659813284873962, + "loss_ib": 0.0029869787395000458, + "step": 366 + }, + { + "ce_ib": 12.112702369689941, + "ce_orig": 0.7612364888191223, + "epoch": 0.10525558990581638, + "kl_loss": 0.2407711148262024, + "loss_ib": 0.003618981223553419, + "step": 366 + }, + { + "ce_ib": 11.292844772338867, + "ce_orig": 0.769523024559021, + "epoch": 0.10525558990581638, + "kl_loss": 0.23566709458827972, + "loss_ib": 0.00348595529794693, + "step": 366 + }, + { + "ce_ib": 12.479425430297852, + "ce_orig": 0.8336954116821289, + "epoch": 0.10554317348479401, + "kl_loss": 0.16503103077411652, + "loss_ib": 0.0028982528019696474, + "step": 367 + }, + { + "ce_ib": 11.297480583190918, + "ce_orig": 0.7182275056838989, + "epoch": 0.10554317348479401, + "kl_loss": 0.2943016290664673, + "loss_ib": 0.004072763957083225, + "step": 367 + }, + { + "ce_ib": 13.729005813598633, + "ce_orig": 1.3682771921157837, + "epoch": 0.10554317348479401, + "kl_loss": 0.1870647370815277, + "loss_ib": 0.0032435478642582893, + "step": 367 + }, + { + "ce_ib": 14.036231994628906, + "ce_orig": 1.112056851387024, + "epoch": 0.10554317348479401, + "kl_loss": 0.15229341387748718, + "loss_ib": 0.0029265573248267174, + "step": 367 + }, + { + "ce_ib": 15.093040466308594, + "ce_orig": 1.170424222946167, + "epoch": 0.10583075706377165, + "kl_loss": 0.2388845682144165, + "loss_ib": 0.0038981495890766382, + "step": 368 + }, + { + "ce_ib": 15.43298053741455, + "ce_orig": 1.196273922920227, + "epoch": 0.10583075706377165, + "kl_loss": 0.19452136754989624, + "loss_ib": 0.0034885117784142494, + "step": 368 + }, + { + "ce_ib": 13.480894088745117, + "ce_orig": 1.0033950805664062, + "epoch": 0.10583075706377165, + "kl_loss": 0.24435830116271973, + "loss_ib": 0.003791672410443425, + "step": 368 + }, + { + "ce_ib": 15.991963386535645, + "ce_orig": 1.5334750413894653, + "epoch": 0.10583075706377165, + "kl_loss": 0.1978762149810791, + "loss_ib": 0.0035779583267867565, + "step": 368 + }, + { + "ce_ib": 9.99071979522705, + "ce_orig": 0.521371066570282, + "epoch": 0.1061183406427493, + "kl_loss": 0.14332063496112823, + "loss_ib": 0.0024322783574461937, + "step": 369 + }, + { + "ce_ib": 14.23776912689209, + "ce_orig": 1.3547425270080566, + "epoch": 0.1061183406427493, + "kl_loss": 0.18942734599113464, + "loss_ib": 0.0033180504105985165, + "step": 369 + }, + { + "ce_ib": 10.9193754196167, + "ce_orig": 0.7963501214981079, + "epoch": 0.1061183406427493, + "kl_loss": 0.19985352456569672, + "loss_ib": 0.003090472659096122, + "step": 369 + }, + { + "ce_ib": 11.383045196533203, + "ce_orig": 0.7126600742340088, + "epoch": 0.1061183406427493, + "kl_loss": 0.17306244373321533, + "loss_ib": 0.002868928946554661, + "step": 369 + }, + { + "epoch": 0.10640592422172694, + "grad_norm": 0.08818119019269943, + "learning_rate": 4.999698851972622e-05, + "loss": 0.9172, + "step": 370 + }, + { + "ce_ib": 13.027803421020508, + "ce_orig": 0.8471740484237671, + "epoch": 0.10640592422172694, + "kl_loss": 0.1602221429347992, + "loss_ib": 0.0029050016310065985, + "step": 370 + }, + { + "ce_ib": 12.130885124206543, + "ce_orig": 0.5990825891494751, + "epoch": 0.10640592422172694, + "kl_loss": 0.19608041644096375, + "loss_ib": 0.0031738923862576485, + "step": 370 + }, + { + "ce_ib": 11.950088500976562, + "ce_orig": 0.6112602353096008, + "epoch": 0.10640592422172694, + "kl_loss": 0.1965973824262619, + "loss_ib": 0.0031609826255589724, + "step": 370 + }, + { + "ce_ib": 9.864639282226562, + "ce_orig": 0.7072968482971191, + "epoch": 0.10640592422172694, + "kl_loss": 0.1892612874507904, + "loss_ib": 0.002879076637327671, + "step": 370 + }, + { + "ce_ib": 7.837416648864746, + "ce_orig": 0.22909517586231232, + "epoch": 0.10669350780070458, + "kl_loss": 0.4259476363658905, + "loss_ib": 0.005043217912316322, + "step": 371 + }, + { + "ce_ib": 16.050464630126953, + "ce_orig": 1.3338783979415894, + "epoch": 0.10669350780070458, + "kl_loss": 0.18167968094348907, + "loss_ib": 0.0034218430519104004, + "step": 371 + }, + { + "ce_ib": 7.732363224029541, + "ce_orig": 0.6679915189743042, + "epoch": 0.10669350780070458, + "kl_loss": 0.19385015964508057, + "loss_ib": 0.00271173776127398, + "step": 371 + }, + { + "ce_ib": 17.768779754638672, + "ce_orig": 1.290654182434082, + "epoch": 0.10669350780070458, + "kl_loss": 0.18727800250053406, + "loss_ib": 0.0036496578250080347, + "step": 371 + }, + { + "ce_ib": 16.024396896362305, + "ce_orig": 1.4784846305847168, + "epoch": 0.10698109137968222, + "kl_loss": 0.17223666608333588, + "loss_ib": 0.0033248059917241335, + "step": 372 + }, + { + "ce_ib": 9.74294376373291, + "ce_orig": 0.5707986950874329, + "epoch": 0.10698109137968222, + "kl_loss": 0.16291573643684387, + "loss_ib": 0.0026034514885395765, + "step": 372 + }, + { + "ce_ib": 16.458507537841797, + "ce_orig": 0.9432319402694702, + "epoch": 0.10698109137968222, + "kl_loss": 0.1663103997707367, + "loss_ib": 0.0033089546486735344, + "step": 372 + }, + { + "ce_ib": 15.392163276672363, + "ce_orig": 1.5210273265838623, + "epoch": 0.10698109137968222, + "kl_loss": 0.22455021739006042, + "loss_ib": 0.003784718457609415, + "step": 372 + }, + { + "ce_ib": 14.316435813903809, + "ce_orig": 0.8189164400100708, + "epoch": 0.10726867495865985, + "kl_loss": 0.16313040256500244, + "loss_ib": 0.0030629474204033613, + "step": 373 + }, + { + "ce_ib": 11.636860847473145, + "ce_orig": 0.7378359436988831, + "epoch": 0.10726867495865985, + "kl_loss": 0.1494181752204895, + "loss_ib": 0.0026578675024211407, + "step": 373 + }, + { + "ce_ib": 17.269123077392578, + "ce_orig": 1.541763424873352, + "epoch": 0.10726867495865985, + "kl_loss": 0.26547926664352417, + "loss_ib": 0.004381704609841108, + "step": 373 + }, + { + "ce_ib": 10.263904571533203, + "ce_orig": 0.8089870810508728, + "epoch": 0.10726867495865985, + "kl_loss": 0.11840936541557312, + "loss_ib": 0.002210484119132161, + "step": 373 + }, + { + "ce_ib": 15.512616157531738, + "ce_orig": 0.9539732933044434, + "epoch": 0.1075562585376375, + "kl_loss": 0.28004124760627747, + "loss_ib": 0.004351674113422632, + "step": 374 + }, + { + "ce_ib": 13.218826293945312, + "ce_orig": 0.9203503131866455, + "epoch": 0.1075562585376375, + "kl_loss": 0.17954644560813904, + "loss_ib": 0.003117346903309226, + "step": 374 + }, + { + "ce_ib": 12.426115036010742, + "ce_orig": 1.001791000366211, + "epoch": 0.1075562585376375, + "kl_loss": 0.24035847187042236, + "loss_ib": 0.003646196098998189, + "step": 374 + }, + { + "ce_ib": 13.382880210876465, + "ce_orig": 1.0965611934661865, + "epoch": 0.1075562585376375, + "kl_loss": 0.16744542121887207, + "loss_ib": 0.003012742381542921, + "step": 374 + }, + { + "epoch": 0.10784384211661514, + "grad_norm": 0.0786062702536583, + "learning_rate": 4.999635612423198e-05, + "loss": 0.8711, + "step": 375 + }, + { + "ce_ib": 7.170506477355957, + "ce_orig": 0.2852292060852051, + "epoch": 0.10784384211661514, + "kl_loss": 0.37380170822143555, + "loss_ib": 0.004455067683011293, + "step": 375 + }, + { + "ce_ib": 10.11276626586914, + "ce_orig": 0.5102769136428833, + "epoch": 0.10784384211661514, + "kl_loss": 0.18036767840385437, + "loss_ib": 0.002814953215420246, + "step": 375 + }, + { + "ce_ib": 11.423179626464844, + "ce_orig": 1.2335662841796875, + "epoch": 0.10784384211661514, + "kl_loss": 0.19887100160121918, + "loss_ib": 0.0031310277990996838, + "step": 375 + }, + { + "ce_ib": 14.86428451538086, + "ce_orig": 0.6654768586158752, + "epoch": 0.10784384211661514, + "kl_loss": 0.19978465139865875, + "loss_ib": 0.0034842747263610363, + "step": 375 + }, + { + "ce_ib": 12.019170761108398, + "ce_orig": 0.830958902835846, + "epoch": 0.10813142569559278, + "kl_loss": 0.19432778656482697, + "loss_ib": 0.0031451948452740908, + "step": 376 + }, + { + "ce_ib": 15.544536590576172, + "ce_orig": 0.608134388923645, + "epoch": 0.10813142569559278, + "kl_loss": 0.3602546155452728, + "loss_ib": 0.005156999919563532, + "step": 376 + }, + { + "ce_ib": 10.856096267700195, + "ce_orig": 0.7232799530029297, + "epoch": 0.10813142569559278, + "kl_loss": 0.15802645683288574, + "loss_ib": 0.00266587408259511, + "step": 376 + }, + { + "ce_ib": 13.540989875793457, + "ce_orig": 1.0887871980667114, + "epoch": 0.10813142569559278, + "kl_loss": 0.19060353934764862, + "loss_ib": 0.0032601344864815474, + "step": 376 + }, + { + "ce_ib": 13.389488220214844, + "ce_orig": 0.8150464296340942, + "epoch": 0.10841900927457042, + "kl_loss": 0.19611474871635437, + "loss_ib": 0.0033000963740050793, + "step": 377 + }, + { + "ce_ib": 7.958892822265625, + "ce_orig": 0.558684229850769, + "epoch": 0.10841900927457042, + "kl_loss": 0.12297540158033371, + "loss_ib": 0.002025643130764365, + "step": 377 + }, + { + "ce_ib": 13.451696395874023, + "ce_orig": 0.9451778531074524, + "epoch": 0.10841900927457042, + "kl_loss": 0.22830717265605927, + "loss_ib": 0.003628241363912821, + "step": 377 + }, + { + "ce_ib": 11.917108535766602, + "ce_orig": 1.090754747390747, + "epoch": 0.10841900927457042, + "kl_loss": 0.18710459768772125, + "loss_ib": 0.003062756499275565, + "step": 377 + }, + { + "ce_ib": 11.53650951385498, + "ce_orig": 0.4949207007884979, + "epoch": 0.10870659285354806, + "kl_loss": 0.2471131980419159, + "loss_ib": 0.0036247826647013426, + "step": 378 + }, + { + "ce_ib": 9.730238914489746, + "ce_orig": 0.40288040041923523, + "epoch": 0.10870659285354806, + "kl_loss": 0.18191629648208618, + "loss_ib": 0.0027921865694224834, + "step": 378 + }, + { + "ce_ib": 9.56220817565918, + "ce_orig": 0.5649886727333069, + "epoch": 0.10870659285354806, + "kl_loss": 0.17736974358558655, + "loss_ib": 0.0027299183420836926, + "step": 378 + }, + { + "ce_ib": 16.986759185791016, + "ce_orig": 1.3326983451843262, + "epoch": 0.10870659285354806, + "kl_loss": 0.27190378308296204, + "loss_ib": 0.0044177137315273285, + "step": 378 + }, + { + "ce_ib": 10.621078491210938, + "ce_orig": 0.9211146831512451, + "epoch": 0.10899417643252571, + "kl_loss": 0.1395106166601181, + "loss_ib": 0.002457214053720236, + "step": 379 + }, + { + "ce_ib": 8.39609432220459, + "ce_orig": 0.679756224155426, + "epoch": 0.10899417643252571, + "kl_loss": 0.1734372079372406, + "loss_ib": 0.0025739814154803753, + "step": 379 + }, + { + "ce_ib": 13.126282691955566, + "ce_orig": 0.5308454036712646, + "epoch": 0.10899417643252571, + "kl_loss": 0.23796755075454712, + "loss_ib": 0.0036923037841916084, + "step": 379 + }, + { + "ce_ib": 15.385143280029297, + "ce_orig": 0.9731943607330322, + "epoch": 0.10899417643252571, + "kl_loss": 0.2400083839893341, + "loss_ib": 0.003938598092645407, + "step": 379 + }, + { + "epoch": 0.10928176001150335, + "grad_norm": 0.07642538100481033, + "learning_rate": 4.9995663506714054e-05, + "loss": 0.8705, + "step": 380 + }, + { + "ce_ib": 10.140104293823242, + "ce_orig": 0.55213463306427, + "epoch": 0.10928176001150335, + "kl_loss": 0.2998710870742798, + "loss_ib": 0.004012721125036478, + "step": 380 + }, + { + "ce_ib": 12.016203880310059, + "ce_orig": 0.2663145363330841, + "epoch": 0.10928176001150335, + "kl_loss": 0.20258453488349915, + "loss_ib": 0.0032274657860398293, + "step": 380 + }, + { + "ce_ib": 10.269865989685059, + "ce_orig": 0.5478006601333618, + "epoch": 0.10928176001150335, + "kl_loss": 0.17457614839076996, + "loss_ib": 0.0027727477718144655, + "step": 380 + }, + { + "ce_ib": 10.28240966796875, + "ce_orig": 0.46482110023498535, + "epoch": 0.10928176001150335, + "kl_loss": 0.14175333082675934, + "loss_ib": 0.0024457741528749466, + "step": 380 + }, + { + "ce_ib": 18.375350952148438, + "ce_orig": 1.6469268798828125, + "epoch": 0.10956934359048098, + "kl_loss": 0.2459729164838791, + "loss_ib": 0.004297263920307159, + "step": 381 + }, + { + "ce_ib": 13.591058731079102, + "ce_orig": 0.654187798500061, + "epoch": 0.10956934359048098, + "kl_loss": 0.33038705587387085, + "loss_ib": 0.004662976134568453, + "step": 381 + }, + { + "ce_ib": 18.45250129699707, + "ce_orig": 1.5424803495407104, + "epoch": 0.10956934359048098, + "kl_loss": 0.25389549136161804, + "loss_ib": 0.004384204745292664, + "step": 381 + }, + { + "ce_ib": 15.972155570983887, + "ce_orig": 1.1866068840026855, + "epoch": 0.10956934359048098, + "kl_loss": 0.26268285512924194, + "loss_ib": 0.004224043805152178, + "step": 381 + }, + { + "ce_ib": 10.061860084533691, + "ce_orig": 0.5798574090003967, + "epoch": 0.10985692716945862, + "kl_loss": 0.18744969367980957, + "loss_ib": 0.002880682935938239, + "step": 382 + }, + { + "ce_ib": 8.527949333190918, + "ce_orig": 0.7655165195465088, + "epoch": 0.10985692716945862, + "kl_loss": 0.19700750708580017, + "loss_ib": 0.002822869922965765, + "step": 382 + }, + { + "ce_ib": 7.723175525665283, + "ce_orig": 0.46251556277275085, + "epoch": 0.10985692716945862, + "kl_loss": 0.1479792296886444, + "loss_ib": 0.0022521098144352436, + "step": 382 + }, + { + "ce_ib": 16.92078399658203, + "ce_orig": 1.5507680177688599, + "epoch": 0.10985692716945862, + "kl_loss": 0.17009945213794708, + "loss_ib": 0.0033930731005966663, + "step": 382 + }, + { + "ce_ib": 11.547307014465332, + "ce_orig": 0.9038912057876587, + "epoch": 0.11014451074843626, + "kl_loss": 0.18578889966011047, + "loss_ib": 0.003012619446963072, + "step": 383 + }, + { + "ce_ib": 7.641073226928711, + "ce_orig": 0.6663032174110413, + "epoch": 0.11014451074843626, + "kl_loss": 0.12631307542324066, + "loss_ib": 0.002027238020673394, + "step": 383 + }, + { + "ce_ib": 7.092626094818115, + "ce_orig": 0.4686228632926941, + "epoch": 0.11014451074843626, + "kl_loss": 0.1317451447248459, + "loss_ib": 0.002026714151725173, + "step": 383 + }, + { + "ce_ib": 9.421049118041992, + "ce_orig": 0.6183243989944458, + "epoch": 0.11014451074843626, + "kl_loss": 0.15724687278270721, + "loss_ib": 0.002514573512598872, + "step": 383 + }, + { + "ce_ib": 11.571142196655273, + "ce_orig": 0.4451025128364563, + "epoch": 0.11043209432741391, + "kl_loss": 0.2353937327861786, + "loss_ib": 0.0035110514145344496, + "step": 384 + }, + { + "ce_ib": 16.550886154174805, + "ce_orig": 1.2699673175811768, + "epoch": 0.11043209432741391, + "kl_loss": 0.2898581027984619, + "loss_ib": 0.004553669597953558, + "step": 384 + }, + { + "ce_ib": 12.445303916931152, + "ce_orig": 0.6089316606521606, + "epoch": 0.11043209432741391, + "kl_loss": 0.5906498432159424, + "loss_ib": 0.007151029072701931, + "step": 384 + }, + { + "ce_ib": 11.304732322692871, + "ce_orig": 0.6872734427452087, + "epoch": 0.11043209432741391, + "kl_loss": 0.132551908493042, + "loss_ib": 0.002455992391332984, + "step": 384 + }, + { + "epoch": 0.11071967790639155, + "grad_norm": 0.08736824244260788, + "learning_rate": 4.999491066884113e-05, + "loss": 0.8343, + "step": 385 + }, + { + "ce_ib": 12.851602554321289, + "ce_orig": 1.4722890853881836, + "epoch": 0.11071967790639155, + "kl_loss": 0.16855554282665253, + "loss_ib": 0.0029707157518714666, + "step": 385 + }, + { + "ce_ib": 12.488396644592285, + "ce_orig": 1.012579321861267, + "epoch": 0.11071967790639155, + "kl_loss": 0.24618947505950928, + "loss_ib": 0.0037107341922819614, + "step": 385 + }, + { + "ce_ib": 9.774558067321777, + "ce_orig": 0.6036505103111267, + "epoch": 0.11071967790639155, + "kl_loss": 0.14310193061828613, + "loss_ib": 0.0024084749165922403, + "step": 385 + }, + { + "ce_ib": 10.53403091430664, + "ce_orig": 0.8375312089920044, + "epoch": 0.11071967790639155, + "kl_loss": 0.2290131151676178, + "loss_ib": 0.00334353419020772, + "step": 385 + }, + { + "ce_ib": 13.784265518188477, + "ce_orig": 0.7486900091171265, + "epoch": 0.11100726148536919, + "kl_loss": 0.275905966758728, + "loss_ib": 0.004137486219406128, + "step": 386 + }, + { + "ce_ib": 13.386645317077637, + "ce_orig": 0.8458417654037476, + "epoch": 0.11100726148536919, + "kl_loss": 0.2864159941673279, + "loss_ib": 0.0042028240859508514, + "step": 386 + }, + { + "ce_ib": 4.843447685241699, + "ce_orig": 0.1851879358291626, + "epoch": 0.11100726148536919, + "kl_loss": 0.44297945499420166, + "loss_ib": 0.0049141389317810535, + "step": 386 + }, + { + "ce_ib": 11.102249145507812, + "ce_orig": 0.7399924397468567, + "epoch": 0.11100726148536919, + "kl_loss": 0.2577285170555115, + "loss_ib": 0.0036875098012387753, + "step": 386 + }, + { + "ce_ib": 13.290738105773926, + "ce_orig": 0.8414790034294128, + "epoch": 0.11129484506434682, + "kl_loss": 0.1596413552761078, + "loss_ib": 0.0029254870023578405, + "step": 387 + }, + { + "ce_ib": 12.116263389587402, + "ce_orig": 1.0983738899230957, + "epoch": 0.11129484506434682, + "kl_loss": 0.22985008358955383, + "loss_ib": 0.0035101270768791437, + "step": 387 + }, + { + "ce_ib": 11.536699295043945, + "ce_orig": 0.7217467427253723, + "epoch": 0.11129484506434682, + "kl_loss": 0.23231491446495056, + "loss_ib": 0.003476819023489952, + "step": 387 + }, + { + "ce_ib": 10.348511695861816, + "ce_orig": 0.9353364706039429, + "epoch": 0.11129484506434682, + "kl_loss": 0.16923439502716064, + "loss_ib": 0.0027271949220448732, + "step": 387 + }, + { + "ce_ib": 8.719440460205078, + "ce_orig": 0.6035894155502319, + "epoch": 0.11158242864332446, + "kl_loss": 0.22707051038742065, + "loss_ib": 0.0031426490750163794, + "step": 388 + }, + { + "ce_ib": 12.744837760925293, + "ce_orig": 0.7636030316352844, + "epoch": 0.11158242864332446, + "kl_loss": 0.19510197639465332, + "loss_ib": 0.0032255034893751144, + "step": 388 + }, + { + "ce_ib": 10.507633209228516, + "ce_orig": 0.5344758033752441, + "epoch": 0.11158242864332446, + "kl_loss": 0.23824182152748108, + "loss_ib": 0.0034331816714257, + "step": 388 + }, + { + "ce_ib": 13.216085433959961, + "ce_orig": 0.9561126232147217, + "epoch": 0.11158242864332446, + "kl_loss": 0.19082219898700714, + "loss_ib": 0.003229830414056778, + "step": 388 + }, + { + "ce_ib": 12.790705680847168, + "ce_orig": 0.7942649722099304, + "epoch": 0.11187001222230211, + "kl_loss": 0.19223928451538086, + "loss_ib": 0.003201463259756565, + "step": 389 + }, + { + "ce_ib": 12.605323791503906, + "ce_orig": 0.9660126566886902, + "epoch": 0.11187001222230211, + "kl_loss": 0.18265745043754578, + "loss_ib": 0.0030871068593114614, + "step": 389 + }, + { + "ce_ib": 16.678693771362305, + "ce_orig": 1.4949008226394653, + "epoch": 0.11187001222230211, + "kl_loss": 0.2443901002407074, + "loss_ib": 0.004111770074814558, + "step": 389 + }, + { + "ce_ib": 15.18307876586914, + "ce_orig": 1.5530723333358765, + "epoch": 0.11187001222230211, + "kl_loss": 0.18980354070663452, + "loss_ib": 0.0034163433592766523, + "step": 389 + }, + { + "epoch": 0.11215759580127975, + "grad_norm": 0.0806172788143158, + "learning_rate": 4.999409761242696e-05, + "loss": 0.889, + "step": 390 + }, + { + "ce_ib": 12.329959869384766, + "ce_orig": 0.6557547450065613, + "epoch": 0.11215759580127975, + "kl_loss": 0.11406560242176056, + "loss_ib": 0.0023736520670354366, + "step": 390 + }, + { + "ce_ib": 10.382856369018555, + "ce_orig": 0.36463189125061035, + "epoch": 0.11215759580127975, + "kl_loss": 0.2578916549682617, + "loss_ib": 0.0036172019317746162, + "step": 390 + }, + { + "ce_ib": 13.944049835205078, + "ce_orig": 0.8022533655166626, + "epoch": 0.11215759580127975, + "kl_loss": 0.23697403073310852, + "loss_ib": 0.0037641453091055155, + "step": 390 + }, + { + "ce_ib": 10.437244415283203, + "ce_orig": 0.6617816090583801, + "epoch": 0.11215759580127975, + "kl_loss": 0.15966740250587463, + "loss_ib": 0.0026403984520584345, + "step": 390 + }, + { + "ce_ib": 11.55241584777832, + "ce_orig": 0.7899225950241089, + "epoch": 0.11244517938025739, + "kl_loss": 0.12170088291168213, + "loss_ib": 0.0023722504265606403, + "step": 391 + }, + { + "ce_ib": 14.663440704345703, + "ce_orig": 0.9394941329956055, + "epoch": 0.11244517938025739, + "kl_loss": 0.23881687223911285, + "loss_ib": 0.00385451246984303, + "step": 391 + }, + { + "ce_ib": 9.422616004943848, + "ce_orig": 0.5803003907203674, + "epoch": 0.11244517938025739, + "kl_loss": 0.16054609417915344, + "loss_ib": 0.0025477223098278046, + "step": 391 + }, + { + "ce_ib": 11.833211898803711, + "ce_orig": 0.6175609827041626, + "epoch": 0.11244517938025739, + "kl_loss": 0.2186504304409027, + "loss_ib": 0.0033698254264891148, + "step": 391 + }, + { + "ce_ib": 11.895299911499023, + "ce_orig": 0.6896355152130127, + "epoch": 0.11273276295923502, + "kl_loss": 0.17966461181640625, + "loss_ib": 0.0029861759394407272, + "step": 392 + }, + { + "ce_ib": 11.261984825134277, + "ce_orig": 0.7158202528953552, + "epoch": 0.11273276295923502, + "kl_loss": 0.19520384073257446, + "loss_ib": 0.00307823671028018, + "step": 392 + }, + { + "ce_ib": 12.316457748413086, + "ce_orig": 0.8367967009544373, + "epoch": 0.11273276295923502, + "kl_loss": 0.2615872323513031, + "loss_ib": 0.0038475177716463804, + "step": 392 + }, + { + "ce_ib": 10.227145195007324, + "ce_orig": 0.7019678950309753, + "epoch": 0.11273276295923502, + "kl_loss": 0.21493881940841675, + "loss_ib": 0.0031721023842692375, + "step": 392 + }, + { + "ce_ib": 16.03828239440918, + "ce_orig": 1.3268945217132568, + "epoch": 0.11302034653821266, + "kl_loss": 0.23496520519256592, + "loss_ib": 0.003953480161726475, + "step": 393 + }, + { + "ce_ib": 7.363077163696289, + "ce_orig": 0.6715714335441589, + "epoch": 0.11302034653821266, + "kl_loss": 0.12172873318195343, + "loss_ib": 0.0019535948522388935, + "step": 393 + }, + { + "ce_ib": 10.327352523803711, + "ce_orig": 0.9410114884376526, + "epoch": 0.11302034653821266, + "kl_loss": 0.11792122572660446, + "loss_ib": 0.0022119474597275257, + "step": 393 + }, + { + "ce_ib": 5.172538757324219, + "ce_orig": 0.20720714330673218, + "epoch": 0.11302034653821266, + "kl_loss": 0.41349154710769653, + "loss_ib": 0.004652169067412615, + "step": 393 + }, + { + "ce_ib": 10.094765663146973, + "ce_orig": 0.6660728454589844, + "epoch": 0.11330793011719031, + "kl_loss": 0.1545972228050232, + "loss_ib": 0.0025554485619068146, + "step": 394 + }, + { + "ce_ib": 12.061307907104492, + "ce_orig": 0.5917040109634399, + "epoch": 0.11330793011719031, + "kl_loss": 0.19155195355415344, + "loss_ib": 0.003121650079265237, + "step": 394 + }, + { + "ce_ib": 13.97226333618164, + "ce_orig": 1.101852536201477, + "epoch": 0.11330793011719031, + "kl_loss": 0.19805079698562622, + "loss_ib": 0.003377734450623393, + "step": 394 + }, + { + "ce_ib": 14.349608421325684, + "ce_orig": 0.5646123886108398, + "epoch": 0.11330793011719031, + "kl_loss": 0.17135578393936157, + "loss_ib": 0.0031485187355428934, + "step": 394 + }, + { + "epoch": 0.11359551369616795, + "grad_norm": 0.08417540043592453, + "learning_rate": 4.999322433943038e-05, + "loss": 0.8409, + "step": 395 + }, + { + "ce_ib": 12.859439849853516, + "ce_orig": 0.9198188185691833, + "epoch": 0.11359551369616795, + "kl_loss": 0.1865035444498062, + "loss_ib": 0.0031509792897850275, + "step": 395 + }, + { + "ce_ib": 10.23005485534668, + "ce_orig": 0.6933926939964294, + "epoch": 0.11359551369616795, + "kl_loss": 0.14806464314460754, + "loss_ib": 0.0025036518927663565, + "step": 395 + }, + { + "ce_ib": 14.05105972290039, + "ce_orig": 0.6480773687362671, + "epoch": 0.11359551369616795, + "kl_loss": 0.2830086648464203, + "loss_ib": 0.0042351926676929, + "step": 395 + }, + { + "ce_ib": 12.827180862426758, + "ce_orig": 1.2222548723220825, + "epoch": 0.11359551369616795, + "kl_loss": 0.19166235625743866, + "loss_ib": 0.0031993414741009474, + "step": 395 + }, + { + "ce_ib": 14.012380599975586, + "ce_orig": 1.3009854555130005, + "epoch": 0.11388309727514559, + "kl_loss": 0.2228410542011261, + "loss_ib": 0.003629648592323065, + "step": 396 + }, + { + "ce_ib": 7.52554178237915, + "ce_orig": 0.6004323363304138, + "epoch": 0.11388309727514559, + "kl_loss": 0.15014877915382385, + "loss_ib": 0.002254042075946927, + "step": 396 + }, + { + "ce_ib": 11.584349632263184, + "ce_orig": 0.8162614703178406, + "epoch": 0.11388309727514559, + "kl_loss": 0.1859622299671173, + "loss_ib": 0.003018057206645608, + "step": 396 + }, + { + "ce_ib": 11.174099922180176, + "ce_orig": 0.7962226271629333, + "epoch": 0.11388309727514559, + "kl_loss": 0.16366738080978394, + "loss_ib": 0.0027540838345885277, + "step": 396 + }, + { + "ce_ib": 13.57970905303955, + "ce_orig": 1.1856755018234253, + "epoch": 0.11417068085412323, + "kl_loss": 0.272286593914032, + "loss_ib": 0.004080836661159992, + "step": 397 + }, + { + "ce_ib": 6.8682122230529785, + "ce_orig": 0.43402042984962463, + "epoch": 0.11417068085412323, + "kl_loss": 0.1285514086484909, + "loss_ib": 0.00197233515791595, + "step": 397 + }, + { + "ce_ib": 11.961787223815918, + "ce_orig": 0.782745897769928, + "epoch": 0.11417068085412323, + "kl_loss": 0.15355338156223297, + "loss_ib": 0.0027317123021930456, + "step": 397 + }, + { + "ce_ib": 10.129254341125488, + "ce_orig": 0.8172139525413513, + "epoch": 0.11417068085412323, + "kl_loss": 0.17509959638118744, + "loss_ib": 0.0027639211621135473, + "step": 397 + }, + { + "ce_ib": 8.069706916809082, + "ce_orig": 0.6624881029129028, + "epoch": 0.11445826443310086, + "kl_loss": 0.12610213458538055, + "loss_ib": 0.0020679919980466366, + "step": 398 + }, + { + "ce_ib": 7.447951793670654, + "ce_orig": 0.44954437017440796, + "epoch": 0.11445826443310086, + "kl_loss": 0.16658943891525269, + "loss_ib": 0.002410689601674676, + "step": 398 + }, + { + "ce_ib": 11.109574317932129, + "ce_orig": 0.47779056429862976, + "epoch": 0.11445826443310086, + "kl_loss": 0.23340463638305664, + "loss_ib": 0.0034450036473572254, + "step": 398 + }, + { + "ce_ib": 15.319025039672852, + "ce_orig": 1.082602620124817, + "epoch": 0.11445826443310086, + "kl_loss": 0.1651686728000641, + "loss_ib": 0.0031835888512432575, + "step": 398 + }, + { + "ce_ib": 16.39435386657715, + "ce_orig": 1.458138108253479, + "epoch": 0.11474584801207852, + "kl_loss": 0.23157645761966705, + "loss_ib": 0.003955199848860502, + "step": 399 + }, + { + "ce_ib": 12.060273170471191, + "ce_orig": 1.2219758033752441, + "epoch": 0.11474584801207852, + "kl_loss": 0.18119218945503235, + "loss_ib": 0.0030179491732269526, + "step": 399 + }, + { + "ce_ib": 9.549758911132812, + "ce_orig": 0.5686326026916504, + "epoch": 0.11474584801207852, + "kl_loss": 0.1396368145942688, + "loss_ib": 0.0023513438645750284, + "step": 399 + }, + { + "ce_ib": 12.140963554382324, + "ce_orig": 0.8329185247421265, + "epoch": 0.11474584801207852, + "kl_loss": 0.1271989345550537, + "loss_ib": 0.002486085519194603, + "step": 399 + }, + { + "epoch": 0.11503343159105615, + "grad_norm": 0.08466464281082153, + "learning_rate": 4.9992290851955325e-05, + "loss": 0.8643, + "step": 400 + }, + { + "ce_ib": 16.369003295898438, + "ce_orig": 0.8453714847564697, + "epoch": 0.11503343159105615, + "kl_loss": 0.20362722873687744, + "loss_ib": 0.0036731725558638573, + "step": 400 + }, + { + "ce_ib": 10.80587387084961, + "ce_orig": 0.9257553219795227, + "epoch": 0.11503343159105615, + "kl_loss": 0.18605493009090424, + "loss_ib": 0.0029411364812403917, + "step": 400 + }, + { + "ce_ib": 6.573936939239502, + "ce_orig": 0.6002892851829529, + "epoch": 0.11503343159105615, + "kl_loss": 0.15251712501049042, + "loss_ib": 0.0021825649309903383, + "step": 400 + }, + { + "ce_ib": 13.615230560302734, + "ce_orig": 1.3942912817001343, + "epoch": 0.11503343159105615, + "kl_loss": 0.171320840716362, + "loss_ib": 0.0030747312121093273, + "step": 400 + }, + { + "ce_ib": 12.808405876159668, + "ce_orig": 1.0415929555892944, + "epoch": 0.11532101517003379, + "kl_loss": 0.17026068270206451, + "loss_ib": 0.002983447164297104, + "step": 401 + }, + { + "ce_ib": 7.786767959594727, + "ce_orig": 0.5461778044700623, + "epoch": 0.11532101517003379, + "kl_loss": 0.1809147298336029, + "loss_ib": 0.0025878241285681725, + "step": 401 + }, + { + "ce_ib": 9.318531036376953, + "ce_orig": 1.0691779851913452, + "epoch": 0.11532101517003379, + "kl_loss": 0.12710833549499512, + "loss_ib": 0.0022029364481568336, + "step": 401 + }, + { + "ce_ib": 15.711153030395508, + "ce_orig": 1.2399132251739502, + "epoch": 0.11532101517003379, + "kl_loss": 0.20981115102767944, + "loss_ib": 0.003669226774945855, + "step": 401 + }, + { + "ce_ib": 13.523634910583496, + "ce_orig": 1.0126256942749023, + "epoch": 0.11560859874901143, + "kl_loss": 0.17583820223808289, + "loss_ib": 0.003110745456069708, + "step": 402 + }, + { + "ce_ib": 10.453843116760254, + "ce_orig": 0.7707417607307434, + "epoch": 0.11560859874901143, + "kl_loss": 0.13824975490570068, + "loss_ib": 0.0024278818164020777, + "step": 402 + }, + { + "ce_ib": 12.620675086975098, + "ce_orig": 0.9837019443511963, + "epoch": 0.11560859874901143, + "kl_loss": 0.15091584622859955, + "loss_ib": 0.0027712257578969, + "step": 402 + }, + { + "ce_ib": 14.844929695129395, + "ce_orig": 1.3407633304595947, + "epoch": 0.11560859874901143, + "kl_loss": 0.30624860525131226, + "loss_ib": 0.00454697897657752, + "step": 402 + }, + { + "ce_ib": 11.737789154052734, + "ce_orig": 1.1215554475784302, + "epoch": 0.11589618232798907, + "kl_loss": 0.16605661809444427, + "loss_ib": 0.002834344981238246, + "step": 403 + }, + { + "ce_ib": 11.818507194519043, + "ce_orig": 0.4051503837108612, + "epoch": 0.11589618232798907, + "kl_loss": 0.2137288749217987, + "loss_ib": 0.0033191393595188856, + "step": 403 + }, + { + "ce_ib": 10.569378852844238, + "ce_orig": 0.7266899347305298, + "epoch": 0.11589618232798907, + "kl_loss": 0.19015324115753174, + "loss_ib": 0.002958470256999135, + "step": 403 + }, + { + "ce_ib": 10.655888557434082, + "ce_orig": 0.640403151512146, + "epoch": 0.11589618232798907, + "kl_loss": 0.18397970497608185, + "loss_ib": 0.0029053858015686274, + "step": 403 + }, + { + "ce_ib": 10.284205436706543, + "ce_orig": 1.0149288177490234, + "epoch": 0.11618376590696672, + "kl_loss": 0.16832667589187622, + "loss_ib": 0.0027116872370243073, + "step": 404 + }, + { + "ce_ib": 13.454412460327148, + "ce_orig": 1.0919182300567627, + "epoch": 0.11618376590696672, + "kl_loss": 0.15390118956565857, + "loss_ib": 0.0028844529297202826, + "step": 404 + }, + { + "ce_ib": 8.412931442260742, + "ce_orig": 0.6086220145225525, + "epoch": 0.11618376590696672, + "kl_loss": 0.13261398673057556, + "loss_ib": 0.002167432801797986, + "step": 404 + }, + { + "ce_ib": 16.36043930053711, + "ce_orig": 1.389167308807373, + "epoch": 0.11618376590696672, + "kl_loss": 0.13541021943092346, + "loss_ib": 0.002990146167576313, + "step": 404 + }, + { + "epoch": 0.11647134948594436, + "grad_norm": 0.08868135511875153, + "learning_rate": 4.999129715225077e-05, + "loss": 0.8893, + "step": 405 + }, + { + "ce_ib": 11.492820739746094, + "ce_orig": 0.746446967124939, + "epoch": 0.11647134948594436, + "kl_loss": 0.1755758821964264, + "loss_ib": 0.002905040979385376, + "step": 405 + }, + { + "ce_ib": 14.550654411315918, + "ce_orig": 1.1409695148468018, + "epoch": 0.11647134948594436, + "kl_loss": 0.16501018404960632, + "loss_ib": 0.003105167066678405, + "step": 405 + }, + { + "ce_ib": 12.881452560424805, + "ce_orig": 1.0264896154403687, + "epoch": 0.11647134948594436, + "kl_loss": 0.18386085331439972, + "loss_ib": 0.003126753494143486, + "step": 405 + }, + { + "ce_ib": 15.17376708984375, + "ce_orig": 1.3239004611968994, + "epoch": 0.11647134948594436, + "kl_loss": 0.18682563304901123, + "loss_ib": 0.0033856327645480633, + "step": 405 + }, + { + "ce_ib": 14.622434616088867, + "ce_orig": 1.3146438598632812, + "epoch": 0.116758933064922, + "kl_loss": 0.18558424711227417, + "loss_ib": 0.003318085800856352, + "step": 406 + }, + { + "ce_ib": 8.00043773651123, + "ce_orig": 0.7650782465934753, + "epoch": 0.116758933064922, + "kl_loss": 0.14243070781230927, + "loss_ib": 0.0022243508137762547, + "step": 406 + }, + { + "ce_ib": 11.638957977294922, + "ce_orig": 0.927288830280304, + "epoch": 0.116758933064922, + "kl_loss": 0.14190661907196045, + "loss_ib": 0.0025829619262367487, + "step": 406 + }, + { + "ce_ib": 8.698803901672363, + "ce_orig": 0.4740663766860962, + "epoch": 0.116758933064922, + "kl_loss": 0.18497580289840698, + "loss_ib": 0.0027196381706744432, + "step": 406 + }, + { + "ce_ib": 11.999215126037598, + "ce_orig": 0.5628153681755066, + "epoch": 0.11704651664389963, + "kl_loss": 0.261201411485672, + "loss_ib": 0.0038119356613606215, + "step": 407 + }, + { + "ce_ib": 14.865316390991211, + "ce_orig": 1.6626498699188232, + "epoch": 0.11704651664389963, + "kl_loss": 0.17202956974506378, + "loss_ib": 0.003206827212125063, + "step": 407 + }, + { + "ce_ib": 11.803093910217285, + "ce_orig": 0.8193854093551636, + "epoch": 0.11704651664389963, + "kl_loss": 0.2457035779953003, + "loss_ib": 0.003637345042079687, + "step": 407 + }, + { + "ce_ib": 14.206315994262695, + "ce_orig": 1.1790286302566528, + "epoch": 0.11704651664389963, + "kl_loss": 0.21022199094295502, + "loss_ib": 0.0035228515043854713, + "step": 407 + }, + { + "ce_ib": 9.615798950195312, + "ce_orig": 0.5693954229354858, + "epoch": 0.11733410022287727, + "kl_loss": 0.20928806066513062, + "loss_ib": 0.003054460510611534, + "step": 408 + }, + { + "ce_ib": 10.448309898376465, + "ce_orig": 0.7434052228927612, + "epoch": 0.11733410022287727, + "kl_loss": 0.18974988162517548, + "loss_ib": 0.002942329505458474, + "step": 408 + }, + { + "ce_ib": 12.322173118591309, + "ce_orig": 0.7398363351821899, + "epoch": 0.11733410022287727, + "kl_loss": 0.32504746317863464, + "loss_ib": 0.004482691641896963, + "step": 408 + }, + { + "ce_ib": 11.393105506896973, + "ce_orig": 0.2980586290359497, + "epoch": 0.11733410022287727, + "kl_loss": 0.2762417793273926, + "loss_ib": 0.003901728196069598, + "step": 408 + }, + { + "ce_ib": 9.13760757446289, + "ce_orig": 0.48705342411994934, + "epoch": 0.11762168380185492, + "kl_loss": 0.2090642750263214, + "loss_ib": 0.003004403319209814, + "step": 409 + }, + { + "ce_ib": 13.404657363891602, + "ce_orig": 1.3054029941558838, + "epoch": 0.11762168380185492, + "kl_loss": 0.15996284782886505, + "loss_ib": 0.0029400940984487534, + "step": 409 + }, + { + "ce_ib": 14.58011245727539, + "ce_orig": 0.6604776978492737, + "epoch": 0.11762168380185492, + "kl_loss": 0.17515218257904053, + "loss_ib": 0.0032095329370349646, + "step": 409 + }, + { + "ce_ib": 12.46037769317627, + "ce_orig": 0.969291090965271, + "epoch": 0.11762168380185492, + "kl_loss": 0.12114151567220688, + "loss_ib": 0.002457452705129981, + "step": 409 + }, + { + "epoch": 0.11790926738083256, + "grad_norm": 0.08972907811403275, + "learning_rate": 4.9990243242710764e-05, + "loss": 0.8016, + "step": 410 + }, + { + "ce_ib": 14.017788887023926, + "ce_orig": 1.2842930555343628, + "epoch": 0.11790926738083256, + "kl_loss": 0.20839394629001617, + "loss_ib": 0.0034857182763516903, + "step": 410 + }, + { + "ce_ib": 12.625293731689453, + "ce_orig": 0.331617534160614, + "epoch": 0.11790926738083256, + "kl_loss": 0.23530715703964233, + "loss_ib": 0.0036156009882688522, + "step": 410 + }, + { + "ce_ib": 13.452978134155273, + "ce_orig": 0.9680318832397461, + "epoch": 0.11790926738083256, + "kl_loss": 0.17226648330688477, + "loss_ib": 0.003067962359637022, + "step": 410 + }, + { + "ce_ib": 13.609016418457031, + "ce_orig": 1.1668546199798584, + "epoch": 0.11790926738083256, + "kl_loss": 0.15326163172721863, + "loss_ib": 0.0028935179580003023, + "step": 410 + }, + { + "ce_ib": 11.401368141174316, + "ce_orig": 0.7574223279953003, + "epoch": 0.1181968509598102, + "kl_loss": 0.17012295126914978, + "loss_ib": 0.002841366222128272, + "step": 411 + }, + { + "ce_ib": 13.586700439453125, + "ce_orig": 1.1896238327026367, + "epoch": 0.1181968509598102, + "kl_loss": 0.2817670702934265, + "loss_ib": 0.004176340531557798, + "step": 411 + }, + { + "ce_ib": 16.24236488342285, + "ce_orig": 1.1911630630493164, + "epoch": 0.1181968509598102, + "kl_loss": 0.19016054272651672, + "loss_ib": 0.003525841748341918, + "step": 411 + }, + { + "ce_ib": 13.229630470275879, + "ce_orig": 0.9971056580543518, + "epoch": 0.1181968509598102, + "kl_loss": 0.1399802565574646, + "loss_ib": 0.0027227657847106457, + "step": 411 + }, + { + "ce_ib": 10.67956829071045, + "ce_orig": 0.8728124499320984, + "epoch": 0.11848443453878783, + "kl_loss": 0.15304341912269592, + "loss_ib": 0.0025983911473304033, + "step": 412 + }, + { + "ce_ib": 7.926272869110107, + "ce_orig": 0.4618622958660126, + "epoch": 0.11848443453878783, + "kl_loss": 0.08525725454092026, + "loss_ib": 0.0016451997216790915, + "step": 412 + }, + { + "ce_ib": 12.342477798461914, + "ce_orig": 0.6581653952598572, + "epoch": 0.11848443453878783, + "kl_loss": 0.20672234892845154, + "loss_ib": 0.0033014710061252117, + "step": 412 + }, + { + "ce_ib": 12.10908317565918, + "ce_orig": 0.8213714957237244, + "epoch": 0.11848443453878783, + "kl_loss": 0.23801177740097046, + "loss_ib": 0.00359102594666183, + "step": 412 + }, + { + "ce_ib": 11.018802642822266, + "ce_orig": 0.5446355938911438, + "epoch": 0.11877201811776547, + "kl_loss": 0.26025596261024475, + "loss_ib": 0.0037044398486614227, + "step": 413 + }, + { + "ce_ib": 12.422916412353516, + "ce_orig": 0.9991008043289185, + "epoch": 0.11877201811776547, + "kl_loss": 0.37930434942245483, + "loss_ib": 0.00503533473238349, + "step": 413 + }, + { + "ce_ib": 11.024622917175293, + "ce_orig": 0.8313679099082947, + "epoch": 0.11877201811776547, + "kl_loss": 0.18216568231582642, + "loss_ib": 0.002924119122326374, + "step": 413 + }, + { + "ce_ib": 12.971683502197266, + "ce_orig": 0.9980677366256714, + "epoch": 0.11877201811776547, + "kl_loss": 0.28354763984680176, + "loss_ib": 0.004132644273340702, + "step": 413 + }, + { + "ce_ib": 10.75366497039795, + "ce_orig": 1.0008810758590698, + "epoch": 0.11905960169674312, + "kl_loss": 0.21501143276691437, + "loss_ib": 0.00322548090480268, + "step": 414 + }, + { + "ce_ib": 15.019447326660156, + "ce_orig": 1.0870994329452515, + "epoch": 0.11905960169674312, + "kl_loss": 0.18550065159797668, + "loss_ib": 0.003356951056048274, + "step": 414 + }, + { + "ce_ib": 13.59741497039795, + "ce_orig": 1.1491084098815918, + "epoch": 0.11905960169674312, + "kl_loss": 0.31042391061782837, + "loss_ib": 0.004463980905711651, + "step": 414 + }, + { + "ce_ib": 11.30091667175293, + "ce_orig": 0.7725252509117126, + "epoch": 0.11905960169674312, + "kl_loss": 0.19686946272850037, + "loss_ib": 0.003098786110058427, + "step": 414 + }, + { + "epoch": 0.11934718527572076, + "grad_norm": 0.0853128507733345, + "learning_rate": 4.998912912587444e-05, + "loss": 0.8496, + "step": 415 + }, + { + "ce_ib": 10.824992179870605, + "ce_orig": 1.003801703453064, + "epoch": 0.11934718527572076, + "kl_loss": 0.11075378954410553, + "loss_ib": 0.0021900369320064783, + "step": 415 + }, + { + "ce_ib": 14.990334510803223, + "ce_orig": 1.1157439947128296, + "epoch": 0.11934718527572076, + "kl_loss": 0.18331551551818848, + "loss_ib": 0.0033321885857731104, + "step": 415 + }, + { + "ce_ib": 10.030242919921875, + "ce_orig": 0.7444831728935242, + "epoch": 0.11934718527572076, + "kl_loss": 0.2142019271850586, + "loss_ib": 0.0031450435053557158, + "step": 415 + }, + { + "ce_ib": 7.975912570953369, + "ce_orig": 0.713485062122345, + "epoch": 0.11934718527572076, + "kl_loss": 0.20920607447624207, + "loss_ib": 0.00288965180516243, + "step": 415 + }, + { + "ce_ib": 12.238396644592285, + "ce_orig": 1.1776320934295654, + "epoch": 0.1196347688546984, + "kl_loss": 0.21955502033233643, + "loss_ib": 0.0034193897154182196, + "step": 416 + }, + { + "ce_ib": 13.989873886108398, + "ce_orig": 1.128517746925354, + "epoch": 0.1196347688546984, + "kl_loss": 0.2628554701805115, + "loss_ib": 0.0040275417268276215, + "step": 416 + }, + { + "ce_ib": 15.87778091430664, + "ce_orig": 1.3535970449447632, + "epoch": 0.1196347688546984, + "kl_loss": 0.197160542011261, + "loss_ib": 0.0035593833308666945, + "step": 416 + }, + { + "ce_ib": 8.849984169006348, + "ce_orig": 0.872184157371521, + "epoch": 0.1196347688546984, + "kl_loss": 0.1935926228761673, + "loss_ib": 0.0028209243901073933, + "step": 416 + }, + { + "ce_ib": 9.946513175964355, + "ce_orig": 0.5356448292732239, + "epoch": 0.11992235243367604, + "kl_loss": 0.2691783308982849, + "loss_ib": 0.0036864345893263817, + "step": 417 + }, + { + "ce_ib": 13.770381927490234, + "ce_orig": 1.427547812461853, + "epoch": 0.11992235243367604, + "kl_loss": 0.1527450531721115, + "loss_ib": 0.002904488705098629, + "step": 417 + }, + { + "ce_ib": 12.310346603393555, + "ce_orig": 1.485551118850708, + "epoch": 0.11992235243367604, + "kl_loss": 0.1050770953297615, + "loss_ib": 0.0022818055003881454, + "step": 417 + }, + { + "ce_ib": 14.205962181091309, + "ce_orig": 1.5815967321395874, + "epoch": 0.11992235243367604, + "kl_loss": 0.26177799701690674, + "loss_ib": 0.004038376267999411, + "step": 417 + }, + { + "ce_ib": 9.17827320098877, + "ce_orig": 0.5437095165252686, + "epoch": 0.12020993601265367, + "kl_loss": 0.17721109092235565, + "loss_ib": 0.0026899382937699556, + "step": 418 + }, + { + "ce_ib": 15.857308387756348, + "ce_orig": 0.8890218138694763, + "epoch": 0.12020993601265367, + "kl_loss": 0.35342293977737427, + "loss_ib": 0.0051199602894485, + "step": 418 + }, + { + "ce_ib": 12.519335746765137, + "ce_orig": 0.40189069509506226, + "epoch": 0.12020993601265367, + "kl_loss": 0.19997818768024445, + "loss_ib": 0.0032517153304070234, + "step": 418 + }, + { + "ce_ib": 10.383864402770996, + "ce_orig": 0.9198765754699707, + "epoch": 0.12020993601265367, + "kl_loss": 0.1422748863697052, + "loss_ib": 0.0024611353874206543, + "step": 418 + }, + { + "ce_ib": 4.41956090927124, + "ce_orig": 0.15882746875286102, + "epoch": 0.12049751959163132, + "kl_loss": 0.42377138137817383, + "loss_ib": 0.004679669626057148, + "step": 419 + }, + { + "ce_ib": 8.898272514343262, + "ce_orig": 0.6479420065879822, + "epoch": 0.12049751959163132, + "kl_loss": 0.14590340852737427, + "loss_ib": 0.002348861424252391, + "step": 419 + }, + { + "ce_ib": 11.02161693572998, + "ce_orig": 0.6498445272445679, + "epoch": 0.12049751959163132, + "kl_loss": 0.18951579928398132, + "loss_ib": 0.0029973196797072887, + "step": 419 + }, + { + "ce_ib": 13.597489356994629, + "ce_orig": 1.474985122680664, + "epoch": 0.12049751959163132, + "kl_loss": 0.2165094017982483, + "loss_ib": 0.0035248426720499992, + "step": 419 + }, + { + "epoch": 0.12078510317060896, + "grad_norm": 0.09395145624876022, + "learning_rate": 4.998795480442595e-05, + "loss": 0.919, + "step": 420 + }, + { + "ce_ib": 13.930419921875, + "ce_orig": 1.4253751039505005, + "epoch": 0.12078510317060896, + "kl_loss": 0.25621849298477173, + "loss_ib": 0.003955226857215166, + "step": 420 + }, + { + "ce_ib": 13.867815017700195, + "ce_orig": 1.0413322448730469, + "epoch": 0.12078510317060896, + "kl_loss": 0.1656143069267273, + "loss_ib": 0.003042924450710416, + "step": 420 + }, + { + "ce_ib": 9.55808162689209, + "ce_orig": 0.8998080492019653, + "epoch": 0.12078510317060896, + "kl_loss": 0.2003626972436905, + "loss_ib": 0.002959434874355793, + "step": 420 + }, + { + "ce_ib": 6.539864540100098, + "ce_orig": 0.4415733814239502, + "epoch": 0.12078510317060896, + "kl_loss": 0.16704899072647095, + "loss_ib": 0.0023244761396199465, + "step": 420 + }, + { + "ce_ib": 13.084672927856445, + "ce_orig": 0.9477989077568054, + "epoch": 0.1210726867495866, + "kl_loss": 0.1585165113210678, + "loss_ib": 0.0028936322778463364, + "step": 421 + }, + { + "ce_ib": 10.002495765686035, + "ce_orig": 0.7920336127281189, + "epoch": 0.1210726867495866, + "kl_loss": 0.23921585083007812, + "loss_ib": 0.0033924079034477472, + "step": 421 + }, + { + "ce_ib": 9.44317626953125, + "ce_orig": 0.7052544355392456, + "epoch": 0.1210726867495866, + "kl_loss": 0.1775376945734024, + "loss_ib": 0.002719694282859564, + "step": 421 + }, + { + "ce_ib": 6.67018985748291, + "ce_orig": 0.35514646768569946, + "epoch": 0.1210726867495866, + "kl_loss": 0.27639925479888916, + "loss_ib": 0.0034310114569962025, + "step": 421 + }, + { + "ce_ib": 9.008115768432617, + "ce_orig": 1.0835165977478027, + "epoch": 0.12136027032856424, + "kl_loss": 0.1306457817554474, + "loss_ib": 0.002207269426435232, + "step": 422 + }, + { + "ce_ib": 9.596372604370117, + "ce_orig": 0.6315584182739258, + "epoch": 0.12136027032856424, + "kl_loss": 0.16708403825759888, + "loss_ib": 0.002630477538332343, + "step": 422 + }, + { + "ce_ib": 10.36826229095459, + "ce_orig": 0.8352252840995789, + "epoch": 0.12136027032856424, + "kl_loss": 0.4174191355705261, + "loss_ib": 0.005211017560213804, + "step": 422 + }, + { + "ce_ib": 6.275569438934326, + "ce_orig": 0.6971433758735657, + "epoch": 0.12136027032856424, + "kl_loss": 0.10864199697971344, + "loss_ib": 0.001713976846076548, + "step": 422 + }, + { + "ce_ib": 11.055356979370117, + "ce_orig": 0.711353600025177, + "epoch": 0.12164785390754188, + "kl_loss": 0.223836749792099, + "loss_ib": 0.0033439029939472675, + "step": 423 + }, + { + "ce_ib": 12.989297866821289, + "ce_orig": 0.8257631063461304, + "epoch": 0.12164785390754188, + "kl_loss": 0.21258409321308136, + "loss_ib": 0.003424770664423704, + "step": 423 + }, + { + "ce_ib": 7.487967014312744, + "ce_orig": 0.5072060227394104, + "epoch": 0.12164785390754188, + "kl_loss": 0.15735791623592377, + "loss_ib": 0.002322375774383545, + "step": 423 + }, + { + "ce_ib": 13.437426567077637, + "ce_orig": 0.9563009142875671, + "epoch": 0.12164785390754188, + "kl_loss": 0.1478041410446167, + "loss_ib": 0.0028217840008437634, + "step": 423 + }, + { + "ce_ib": 12.20171070098877, + "ce_orig": 0.7322474718093872, + "epoch": 0.12193543748651951, + "kl_loss": 0.2593429684638977, + "loss_ib": 0.003813600866124034, + "step": 424 + }, + { + "ce_ib": 10.579559326171875, + "ce_orig": 0.5674452781677246, + "epoch": 0.12193543748651951, + "kl_loss": 0.24632222950458527, + "loss_ib": 0.0035211783833801746, + "step": 424 + }, + { + "ce_ib": 12.876055717468262, + "ce_orig": 1.14524245262146, + "epoch": 0.12193543748651951, + "kl_loss": 0.21795441210269928, + "loss_ib": 0.003467149566859007, + "step": 424 + }, + { + "ce_ib": 8.761207580566406, + "ce_orig": 0.9090970754623413, + "epoch": 0.12193543748651951, + "kl_loss": 0.15418817102909088, + "loss_ib": 0.0024180023465305567, + "step": 424 + }, + { + "epoch": 0.12222302106549716, + "grad_norm": 0.07512833178043365, + "learning_rate": 4.9986720281194496e-05, + "loss": 0.8761, + "step": 425 + }, + { + "ce_ib": 7.6993608474731445, + "ce_orig": 0.40163472294807434, + "epoch": 0.12222302106549716, + "kl_loss": 0.20606671273708344, + "loss_ib": 0.0028306031599640846, + "step": 425 + }, + { + "ce_ib": 10.682936668395996, + "ce_orig": 0.752029538154602, + "epoch": 0.12222302106549716, + "kl_loss": 0.17551946640014648, + "loss_ib": 0.0028234883211553097, + "step": 425 + }, + { + "ce_ib": 10.21149730682373, + "ce_orig": 0.8434242010116577, + "epoch": 0.12222302106549716, + "kl_loss": 0.19770103693008423, + "loss_ib": 0.0029981599655002356, + "step": 425 + }, + { + "ce_ib": 9.649618148803711, + "ce_orig": 0.6008751392364502, + "epoch": 0.12222302106549716, + "kl_loss": 0.2020394206047058, + "loss_ib": 0.0029853556770831347, + "step": 425 + }, + { + "ce_ib": 11.479440689086914, + "ce_orig": 0.9273905158042908, + "epoch": 0.1225106046444748, + "kl_loss": 0.15146127343177795, + "loss_ib": 0.0026625567115843296, + "step": 426 + }, + { + "ce_ib": 7.9811859130859375, + "ce_orig": 0.25573351979255676, + "epoch": 0.1225106046444748, + "kl_loss": 0.23791244626045227, + "loss_ib": 0.0031772428192198277, + "step": 426 + }, + { + "ce_ib": 10.76065444946289, + "ce_orig": 0.8275227546691895, + "epoch": 0.1225106046444748, + "kl_loss": 0.2022540271282196, + "loss_ib": 0.0030986058991402388, + "step": 426 + }, + { + "ce_ib": 10.59325885772705, + "ce_orig": 0.6065911650657654, + "epoch": 0.1225106046444748, + "kl_loss": 0.2510051429271698, + "loss_ib": 0.003569377353414893, + "step": 426 + }, + { + "ce_ib": 9.989922523498535, + "ce_orig": 0.6457223296165466, + "epoch": 0.12279818822345244, + "kl_loss": 0.41983091831207275, + "loss_ib": 0.005197301506996155, + "step": 427 + }, + { + "ce_ib": 14.872665405273438, + "ce_orig": 1.3183645009994507, + "epoch": 0.12279818822345244, + "kl_loss": 0.25291332602500916, + "loss_ib": 0.004016399849206209, + "step": 427 + }, + { + "ce_ib": 14.173356056213379, + "ce_orig": 0.860676646232605, + "epoch": 0.12279818822345244, + "kl_loss": 0.22176355123519897, + "loss_ib": 0.0036349711008369923, + "step": 427 + }, + { + "ce_ib": 12.447295188903809, + "ce_orig": 1.0465346574783325, + "epoch": 0.12279818822345244, + "kl_loss": 0.15125788748264313, + "loss_ib": 0.00275730830617249, + "step": 427 + }, + { + "ce_ib": 11.742615699768066, + "ce_orig": 0.8324576616287231, + "epoch": 0.12308577180243008, + "kl_loss": 0.26873379945755005, + "loss_ib": 0.0038615993689745665, + "step": 428 + }, + { + "ce_ib": 15.946845054626465, + "ce_orig": 1.4635050296783447, + "epoch": 0.12308577180243008, + "kl_loss": 0.3334965407848358, + "loss_ib": 0.004929649643599987, + "step": 428 + }, + { + "ce_ib": 11.777470588684082, + "ce_orig": 0.8487641215324402, + "epoch": 0.12308577180243008, + "kl_loss": 0.18009008467197418, + "loss_ib": 0.0029786478262394667, + "step": 428 + }, + { + "ce_ib": 15.157903671264648, + "ce_orig": 1.4350470304489136, + "epoch": 0.12308577180243008, + "kl_loss": 0.16165342926979065, + "loss_ib": 0.003132324665784836, + "step": 428 + }, + { + "ce_ib": 6.574849605560303, + "ce_orig": 0.35640019178390503, + "epoch": 0.12337335538140771, + "kl_loss": 0.16976439952850342, + "loss_ib": 0.0023551289923489094, + "step": 429 + }, + { + "ce_ib": 11.606804847717285, + "ce_orig": 0.7963778972625732, + "epoch": 0.12337335538140771, + "kl_loss": 0.17389288544654846, + "loss_ib": 0.002899609040468931, + "step": 429 + }, + { + "ce_ib": 11.487293243408203, + "ce_orig": 1.072171926498413, + "epoch": 0.12337335538140771, + "kl_loss": 0.19142760336399078, + "loss_ib": 0.0030630051624029875, + "step": 429 + }, + { + "ce_ib": 7.667901992797852, + "ce_orig": 0.5443602204322815, + "epoch": 0.12337335538140771, + "kl_loss": 0.1836308240890503, + "loss_ib": 0.0026030982844531536, + "step": 429 + }, + { + "epoch": 0.12366093896038537, + "grad_norm": 0.08391211926937103, + "learning_rate": 4.998542555915435e-05, + "loss": 0.9167, + "step": 430 + }, + { + "ce_ib": 11.157709121704102, + "ce_orig": 0.8726559281349182, + "epoch": 0.12366093896038537, + "kl_loss": 0.19528821110725403, + "loss_ib": 0.0030686529353260994, + "step": 430 + }, + { + "ce_ib": 12.455047607421875, + "ce_orig": 1.3305251598358154, + "epoch": 0.12366093896038537, + "kl_loss": 0.11633329838514328, + "loss_ib": 0.002408837666735053, + "step": 430 + }, + { + "ce_ib": 17.702678680419922, + "ce_orig": 1.9683383703231812, + "epoch": 0.12366093896038537, + "kl_loss": 0.21032744646072388, + "loss_ib": 0.003873542184010148, + "step": 430 + }, + { + "ce_ib": 11.364348411560059, + "ce_orig": 1.2615947723388672, + "epoch": 0.12366093896038537, + "kl_loss": 0.15659835934638977, + "loss_ib": 0.0027024184819310904, + "step": 430 + }, + { + "ce_ib": 10.107429504394531, + "ce_orig": 0.8148799538612366, + "epoch": 0.123948522539363, + "kl_loss": 0.13077875971794128, + "loss_ib": 0.0023185305763036013, + "step": 431 + }, + { + "ce_ib": 11.587873458862305, + "ce_orig": 1.074462890625, + "epoch": 0.123948522539363, + "kl_loss": 0.24695566296577454, + "loss_ib": 0.0036283438093960285, + "step": 431 + }, + { + "ce_ib": 10.614514350891113, + "ce_orig": 0.9438521862030029, + "epoch": 0.123948522539363, + "kl_loss": 0.2546341121196747, + "loss_ib": 0.0036077925469726324, + "step": 431 + }, + { + "ce_ib": 10.189565658569336, + "ce_orig": 0.468057245016098, + "epoch": 0.123948522539363, + "kl_loss": 0.21918027102947235, + "loss_ib": 0.003210759023204446, + "step": 431 + }, + { + "ce_ib": 16.234272003173828, + "ce_orig": 1.4394607543945312, + "epoch": 0.12423610611834064, + "kl_loss": 0.16391563415527344, + "loss_ib": 0.0032625836320221424, + "step": 432 + }, + { + "ce_ib": 13.389922142028809, + "ce_orig": 1.0571950674057007, + "epoch": 0.12423610611834064, + "kl_loss": 0.25300243496894836, + "loss_ib": 0.003869016421958804, + "step": 432 + }, + { + "ce_ib": 11.05131721496582, + "ce_orig": 0.8905650973320007, + "epoch": 0.12423610611834064, + "kl_loss": 0.1906907856464386, + "loss_ib": 0.0030120396986603737, + "step": 432 + }, + { + "ce_ib": 8.993093490600586, + "ce_orig": 0.7904607653617859, + "epoch": 0.12423610611834064, + "kl_loss": 0.18500834703445435, + "loss_ib": 0.002749392529949546, + "step": 432 + }, + { + "ce_ib": 13.643828392028809, + "ce_orig": 0.8764438629150391, + "epoch": 0.12452368969731828, + "kl_loss": 0.25997138023376465, + "loss_ib": 0.003964096307754517, + "step": 433 + }, + { + "ce_ib": 6.452097415924072, + "ce_orig": 0.5795682072639465, + "epoch": 0.12452368969731828, + "kl_loss": 0.1325344741344452, + "loss_ib": 0.001970554469153285, + "step": 433 + }, + { + "ce_ib": 10.294032096862793, + "ce_orig": 0.34366491436958313, + "epoch": 0.12452368969731828, + "kl_loss": 0.12770652770996094, + "loss_ib": 0.0023064683191478252, + "step": 433 + }, + { + "ce_ib": 8.313075065612793, + "ce_orig": 0.6022379398345947, + "epoch": 0.12452368969731828, + "kl_loss": 0.11339238286018372, + "loss_ib": 0.0019652312621474266, + "step": 433 + }, + { + "ce_ib": 14.330761909484863, + "ce_orig": 0.8713304996490479, + "epoch": 0.12481127327629592, + "kl_loss": 0.22767247259616852, + "loss_ib": 0.003709800774231553, + "step": 434 + }, + { + "ce_ib": 9.519655227661133, + "ce_orig": 0.5508330464363098, + "epoch": 0.12481127327629592, + "kl_loss": 0.1265774965286255, + "loss_ib": 0.002217740286141634, + "step": 434 + }, + { + "ce_ib": 12.286112785339355, + "ce_orig": 0.8811700344085693, + "epoch": 0.12481127327629592, + "kl_loss": 0.17079584300518036, + "loss_ib": 0.0029365697409957647, + "step": 434 + }, + { + "ce_ib": 8.997398376464844, + "ce_orig": 0.48554953932762146, + "epoch": 0.12481127327629592, + "kl_loss": 0.13161113858222961, + "loss_ib": 0.0022158510982990265, + "step": 434 + }, + { + "epoch": 0.12509885685527355, + "grad_norm": 0.08008844405412674, + "learning_rate": 4.99840706414248e-05, + "loss": 0.8499, + "step": 435 + }, + { + "ce_ib": 12.028959274291992, + "ce_orig": 0.7656774520874023, + "epoch": 0.12509885685527355, + "kl_loss": 0.18438570201396942, + "loss_ib": 0.0030467526521533728, + "step": 435 + }, + { + "ce_ib": 9.531744003295898, + "ce_orig": 0.7000839710235596, + "epoch": 0.12509885685527355, + "kl_loss": 0.1339460015296936, + "loss_ib": 0.002292634453624487, + "step": 435 + }, + { + "ce_ib": 7.86661958694458, + "ce_orig": 0.47849422693252563, + "epoch": 0.12509885685527355, + "kl_loss": 0.20920854806900024, + "loss_ib": 0.0028787474147975445, + "step": 435 + }, + { + "ce_ib": 10.296324729919434, + "ce_orig": 1.1329785585403442, + "epoch": 0.12509885685527355, + "kl_loss": 0.17465677857398987, + "loss_ib": 0.002776200184598565, + "step": 435 + }, + { + "ce_ib": 8.947026252746582, + "ce_orig": 0.9858295321464539, + "epoch": 0.1253864404342512, + "kl_loss": 0.22594591975212097, + "loss_ib": 0.0031541618518531322, + "step": 436 + }, + { + "ce_ib": 10.008584022521973, + "ce_orig": 0.7601190805435181, + "epoch": 0.1253864404342512, + "kl_loss": 0.18392398953437805, + "loss_ib": 0.0028400979936122894, + "step": 436 + }, + { + "ce_ib": 18.1694393157959, + "ce_orig": 1.7608329057693481, + "epoch": 0.1253864404342512, + "kl_loss": 0.3656727969646454, + "loss_ib": 0.005473671946674585, + "step": 436 + }, + { + "ce_ib": 10.617599487304688, + "ce_orig": 0.7405425906181335, + "epoch": 0.1253864404342512, + "kl_loss": 0.23880283534526825, + "loss_ib": 0.003449788084253669, + "step": 436 + }, + { + "ce_ib": 7.292166233062744, + "ce_orig": 0.321105033159256, + "epoch": 0.12567402401322886, + "kl_loss": 0.573823094367981, + "loss_ib": 0.006467447150498629, + "step": 437 + }, + { + "ce_ib": 12.345170021057129, + "ce_orig": 1.1237767934799194, + "epoch": 0.12567402401322886, + "kl_loss": 0.12964360415935516, + "loss_ib": 0.0025309529155492783, + "step": 437 + }, + { + "ce_ib": 9.116506576538086, + "ce_orig": 0.351901113986969, + "epoch": 0.12567402401322886, + "kl_loss": 0.1449497938156128, + "loss_ib": 0.0023611485958099365, + "step": 437 + }, + { + "ce_ib": 9.928841590881348, + "ce_orig": 0.5612432956695557, + "epoch": 0.12567402401322886, + "kl_loss": 0.17729271948337555, + "loss_ib": 0.0027658112812787294, + "step": 437 + }, + { + "ce_ib": 8.56289005279541, + "ce_orig": 0.5716758966445923, + "epoch": 0.12596160759220648, + "kl_loss": 0.1342535763978958, + "loss_ib": 0.0021988246589899063, + "step": 438 + }, + { + "ce_ib": 13.346972465515137, + "ce_orig": 0.7096478939056396, + "epoch": 0.12596160759220648, + "kl_loss": 0.1945207715034485, + "loss_ib": 0.0032799046020954847, + "step": 438 + }, + { + "ce_ib": 5.58247709274292, + "ce_orig": 0.38993921875953674, + "epoch": 0.12596160759220648, + "kl_loss": 0.3592712879180908, + "loss_ib": 0.004150960128754377, + "step": 438 + }, + { + "ce_ib": 10.52940845489502, + "ce_orig": 0.3933676481246948, + "epoch": 0.12596160759220648, + "kl_loss": 0.13906532526016235, + "loss_ib": 0.0024435939267277718, + "step": 438 + }, + { + "ce_ib": 10.115212440490723, + "ce_orig": 0.6660525798797607, + "epoch": 0.12624919117118413, + "kl_loss": 0.28316277265548706, + "loss_ib": 0.003843148937448859, + "step": 439 + }, + { + "ce_ib": 11.30185317993164, + "ce_orig": 0.8098467588424683, + "epoch": 0.12624919117118413, + "kl_loss": 0.16112932562828064, + "loss_ib": 0.002741478616371751, + "step": 439 + }, + { + "ce_ib": 7.795021057128906, + "ce_orig": 0.6182560324668884, + "epoch": 0.12624919117118413, + "kl_loss": 0.3065950274467468, + "loss_ib": 0.003845452331006527, + "step": 439 + }, + { + "ce_ib": 13.468255996704102, + "ce_orig": 0.9566720128059387, + "epoch": 0.12624919117118413, + "kl_loss": 0.2001672387123108, + "loss_ib": 0.003348497673869133, + "step": 439 + }, + { + "epoch": 0.12653677475016176, + "grad_norm": 0.08590603619813919, + "learning_rate": 4.998265553127013e-05, + "loss": 0.8382, + "step": 440 + }, + { + "ce_ib": 11.135417938232422, + "ce_orig": 0.9260854125022888, + "epoch": 0.12653677475016176, + "kl_loss": 0.15384265780448914, + "loss_ib": 0.0026519682724028826, + "step": 440 + }, + { + "ce_ib": 11.6417818069458, + "ce_orig": 0.7246519923210144, + "epoch": 0.12653677475016176, + "kl_loss": 0.18312156200408936, + "loss_ib": 0.002995393704622984, + "step": 440 + }, + { + "ce_ib": 11.070989608764648, + "ce_orig": 1.1319186687469482, + "epoch": 0.12653677475016176, + "kl_loss": 0.18624573945999146, + "loss_ib": 0.002969556488096714, + "step": 440 + }, + { + "ce_ib": 15.00080394744873, + "ce_orig": 1.5474122762680054, + "epoch": 0.12653677475016176, + "kl_loss": 0.17769742012023926, + "loss_ib": 0.003277054289355874, + "step": 440 + }, + { + "ce_ib": 12.565924644470215, + "ce_orig": 0.9930309057235718, + "epoch": 0.1268243583291394, + "kl_loss": 0.22812071442604065, + "loss_ib": 0.003537799697369337, + "step": 441 + }, + { + "ce_ib": 8.496469497680664, + "ce_orig": 0.7013565301895142, + "epoch": 0.1268243583291394, + "kl_loss": 0.1900731325149536, + "loss_ib": 0.0027503781020641327, + "step": 441 + }, + { + "ce_ib": 9.66351318359375, + "ce_orig": 0.984688401222229, + "epoch": 0.1268243583291394, + "kl_loss": 0.11712448298931122, + "loss_ib": 0.00213759602047503, + "step": 441 + }, + { + "ce_ib": 11.531122207641602, + "ce_orig": 0.8578464388847351, + "epoch": 0.1268243583291394, + "kl_loss": 0.22207866609096527, + "loss_ib": 0.003373898798599839, + "step": 441 + }, + { + "ce_ib": 11.552083015441895, + "ce_orig": 1.012474775314331, + "epoch": 0.12711194190811706, + "kl_loss": 0.19562029838562012, + "loss_ib": 0.003111411351710558, + "step": 442 + }, + { + "ce_ib": 8.36318588256836, + "ce_orig": 0.7492285966873169, + "epoch": 0.12711194190811706, + "kl_loss": 0.16260814666748047, + "loss_ib": 0.0024623998906463385, + "step": 442 + }, + { + "ce_ib": 9.527094841003418, + "ce_orig": 0.7202103137969971, + "epoch": 0.12711194190811706, + "kl_loss": 0.128434419631958, + "loss_ib": 0.0022370535880327225, + "step": 442 + }, + { + "ce_ib": 11.373018264770508, + "ce_orig": 0.8286006450653076, + "epoch": 0.12711194190811706, + "kl_loss": 0.22360824048519135, + "loss_ib": 0.0033733840100467205, + "step": 442 + }, + { + "ce_ib": 12.500737190246582, + "ce_orig": 1.1852082014083862, + "epoch": 0.12739952548709468, + "kl_loss": 0.17222145199775696, + "loss_ib": 0.0029722880572080612, + "step": 443 + }, + { + "ce_ib": 14.716822624206543, + "ce_orig": 1.8037943840026855, + "epoch": 0.12739952548709468, + "kl_loss": 0.22250494360923767, + "loss_ib": 0.003696731524541974, + "step": 443 + }, + { + "ce_ib": 13.993595123291016, + "ce_orig": 1.002560019493103, + "epoch": 0.12739952548709468, + "kl_loss": 0.1844272017478943, + "loss_ib": 0.0032436316832900047, + "step": 443 + }, + { + "ce_ib": 9.316429138183594, + "ce_orig": 0.4433915317058563, + "epoch": 0.12739952548709468, + "kl_loss": 0.3312546908855438, + "loss_ib": 0.0042441897094249725, + "step": 443 + }, + { + "ce_ib": 10.669767379760742, + "ce_orig": 0.8024405837059021, + "epoch": 0.12768710906607234, + "kl_loss": 0.18377812206745148, + "loss_ib": 0.002904757857322693, + "step": 444 + }, + { + "ce_ib": 8.749341011047363, + "ce_orig": 0.44320985674858093, + "epoch": 0.12768710906607234, + "kl_loss": 0.2103624790906906, + "loss_ib": 0.002978558884933591, + "step": 444 + }, + { + "ce_ib": 6.491762161254883, + "ce_orig": 0.7110791802406311, + "epoch": 0.12768710906607234, + "kl_loss": 0.12732303142547607, + "loss_ib": 0.0019224064890295267, + "step": 444 + }, + { + "ce_ib": 14.327024459838867, + "ce_orig": 1.2938189506530762, + "epoch": 0.12768710906607234, + "kl_loss": 0.1754641830921173, + "loss_ib": 0.003187343943864107, + "step": 444 + }, + { + "epoch": 0.12797469264504996, + "grad_norm": 0.07503530383110046, + "learning_rate": 4.99811802320997e-05, + "loss": 0.8966, + "step": 445 + }, + { + "ce_ib": 12.345433235168457, + "ce_orig": 0.8386138677597046, + "epoch": 0.12797469264504996, + "kl_loss": 0.31317782402038574, + "loss_ib": 0.004366321489214897, + "step": 445 + }, + { + "ce_ib": 11.949541091918945, + "ce_orig": 0.5236607193946838, + "epoch": 0.12797469264504996, + "kl_loss": 0.19711901247501373, + "loss_ib": 0.0031661440152674913, + "step": 445 + }, + { + "ce_ib": 10.233663558959961, + "ce_orig": 0.6846522688865662, + "epoch": 0.12797469264504996, + "kl_loss": 0.16328191757202148, + "loss_ib": 0.002656185533851385, + "step": 445 + }, + { + "ce_ib": 8.310503959655762, + "ce_orig": 0.7847996950149536, + "epoch": 0.12797469264504996, + "kl_loss": 0.137738898396492, + "loss_ib": 0.002208439400419593, + "step": 445 + }, + { + "ce_ib": 6.889098167419434, + "ce_orig": 0.5118075609207153, + "epoch": 0.1282622762240276, + "kl_loss": 0.16013775765895844, + "loss_ib": 0.0022902872879058123, + "step": 446 + }, + { + "ce_ib": 14.948297500610352, + "ce_orig": 1.007593035697937, + "epoch": 0.1282622762240276, + "kl_loss": 0.17850220203399658, + "loss_ib": 0.0032798515167087317, + "step": 446 + }, + { + "ce_ib": 10.017216682434082, + "ce_orig": 1.0469541549682617, + "epoch": 0.1282622762240276, + "kl_loss": 0.15087240934371948, + "loss_ib": 0.0025104456581175327, + "step": 446 + }, + { + "ce_ib": 9.104086875915527, + "ce_orig": 0.5955268144607544, + "epoch": 0.1282622762240276, + "kl_loss": 0.2088235318660736, + "loss_ib": 0.002998644020408392, + "step": 446 + }, + { + "ce_ib": 7.966549396514893, + "ce_orig": 0.7193945646286011, + "epoch": 0.12854985980300526, + "kl_loss": 0.17041301727294922, + "loss_ib": 0.0025007850490510464, + "step": 447 + }, + { + "ce_ib": 12.620382308959961, + "ce_orig": 0.6646131873130798, + "epoch": 0.12854985980300526, + "kl_loss": 0.21500855684280396, + "loss_ib": 0.0034121237695217133, + "step": 447 + }, + { + "ce_ib": 5.8810319900512695, + "ce_orig": 0.6042389869689941, + "epoch": 0.12854985980300526, + "kl_loss": 0.16328245401382446, + "loss_ib": 0.0022209277376532555, + "step": 447 + }, + { + "ce_ib": 13.061161994934082, + "ce_orig": 0.9521631598472595, + "epoch": 0.12854985980300526, + "kl_loss": 0.17168429493904114, + "loss_ib": 0.0030229592230170965, + "step": 447 + }, + { + "ce_ib": 8.089184761047363, + "ce_orig": 0.7481786608695984, + "epoch": 0.12883744338198289, + "kl_loss": 0.15286147594451904, + "loss_ib": 0.0023375332821160555, + "step": 448 + }, + { + "ce_ib": 7.247897624969482, + "ce_orig": 0.7153653502464294, + "epoch": 0.12883744338198289, + "kl_loss": 0.16917859017848969, + "loss_ib": 0.0024165755603462458, + "step": 448 + }, + { + "ce_ib": 9.982969284057617, + "ce_orig": 0.8300044536590576, + "epoch": 0.12883744338198289, + "kl_loss": 0.1288834512233734, + "loss_ib": 0.002287131268531084, + "step": 448 + }, + { + "ce_ib": 12.154969215393066, + "ce_orig": 0.8374365568161011, + "epoch": 0.12883744338198289, + "kl_loss": 0.2313854843378067, + "loss_ib": 0.003529351670295, + "step": 448 + }, + { + "ce_ib": 7.137256622314453, + "ce_orig": 0.24174675345420837, + "epoch": 0.12912502696096054, + "kl_loss": 0.4337605834007263, + "loss_ib": 0.005051331594586372, + "step": 449 + }, + { + "ce_ib": 13.23520565032959, + "ce_orig": 0.9527087807655334, + "epoch": 0.12912502696096054, + "kl_loss": 0.14814800024032593, + "loss_ib": 0.002805000403895974, + "step": 449 + }, + { + "ce_ib": 9.36728572845459, + "ce_orig": 0.4500534236431122, + "epoch": 0.12912502696096054, + "kl_loss": 0.12568199634552002, + "loss_ib": 0.002193548483774066, + "step": 449 + }, + { + "ce_ib": 8.504453659057617, + "ce_orig": 0.9163048267364502, + "epoch": 0.12912502696096054, + "kl_loss": 0.15279194712638855, + "loss_ib": 0.0023783647920936346, + "step": 449 + }, + { + "epoch": 0.12941261053993816, + "grad_norm": 0.09120236337184906, + "learning_rate": 4.9979644747467835e-05, + "loss": 0.8569, + "step": 450 + }, + { + "ce_ib": 12.404295921325684, + "ce_orig": 0.7500115036964417, + "epoch": 0.12941261053993816, + "kl_loss": 0.18148070573806763, + "loss_ib": 0.0030552365351468325, + "step": 450 + }, + { + "ce_ib": 9.151787757873535, + "ce_orig": 0.4986724555492401, + "epoch": 0.12941261053993816, + "kl_loss": 0.1752292811870575, + "loss_ib": 0.0026674713008105755, + "step": 450 + }, + { + "ce_ib": 8.871505737304688, + "ce_orig": 0.6981923580169678, + "epoch": 0.12941261053993816, + "kl_loss": 0.14069266617298126, + "loss_ib": 0.00229407730512321, + "step": 450 + }, + { + "ce_ib": 8.211197853088379, + "ce_orig": 0.7622634172439575, + "epoch": 0.12941261053993816, + "kl_loss": 0.13836929202079773, + "loss_ib": 0.00220481283031404, + "step": 450 + }, + { + "ce_ib": 7.645545482635498, + "ce_orig": 0.5680274963378906, + "epoch": 0.1297001941189158, + "kl_loss": 0.18493801355361938, + "loss_ib": 0.0026139344554394484, + "step": 451 + }, + { + "ce_ib": 9.379469871520996, + "ce_orig": 0.5945746302604675, + "epoch": 0.1297001941189158, + "kl_loss": 0.15481063723564148, + "loss_ib": 0.0024860533885657787, + "step": 451 + }, + { + "ce_ib": 13.473487854003906, + "ce_orig": 0.9739643931388855, + "epoch": 0.1297001941189158, + "kl_loss": 0.2487793266773224, + "loss_ib": 0.0038351418916136026, + "step": 451 + }, + { + "ce_ib": 9.095867156982422, + "ce_orig": 0.7306302785873413, + "epoch": 0.1297001941189158, + "kl_loss": 0.1404043734073639, + "loss_ib": 0.002313630422577262, + "step": 451 + }, + { + "ce_ib": 8.908297538757324, + "ce_orig": 0.548682451248169, + "epoch": 0.12998777769789346, + "kl_loss": 0.18304413557052612, + "loss_ib": 0.002721270779147744, + "step": 452 + }, + { + "ce_ib": 12.341330528259277, + "ce_orig": 1.0068693161010742, + "epoch": 0.12998777769789346, + "kl_loss": 0.21775811910629272, + "loss_ib": 0.0034117139875888824, + "step": 452 + }, + { + "ce_ib": 7.933249473571777, + "ce_orig": 0.6957258582115173, + "epoch": 0.12998777769789346, + "kl_loss": 0.12882256507873535, + "loss_ib": 0.002081550657749176, + "step": 452 + }, + { + "ce_ib": 11.709693908691406, + "ce_orig": 0.7259184718132019, + "epoch": 0.12998777769789346, + "kl_loss": 0.20683181285858154, + "loss_ib": 0.003239287296310067, + "step": 452 + }, + { + "ce_ib": 9.15219783782959, + "ce_orig": 0.521969199180603, + "epoch": 0.1302753612768711, + "kl_loss": 0.16149017214775085, + "loss_ib": 0.0025301214773207903, + "step": 453 + }, + { + "ce_ib": 10.897204399108887, + "ce_orig": 0.9679385423660278, + "epoch": 0.1302753612768711, + "kl_loss": 0.1327183097600937, + "loss_ib": 0.0024169033858925104, + "step": 453 + }, + { + "ce_ib": 11.597270965576172, + "ce_orig": 0.9538221955299377, + "epoch": 0.1302753612768711, + "kl_loss": 0.13329055905342102, + "loss_ib": 0.002492632484063506, + "step": 453 + }, + { + "ce_ib": 14.650552749633789, + "ce_orig": 0.9307949542999268, + "epoch": 0.1302753612768711, + "kl_loss": 0.3576693534851074, + "loss_ib": 0.005041748750954866, + "step": 453 + }, + { + "ce_ib": 9.383811950683594, + "ce_orig": 0.7557501196861267, + "epoch": 0.13056294485584874, + "kl_loss": 0.1699744164943695, + "loss_ib": 0.0026381253264844418, + "step": 454 + }, + { + "ce_ib": 16.616901397705078, + "ce_orig": 1.1251425743103027, + "epoch": 0.13056294485584874, + "kl_loss": 0.260105699300766, + "loss_ib": 0.0042627472430467606, + "step": 454 + }, + { + "ce_ib": 9.158967018127441, + "ce_orig": 0.602447509765625, + "epoch": 0.13056294485584874, + "kl_loss": 0.12641242146492004, + "loss_ib": 0.0021800207905471325, + "step": 454 + }, + { + "ce_ib": 12.393420219421387, + "ce_orig": 1.04123055934906, + "epoch": 0.13056294485584874, + "kl_loss": 0.21238833665847778, + "loss_ib": 0.0033632253762334585, + "step": 454 + }, + { + "epoch": 0.13085052843482636, + "grad_norm": 0.09227544069290161, + "learning_rate": 4.997804908107387e-05, + "loss": 0.8765, + "step": 455 + }, + { + "ce_ib": 9.478934288024902, + "ce_orig": 0.8604238033294678, + "epoch": 0.13085052843482636, + "kl_loss": 0.09945785254240036, + "loss_ib": 0.0019424718338996172, + "step": 455 + }, + { + "ce_ib": 7.167737007141113, + "ce_orig": 0.7395135164260864, + "epoch": 0.13085052843482636, + "kl_loss": 0.18323373794555664, + "loss_ib": 0.0025491111446172, + "step": 455 + }, + { + "ce_ib": 9.015175819396973, + "ce_orig": 0.5233743786811829, + "epoch": 0.13085052843482636, + "kl_loss": 0.22085565328598022, + "loss_ib": 0.00311007397249341, + "step": 455 + }, + { + "ce_ib": 16.64781951904297, + "ce_orig": 1.744786024093628, + "epoch": 0.13085052843482636, + "kl_loss": 0.17802797257900238, + "loss_ib": 0.0034450613893568516, + "step": 455 + }, + { + "ce_ib": 10.087674140930176, + "ce_orig": 0.6486227512359619, + "epoch": 0.13113811201380401, + "kl_loss": 0.23665431141853333, + "loss_ib": 0.0033753104507923126, + "step": 456 + }, + { + "ce_ib": 12.518637657165527, + "ce_orig": 1.2168270349502563, + "epoch": 0.13113811201380401, + "kl_loss": 0.15110589563846588, + "loss_ib": 0.002762922551482916, + "step": 456 + }, + { + "ce_ib": 13.409689903259277, + "ce_orig": 1.1832727193832397, + "epoch": 0.13113811201380401, + "kl_loss": 0.14149300754070282, + "loss_ib": 0.0027558987494558096, + "step": 456 + }, + { + "ce_ib": 7.571767807006836, + "ce_orig": 0.4468522071838379, + "epoch": 0.13113811201380401, + "kl_loss": 0.2032560110092163, + "loss_ib": 0.002789736958220601, + "step": 456 + }, + { + "ce_ib": 16.250171661376953, + "ce_orig": 1.4707938432693481, + "epoch": 0.13142569559278164, + "kl_loss": 0.15541328489780426, + "loss_ib": 0.0031791499350219965, + "step": 457 + }, + { + "ce_ib": 10.158995628356934, + "ce_orig": 0.7189385890960693, + "epoch": 0.13142569559278164, + "kl_loss": 0.13689836859703064, + "loss_ib": 0.0023848831187933683, + "step": 457 + }, + { + "ce_ib": 13.75023078918457, + "ce_orig": 0.9671751856803894, + "epoch": 0.13142569559278164, + "kl_loss": 0.20036864280700684, + "loss_ib": 0.0033787095453590155, + "step": 457 + }, + { + "ce_ib": 12.164261817932129, + "ce_orig": 0.6848416328430176, + "epoch": 0.13142569559278164, + "kl_loss": 0.18254505097866058, + "loss_ib": 0.0030418764799833298, + "step": 457 + }, + { + "ce_ib": 9.728108406066895, + "ce_orig": 0.6200440526008606, + "epoch": 0.1317132791717593, + "kl_loss": 0.15019859373569489, + "loss_ib": 0.002474796725437045, + "step": 458 + }, + { + "ce_ib": 13.638520240783691, + "ce_orig": 1.041512370109558, + "epoch": 0.1317132791717593, + "kl_loss": 0.19027158617973328, + "loss_ib": 0.0032665678299963474, + "step": 458 + }, + { + "ce_ib": 12.803059577941895, + "ce_orig": 0.5324247479438782, + "epoch": 0.1317132791717593, + "kl_loss": 0.18024376034736633, + "loss_ib": 0.003082743613049388, + "step": 458 + }, + { + "ce_ib": 10.689647674560547, + "ce_orig": 0.8073751926422119, + "epoch": 0.1317132791717593, + "kl_loss": 0.12346737831830978, + "loss_ib": 0.002303638495504856, + "step": 458 + }, + { + "ce_ib": 5.675622463226318, + "ce_orig": 0.3405647575855255, + "epoch": 0.13200086275073694, + "kl_loss": 0.31278595328330994, + "loss_ib": 0.0036954216193407774, + "step": 459 + }, + { + "ce_ib": 15.82054615020752, + "ce_orig": 1.3749570846557617, + "epoch": 0.13200086275073694, + "kl_loss": 0.21644842624664307, + "loss_ib": 0.00374653865583241, + "step": 459 + }, + { + "ce_ib": 10.457240104675293, + "ce_orig": 0.5686371326446533, + "epoch": 0.13200086275073694, + "kl_loss": 0.19664248824119568, + "loss_ib": 0.0030121486634016037, + "step": 459 + }, + { + "ce_ib": 9.230670928955078, + "ce_orig": 0.669769823551178, + "epoch": 0.13200086275073694, + "kl_loss": 0.15697245299816132, + "loss_ib": 0.0024927917402237654, + "step": 459 + }, + { + "epoch": 0.13228844632971457, + "grad_norm": 0.08651373535394669, + "learning_rate": 4.997639323676214e-05, + "loss": 0.7999, + "step": 460 + }, + { + "ce_ib": 10.430649757385254, + "ce_orig": 0.5384361147880554, + "epoch": 0.13228844632971457, + "kl_loss": 0.240619957447052, + "loss_ib": 0.0034492644481360912, + "step": 460 + }, + { + "ce_ib": 11.414108276367188, + "ce_orig": 0.2244451642036438, + "epoch": 0.13228844632971457, + "kl_loss": 0.333604633808136, + "loss_ib": 0.004477457143366337, + "step": 460 + }, + { + "ce_ib": 8.57000732421875, + "ce_orig": 0.6421215534210205, + "epoch": 0.13228844632971457, + "kl_loss": 0.11957961320877075, + "loss_ib": 0.002052796771749854, + "step": 460 + }, + { + "ce_ib": 9.772636413574219, + "ce_orig": 0.7227917909622192, + "epoch": 0.13228844632971457, + "kl_loss": 0.1671256273984909, + "loss_ib": 0.002648519817739725, + "step": 460 + }, + { + "ce_ib": 6.091513156890869, + "ce_orig": 0.5446451902389526, + "epoch": 0.13257602990869222, + "kl_loss": 0.15295615792274475, + "loss_ib": 0.0021387129090726376, + "step": 461 + }, + { + "ce_ib": 10.357866287231445, + "ce_orig": 0.7494820356369019, + "epoch": 0.13257602990869222, + "kl_loss": 0.16398490965366364, + "loss_ib": 0.002675635740160942, + "step": 461 + }, + { + "ce_ib": 9.155532836914062, + "ce_orig": 0.6777340173721313, + "epoch": 0.13257602990869222, + "kl_loss": 0.16127201914787292, + "loss_ib": 0.0025282735005021095, + "step": 461 + }, + { + "ce_ib": 10.313591003417969, + "ce_orig": 0.7135111689567566, + "epoch": 0.13257602990869222, + "kl_loss": 0.22754691541194916, + "loss_ib": 0.0033068279735744, + "step": 461 + }, + { + "ce_ib": 8.072470664978027, + "ce_orig": 0.6382450461387634, + "epoch": 0.13286361348766984, + "kl_loss": 0.12815770506858826, + "loss_ib": 0.0020888240542262793, + "step": 462 + }, + { + "ce_ib": 11.419739723205566, + "ce_orig": 1.4712454080581665, + "epoch": 0.13286361348766984, + "kl_loss": 0.15876120328903198, + "loss_ib": 0.0027295860927551985, + "step": 462 + }, + { + "ce_ib": 11.649603843688965, + "ce_orig": 0.5335864424705505, + "epoch": 0.13286361348766984, + "kl_loss": 0.23220552504062653, + "loss_ib": 0.0034870156086981297, + "step": 462 + }, + { + "ce_ib": 13.856771469116211, + "ce_orig": 1.503601312637329, + "epoch": 0.13286361348766984, + "kl_loss": 0.22454500198364258, + "loss_ib": 0.003631127066910267, + "step": 462 + }, + { + "ce_ib": 11.232563018798828, + "ce_orig": 0.8185619115829468, + "epoch": 0.1331511970666475, + "kl_loss": 0.14658993482589722, + "loss_ib": 0.002589155687019229, + "step": 463 + }, + { + "ce_ib": 8.680917739868164, + "ce_orig": 0.897162914276123, + "epoch": 0.1331511970666475, + "kl_loss": 0.16236066818237305, + "loss_ib": 0.002491698367521167, + "step": 463 + }, + { + "ce_ib": 13.455824851989746, + "ce_orig": 0.7123657464981079, + "epoch": 0.1331511970666475, + "kl_loss": 0.1669284999370575, + "loss_ib": 0.0030148671939969063, + "step": 463 + }, + { + "ce_ib": 11.438149452209473, + "ce_orig": 0.6367284059524536, + "epoch": 0.1331511970666475, + "kl_loss": 0.17310284078121185, + "loss_ib": 0.0028748433105647564, + "step": 463 + }, + { + "ce_ib": 9.650132179260254, + "ce_orig": 0.877562403678894, + "epoch": 0.13343878064562514, + "kl_loss": 0.131558358669281, + "loss_ib": 0.0022805966436862946, + "step": 464 + }, + { + "ce_ib": 10.920985221862793, + "ce_orig": 0.9254859089851379, + "epoch": 0.13343878064562514, + "kl_loss": 0.15900883078575134, + "loss_ib": 0.002682186895981431, + "step": 464 + }, + { + "ce_ib": 7.144834518432617, + "ce_orig": 0.9159427285194397, + "epoch": 0.13343878064562514, + "kl_loss": 0.12562786042690277, + "loss_ib": 0.0019707619212567806, + "step": 464 + }, + { + "ce_ib": 7.252218723297119, + "ce_orig": 0.5822017192840576, + "epoch": 0.13343878064562514, + "kl_loss": 0.23755145072937012, + "loss_ib": 0.0031007362995296717, + "step": 464 + }, + { + "epoch": 0.13372636422460277, + "grad_norm": 0.09323103725910187, + "learning_rate": 4.997467721852196e-05, + "loss": 0.8438, + "step": 465 + }, + { + "ce_ib": 13.914019584655762, + "ce_orig": 1.2358434200286865, + "epoch": 0.13372636422460277, + "kl_loss": 0.20358413457870483, + "loss_ib": 0.0034272430930286646, + "step": 465 + }, + { + "ce_ib": 6.4653425216674805, + "ce_orig": 0.5584582090377808, + "epoch": 0.13372636422460277, + "kl_loss": 0.0890372097492218, + "loss_ib": 0.0015369063476100564, + "step": 465 + }, + { + "ce_ib": 10.534329414367676, + "ce_orig": 0.4327813684940338, + "epoch": 0.13372636422460277, + "kl_loss": 0.17528124153614044, + "loss_ib": 0.0028062453493475914, + "step": 465 + }, + { + "ce_ib": 13.77268123626709, + "ce_orig": 0.8556481599807739, + "epoch": 0.13372636422460277, + "kl_loss": 0.20487025380134583, + "loss_ib": 0.003425970673561096, + "step": 465 + }, + { + "ce_ib": 9.667628288269043, + "ce_orig": 0.7900619506835938, + "epoch": 0.13401394780358042, + "kl_loss": 0.16862963140010834, + "loss_ib": 0.002653059083968401, + "step": 466 + }, + { + "ce_ib": 11.5745210647583, + "ce_orig": 0.5225064158439636, + "epoch": 0.13401394780358042, + "kl_loss": 0.219425767660141, + "loss_ib": 0.0033517098054289818, + "step": 466 + }, + { + "ce_ib": 10.800277709960938, + "ce_orig": 0.9123367667198181, + "epoch": 0.13401394780358042, + "kl_loss": 0.2272198349237442, + "loss_ib": 0.0033522259909659624, + "step": 466 + }, + { + "ce_ib": 8.781864166259766, + "ce_orig": 0.7642026543617249, + "epoch": 0.13401394780358042, + "kl_loss": 0.17208728194236755, + "loss_ib": 0.00259905937127769, + "step": 466 + }, + { + "ce_ib": 12.304344177246094, + "ce_orig": 0.7450142502784729, + "epoch": 0.13430153138255804, + "kl_loss": 0.1326742172241211, + "loss_ib": 0.0025571766309440136, + "step": 467 + }, + { + "ce_ib": 9.575723648071289, + "ce_orig": 0.648908793926239, + "epoch": 0.13430153138255804, + "kl_loss": 0.19128626585006714, + "loss_ib": 0.0028704351279884577, + "step": 467 + }, + { + "ce_ib": 13.457468032836914, + "ce_orig": 1.4314584732055664, + "epoch": 0.13430153138255804, + "kl_loss": 0.20292793214321136, + "loss_ib": 0.0033750259317457676, + "step": 467 + }, + { + "ce_ib": 8.486967086791992, + "ce_orig": 0.6459015011787415, + "epoch": 0.13430153138255804, + "kl_loss": 0.16141179203987122, + "loss_ib": 0.002462814562022686, + "step": 467 + }, + { + "ce_ib": 9.87436580657959, + "ce_orig": 1.0700461864471436, + "epoch": 0.1345891149615357, + "kl_loss": 0.24746514856815338, + "loss_ib": 0.0034620880614966154, + "step": 468 + }, + { + "ce_ib": 10.845966339111328, + "ce_orig": 0.6945490837097168, + "epoch": 0.1345891149615357, + "kl_loss": 0.1763351410627365, + "loss_ib": 0.0028479481115937233, + "step": 468 + }, + { + "ce_ib": 11.760831832885742, + "ce_orig": 0.9512259364128113, + "epoch": 0.1345891149615357, + "kl_loss": 0.15363314747810364, + "loss_ib": 0.002712414599955082, + "step": 468 + }, + { + "ce_ib": 9.594245910644531, + "ce_orig": 0.6800144910812378, + "epoch": 0.1345891149615357, + "kl_loss": 0.2686789929866791, + "loss_ib": 0.0036462144926190376, + "step": 468 + }, + { + "ce_ib": 9.218502044677734, + "ce_orig": 0.45141276717185974, + "epoch": 0.13487669854051335, + "kl_loss": 0.1673453450202942, + "loss_ib": 0.0025953035801649094, + "step": 469 + }, + { + "ce_ib": 8.04023265838623, + "ce_orig": 0.626139223575592, + "epoch": 0.13487669854051335, + "kl_loss": 0.18813760578632355, + "loss_ib": 0.0026853992603719234, + "step": 469 + }, + { + "ce_ib": 9.180455207824707, + "ce_orig": 0.4675959348678589, + "epoch": 0.13487669854051335, + "kl_loss": 0.19254645705223083, + "loss_ib": 0.0028435098938643932, + "step": 469 + }, + { + "ce_ib": 10.131449699401855, + "ce_orig": 0.6631660461425781, + "epoch": 0.13487669854051335, + "kl_loss": 0.14070668816566467, + "loss_ib": 0.0024202116765081882, + "step": 469 + }, + { + "epoch": 0.13516428211949097, + "grad_norm": 0.08323580771684647, + "learning_rate": 4.9972901030487616e-05, + "loss": 0.8432, + "step": 470 + }, + { + "ce_ib": 9.376252174377441, + "ce_orig": 0.6429654955863953, + "epoch": 0.13516428211949097, + "kl_loss": 0.15271279215812683, + "loss_ib": 0.002464753109961748, + "step": 470 + }, + { + "ce_ib": 7.387055397033691, + "ce_orig": 0.4410795569419861, + "epoch": 0.13516428211949097, + "kl_loss": 0.13434870541095734, + "loss_ib": 0.0020821925718337297, + "step": 470 + }, + { + "ce_ib": 12.794949531555176, + "ce_orig": 0.8125112056732178, + "epoch": 0.13516428211949097, + "kl_loss": 0.5913262963294983, + "loss_ib": 0.007192757446318865, + "step": 470 + }, + { + "ce_ib": 7.509584903717041, + "ce_orig": 0.8456118106842041, + "epoch": 0.13516428211949097, + "kl_loss": 0.13973768055438995, + "loss_ib": 0.0021483353339135647, + "step": 470 + }, + { + "ce_ib": 7.9404191970825195, + "ce_orig": 0.7081736326217651, + "epoch": 0.13545186569846862, + "kl_loss": 0.12523691356182098, + "loss_ib": 0.0020464109256863594, + "step": 471 + }, + { + "ce_ib": 10.213021278381348, + "ce_orig": 0.7080956101417542, + "epoch": 0.13545186569846862, + "kl_loss": 0.2242983877658844, + "loss_ib": 0.003264285856857896, + "step": 471 + }, + { + "ce_ib": 9.126559257507324, + "ce_orig": 0.7322322726249695, + "epoch": 0.13545186569846862, + "kl_loss": 0.1383284032344818, + "loss_ib": 0.002295939950272441, + "step": 471 + }, + { + "ce_ib": 6.414395809173584, + "ce_orig": 0.6884047985076904, + "epoch": 0.13545186569846862, + "kl_loss": 0.11249984800815582, + "loss_ib": 0.0017664380138739944, + "step": 471 + }, + { + "ce_ib": 5.14065408706665, + "ce_orig": 0.2586210370063782, + "epoch": 0.13573944927744624, + "kl_loss": 0.3770146667957306, + "loss_ib": 0.004284211900085211, + "step": 472 + }, + { + "ce_ib": 14.23343276977539, + "ce_orig": 1.3532360792160034, + "epoch": 0.13573944927744624, + "kl_loss": 0.18985393643379211, + "loss_ib": 0.003321882337331772, + "step": 472 + }, + { + "ce_ib": 14.564502716064453, + "ce_orig": 1.749810814857483, + "epoch": 0.13573944927744624, + "kl_loss": 0.15691399574279785, + "loss_ib": 0.0030255902092903852, + "step": 472 + }, + { + "ce_ib": 8.07909870147705, + "ce_orig": 0.5683416724205017, + "epoch": 0.13573944927744624, + "kl_loss": 0.20622625946998596, + "loss_ib": 0.0028701722621917725, + "step": 472 + }, + { + "ce_ib": 13.582554817199707, + "ce_orig": 1.6216903924942017, + "epoch": 0.1360270328564239, + "kl_loss": 0.1717289686203003, + "loss_ib": 0.0030755449552088976, + "step": 473 + }, + { + "ce_ib": 12.75936222076416, + "ce_orig": 1.1129322052001953, + "epoch": 0.1360270328564239, + "kl_loss": 0.18489691615104675, + "loss_ib": 0.003124905051663518, + "step": 473 + }, + { + "ce_ib": 8.29798412322998, + "ce_orig": 0.4912956953048706, + "epoch": 0.1360270328564239, + "kl_loss": 0.13187135756015778, + "loss_ib": 0.002148512052372098, + "step": 473 + }, + { + "ce_ib": 6.32996129989624, + "ce_orig": 0.5624181628227234, + "epoch": 0.1360270328564239, + "kl_loss": 0.15103286504745483, + "loss_ib": 0.0021433248184621334, + "step": 473 + }, + { + "ce_ib": 9.895087242126465, + "ce_orig": 0.9939194321632385, + "epoch": 0.13631461643540155, + "kl_loss": 0.14027726650238037, + "loss_ib": 0.00239228131249547, + "step": 474 + }, + { + "ce_ib": 10.561649322509766, + "ce_orig": 0.8449274897575378, + "epoch": 0.13631461643540155, + "kl_loss": 0.16884073615074158, + "loss_ib": 0.002744572004303336, + "step": 474 + }, + { + "ce_ib": 10.697015762329102, + "ce_orig": 0.6244873404502869, + "epoch": 0.13631461643540155, + "kl_loss": 0.16410231590270996, + "loss_ib": 0.002710724715143442, + "step": 474 + }, + { + "ce_ib": 10.975496292114258, + "ce_orig": 0.6514372825622559, + "epoch": 0.13631461643540155, + "kl_loss": 0.20907053351402283, + "loss_ib": 0.003188254777342081, + "step": 474 + }, + { + "epoch": 0.13660220001437917, + "grad_norm": 0.09574563801288605, + "learning_rate": 4.997106467693835e-05, + "loss": 0.8712, + "step": 475 + }, + { + "ce_ib": 8.283028602600098, + "ce_orig": 0.6631106734275818, + "epoch": 0.13660220001437917, + "kl_loss": 0.19269996881484985, + "loss_ib": 0.002755302470177412, + "step": 475 + }, + { + "ce_ib": 9.683073997497559, + "ce_orig": 0.7057353258132935, + "epoch": 0.13660220001437917, + "kl_loss": 0.22878339886665344, + "loss_ib": 0.0032561414409428835, + "step": 475 + }, + { + "ce_ib": 8.93950366973877, + "ce_orig": 0.7505787014961243, + "epoch": 0.13660220001437917, + "kl_loss": 0.1079094409942627, + "loss_ib": 0.001973044592887163, + "step": 475 + }, + { + "ce_ib": 6.951815128326416, + "ce_orig": 0.5303117632865906, + "epoch": 0.13660220001437917, + "kl_loss": 0.11965961754322052, + "loss_ib": 0.0018917776178568602, + "step": 475 + }, + { + "ce_ib": 14.84595775604248, + "ce_orig": 1.473997950553894, + "epoch": 0.13688978359335682, + "kl_loss": 0.16360431909561157, + "loss_ib": 0.0031206386629492044, + "step": 476 + }, + { + "ce_ib": 13.593896865844727, + "ce_orig": 1.2449053525924683, + "epoch": 0.13688978359335682, + "kl_loss": 0.16457515954971313, + "loss_ib": 0.003005141159519553, + "step": 476 + }, + { + "ce_ib": 9.549323081970215, + "ce_orig": 0.7047984004020691, + "epoch": 0.13688978359335682, + "kl_loss": 0.2452981024980545, + "loss_ib": 0.0034079132601618767, + "step": 476 + }, + { + "ce_ib": 10.544528007507324, + "ce_orig": 0.7549718022346497, + "epoch": 0.13688978359335682, + "kl_loss": 0.1296602040529251, + "loss_ib": 0.0023510546889156103, + "step": 476 + }, + { + "ce_ib": 7.901725769042969, + "ce_orig": 0.8333771824836731, + "epoch": 0.13717736717233445, + "kl_loss": 0.11247368156909943, + "loss_ib": 0.001914909458719194, + "step": 477 + }, + { + "ce_ib": 6.190486431121826, + "ce_orig": 0.6102291345596313, + "epoch": 0.13717736717233445, + "kl_loss": 0.10293813049793243, + "loss_ib": 0.0016484298976138234, + "step": 477 + }, + { + "ce_ib": 8.575281143188477, + "ce_orig": 0.701132595539093, + "epoch": 0.13717736717233445, + "kl_loss": 0.12163711339235306, + "loss_ib": 0.002073899144306779, + "step": 477 + }, + { + "ce_ib": 9.477543830871582, + "ce_orig": 0.6797293424606323, + "epoch": 0.13717736717233445, + "kl_loss": 0.13846732676029205, + "loss_ib": 0.0023324275389313698, + "step": 477 + }, + { + "ce_ib": 8.02422046661377, + "ce_orig": 0.7941074371337891, + "epoch": 0.1374649507513121, + "kl_loss": 0.11544310301542282, + "loss_ib": 0.001956852851435542, + "step": 478 + }, + { + "ce_ib": 10.08478832244873, + "ce_orig": 0.5088381171226501, + "epoch": 0.1374649507513121, + "kl_loss": 0.26347583532333374, + "loss_ib": 0.003643237054347992, + "step": 478 + }, + { + "ce_ib": 10.609148025512695, + "ce_orig": 1.1123254299163818, + "epoch": 0.1374649507513121, + "kl_loss": 0.18334609270095825, + "loss_ib": 0.0028943754732608795, + "step": 478 + }, + { + "ce_ib": 8.124927520751953, + "ce_orig": 0.911295473575592, + "epoch": 0.1374649507513121, + "kl_loss": 0.1195707842707634, + "loss_ib": 0.0020082006230950356, + "step": 478 + }, + { + "ce_ib": 9.455392837524414, + "ce_orig": 0.46617555618286133, + "epoch": 0.13775253433028975, + "kl_loss": 0.2140653133392334, + "loss_ib": 0.003086192300543189, + "step": 479 + }, + { + "ce_ib": 12.214388847351074, + "ce_orig": 0.7688418030738831, + "epoch": 0.13775253433028975, + "kl_loss": 0.20302480459213257, + "loss_ib": 0.0032516869250684977, + "step": 479 + }, + { + "ce_ib": 10.966753005981445, + "ce_orig": 0.7571495175361633, + "epoch": 0.13775253433028975, + "kl_loss": 0.17398342490196228, + "loss_ib": 0.0028365093749016523, + "step": 479 + }, + { + "ce_ib": 7.949142932891846, + "ce_orig": 0.6712113618850708, + "epoch": 0.13775253433028975, + "kl_loss": 0.16949275135993958, + "loss_ib": 0.002489841775968671, + "step": 479 + }, + { + "epoch": 0.13804011790926737, + "grad_norm": 0.11252865940332413, + "learning_rate": 4.996916816229837e-05, + "loss": 0.8761, + "step": 480 + }, + { + "ce_ib": 11.602306365966797, + "ce_orig": 0.8134323358535767, + "epoch": 0.13804011790926737, + "kl_loss": 0.2448122501373291, + "loss_ib": 0.0036083529703319073, + "step": 480 + }, + { + "ce_ib": 11.314531326293945, + "ce_orig": 0.7367826700210571, + "epoch": 0.13804011790926737, + "kl_loss": 0.15320701897144318, + "loss_ib": 0.002663523191586137, + "step": 480 + }, + { + "ce_ib": 12.606677055358887, + "ce_orig": 1.1588752269744873, + "epoch": 0.13804011790926737, + "kl_loss": 0.15930581092834473, + "loss_ib": 0.002853725804015994, + "step": 480 + }, + { + "ce_ib": 11.054275512695312, + "ce_orig": 1.0181690454483032, + "epoch": 0.13804011790926737, + "kl_loss": 0.11396267265081406, + "loss_ib": 0.0022450541146099567, + "step": 480 + }, + { + "ce_ib": 14.066468238830566, + "ce_orig": 1.4801671504974365, + "epoch": 0.13832770148824503, + "kl_loss": 0.20112600922584534, + "loss_ib": 0.0034179065842181444, + "step": 481 + }, + { + "ce_ib": 8.212839126586914, + "ce_orig": 0.7344709038734436, + "epoch": 0.13832770148824503, + "kl_loss": 0.16998212039470673, + "loss_ib": 0.002521105110645294, + "step": 481 + }, + { + "ce_ib": 12.749667167663574, + "ce_orig": 1.22507905960083, + "epoch": 0.13832770148824503, + "kl_loss": 0.16252657771110535, + "loss_ib": 0.0029002325609326363, + "step": 481 + }, + { + "ce_ib": 9.067646026611328, + "ce_orig": 0.8712934851646423, + "epoch": 0.13832770148824503, + "kl_loss": 0.14971376955509186, + "loss_ib": 0.0024039021227508783, + "step": 481 + }, + { + "ce_ib": 7.4774956703186035, + "ce_orig": 0.6922398805618286, + "epoch": 0.13861528506722265, + "kl_loss": 0.17292888462543488, + "loss_ib": 0.0024770384188741446, + "step": 482 + }, + { + "ce_ib": 12.176875114440918, + "ce_orig": 0.8391960859298706, + "epoch": 0.13861528506722265, + "kl_loss": 0.28599101305007935, + "loss_ib": 0.00407759752124548, + "step": 482 + }, + { + "ce_ib": 14.910351753234863, + "ce_orig": 1.4995876550674438, + "epoch": 0.13861528506722265, + "kl_loss": 0.20326115190982819, + "loss_ib": 0.0035236466210335493, + "step": 482 + }, + { + "ce_ib": 14.353828430175781, + "ce_orig": 1.3694660663604736, + "epoch": 0.13861528506722265, + "kl_loss": 0.2608376145362854, + "loss_ib": 0.004043758846819401, + "step": 482 + }, + { + "ce_ib": 10.655997276306152, + "ce_orig": 0.795907735824585, + "epoch": 0.1389028686462003, + "kl_loss": 0.17903940379619598, + "loss_ib": 0.0028559938073158264, + "step": 483 + }, + { + "ce_ib": 8.001945495605469, + "ce_orig": 0.6496250629425049, + "epoch": 0.1389028686462003, + "kl_loss": 0.22284536063671112, + "loss_ib": 0.003028648206964135, + "step": 483 + }, + { + "ce_ib": 15.7087984085083, + "ce_orig": 1.539727807044983, + "epoch": 0.1389028686462003, + "kl_loss": 0.1869555413722992, + "loss_ib": 0.003440435044467449, + "step": 483 + }, + { + "ce_ib": 11.261712074279785, + "ce_orig": 0.7162747383117676, + "epoch": 0.1389028686462003, + "kl_loss": 0.15583762526512146, + "loss_ib": 0.002684547333046794, + "step": 483 + }, + { + "ce_ib": 12.349994659423828, + "ce_orig": 0.5102917551994324, + "epoch": 0.13919045222517795, + "kl_loss": 0.21702654659748077, + "loss_ib": 0.003405264811590314, + "step": 484 + }, + { + "ce_ib": 14.703964233398438, + "ce_orig": 1.2549042701721191, + "epoch": 0.13919045222517795, + "kl_loss": 0.28059592843055725, + "loss_ib": 0.004276355262845755, + "step": 484 + }, + { + "ce_ib": 14.395584106445312, + "ce_orig": 1.6029713153839111, + "epoch": 0.13919045222517795, + "kl_loss": 0.1588822305202484, + "loss_ib": 0.0030283809173852205, + "step": 484 + }, + { + "ce_ib": 12.572036743164062, + "ce_orig": 1.4179061651229858, + "epoch": 0.13919045222517795, + "kl_loss": 0.23325228691101074, + "loss_ib": 0.0035897265188395977, + "step": 484 + }, + { + "epoch": 0.13947803580415558, + "grad_norm": 0.09978976100683212, + "learning_rate": 4.996721149113682e-05, + "loss": 0.9298, + "step": 485 + }, + { + "ce_ib": 15.488334655761719, + "ce_orig": 1.308066964149475, + "epoch": 0.13947803580415558, + "kl_loss": 0.17892438173294067, + "loss_ib": 0.0033380771055817604, + "step": 485 + }, + { + "ce_ib": 10.043342590332031, + "ce_orig": 0.9190800786018372, + "epoch": 0.13947803580415558, + "kl_loss": 0.14057381451129913, + "loss_ib": 0.0024100723676383495, + "step": 485 + }, + { + "ce_ib": 10.405566215515137, + "ce_orig": 0.41310566663742065, + "epoch": 0.13947803580415558, + "kl_loss": 0.08804390579462051, + "loss_ib": 0.001920995651744306, + "step": 485 + }, + { + "ce_ib": 12.68533706665039, + "ce_orig": 0.9749146103858948, + "epoch": 0.13947803580415558, + "kl_loss": 0.17658352851867676, + "loss_ib": 0.003034368623048067, + "step": 485 + }, + { + "ce_ib": 14.109472274780273, + "ce_orig": 0.93257075548172, + "epoch": 0.13976561938313323, + "kl_loss": 0.15330049395561218, + "loss_ib": 0.002943952102214098, + "step": 486 + }, + { + "ce_ib": 7.845019340515137, + "ce_orig": 0.7454962730407715, + "epoch": 0.13976561938313323, + "kl_loss": 0.18838530778884888, + "loss_ib": 0.0026683551259338856, + "step": 486 + }, + { + "ce_ib": 9.430821418762207, + "ce_orig": 1.0630453824996948, + "epoch": 0.13976561938313323, + "kl_loss": 0.15098202228546143, + "loss_ib": 0.00245290226303041, + "step": 486 + }, + { + "ce_ib": 6.5644049644470215, + "ce_orig": 0.5489628314971924, + "epoch": 0.13976561938313323, + "kl_loss": 0.08801446855068207, + "loss_ib": 0.001536585041321814, + "step": 486 + }, + { + "ce_ib": 11.013578414916992, + "ce_orig": 0.7610940933227539, + "epoch": 0.14005320296211085, + "kl_loss": 0.22202414274215698, + "loss_ib": 0.0033215992152690887, + "step": 487 + }, + { + "ce_ib": 13.75318717956543, + "ce_orig": 0.7965566515922546, + "epoch": 0.14005320296211085, + "kl_loss": 0.22268138825893402, + "loss_ib": 0.003602132434025407, + "step": 487 + }, + { + "ce_ib": 11.45129680633545, + "ce_orig": 1.317887306213379, + "epoch": 0.14005320296211085, + "kl_loss": 0.1648291051387787, + "loss_ib": 0.002793420571833849, + "step": 487 + }, + { + "ce_ib": 9.824847221374512, + "ce_orig": 0.6467515230178833, + "epoch": 0.14005320296211085, + "kl_loss": 0.21943452954292297, + "loss_ib": 0.0031768297776579857, + "step": 487 + }, + { + "ce_ib": 9.987610816955566, + "ce_orig": 0.7534988522529602, + "epoch": 0.1403407865410885, + "kl_loss": 0.1998450607061386, + "loss_ib": 0.0029972116462886333, + "step": 488 + }, + { + "ce_ib": 14.38665771484375, + "ce_orig": 1.332573413848877, + "epoch": 0.1403407865410885, + "kl_loss": 0.23315776884555817, + "loss_ib": 0.003770243376493454, + "step": 488 + }, + { + "ce_ib": 6.623453140258789, + "ce_orig": 0.42733436822891235, + "epoch": 0.1403407865410885, + "kl_loss": 0.14417804777622223, + "loss_ib": 0.0021041256841272116, + "step": 488 + }, + { + "ce_ib": 12.093878746032715, + "ce_orig": 1.1107982397079468, + "epoch": 0.1403407865410885, + "kl_loss": 0.1123935654759407, + "loss_ib": 0.00233332347124815, + "step": 488 + }, + { + "ce_ib": 11.833623886108398, + "ce_orig": 0.9317137598991394, + "epoch": 0.14062837012006615, + "kl_loss": 0.4279143214225769, + "loss_ib": 0.005462505854666233, + "step": 489 + }, + { + "ce_ib": 6.175386428833008, + "ce_orig": 0.2635171413421631, + "epoch": 0.14062837012006615, + "kl_loss": 0.17456625401973724, + "loss_ib": 0.0023632009979337454, + "step": 489 + }, + { + "ce_ib": 9.941847801208496, + "ce_orig": 0.6531252861022949, + "epoch": 0.14062837012006615, + "kl_loss": 0.17595279216766357, + "loss_ib": 0.0027537124697118998, + "step": 489 + }, + { + "ce_ib": 7.677126407623291, + "ce_orig": 0.688906192779541, + "epoch": 0.14062837012006615, + "kl_loss": 0.18364465236663818, + "loss_ib": 0.0026041590608656406, + "step": 489 + }, + { + "epoch": 0.14091595369904378, + "grad_norm": 0.0886775329709053, + "learning_rate": 4.996519466816778e-05, + "loss": 0.9075, + "step": 490 + }, + { + "ce_ib": 8.481292724609375, + "ce_orig": 0.5066956281661987, + "epoch": 0.14091595369904378, + "kl_loss": 0.14637017250061035, + "loss_ib": 0.002311830874532461, + "step": 490 + }, + { + "ce_ib": 14.51606559753418, + "ce_orig": 1.596727967262268, + "epoch": 0.14091595369904378, + "kl_loss": 0.19026514887809753, + "loss_ib": 0.0033542579039931297, + "step": 490 + }, + { + "ce_ib": 10.839821815490723, + "ce_orig": 1.242689609527588, + "epoch": 0.14091595369904378, + "kl_loss": 0.21338334679603577, + "loss_ib": 0.0032178156543523073, + "step": 490 + }, + { + "ce_ib": 7.8446831703186035, + "ce_orig": 0.968952476978302, + "epoch": 0.14091595369904378, + "kl_loss": 0.2997243404388428, + "loss_ib": 0.003781711682677269, + "step": 490 + }, + { + "ce_ib": 11.051322937011719, + "ce_orig": 1.4200416803359985, + "epoch": 0.14120353727802143, + "kl_loss": 0.17061398923397064, + "loss_ib": 0.0028112721629440784, + "step": 491 + }, + { + "ce_ib": 11.496525764465332, + "ce_orig": 1.1261836290359497, + "epoch": 0.14120353727802143, + "kl_loss": 0.1715063452720642, + "loss_ib": 0.002864715876057744, + "step": 491 + }, + { + "ce_ib": 15.705190658569336, + "ce_orig": 1.4249088764190674, + "epoch": 0.14120353727802143, + "kl_loss": 0.19172680377960205, + "loss_ib": 0.0034877872094511986, + "step": 491 + }, + { + "ce_ib": 10.472744941711426, + "ce_orig": 0.9399302005767822, + "epoch": 0.14120353727802143, + "kl_loss": 0.15676361322402954, + "loss_ib": 0.0026149104814976454, + "step": 491 + }, + { + "ce_ib": 7.808863639831543, + "ce_orig": 0.8644165396690369, + "epoch": 0.14149112085699905, + "kl_loss": 0.09857909381389618, + "loss_ib": 0.0017666771309450269, + "step": 492 + }, + { + "ce_ib": 9.123184204101562, + "ce_orig": 0.736179530620575, + "epoch": 0.14149112085699905, + "kl_loss": 0.1718408167362213, + "loss_ib": 0.0026307266671210527, + "step": 492 + }, + { + "ce_ib": 16.71449851989746, + "ce_orig": 1.8901475667953491, + "epoch": 0.14149112085699905, + "kl_loss": 0.21999290585517883, + "loss_ib": 0.0038713787216693163, + "step": 492 + }, + { + "ce_ib": 14.769342422485352, + "ce_orig": 1.0042953491210938, + "epoch": 0.14149112085699905, + "kl_loss": 0.19468256831169128, + "loss_ib": 0.0034237599465996027, + "step": 492 + }, + { + "ce_ib": 7.843172073364258, + "ce_orig": 0.2853364646434784, + "epoch": 0.1417787044359767, + "kl_loss": 0.13801902532577515, + "loss_ib": 0.002164507517591119, + "step": 493 + }, + { + "ce_ib": 10.78830337524414, + "ce_orig": 1.1020339727401733, + "epoch": 0.1417787044359767, + "kl_loss": 0.21507477760314941, + "loss_ib": 0.003229578025639057, + "step": 493 + }, + { + "ce_ib": 7.485945224761963, + "ce_orig": 0.6249547004699707, + "epoch": 0.1417787044359767, + "kl_loss": 0.0884731262922287, + "loss_ib": 0.0016333258245140314, + "step": 493 + }, + { + "ce_ib": 10.592840194702148, + "ce_orig": 0.6959387063980103, + "epoch": 0.1417787044359767, + "kl_loss": 0.3251839876174927, + "loss_ib": 0.004311123862862587, + "step": 493 + }, + { + "ce_ib": 6.683375358581543, + "ce_orig": 0.47197264432907104, + "epoch": 0.14206628801495436, + "kl_loss": 0.1459130495786667, + "loss_ib": 0.0021274678874760866, + "step": 494 + }, + { + "ce_ib": 8.309039115905762, + "ce_orig": 0.34105879068374634, + "epoch": 0.14206628801495436, + "kl_loss": 0.15694405138492584, + "loss_ib": 0.0024003442376852036, + "step": 494 + }, + { + "ce_ib": 13.072383880615234, + "ce_orig": 1.3826266527175903, + "epoch": 0.14206628801495436, + "kl_loss": 0.19439606368541718, + "loss_ib": 0.003251199144870043, + "step": 494 + }, + { + "ce_ib": 8.67587947845459, + "ce_orig": 0.5467540621757507, + "epoch": 0.14206628801495436, + "kl_loss": 0.22117763757705688, + "loss_ib": 0.0030793643090873957, + "step": 494 + }, + { + "epoch": 0.14235387159393198, + "grad_norm": 0.11509755253791809, + "learning_rate": 4.996311769825024e-05, + "loss": 0.8795, + "step": 495 + }, + { + "ce_ib": 7.835722923278809, + "ce_orig": 0.5985096096992493, + "epoch": 0.14235387159393198, + "kl_loss": 0.11672006547451019, + "loss_ib": 0.0019507729448378086, + "step": 495 + }, + { + "ce_ib": 10.796977996826172, + "ce_orig": 1.2538580894470215, + "epoch": 0.14235387159393198, + "kl_loss": 0.17189006507396698, + "loss_ib": 0.0027985982596874237, + "step": 495 + }, + { + "ce_ib": 11.224424362182617, + "ce_orig": 0.8003069758415222, + "epoch": 0.14235387159393198, + "kl_loss": 0.11536049842834473, + "loss_ib": 0.0022760473657399416, + "step": 495 + }, + { + "ce_ib": 9.976873397827148, + "ce_orig": 0.9437066316604614, + "epoch": 0.14235387159393198, + "kl_loss": 0.14582431316375732, + "loss_ib": 0.002455930458381772, + "step": 495 + }, + { + "ce_ib": 12.75202751159668, + "ce_orig": 1.2885056734085083, + "epoch": 0.14264145517290963, + "kl_loss": 0.17118601500988007, + "loss_ib": 0.002987062791362405, + "step": 496 + }, + { + "ce_ib": 11.649064064025879, + "ce_orig": 0.8830009698867798, + "epoch": 0.14264145517290963, + "kl_loss": 0.1887287050485611, + "loss_ib": 0.0030521934386342764, + "step": 496 + }, + { + "ce_ib": 14.029234886169434, + "ce_orig": 0.9412078261375427, + "epoch": 0.14264145517290963, + "kl_loss": 0.14774635434150696, + "loss_ib": 0.002880387008190155, + "step": 496 + }, + { + "ce_ib": 16.335601806640625, + "ce_orig": 2.0776398181915283, + "epoch": 0.14264145517290963, + "kl_loss": 0.1809537559747696, + "loss_ib": 0.003443097695708275, + "step": 496 + }, + { + "ce_ib": 9.790528297424316, + "ce_orig": 0.9552518129348755, + "epoch": 0.14292903875188726, + "kl_loss": 0.1889423429965973, + "loss_ib": 0.002868476090952754, + "step": 497 + }, + { + "ce_ib": 9.869925498962402, + "ce_orig": 0.8539248704910278, + "epoch": 0.14292903875188726, + "kl_loss": 0.19081233441829681, + "loss_ib": 0.002895115874707699, + "step": 497 + }, + { + "ce_ib": 9.595001220703125, + "ce_orig": 0.5288217663764954, + "epoch": 0.14292903875188726, + "kl_loss": 0.19318905472755432, + "loss_ib": 0.002891390584409237, + "step": 497 + }, + { + "ce_ib": 9.586201667785645, + "ce_orig": 1.1783746480941772, + "epoch": 0.14292903875188726, + "kl_loss": 0.18821683526039124, + "loss_ib": 0.002840788336470723, + "step": 497 + }, + { + "ce_ib": 12.379876136779785, + "ce_orig": 1.2077107429504395, + "epoch": 0.1432166223308649, + "kl_loss": 0.14908835291862488, + "loss_ib": 0.0027288710698485374, + "step": 498 + }, + { + "ce_ib": 11.757010459899902, + "ce_orig": 0.6566261649131775, + "epoch": 0.1432166223308649, + "kl_loss": 0.1946725696325302, + "loss_ib": 0.003122426802292466, + "step": 498 + }, + { + "ce_ib": 8.702943801879883, + "ce_orig": 0.4927518963813782, + "epoch": 0.1432166223308649, + "kl_loss": 0.10925129801034927, + "loss_ib": 0.001962807262316346, + "step": 498 + }, + { + "ce_ib": 10.411445617675781, + "ce_orig": 0.706779956817627, + "epoch": 0.1432166223308649, + "kl_loss": 0.17433921992778778, + "loss_ib": 0.002784536685794592, + "step": 498 + }, + { + "ce_ib": 14.122117042541504, + "ce_orig": 1.4039651155471802, + "epoch": 0.14350420590984256, + "kl_loss": 0.16670997440814972, + "loss_ib": 0.0030793112237006426, + "step": 499 + }, + { + "ce_ib": 10.383757591247559, + "ce_orig": 1.0631924867630005, + "epoch": 0.14350420590984256, + "kl_loss": 0.16225400567054749, + "loss_ib": 0.002660915721207857, + "step": 499 + }, + { + "ce_ib": 6.402273654937744, + "ce_orig": 0.6313638091087341, + "epoch": 0.14350420590984256, + "kl_loss": 0.18505465984344482, + "loss_ib": 0.0024907737970352173, + "step": 499 + }, + { + "ce_ib": 8.23951244354248, + "ce_orig": 0.8967234492301941, + "epoch": 0.14350420590984256, + "kl_loss": 0.13677600026130676, + "loss_ib": 0.0021917112171649933, + "step": 499 + }, + { + "epoch": 0.14379178948882018, + "grad_norm": 0.09538047015666962, + "learning_rate": 4.996098058638809e-05, + "loss": 0.8901, + "step": 500 + }, + { + "ce_ib": 11.12409496307373, + "ce_orig": 1.0636703968048096, + "epoch": 0.14379178948882018, + "kl_loss": 0.19292375445365906, + "loss_ib": 0.003041646908968687, + "step": 500 + }, + { + "ce_ib": 9.371049880981445, + "ce_orig": 1.0397700071334839, + "epoch": 0.14379178948882018, + "kl_loss": 0.2242284119129181, + "loss_ib": 0.003179389052093029, + "step": 500 + }, + { + "ce_ib": 9.47309684753418, + "ce_orig": 0.7427978515625, + "epoch": 0.14379178948882018, + "kl_loss": 0.13901641964912415, + "loss_ib": 0.0023374739103019238, + "step": 500 + }, + { + "ce_ib": 9.967049598693848, + "ce_orig": 0.44743821024894714, + "epoch": 0.14379178948882018, + "kl_loss": 0.1941380798816681, + "loss_ib": 0.002938085701316595, + "step": 500 + }, + { + "ce_ib": 6.8732428550720215, + "ce_orig": 0.6810420751571655, + "epoch": 0.14407937306779783, + "kl_loss": 0.14418122172355652, + "loss_ib": 0.0021291363518685102, + "step": 501 + }, + { + "ce_ib": 11.470809936523438, + "ce_orig": 1.31697416305542, + "epoch": 0.14407937306779783, + "kl_loss": 0.17208820581436157, + "loss_ib": 0.002867962932214141, + "step": 501 + }, + { + "ce_ib": 10.819042205810547, + "ce_orig": 1.1678673028945923, + "epoch": 0.14407937306779783, + "kl_loss": 0.19393761456012726, + "loss_ib": 0.003021280048415065, + "step": 501 + }, + { + "ce_ib": 6.877528190612793, + "ce_orig": 0.7564640641212463, + "epoch": 0.14407937306779783, + "kl_loss": 0.12551426887512207, + "loss_ib": 0.0019428954692557454, + "step": 501 + }, + { + "ce_ib": 7.815924644470215, + "ce_orig": 0.32971468567848206, + "epoch": 0.14436695664677546, + "kl_loss": 0.42823344469070435, + "loss_ib": 0.005063927266746759, + "step": 502 + }, + { + "ce_ib": 12.163249969482422, + "ce_orig": 0.8130381107330322, + "epoch": 0.14436695664677546, + "kl_loss": 0.15447859466075897, + "loss_ib": 0.002761110896244645, + "step": 502 + }, + { + "ce_ib": 8.399581909179688, + "ce_orig": 0.6190077066421509, + "epoch": 0.14436695664677546, + "kl_loss": 0.14217016100883484, + "loss_ib": 0.0022616598289459944, + "step": 502 + }, + { + "ce_ib": 6.57456636428833, + "ce_orig": 0.7635507583618164, + "epoch": 0.14436695664677546, + "kl_loss": 0.1617724746465683, + "loss_ib": 0.002275181468576193, + "step": 502 + }, + { + "ce_ib": 10.540924072265625, + "ce_orig": 0.5798451900482178, + "epoch": 0.1446545402257531, + "kl_loss": 0.21290099620819092, + "loss_ib": 0.003183102235198021, + "step": 503 + }, + { + "ce_ib": 8.164572715759277, + "ce_orig": 0.5676815509796143, + "epoch": 0.1446545402257531, + "kl_loss": 0.38908153772354126, + "loss_ib": 0.004707272630184889, + "step": 503 + }, + { + "ce_ib": 10.497174263000488, + "ce_orig": 0.8416476249694824, + "epoch": 0.1446545402257531, + "kl_loss": 0.09391873329877853, + "loss_ib": 0.001988904783502221, + "step": 503 + }, + { + "ce_ib": 11.39775562286377, + "ce_orig": 0.9236753582954407, + "epoch": 0.1446545402257531, + "kl_loss": 0.19317620992660522, + "loss_ib": 0.003071537474170327, + "step": 503 + }, + { + "ce_ib": 13.18823528289795, + "ce_orig": 1.4070528745651245, + "epoch": 0.14494212380473076, + "kl_loss": 0.16827702522277832, + "loss_ib": 0.0030015939846634865, + "step": 504 + }, + { + "ce_ib": 12.251890182495117, + "ce_orig": 1.0019031763076782, + "epoch": 0.14494212380473076, + "kl_loss": 0.15187132358551025, + "loss_ib": 0.002743902150541544, + "step": 504 + }, + { + "ce_ib": 11.316810607910156, + "ce_orig": 0.949166476726532, + "epoch": 0.14494212380473076, + "kl_loss": 0.18364247679710388, + "loss_ib": 0.0029681057203561068, + "step": 504 + }, + { + "ce_ib": 10.85471248626709, + "ce_orig": 1.003252387046814, + "epoch": 0.14494212380473076, + "kl_loss": 0.1248200461268425, + "loss_ib": 0.002333671785891056, + "step": 504 + }, + { + "epoch": 0.14522970738370838, + "grad_norm": 0.09246399253606796, + "learning_rate": 4.9958783337730156e-05, + "loss": 0.8941, + "step": 505 + }, + { + "ce_ib": 7.710818767547607, + "ce_orig": 0.6707054972648621, + "epoch": 0.14522970738370838, + "kl_loss": 0.12212786078453064, + "loss_ib": 0.001992360455915332, + "step": 505 + }, + { + "ce_ib": 11.13107967376709, + "ce_orig": 0.5834399461746216, + "epoch": 0.14522970738370838, + "kl_loss": 0.26416948437690735, + "loss_ib": 0.0037548027466982603, + "step": 505 + }, + { + "ce_ib": 10.07874870300293, + "ce_orig": 0.4818477928638458, + "epoch": 0.14522970738370838, + "kl_loss": 0.20180988311767578, + "loss_ib": 0.003025973681360483, + "step": 505 + }, + { + "ce_ib": 7.720142364501953, + "ce_orig": 0.48857802152633667, + "epoch": 0.14522970738370838, + "kl_loss": 0.23724249005317688, + "loss_ib": 0.0031444390770047903, + "step": 505 + }, + { + "ce_ib": 11.317874908447266, + "ce_orig": 0.878321647644043, + "epoch": 0.14551729096268604, + "kl_loss": 0.22631272673606873, + "loss_ib": 0.0033949147909879684, + "step": 506 + }, + { + "ce_ib": 6.088842868804932, + "ce_orig": 0.44260281324386597, + "epoch": 0.14551729096268604, + "kl_loss": 0.1061149537563324, + "loss_ib": 0.0016700337873771787, + "step": 506 + }, + { + "ce_ib": 10.132776260375977, + "ce_orig": 0.45138809084892273, + "epoch": 0.14551729096268604, + "kl_loss": 0.12615957856178284, + "loss_ib": 0.002274873433634639, + "step": 506 + }, + { + "ce_ib": 9.908495903015137, + "ce_orig": 0.6264781951904297, + "epoch": 0.14551729096268604, + "kl_loss": 0.15342764556407928, + "loss_ib": 0.0025251260958611965, + "step": 506 + }, + { + "ce_ib": 15.370584487915039, + "ce_orig": 1.6317614316940308, + "epoch": 0.14580487454166366, + "kl_loss": 0.368154913187027, + "loss_ib": 0.005218606907874346, + "step": 507 + }, + { + "ce_ib": 9.250166893005371, + "ce_orig": 0.9209775924682617, + "epoch": 0.14580487454166366, + "kl_loss": 0.16472969949245453, + "loss_ib": 0.002572313416749239, + "step": 507 + }, + { + "ce_ib": 14.066780090332031, + "ce_orig": 0.879586935043335, + "epoch": 0.14580487454166366, + "kl_loss": 0.5569726228713989, + "loss_ib": 0.0069764042273163795, + "step": 507 + }, + { + "ce_ib": 9.828339576721191, + "ce_orig": 0.7276045083999634, + "epoch": 0.14580487454166366, + "kl_loss": 0.20243659615516663, + "loss_ib": 0.003007199615240097, + "step": 507 + }, + { + "ce_ib": 8.405064582824707, + "ce_orig": 0.5828406810760498, + "epoch": 0.1460924581206413, + "kl_loss": 0.16031748056411743, + "loss_ib": 0.002443681238219142, + "step": 508 + }, + { + "ce_ib": 12.821935653686523, + "ce_orig": 0.7830954194068909, + "epoch": 0.1460924581206413, + "kl_loss": 0.1861943006515503, + "loss_ib": 0.003144136629998684, + "step": 508 + }, + { + "ce_ib": 13.270180702209473, + "ce_orig": 1.4049288034439087, + "epoch": 0.1460924581206413, + "kl_loss": 0.26943105459213257, + "loss_ib": 0.004021328408271074, + "step": 508 + }, + { + "ce_ib": 8.444483757019043, + "ce_orig": 0.9969488382339478, + "epoch": 0.1460924581206413, + "kl_loss": 0.17717388272285461, + "loss_ib": 0.002616187324747443, + "step": 508 + }, + { + "ce_ib": 14.19354248046875, + "ce_orig": 1.728639006614685, + "epoch": 0.14638004169961896, + "kl_loss": 0.12366104125976562, + "loss_ib": 0.002655964344739914, + "step": 509 + }, + { + "ce_ib": 12.799410820007324, + "ce_orig": 0.962722897529602, + "epoch": 0.14638004169961896, + "kl_loss": 0.2079516500234604, + "loss_ib": 0.003359457477927208, + "step": 509 + }, + { + "ce_ib": 7.642533779144287, + "ce_orig": 0.570946216583252, + "epoch": 0.14638004169961896, + "kl_loss": 0.15292105078697205, + "loss_ib": 0.002293463796377182, + "step": 509 + }, + { + "ce_ib": 10.93984603881836, + "ce_orig": 1.3312052488327026, + "epoch": 0.14638004169961896, + "kl_loss": 0.1654568910598755, + "loss_ib": 0.0027485534083098173, + "step": 509 + }, + { + "epoch": 0.1466676252785966, + "grad_norm": 0.09280460327863693, + "learning_rate": 4.9956525957570086e-05, + "loss": 0.8336, + "step": 510 + }, + { + "ce_ib": 7.373950004577637, + "ce_orig": 0.7074568271636963, + "epoch": 0.1466676252785966, + "kl_loss": 0.12326858937740326, + "loss_ib": 0.001970080891624093, + "step": 510 + }, + { + "ce_ib": 13.963235855102539, + "ce_orig": 1.2645924091339111, + "epoch": 0.1466676252785966, + "kl_loss": 0.1571410596370697, + "loss_ib": 0.002967734355479479, + "step": 510 + }, + { + "ce_ib": 16.279865264892578, + "ce_orig": 1.9305559396743774, + "epoch": 0.1466676252785966, + "kl_loss": 0.2673254609107971, + "loss_ib": 0.004301241133362055, + "step": 510 + }, + { + "ce_ib": 11.21214485168457, + "ce_orig": 0.6908549666404724, + "epoch": 0.1466676252785966, + "kl_loss": 0.18891718983650208, + "loss_ib": 0.003010386135429144, + "step": 510 + }, + { + "ce_ib": 13.738448143005371, + "ce_orig": 1.3909226655960083, + "epoch": 0.14695520885757424, + "kl_loss": 0.2021740972995758, + "loss_ib": 0.0033955855760723352, + "step": 511 + }, + { + "ce_ib": 8.647909164428711, + "ce_orig": 0.6839855313301086, + "epoch": 0.14695520885757424, + "kl_loss": 0.13517390191555023, + "loss_ib": 0.0022165297996252775, + "step": 511 + }, + { + "ce_ib": 12.71828556060791, + "ce_orig": 1.1863548755645752, + "epoch": 0.14695520885757424, + "kl_loss": 0.3870590329170227, + "loss_ib": 0.005142418667674065, + "step": 511 + }, + { + "ce_ib": 7.004892826080322, + "ce_orig": 0.6125016808509827, + "epoch": 0.14695520885757424, + "kl_loss": 0.11418376863002777, + "loss_ib": 0.0018423269502818584, + "step": 511 + }, + { + "ce_ib": 12.221985816955566, + "ce_orig": 1.3097161054611206, + "epoch": 0.14724279243655186, + "kl_loss": 0.1954774260520935, + "loss_ib": 0.003176972735673189, + "step": 512 + }, + { + "ce_ib": 7.531097412109375, + "ce_orig": 0.5426865816116333, + "epoch": 0.14724279243655186, + "kl_loss": 0.1673842817544937, + "loss_ib": 0.002426952589303255, + "step": 512 + }, + { + "ce_ib": 9.53439712524414, + "ce_orig": 0.8523163795471191, + "epoch": 0.14724279243655186, + "kl_loss": 0.16582007706165314, + "loss_ib": 0.0026116403751075268, + "step": 512 + }, + { + "ce_ib": 9.802694320678711, + "ce_orig": 0.5798234939575195, + "epoch": 0.14724279243655186, + "kl_loss": 0.31143057346343994, + "loss_ib": 0.0040945750661194324, + "step": 512 + }, + { + "ce_ib": 16.203046798706055, + "ce_orig": 1.5306854248046875, + "epoch": 0.1475303760155295, + "kl_loss": 0.17325828969478607, + "loss_ib": 0.003352887462824583, + "step": 513 + }, + { + "ce_ib": 8.417235374450684, + "ce_orig": 0.896914005279541, + "epoch": 0.1475303760155295, + "kl_loss": 0.19780506193637848, + "loss_ib": 0.0028197739738970995, + "step": 513 + }, + { + "ce_ib": 6.960491180419922, + "ce_orig": 0.528160572052002, + "epoch": 0.1475303760155295, + "kl_loss": 0.23184965550899506, + "loss_ib": 0.0030145456548780203, + "step": 513 + }, + { + "ce_ib": 6.913288116455078, + "ce_orig": 0.4217478930950165, + "epoch": 0.1475303760155295, + "kl_loss": 0.12475548684597015, + "loss_ib": 0.0019388835644349456, + "step": 513 + }, + { + "ce_ib": 13.182520866394043, + "ce_orig": 1.2185357809066772, + "epoch": 0.14781795959450716, + "kl_loss": 0.2229577898979187, + "loss_ib": 0.003547829808667302, + "step": 514 + }, + { + "ce_ib": 13.908792495727539, + "ce_orig": 1.1175830364227295, + "epoch": 0.14781795959450716, + "kl_loss": 0.28653332591056824, + "loss_ib": 0.004256212152540684, + "step": 514 + }, + { + "ce_ib": 11.028960227966309, + "ce_orig": 1.2024520635604858, + "epoch": 0.14781795959450716, + "kl_loss": 0.21963676810264587, + "loss_ib": 0.0032992635387927294, + "step": 514 + }, + { + "ce_ib": 9.403878211975098, + "ce_orig": 0.7816042304039001, + "epoch": 0.14781795959450716, + "kl_loss": 0.11819127202033997, + "loss_ib": 0.0021223004441708326, + "step": 514 + }, + { + "epoch": 0.1481055431734848, + "grad_norm": 0.09141584485769272, + "learning_rate": 4.9954208451346465e-05, + "loss": 0.8628, + "step": 515 + }, + { + "ce_ib": 8.093498229980469, + "ce_orig": 0.5340498089790344, + "epoch": 0.1481055431734848, + "kl_loss": 0.1474526822566986, + "loss_ib": 0.0022838765289634466, + "step": 515 + }, + { + "ce_ib": 11.310858726501465, + "ce_orig": 1.0567339658737183, + "epoch": 0.1481055431734848, + "kl_loss": 0.1496482789516449, + "loss_ib": 0.0026275685522705317, + "step": 515 + }, + { + "ce_ib": 8.765999794006348, + "ce_orig": 0.8822218775749207, + "epoch": 0.1481055431734848, + "kl_loss": 0.14761213958263397, + "loss_ib": 0.0023527212906628847, + "step": 515 + }, + { + "ce_ib": 9.370061874389648, + "ce_orig": 0.7221859097480774, + "epoch": 0.1481055431734848, + "kl_loss": 0.21021240949630737, + "loss_ib": 0.0030391302425414324, + "step": 515 + }, + { + "ce_ib": 10.952759742736816, + "ce_orig": 1.2016615867614746, + "epoch": 0.14839312675246244, + "kl_loss": 0.12589505314826965, + "loss_ib": 0.0023542263079434633, + "step": 516 + }, + { + "ce_ib": 11.208597183227539, + "ce_orig": 1.2649309635162354, + "epoch": 0.14839312675246244, + "kl_loss": 0.1935414969921112, + "loss_ib": 0.0030562744941562414, + "step": 516 + }, + { + "ce_ib": 14.064299583435059, + "ce_orig": 1.3251948356628418, + "epoch": 0.14839312675246244, + "kl_loss": 0.2095833718776703, + "loss_ib": 0.003502263454720378, + "step": 516 + }, + { + "ce_ib": 6.933436870574951, + "ce_orig": 0.6950163841247559, + "epoch": 0.14839312675246244, + "kl_loss": 0.1774298995733261, + "loss_ib": 0.0024676427710801363, + "step": 516 + }, + { + "ce_ib": 6.997844219207764, + "ce_orig": 0.7801376581192017, + "epoch": 0.14868071033144006, + "kl_loss": 0.14632141590118408, + "loss_ib": 0.002162998542189598, + "step": 517 + }, + { + "ce_ib": 11.151383399963379, + "ce_orig": 1.1578229665756226, + "epoch": 0.14868071033144006, + "kl_loss": 0.31972575187683105, + "loss_ib": 0.004312395583838224, + "step": 517 + }, + { + "ce_ib": 12.593860626220703, + "ce_orig": 1.288727045059204, + "epoch": 0.14868071033144006, + "kl_loss": 0.172956645488739, + "loss_ib": 0.0029889524448662996, + "step": 517 + }, + { + "ce_ib": 11.15422248840332, + "ce_orig": 0.7159664630889893, + "epoch": 0.14868071033144006, + "kl_loss": 0.20476844906806946, + "loss_ib": 0.0031631067395210266, + "step": 517 + }, + { + "ce_ib": 10.187602996826172, + "ce_orig": 0.8133834600448608, + "epoch": 0.14896829391041772, + "kl_loss": 0.15150345861911774, + "loss_ib": 0.0025337948463857174, + "step": 518 + }, + { + "ce_ib": 8.426726341247559, + "ce_orig": 0.8027427196502686, + "epoch": 0.14896829391041772, + "kl_loss": 0.33559098839759827, + "loss_ib": 0.004198582377284765, + "step": 518 + }, + { + "ce_ib": 10.157910346984863, + "ce_orig": 0.7894662618637085, + "epoch": 0.14896829391041772, + "kl_loss": 0.1754056215286255, + "loss_ib": 0.0027698471676558256, + "step": 518 + }, + { + "ce_ib": 12.347294807434082, + "ce_orig": 0.8127198219299316, + "epoch": 0.14896829391041772, + "kl_loss": 0.16092827916145325, + "loss_ib": 0.0028440123423933983, + "step": 518 + }, + { + "ce_ib": 12.081820487976074, + "ce_orig": 1.1218383312225342, + "epoch": 0.14925587748939537, + "kl_loss": 0.1572212278842926, + "loss_ib": 0.0027803941629827023, + "step": 519 + }, + { + "ce_ib": 8.355681419372559, + "ce_orig": 0.4457217752933502, + "epoch": 0.14925587748939537, + "kl_loss": 0.15767526626586914, + "loss_ib": 0.002412320813164115, + "step": 519 + }, + { + "ce_ib": 15.299323081970215, + "ce_orig": 1.5671056509017944, + "epoch": 0.14925587748939537, + "kl_loss": 0.22422994673252106, + "loss_ib": 0.003772231750190258, + "step": 519 + }, + { + "ce_ib": 6.267134189605713, + "ce_orig": 0.5929120779037476, + "epoch": 0.14925587748939537, + "kl_loss": 0.1314193606376648, + "loss_ib": 0.0019409068627282977, + "step": 519 + }, + { + "epoch": 0.149543461068373, + "grad_norm": 0.12853504717350006, + "learning_rate": 4.995183082464269e-05, + "loss": 0.8526, + "step": 520 + }, + { + "ce_ib": 14.093266487121582, + "ce_orig": 1.7895437479019165, + "epoch": 0.149543461068373, + "kl_loss": 0.18939438462257385, + "loss_ib": 0.003303270321339369, + "step": 520 + }, + { + "ce_ib": 9.622370719909668, + "ce_orig": 0.6922176480293274, + "epoch": 0.149543461068373, + "kl_loss": 0.21940672397613525, + "loss_ib": 0.003156304359436035, + "step": 520 + }, + { + "ce_ib": 10.755894660949707, + "ce_orig": 0.4960995316505432, + "epoch": 0.149543461068373, + "kl_loss": 0.19118964672088623, + "loss_ib": 0.002987485844641924, + "step": 520 + }, + { + "ce_ib": 12.176746368408203, + "ce_orig": 0.7688639163970947, + "epoch": 0.149543461068373, + "kl_loss": 0.1497233808040619, + "loss_ib": 0.0027149084489792585, + "step": 520 + }, + { + "ce_ib": 8.148961067199707, + "ce_orig": 0.9524043798446655, + "epoch": 0.14983104464735064, + "kl_loss": 0.1963375210762024, + "loss_ib": 0.00277827144600451, + "step": 521 + }, + { + "ce_ib": 7.612722396850586, + "ce_orig": 0.5829215049743652, + "epoch": 0.14983104464735064, + "kl_loss": 0.13818615674972534, + "loss_ib": 0.0021431338973343372, + "step": 521 + }, + { + "ce_ib": 6.743089199066162, + "ce_orig": 0.6650915741920471, + "epoch": 0.14983104464735064, + "kl_loss": 0.15762126445770264, + "loss_ib": 0.002250521443784237, + "step": 521 + }, + { + "ce_ib": 7.84657621383667, + "ce_orig": 0.5749149918556213, + "epoch": 0.14983104464735064, + "kl_loss": 0.1294727921485901, + "loss_ib": 0.0020793855655938387, + "step": 521 + }, + { + "ce_ib": 10.302209854125977, + "ce_orig": 0.7592967748641968, + "epoch": 0.15011862822632827, + "kl_loss": 0.16181603074073792, + "loss_ib": 0.002648381283506751, + "step": 522 + }, + { + "ce_ib": 11.727582931518555, + "ce_orig": 1.4059052467346191, + "epoch": 0.15011862822632827, + "kl_loss": 0.17755615711212158, + "loss_ib": 0.002948319772258401, + "step": 522 + }, + { + "ce_ib": 5.496983528137207, + "ce_orig": 0.5452262163162231, + "epoch": 0.15011862822632827, + "kl_loss": 0.09833450615406036, + "loss_ib": 0.0015330433379858732, + "step": 522 + }, + { + "ce_ib": 12.549999237060547, + "ce_orig": 0.8813621997833252, + "epoch": 0.15011862822632827, + "kl_loss": 0.19512861967086792, + "loss_ib": 0.0032062861137092113, + "step": 522 + }, + { + "ce_ib": 13.78541374206543, + "ce_orig": 1.6297649145126343, + "epoch": 0.15040621180530592, + "kl_loss": 0.17818473279476166, + "loss_ib": 0.0031603884417563677, + "step": 523 + }, + { + "ce_ib": 11.36571979522705, + "ce_orig": 0.8598288893699646, + "epoch": 0.15040621180530592, + "kl_loss": 0.13574492931365967, + "loss_ib": 0.0024940213188529015, + "step": 523 + }, + { + "ce_ib": 7.622193813323975, + "ce_orig": 0.7786977291107178, + "epoch": 0.15040621180530592, + "kl_loss": 0.14550796151161194, + "loss_ib": 0.00221729907207191, + "step": 523 + }, + { + "ce_ib": 11.014781951904297, + "ce_orig": 1.0243315696716309, + "epoch": 0.15040621180530592, + "kl_loss": 0.2482243776321411, + "loss_ib": 0.003583722049370408, + "step": 523 + }, + { + "ce_ib": 10.487326622009277, + "ce_orig": 0.8381717205047607, + "epoch": 0.15069379538428357, + "kl_loss": 0.1882508099079132, + "loss_ib": 0.002931240713223815, + "step": 524 + }, + { + "ce_ib": 5.229416847229004, + "ce_orig": 0.4120478630065918, + "epoch": 0.15069379538428357, + "kl_loss": 0.09162455052137375, + "loss_ib": 0.0014391872100532055, + "step": 524 + }, + { + "ce_ib": 10.072685241699219, + "ce_orig": 0.5915430188179016, + "epoch": 0.15069379538428357, + "kl_loss": 0.25207147002220154, + "loss_ib": 0.0035279830917716026, + "step": 524 + }, + { + "ce_ib": 9.47887134552002, + "ce_orig": 0.8751776218414307, + "epoch": 0.15069379538428357, + "kl_loss": 0.18856096267700195, + "loss_ib": 0.0028334965463727713, + "step": 524 + }, + { + "epoch": 0.1509813789632612, + "grad_norm": 0.09292690455913544, + "learning_rate": 4.9949393083187005e-05, + "loss": 0.9051, + "step": 525 + }, + { + "ce_ib": 16.163028717041016, + "ce_orig": 1.7853000164031982, + "epoch": 0.1509813789632612, + "kl_loss": 0.2081843614578247, + "loss_ib": 0.003698146203532815, + "step": 525 + }, + { + "ce_ib": 9.814764022827148, + "ce_orig": 0.8537163734436035, + "epoch": 0.1509813789632612, + "kl_loss": 0.2146293818950653, + "loss_ib": 0.003127770032733679, + "step": 525 + }, + { + "ce_ib": 6.847357749938965, + "ce_orig": 0.7610385417938232, + "epoch": 0.1509813789632612, + "kl_loss": 0.10232152044773102, + "loss_ib": 0.0017079509561881423, + "step": 525 + }, + { + "ce_ib": 9.158605575561523, + "ce_orig": 0.7744526863098145, + "epoch": 0.1509813789632612, + "kl_loss": 0.16445057094097137, + "loss_ib": 0.0025603664107620716, + "step": 525 + }, + { + "ce_ib": 10.982367515563965, + "ce_orig": 0.6184618473052979, + "epoch": 0.15126896254223884, + "kl_loss": 0.12473335862159729, + "loss_ib": 0.0023455703631043434, + "step": 526 + }, + { + "ce_ib": 9.526459693908691, + "ce_orig": 1.034873366355896, + "epoch": 0.15126896254223884, + "kl_loss": 0.18319007754325867, + "loss_ib": 0.0027845464646816254, + "step": 526 + }, + { + "ce_ib": 8.941856384277344, + "ce_orig": 0.8572098612785339, + "epoch": 0.15126896254223884, + "kl_loss": 0.14592421054840088, + "loss_ib": 0.0023534276988357306, + "step": 526 + }, + { + "ce_ib": 8.852437973022461, + "ce_orig": 0.6226751208305359, + "epoch": 0.15126896254223884, + "kl_loss": 0.10193414986133575, + "loss_ib": 0.001904585282318294, + "step": 526 + }, + { + "ce_ib": 9.570630073547363, + "ce_orig": 0.6968982219696045, + "epoch": 0.15155654612121647, + "kl_loss": 0.14039430022239685, + "loss_ib": 0.0023610058706253767, + "step": 527 + }, + { + "ce_ib": 7.009479522705078, + "ce_orig": 0.7059912085533142, + "epoch": 0.15155654612121647, + "kl_loss": 0.20908552408218384, + "loss_ib": 0.0027918030973523855, + "step": 527 + }, + { + "ce_ib": 7.056645393371582, + "ce_orig": 0.8603862524032593, + "epoch": 0.15155654612121647, + "kl_loss": 0.23444432020187378, + "loss_ib": 0.0030501075088977814, + "step": 527 + }, + { + "ce_ib": 8.10662841796875, + "ce_orig": 0.5832763910293579, + "epoch": 0.15155654612121647, + "kl_loss": 0.19913099706172943, + "loss_ib": 0.0028019726742058992, + "step": 527 + }, + { + "ce_ib": 12.294358253479004, + "ce_orig": 1.3011188507080078, + "epoch": 0.15184412970019412, + "kl_loss": 0.22973008453845978, + "loss_ib": 0.00352673651650548, + "step": 528 + }, + { + "ce_ib": 11.455277442932129, + "ce_orig": 0.8678830862045288, + "epoch": 0.15184412970019412, + "kl_loss": 0.20876343548297882, + "loss_ib": 0.003233161987736821, + "step": 528 + }, + { + "ce_ib": 13.364859580993652, + "ce_orig": 0.4887239933013916, + "epoch": 0.15184412970019412, + "kl_loss": 0.29823386669158936, + "loss_ib": 0.004318824503570795, + "step": 528 + }, + { + "ce_ib": 7.953736305236816, + "ce_orig": 0.8171699643135071, + "epoch": 0.15184412970019412, + "kl_loss": 0.095934197306633, + "loss_ib": 0.0017547155730426311, + "step": 528 + }, + { + "ce_ib": 6.75135612487793, + "ce_orig": 0.6152947545051575, + "epoch": 0.15213171327917177, + "kl_loss": 0.13159394264221191, + "loss_ib": 0.001991074997931719, + "step": 529 + }, + { + "ce_ib": 7.845587730407715, + "ce_orig": 0.5412665009498596, + "epoch": 0.15213171327917177, + "kl_loss": 0.19593513011932373, + "loss_ib": 0.0027439098339527845, + "step": 529 + }, + { + "ce_ib": 11.808723449707031, + "ce_orig": 1.4632874727249146, + "epoch": 0.15213171327917177, + "kl_loss": 0.1801619529724121, + "loss_ib": 0.002982491860166192, + "step": 529 + }, + { + "ce_ib": 9.44063949584961, + "ce_orig": 0.7242243885993958, + "epoch": 0.15213171327917177, + "kl_loss": 0.11666402220726013, + "loss_ib": 0.002110704081133008, + "step": 529 + }, + { + "epoch": 0.1524192968581494, + "grad_norm": 0.09365444630384445, + "learning_rate": 4.994689523285251e-05, + "loss": 0.8573, + "step": 530 + }, + { + "ce_ib": 13.775089263916016, + "ce_orig": 1.116524577140808, + "epoch": 0.1524192968581494, + "kl_loss": 0.19737480580806732, + "loss_ib": 0.0033512567169964314, + "step": 530 + }, + { + "ce_ib": 8.636777877807617, + "ce_orig": 1.3055970668792725, + "epoch": 0.1524192968581494, + "kl_loss": 0.10996636748313904, + "loss_ib": 0.001963341375812888, + "step": 530 + }, + { + "ce_ib": 9.965653419494629, + "ce_orig": 0.690579891204834, + "epoch": 0.1524192968581494, + "kl_loss": 0.19870160520076752, + "loss_ib": 0.0029835812747478485, + "step": 530 + }, + { + "ce_ib": 10.97655963897705, + "ce_orig": 0.7595181465148926, + "epoch": 0.1524192968581494, + "kl_loss": 0.18271636962890625, + "loss_ib": 0.002924819476902485, + "step": 530 + }, + { + "ce_ib": 8.113723754882812, + "ce_orig": 0.8315019011497498, + "epoch": 0.15270688043712705, + "kl_loss": 0.1101025640964508, + "loss_ib": 0.0019123980309814215, + "step": 531 + }, + { + "ce_ib": 13.18765640258789, + "ce_orig": 0.8172896504402161, + "epoch": 0.15270688043712705, + "kl_loss": 0.18894284963607788, + "loss_ib": 0.003208193928003311, + "step": 531 + }, + { + "ce_ib": 4.153471946716309, + "ce_orig": 0.1576905995607376, + "epoch": 0.15270688043712705, + "kl_loss": 0.40544354915618896, + "loss_ib": 0.004469782579690218, + "step": 531 + }, + { + "ce_ib": 7.5165839195251465, + "ce_orig": 0.8319332003593445, + "epoch": 0.15270688043712705, + "kl_loss": 0.1594739556312561, + "loss_ib": 0.0023463978432118893, + "step": 531 + }, + { + "ce_ib": 12.242563247680664, + "ce_orig": 1.217457890510559, + "epoch": 0.15299446401610467, + "kl_loss": 0.40689927339553833, + "loss_ib": 0.005293248686939478, + "step": 532 + }, + { + "ce_ib": 12.046343803405762, + "ce_orig": 1.403741717338562, + "epoch": 0.15299446401610467, + "kl_loss": 0.1668795347213745, + "loss_ib": 0.00287342956289649, + "step": 532 + }, + { + "ce_ib": 10.049248695373535, + "ce_orig": 1.009892225265503, + "epoch": 0.15299446401610467, + "kl_loss": 0.1594667136669159, + "loss_ib": 0.0025995918549597263, + "step": 532 + }, + { + "ce_ib": 8.571545600891113, + "ce_orig": 0.45953336358070374, + "epoch": 0.15299446401610467, + "kl_loss": 0.17538738250732422, + "loss_ib": 0.0026110284961760044, + "step": 532 + }, + { + "ce_ib": 7.9551005363464355, + "ce_orig": 0.6499494910240173, + "epoch": 0.15328204759508232, + "kl_loss": 0.17974281311035156, + "loss_ib": 0.0025929382536560297, + "step": 533 + }, + { + "ce_ib": 8.36906909942627, + "ce_orig": 0.6839005351066589, + "epoch": 0.15328204759508232, + "kl_loss": 0.13957883417606354, + "loss_ib": 0.0022326952312141657, + "step": 533 + }, + { + "ce_ib": 11.703327178955078, + "ce_orig": 1.262555718421936, + "epoch": 0.15328204759508232, + "kl_loss": 0.20418298244476318, + "loss_ib": 0.0032121625263243914, + "step": 533 + }, + { + "ce_ib": 11.333956718444824, + "ce_orig": 0.4284483790397644, + "epoch": 0.15328204759508232, + "kl_loss": 0.2264348566532135, + "loss_ib": 0.0033977441489696503, + "step": 533 + }, + { + "ce_ib": 9.996318817138672, + "ce_orig": 0.7071578502655029, + "epoch": 0.15356963117405997, + "kl_loss": 0.15454252064228058, + "loss_ib": 0.002545056864619255, + "step": 534 + }, + { + "ce_ib": 12.299304008483887, + "ce_orig": 1.0114151239395142, + "epoch": 0.15356963117405997, + "kl_loss": 0.19390329718589783, + "loss_ib": 0.0031689631287008524, + "step": 534 + }, + { + "ce_ib": 11.23180103302002, + "ce_orig": 1.1782814264297485, + "epoch": 0.15356963117405997, + "kl_loss": 0.20649601519107819, + "loss_ib": 0.0031881402246654034, + "step": 534 + }, + { + "ce_ib": 10.936742782592773, + "ce_orig": 1.050248384475708, + "epoch": 0.15356963117405997, + "kl_loss": 0.10441295057535172, + "loss_ib": 0.002137803705409169, + "step": 534 + }, + { + "epoch": 0.1538572147530376, + "grad_norm": 0.09265723824501038, + "learning_rate": 4.9944337279657106e-05, + "loss": 0.9042, + "step": 535 + }, + { + "ce_ib": 14.606983184814453, + "ce_orig": 1.221248745918274, + "epoch": 0.1538572147530376, + "kl_loss": 0.2094915807247162, + "loss_ib": 0.003555614035576582, + "step": 535 + }, + { + "ce_ib": 9.798521995544434, + "ce_orig": 0.5161023139953613, + "epoch": 0.1538572147530376, + "kl_loss": 0.1310487538576126, + "loss_ib": 0.0022903396748006344, + "step": 535 + }, + { + "ce_ib": 8.770511627197266, + "ce_orig": 0.7841663956642151, + "epoch": 0.1538572147530376, + "kl_loss": 0.18124917149543762, + "loss_ib": 0.0026895427145063877, + "step": 535 + }, + { + "ce_ib": 7.555400371551514, + "ce_orig": 0.7624974250793457, + "epoch": 0.1538572147530376, + "kl_loss": 0.17534580826759338, + "loss_ib": 0.002508997917175293, + "step": 535 + }, + { + "ce_ib": 10.383445739746094, + "ce_orig": 1.285834550857544, + "epoch": 0.15414479833201525, + "kl_loss": 0.14239218831062317, + "loss_ib": 0.002462266245856881, + "step": 536 + }, + { + "ce_ib": 8.89057445526123, + "ce_orig": 0.5952100157737732, + "epoch": 0.15414479833201525, + "kl_loss": 0.16057077050209045, + "loss_ib": 0.0024947652127593756, + "step": 536 + }, + { + "ce_ib": 7.21333122253418, + "ce_orig": 0.49197930097579956, + "epoch": 0.15414479833201525, + "kl_loss": 0.12823858857154846, + "loss_ib": 0.002003718866035342, + "step": 536 + }, + { + "ce_ib": 13.193877220153809, + "ce_orig": 0.9760612845420837, + "epoch": 0.15414479833201525, + "kl_loss": 0.294974148273468, + "loss_ib": 0.004269129130989313, + "step": 536 + }, + { + "ce_ib": 12.644906997680664, + "ce_orig": 1.145738124847412, + "epoch": 0.15443238191099287, + "kl_loss": 0.22196374833583832, + "loss_ib": 0.0034841280430555344, + "step": 537 + }, + { + "ce_ib": 8.502153396606445, + "ce_orig": 1.0564385652542114, + "epoch": 0.15443238191099287, + "kl_loss": 0.10582996904850006, + "loss_ib": 0.001908514997921884, + "step": 537 + }, + { + "ce_ib": 13.864537239074707, + "ce_orig": 1.2629547119140625, + "epoch": 0.15443238191099287, + "kl_loss": 0.21262601017951965, + "loss_ib": 0.003512713825330138, + "step": 537 + }, + { + "ce_ib": 12.024038314819336, + "ce_orig": 1.3225494623184204, + "epoch": 0.15443238191099287, + "kl_loss": 0.18519270420074463, + "loss_ib": 0.003054330823943019, + "step": 537 + }, + { + "ce_ib": 12.25508975982666, + "ce_orig": 1.3482215404510498, + "epoch": 0.15471996548997052, + "kl_loss": 0.2573168873786926, + "loss_ib": 0.003798677818849683, + "step": 538 + }, + { + "ce_ib": 18.197988510131836, + "ce_orig": 1.977699875831604, + "epoch": 0.15471996548997052, + "kl_loss": 0.1906663030385971, + "loss_ib": 0.0037264616694301367, + "step": 538 + }, + { + "ce_ib": 7.995527744293213, + "ce_orig": 1.0472140312194824, + "epoch": 0.15471996548997052, + "kl_loss": 0.08826296776533127, + "loss_ib": 0.0016821823082864285, + "step": 538 + }, + { + "ce_ib": 8.727664947509766, + "ce_orig": 0.9827343821525574, + "epoch": 0.15471996548997052, + "kl_loss": 0.16956618428230286, + "loss_ib": 0.002568428171798587, + "step": 538 + }, + { + "ce_ib": 9.891521453857422, + "ce_orig": 0.603715717792511, + "epoch": 0.15500754906894817, + "kl_loss": 0.1278199702501297, + "loss_ib": 0.002267351606860757, + "step": 539 + }, + { + "ce_ib": 10.724482536315918, + "ce_orig": 0.7409098148345947, + "epoch": 0.15500754906894817, + "kl_loss": 0.2033136785030365, + "loss_ib": 0.0031055849976837635, + "step": 539 + }, + { + "ce_ib": 9.865452766418457, + "ce_orig": 0.5195972323417664, + "epoch": 0.15500754906894817, + "kl_loss": 0.18522371351718903, + "loss_ib": 0.0028387822676450014, + "step": 539 + }, + { + "ce_ib": 11.253691673278809, + "ce_orig": 0.8413724303245544, + "epoch": 0.15500754906894817, + "kl_loss": 0.1541779339313507, + "loss_ib": 0.0026671483647078276, + "step": 539 + }, + { + "epoch": 0.1552951326479258, + "grad_norm": 0.08690934628248215, + "learning_rate": 4.994171922976348e-05, + "loss": 0.8932, + "step": 540 + }, + { + "ce_ib": 12.854255676269531, + "ce_orig": 0.9279084801673889, + "epoch": 0.1552951326479258, + "kl_loss": 0.14839236438274384, + "loss_ib": 0.00276934914290905, + "step": 540 + }, + { + "ce_ib": 13.400528907775879, + "ce_orig": 1.5644118785858154, + "epoch": 0.1552951326479258, + "kl_loss": 0.17688438296318054, + "loss_ib": 0.0031088965479284525, + "step": 540 + }, + { + "ce_ib": 7.6631035804748535, + "ce_orig": 0.9725773930549622, + "epoch": 0.1552951326479258, + "kl_loss": 0.14389821887016296, + "loss_ib": 0.002205292461439967, + "step": 540 + }, + { + "ce_ib": 10.149072647094727, + "ce_orig": 0.8826848268508911, + "epoch": 0.1552951326479258, + "kl_loss": 0.18272389471530914, + "loss_ib": 0.0028421462047845125, + "step": 540 + }, + { + "ce_ib": 10.487343788146973, + "ce_orig": 0.9640787243843079, + "epoch": 0.15558271622690345, + "kl_loss": 0.16164128482341766, + "loss_ib": 0.0026651471853256226, + "step": 541 + }, + { + "ce_ib": 10.549164772033691, + "ce_orig": 0.8183526992797852, + "epoch": 0.15558271622690345, + "kl_loss": 0.11300608515739441, + "loss_ib": 0.002184977289289236, + "step": 541 + }, + { + "ce_ib": 11.219377517700195, + "ce_orig": 0.6499941349029541, + "epoch": 0.15558271622690345, + "kl_loss": 0.22048121690750122, + "loss_ib": 0.003326749661937356, + "step": 541 + }, + { + "ce_ib": 8.821683883666992, + "ce_orig": 0.6309160590171814, + "epoch": 0.15558271622690345, + "kl_loss": 0.14700010418891907, + "loss_ib": 0.002352169482037425, + "step": 541 + }, + { + "ce_ib": 8.223409652709961, + "ce_orig": 0.6411111950874329, + "epoch": 0.15587029980588107, + "kl_loss": 0.15190641582012177, + "loss_ib": 0.0023414050228893757, + "step": 542 + }, + { + "ce_ib": 11.807840347290039, + "ce_orig": 1.247244119644165, + "epoch": 0.15587029980588107, + "kl_loss": 0.1326785683631897, + "loss_ib": 0.00250756973400712, + "step": 542 + }, + { + "ce_ib": 9.101454734802246, + "ce_orig": 0.7491029500961304, + "epoch": 0.15587029980588107, + "kl_loss": 0.1894010752439499, + "loss_ib": 0.0028041561599820852, + "step": 542 + }, + { + "ce_ib": 7.774291038513184, + "ce_orig": 0.7008493542671204, + "epoch": 0.15587029980588107, + "kl_loss": 0.10578468441963196, + "loss_ib": 0.0018352757906541228, + "step": 542 + }, + { + "ce_ib": 8.337699890136719, + "ce_orig": 0.8569279909133911, + "epoch": 0.15615788338485873, + "kl_loss": 0.16004885733127594, + "loss_ib": 0.002434258349239826, + "step": 543 + }, + { + "ce_ib": 11.937819480895996, + "ce_orig": 1.1342954635620117, + "epoch": 0.15615788338485873, + "kl_loss": 0.17315535247325897, + "loss_ib": 0.002925335429608822, + "step": 543 + }, + { + "ce_ib": 9.676532745361328, + "ce_orig": 0.4862201511859894, + "epoch": 0.15615788338485873, + "kl_loss": 0.1526506096124649, + "loss_ib": 0.002494159387424588, + "step": 543 + }, + { + "ce_ib": 10.160820007324219, + "ce_orig": 1.0899734497070312, + "epoch": 0.15615788338485873, + "kl_loss": 0.14247693121433258, + "loss_ib": 0.002440851181745529, + "step": 543 + }, + { + "ce_ib": 9.082582473754883, + "ce_orig": 0.4083137810230255, + "epoch": 0.15644546696383638, + "kl_loss": 0.2051030397415161, + "loss_ib": 0.0029592886567115784, + "step": 544 + }, + { + "ce_ib": 8.451061248779297, + "ce_orig": 0.3469814360141754, + "epoch": 0.15644546696383638, + "kl_loss": 0.15002912282943726, + "loss_ib": 0.0023453973699361086, + "step": 544 + }, + { + "ce_ib": 11.451112747192383, + "ce_orig": 1.2380187511444092, + "epoch": 0.15644546696383638, + "kl_loss": 0.15902701020240784, + "loss_ib": 0.0027353812474757433, + "step": 544 + }, + { + "ce_ib": 15.22938060760498, + "ce_orig": 1.9276436567306519, + "epoch": 0.15644546696383638, + "kl_loss": 0.2923920750617981, + "loss_ib": 0.004446858540177345, + "step": 544 + }, + { + "epoch": 0.156733050542814, + "grad_norm": 0.09769053012132645, + "learning_rate": 4.993904108947914e-05, + "loss": 0.8847, + "step": 545 + }, + { + "ce_ib": 7.582347393035889, + "ce_orig": 0.5396762490272522, + "epoch": 0.156733050542814, + "kl_loss": 0.1266171634197235, + "loss_ib": 0.002024406334385276, + "step": 545 + }, + { + "ce_ib": 13.711177825927734, + "ce_orig": 1.1446130275726318, + "epoch": 0.156733050542814, + "kl_loss": 0.28241363167762756, + "loss_ib": 0.0041952538304030895, + "step": 545 + }, + { + "ce_ib": 13.14343547821045, + "ce_orig": 1.172808051109314, + "epoch": 0.156733050542814, + "kl_loss": 0.21370941400527954, + "loss_ib": 0.0034514376893639565, + "step": 545 + }, + { + "ce_ib": 11.734895706176758, + "ce_orig": 1.2079064846038818, + "epoch": 0.156733050542814, + "kl_loss": 0.18663063645362854, + "loss_ib": 0.003039795672520995, + "step": 545 + }, + { + "ce_ib": 9.871030807495117, + "ce_orig": 1.0754120349884033, + "epoch": 0.15702063412179165, + "kl_loss": 0.13395808637142181, + "loss_ib": 0.0023266838397830725, + "step": 546 + }, + { + "ce_ib": 10.360943794250488, + "ce_orig": 0.7201725244522095, + "epoch": 0.15702063412179165, + "kl_loss": 0.24039188027381897, + "loss_ib": 0.0034400131553411484, + "step": 546 + }, + { + "ce_ib": 8.925966262817383, + "ce_orig": 0.5887497663497925, + "epoch": 0.15702063412179165, + "kl_loss": 0.2111162692308426, + "loss_ib": 0.0030037593096494675, + "step": 546 + }, + { + "ce_ib": 10.003677368164062, + "ce_orig": 0.8970090746879578, + "epoch": 0.15702063412179165, + "kl_loss": 0.15961842238903046, + "loss_ib": 0.0025965517852455378, + "step": 546 + }, + { + "ce_ib": 8.268735885620117, + "ce_orig": 0.6968417167663574, + "epoch": 0.15730821770076928, + "kl_loss": 0.14865007996559143, + "loss_ib": 0.0023133743088692427, + "step": 547 + }, + { + "ce_ib": 10.061677932739258, + "ce_orig": 1.0893551111221313, + "epoch": 0.15730821770076928, + "kl_loss": 0.14944544434547424, + "loss_ib": 0.0025006223004311323, + "step": 547 + }, + { + "ce_ib": 9.257678985595703, + "ce_orig": 0.8900756239891052, + "epoch": 0.15730821770076928, + "kl_loss": 0.13304127752780914, + "loss_ib": 0.002256180625408888, + "step": 547 + }, + { + "ce_ib": 8.690975189208984, + "ce_orig": 0.846500813961029, + "epoch": 0.15730821770076928, + "kl_loss": 0.21669834852218628, + "loss_ib": 0.0030360808596014977, + "step": 547 + }, + { + "ce_ib": 12.489033699035645, + "ce_orig": 0.8086367845535278, + "epoch": 0.15759580127974693, + "kl_loss": 0.15377330780029297, + "loss_ib": 0.0027866363525390625, + "step": 548 + }, + { + "ce_ib": 5.675604343414307, + "ce_orig": 0.6320753693580627, + "epoch": 0.15759580127974693, + "kl_loss": 0.13638901710510254, + "loss_ib": 0.0019314506789669394, + "step": 548 + }, + { + "ce_ib": 10.726299285888672, + "ce_orig": 0.5647706389427185, + "epoch": 0.15759580127974693, + "kl_loss": 0.18386539816856384, + "loss_ib": 0.002911283867433667, + "step": 548 + }, + { + "ce_ib": 7.901782512664795, + "ce_orig": 0.583336591720581, + "epoch": 0.15759580127974693, + "kl_loss": 0.13014139235019684, + "loss_ib": 0.00209159217774868, + "step": 548 + }, + { + "ce_ib": 8.100556373596191, + "ce_orig": 0.802794337272644, + "epoch": 0.15788338485872458, + "kl_loss": 0.14330238103866577, + "loss_ib": 0.0022430792450904846, + "step": 549 + }, + { + "ce_ib": 11.29372787475586, + "ce_orig": 0.885117769241333, + "epoch": 0.15788338485872458, + "kl_loss": 0.10478469729423523, + "loss_ib": 0.002177219605073333, + "step": 549 + }, + { + "ce_ib": 10.886632919311523, + "ce_orig": 1.232460856437683, + "epoch": 0.15788338485872458, + "kl_loss": 0.18609371781349182, + "loss_ib": 0.002949600340798497, + "step": 549 + }, + { + "ce_ib": 4.600982666015625, + "ce_orig": 0.1705351322889328, + "epoch": 0.15788338485872458, + "kl_loss": 0.3744945526123047, + "loss_ib": 0.004205043893307447, + "step": 549 + }, + { + "epoch": 0.1581709684377022, + "grad_norm": 0.09734965115785599, + "learning_rate": 4.993630286525634e-05, + "loss": 0.8445, + "step": 550 + }, + { + "ce_ib": 10.561800003051758, + "ce_orig": 1.2681158781051636, + "epoch": 0.1581709684377022, + "kl_loss": 0.16903841495513916, + "loss_ib": 0.0027465641032904387, + "step": 550 + }, + { + "ce_ib": 8.956363677978516, + "ce_orig": 1.0898432731628418, + "epoch": 0.1581709684377022, + "kl_loss": 0.1274310052394867, + "loss_ib": 0.002169946441426873, + "step": 550 + }, + { + "ce_ib": 4.494284152984619, + "ce_orig": 0.46590328216552734, + "epoch": 0.1581709684377022, + "kl_loss": 0.1029190719127655, + "loss_ib": 0.0014786190586164594, + "step": 550 + }, + { + "ce_ib": 13.822484016418457, + "ce_orig": 0.9805088639259338, + "epoch": 0.1581709684377022, + "kl_loss": 0.14357160031795502, + "loss_ib": 0.0028179644141346216, + "step": 550 + }, + { + "ce_ib": 8.873933792114258, + "ce_orig": 0.5876795053482056, + "epoch": 0.15845855201667985, + "kl_loss": 0.16282954812049866, + "loss_ib": 0.0025156887713819742, + "step": 551 + }, + { + "ce_ib": 11.708837509155273, + "ce_orig": 1.4527268409729004, + "epoch": 0.15845855201667985, + "kl_loss": 0.1705905795097351, + "loss_ib": 0.002876789541915059, + "step": 551 + }, + { + "ce_ib": 9.84598159790039, + "ce_orig": 0.6224023699760437, + "epoch": 0.15845855201667985, + "kl_loss": 0.17640681564807892, + "loss_ib": 0.0027486663311719894, + "step": 551 + }, + { + "ce_ib": 11.687630653381348, + "ce_orig": 1.245397686958313, + "epoch": 0.15845855201667985, + "kl_loss": 0.1673552244901657, + "loss_ib": 0.002842315472662449, + "step": 551 + }, + { + "ce_ib": 12.031242370605469, + "ce_orig": 1.0912039279937744, + "epoch": 0.15874613559565748, + "kl_loss": 0.1191263347864151, + "loss_ib": 0.002394387498497963, + "step": 552 + }, + { + "ce_ib": 7.358980655670166, + "ce_orig": 0.4738483726978302, + "epoch": 0.15874613559565748, + "kl_loss": 0.16353383660316467, + "loss_ib": 0.002371236216276884, + "step": 552 + }, + { + "ce_ib": 11.342854499816895, + "ce_orig": 0.9162889122962952, + "epoch": 0.15874613559565748, + "kl_loss": 0.24299943447113037, + "loss_ib": 0.0035642795264720917, + "step": 552 + }, + { + "ce_ib": 9.668421745300293, + "ce_orig": 0.8884750008583069, + "epoch": 0.15874613559565748, + "kl_loss": 0.15223366022109985, + "loss_ib": 0.0024891786742955446, + "step": 552 + }, + { + "ce_ib": 9.163921356201172, + "ce_orig": 0.5614813566207886, + "epoch": 0.15903371917463513, + "kl_loss": 0.20447899401187897, + "loss_ib": 0.0029611820355057716, + "step": 553 + }, + { + "ce_ib": 14.240976333618164, + "ce_orig": 1.3581258058547974, + "epoch": 0.15903371917463513, + "kl_loss": 0.15795043110847473, + "loss_ib": 0.0030036019161343575, + "step": 553 + }, + { + "ce_ib": 11.746164321899414, + "ce_orig": 0.9120945930480957, + "epoch": 0.15903371917463513, + "kl_loss": 0.21569553017616272, + "loss_ib": 0.0033315718173980713, + "step": 553 + }, + { + "ce_ib": 9.069586753845215, + "ce_orig": 0.7589283585548401, + "epoch": 0.15903371917463513, + "kl_loss": 0.13429264724254608, + "loss_ib": 0.002249885117635131, + "step": 553 + }, + { + "ce_ib": 4.418727874755859, + "ce_orig": 0.4682615101337433, + "epoch": 0.15932130275361278, + "kl_loss": 0.08103372156620026, + "loss_ib": 0.0012522100005298853, + "step": 554 + }, + { + "ce_ib": 9.614441871643066, + "ce_orig": 0.7101583480834961, + "epoch": 0.15932130275361278, + "kl_loss": 0.18348637223243713, + "loss_ib": 0.002796307671815157, + "step": 554 + }, + { + "ce_ib": 12.66514778137207, + "ce_orig": 1.1605197191238403, + "epoch": 0.15932130275361278, + "kl_loss": 0.17392480373382568, + "loss_ib": 0.0030057625845074654, + "step": 554 + }, + { + "ce_ib": 11.166543960571289, + "ce_orig": 0.8878234028816223, + "epoch": 0.15932130275361278, + "kl_loss": 0.18384350836277008, + "loss_ib": 0.002955089556053281, + "step": 554 + }, + { + "epoch": 0.1596088863325904, + "grad_norm": 0.11484308540821075, + "learning_rate": 4.99335045636921e-05, + "loss": 0.943, + "step": 555 + }, + { + "ce_ib": 11.496370315551758, + "ce_orig": 0.8270056247711182, + "epoch": 0.1596088863325904, + "kl_loss": 0.1401461362838745, + "loss_ib": 0.0025510983541607857, + "step": 555 + }, + { + "ce_ib": 8.691624641418457, + "ce_orig": 0.7428332567214966, + "epoch": 0.1596088863325904, + "kl_loss": 0.1531578004360199, + "loss_ib": 0.002400740282610059, + "step": 555 + }, + { + "ce_ib": 10.219917297363281, + "ce_orig": 0.6447824239730835, + "epoch": 0.1596088863325904, + "kl_loss": 0.1406973898410797, + "loss_ib": 0.002428965410217643, + "step": 555 + }, + { + "ce_ib": 12.348773002624512, + "ce_orig": 0.9984573125839233, + "epoch": 0.1596088863325904, + "kl_loss": 0.14746885001659393, + "loss_ib": 0.002709565684199333, + "step": 555 + }, + { + "ce_ib": 16.282346725463867, + "ce_orig": 1.6398210525512695, + "epoch": 0.15989646991156806, + "kl_loss": 0.21631492674350739, + "loss_ib": 0.003791383933275938, + "step": 556 + }, + { + "ce_ib": 6.016524791717529, + "ce_orig": 0.5652951002120972, + "epoch": 0.15989646991156806, + "kl_loss": 0.11779481917619705, + "loss_ib": 0.0017796006286516786, + "step": 556 + }, + { + "ce_ib": 10.406888961791992, + "ce_orig": 1.0634095668792725, + "epoch": 0.15989646991156806, + "kl_loss": 0.2169257253408432, + "loss_ib": 0.0032099459785968065, + "step": 556 + }, + { + "ce_ib": 6.763169288635254, + "ce_orig": 0.5677881240844727, + "epoch": 0.15989646991156806, + "kl_loss": 0.142292320728302, + "loss_ib": 0.0020992399659007788, + "step": 556 + }, + { + "ce_ib": 4.530671119689941, + "ce_orig": 0.26608189940452576, + "epoch": 0.16018405349054568, + "kl_loss": 0.33508527278900146, + "loss_ib": 0.003803919767960906, + "step": 557 + }, + { + "ce_ib": 6.98599910736084, + "ce_orig": 0.8139922618865967, + "epoch": 0.16018405349054568, + "kl_loss": 0.16635608673095703, + "loss_ib": 0.0023621607106179, + "step": 557 + }, + { + "ce_ib": 11.501959800720215, + "ce_orig": 1.0473116636276245, + "epoch": 0.16018405349054568, + "kl_loss": 0.16634799540042877, + "loss_ib": 0.002813675906509161, + "step": 557 + }, + { + "ce_ib": 11.507184982299805, + "ce_orig": 1.2152619361877441, + "epoch": 0.16018405349054568, + "kl_loss": 0.20226159691810608, + "loss_ib": 0.003173334524035454, + "step": 557 + }, + { + "ce_ib": 7.310940742492676, + "ce_orig": 0.6393885016441345, + "epoch": 0.16047163706952333, + "kl_loss": 0.18821844458580017, + "loss_ib": 0.0026132785715162754, + "step": 558 + }, + { + "ce_ib": 10.590526580810547, + "ce_orig": 1.0444893836975098, + "epoch": 0.16047163706952333, + "kl_loss": 0.10557543486356735, + "loss_ib": 0.0021148070227354765, + "step": 558 + }, + { + "ce_ib": 9.154651641845703, + "ce_orig": 0.9614023566246033, + "epoch": 0.16047163706952333, + "kl_loss": 0.10559763014316559, + "loss_ib": 0.0019714415539056063, + "step": 558 + }, + { + "ce_ib": 8.871302604675293, + "ce_orig": 0.7604251503944397, + "epoch": 0.16047163706952333, + "kl_loss": 0.10212661325931549, + "loss_ib": 0.0019083964871242642, + "step": 558 + }, + { + "ce_ib": 8.032217025756836, + "ce_orig": 0.5321059823036194, + "epoch": 0.16075922064850098, + "kl_loss": 0.14546947181224823, + "loss_ib": 0.0022579163778573275, + "step": 559 + }, + { + "ce_ib": 6.739596366882324, + "ce_orig": 0.5902585983276367, + "epoch": 0.16075922064850098, + "kl_loss": 0.10357312858104706, + "loss_ib": 0.0017096908995881677, + "step": 559 + }, + { + "ce_ib": 11.034686088562012, + "ce_orig": 1.1962263584136963, + "epoch": 0.16075922064850098, + "kl_loss": 0.11951439082622528, + "loss_ib": 0.0022986126132309437, + "step": 559 + }, + { + "ce_ib": 8.0606107711792, + "ce_orig": 0.7243085503578186, + "epoch": 0.16075922064850098, + "kl_loss": 0.544711709022522, + "loss_ib": 0.0062531777657568455, + "step": 559 + }, + { + "epoch": 0.1610468042274786, + "grad_norm": 0.1068345308303833, + "learning_rate": 4.9930646191528175e-05, + "loss": 0.8331, + "step": 560 + }, + { + "ce_ib": 6.733529090881348, + "ce_orig": 0.5895476341247559, + "epoch": 0.1610468042274786, + "kl_loss": 0.16663867235183716, + "loss_ib": 0.0023397395852953196, + "step": 560 + }, + { + "ce_ib": 6.515050888061523, + "ce_orig": 0.739066481590271, + "epoch": 0.1610468042274786, + "kl_loss": 0.5483835935592651, + "loss_ib": 0.00613534078001976, + "step": 560 + }, + { + "ce_ib": 10.440169334411621, + "ce_orig": 0.4615348279476166, + "epoch": 0.1610468042274786, + "kl_loss": 0.2002372294664383, + "loss_ib": 0.003046389203518629, + "step": 560 + }, + { + "ce_ib": 8.762824058532715, + "ce_orig": 0.8410115838050842, + "epoch": 0.1610468042274786, + "kl_loss": 0.21117277443408966, + "loss_ib": 0.0029880099464207888, + "step": 560 + }, + { + "ce_ib": 8.918501853942871, + "ce_orig": 0.9338309168815613, + "epoch": 0.16133438780645626, + "kl_loss": 0.1465751826763153, + "loss_ib": 0.002357601886615157, + "step": 561 + }, + { + "ce_ib": 11.092143058776855, + "ce_orig": 1.3103547096252441, + "epoch": 0.16133438780645626, + "kl_loss": 0.15548178553581238, + "loss_ib": 0.0026640319265425205, + "step": 561 + }, + { + "ce_ib": 9.238221168518066, + "ce_orig": 0.5433566570281982, + "epoch": 0.16133438780645626, + "kl_loss": 0.18088671565055847, + "loss_ib": 0.0027326892595738173, + "step": 561 + }, + { + "ce_ib": 8.788311004638672, + "ce_orig": 0.6767505407333374, + "epoch": 0.16133438780645626, + "kl_loss": 0.16330577433109283, + "loss_ib": 0.0025118887424468994, + "step": 561 + }, + { + "ce_ib": 12.871294975280762, + "ce_orig": 0.97724848985672, + "epoch": 0.16162197138543388, + "kl_loss": 0.22364787757396698, + "loss_ib": 0.00352360843680799, + "step": 562 + }, + { + "ce_ib": 8.219422340393066, + "ce_orig": 1.127738118171692, + "epoch": 0.16162197138543388, + "kl_loss": 0.1269611120223999, + "loss_ib": 0.00209155329503119, + "step": 562 + }, + { + "ce_ib": 8.829179763793945, + "ce_orig": 0.815184473991394, + "epoch": 0.16162197138543388, + "kl_loss": 0.15598227083683014, + "loss_ib": 0.0024427406024187803, + "step": 562 + }, + { + "ce_ib": 11.565589904785156, + "ce_orig": 1.25294029712677, + "epoch": 0.16162197138543388, + "kl_loss": 0.1817988157272339, + "loss_ib": 0.002974546980112791, + "step": 562 + }, + { + "ce_ib": 7.771119117736816, + "ce_orig": 0.641653299331665, + "epoch": 0.16190955496441153, + "kl_loss": 0.15232746303081512, + "loss_ib": 0.002300386317074299, + "step": 563 + }, + { + "ce_ib": 11.970166206359863, + "ce_orig": 1.0346808433532715, + "epoch": 0.16190955496441153, + "kl_loss": 0.18096312880516052, + "loss_ib": 0.0030066478066146374, + "step": 563 + }, + { + "ce_ib": 9.624247550964355, + "ce_orig": 0.8522278666496277, + "epoch": 0.16190955496441153, + "kl_loss": 0.14167995750904083, + "loss_ib": 0.002379224169999361, + "step": 563 + }, + { + "ce_ib": 10.173232078552246, + "ce_orig": 0.7698571681976318, + "epoch": 0.16190955496441153, + "kl_loss": 0.14464448392391205, + "loss_ib": 0.0024637680035084486, + "step": 563 + }, + { + "ce_ib": 9.11231803894043, + "ce_orig": 0.5458751916885376, + "epoch": 0.16219713854338919, + "kl_loss": 0.2736416459083557, + "loss_ib": 0.0036476480308920145, + "step": 564 + }, + { + "ce_ib": 7.692645072937012, + "ce_orig": 0.6927090287208557, + "epoch": 0.16219713854338919, + "kl_loss": 0.10034587234258652, + "loss_ib": 0.0017727231606841087, + "step": 564 + }, + { + "ce_ib": 9.02833080291748, + "ce_orig": 0.8556505441665649, + "epoch": 0.16219713854338919, + "kl_loss": 0.18506044149398804, + "loss_ib": 0.0027534374967217445, + "step": 564 + }, + { + "ce_ib": 7.497312068939209, + "ce_orig": 0.626701831817627, + "epoch": 0.16219713854338919, + "kl_loss": 0.21034158766269684, + "loss_ib": 0.0028531469870358706, + "step": 564 + }, + { + "epoch": 0.1624847221223668, + "grad_norm": 0.09401144832372665, + "learning_rate": 4.992772775565104e-05, + "loss": 0.8074, + "step": 565 + }, + { + "ce_ib": 11.46445369720459, + "ce_orig": 0.9820400476455688, + "epoch": 0.1624847221223668, + "kl_loss": 0.21540969610214233, + "loss_ib": 0.0033005422446876764, + "step": 565 + }, + { + "ce_ib": 11.646088600158691, + "ce_orig": 1.372199535369873, + "epoch": 0.1624847221223668, + "kl_loss": 0.15993696451187134, + "loss_ib": 0.002763978438451886, + "step": 565 + }, + { + "ce_ib": 7.149011135101318, + "ce_orig": 0.6144888997077942, + "epoch": 0.1624847221223668, + "kl_loss": 0.13073797523975372, + "loss_ib": 0.0020222808234393597, + "step": 565 + }, + { + "ce_ib": 8.267902374267578, + "ce_orig": 0.7729185819625854, + "epoch": 0.1624847221223668, + "kl_loss": 0.20054320991039276, + "loss_ib": 0.0028322222642600536, + "step": 565 + }, + { + "ce_ib": 12.370036125183105, + "ce_orig": 1.2028396129608154, + "epoch": 0.16277230570134446, + "kl_loss": 0.13463753461837769, + "loss_ib": 0.002583378693088889, + "step": 566 + }, + { + "ce_ib": 10.583022117614746, + "ce_orig": 1.0920214653015137, + "epoch": 0.16277230570134446, + "kl_loss": 0.1449476182460785, + "loss_ib": 0.002507778350263834, + "step": 566 + }, + { + "ce_ib": 8.324437141418457, + "ce_orig": 0.4909367859363556, + "epoch": 0.16277230570134446, + "kl_loss": 0.12846431136131287, + "loss_ib": 0.0021170866675674915, + "step": 566 + }, + { + "ce_ib": 8.053367614746094, + "ce_orig": 0.7633367776870728, + "epoch": 0.16277230570134446, + "kl_loss": 0.14153558015823364, + "loss_ib": 0.002220692578703165, + "step": 566 + }, + { + "ce_ib": 9.400287628173828, + "ce_orig": 0.7323331832885742, + "epoch": 0.16305988928032208, + "kl_loss": 0.11990717798471451, + "loss_ib": 0.0021391003392636776, + "step": 567 + }, + { + "ce_ib": 8.591780662536621, + "ce_orig": 0.3860359787940979, + "epoch": 0.16305988928032208, + "kl_loss": 0.2477714717388153, + "loss_ib": 0.003336892696097493, + "step": 567 + }, + { + "ce_ib": 8.714170455932617, + "ce_orig": 0.5352444052696228, + "epoch": 0.16305988928032208, + "kl_loss": 0.12474939227104187, + "loss_ib": 0.0021189108956605196, + "step": 567 + }, + { + "ce_ib": 12.906991958618164, + "ce_orig": 0.9715366363525391, + "epoch": 0.16305988928032208, + "kl_loss": 0.21474242210388184, + "loss_ib": 0.0034381235018372536, + "step": 567 + }, + { + "ce_ib": 8.508112907409668, + "ce_orig": 0.91560298204422, + "epoch": 0.16334747285929974, + "kl_loss": 0.14688250422477722, + "loss_ib": 0.0023196362890303135, + "step": 568 + }, + { + "ce_ib": 14.228510856628418, + "ce_orig": 1.5433235168457031, + "epoch": 0.16334747285929974, + "kl_loss": 0.12242799997329712, + "loss_ib": 0.0026471309829503298, + "step": 568 + }, + { + "ce_ib": 9.558116912841797, + "ce_orig": 0.8016780018806458, + "epoch": 0.16334747285929974, + "kl_loss": 0.2184457778930664, + "loss_ib": 0.003140269545838237, + "step": 568 + }, + { + "ce_ib": 9.04499340057373, + "ce_orig": 0.6228697299957275, + "epoch": 0.16334747285929974, + "kl_loss": 0.16972365975379944, + "loss_ib": 0.002601735759526491, + "step": 568 + }, + { + "ce_ib": 8.427020072937012, + "ce_orig": 0.8732650279998779, + "epoch": 0.1636350564382774, + "kl_loss": 0.14395001530647278, + "loss_ib": 0.0022822022438049316, + "step": 569 + }, + { + "ce_ib": 7.884949684143066, + "ce_orig": 0.8752006888389587, + "epoch": 0.1636350564382774, + "kl_loss": 0.10892467200756073, + "loss_ib": 0.0018777416553348303, + "step": 569 + }, + { + "ce_ib": 7.760122299194336, + "ce_orig": 0.6656043529510498, + "epoch": 0.1636350564382774, + "kl_loss": 0.1181686520576477, + "loss_ib": 0.0019576987251639366, + "step": 569 + }, + { + "ce_ib": 10.271071434020996, + "ce_orig": 0.9393675923347473, + "epoch": 0.1636350564382774, + "kl_loss": 0.15882378816604614, + "loss_ib": 0.002615345176309347, + "step": 569 + }, + { + "epoch": 0.163922640017255, + "grad_norm": 0.09945710003376007, + "learning_rate": 4.992474926309191e-05, + "loss": 0.8445, + "step": 570 + }, + { + "ce_ib": 7.904441833496094, + "ce_orig": 0.7863525748252869, + "epoch": 0.163922640017255, + "kl_loss": 0.10601916909217834, + "loss_ib": 0.0018506358610466123, + "step": 570 + }, + { + "ce_ib": 10.904396057128906, + "ce_orig": 0.9058529734611511, + "epoch": 0.163922640017255, + "kl_loss": 0.178889200091362, + "loss_ib": 0.002879331586882472, + "step": 570 + }, + { + "ce_ib": 8.751618385314941, + "ce_orig": 0.9822079539299011, + "epoch": 0.163922640017255, + "kl_loss": 0.4031105637550354, + "loss_ib": 0.0049062673933804035, + "step": 570 + }, + { + "ce_ib": 11.65134334564209, + "ce_orig": 1.064084529876709, + "epoch": 0.163922640017255, + "kl_loss": 0.13623788952827454, + "loss_ib": 0.002527513075619936, + "step": 570 + }, + { + "ce_ib": 14.585232734680176, + "ce_orig": 1.6336134672164917, + "epoch": 0.16421022359623266, + "kl_loss": 0.18193870782852173, + "loss_ib": 0.0032779101748019457, + "step": 571 + }, + { + "ce_ib": 9.184663772583008, + "ce_orig": 0.8301358819007874, + "epoch": 0.16421022359623266, + "kl_loss": 0.19080528616905212, + "loss_ib": 0.0028265193104743958, + "step": 571 + }, + { + "ce_ib": 8.895779609680176, + "ce_orig": 0.5664294362068176, + "epoch": 0.16421022359623266, + "kl_loss": 0.11969694495201111, + "loss_ib": 0.002086547203361988, + "step": 571 + }, + { + "ce_ib": 10.437873840332031, + "ce_orig": 0.7510645985603333, + "epoch": 0.16421022359623266, + "kl_loss": 0.3105461597442627, + "loss_ib": 0.004149248823523521, + "step": 571 + }, + { + "ce_ib": 12.008666038513184, + "ce_orig": 0.41471701860427856, + "epoch": 0.1644978071752103, + "kl_loss": 0.4446406066417694, + "loss_ib": 0.00564727233722806, + "step": 572 + }, + { + "ce_ib": 8.411678314208984, + "ce_orig": 0.5035253167152405, + "epoch": 0.1644978071752103, + "kl_loss": 0.1964578628540039, + "loss_ib": 0.002805746393278241, + "step": 572 + }, + { + "ce_ib": 13.450571060180664, + "ce_orig": 1.2887715101242065, + "epoch": 0.1644978071752103, + "kl_loss": 0.13696449995040894, + "loss_ib": 0.0027147019281983376, + "step": 572 + }, + { + "ce_ib": 10.915575981140137, + "ce_orig": 1.13957941532135, + "epoch": 0.1644978071752103, + "kl_loss": 0.18452590703964233, + "loss_ib": 0.0029368167743086815, + "step": 572 + }, + { + "ce_ib": 9.278141975402832, + "ce_orig": 1.0592403411865234, + "epoch": 0.16478539075418794, + "kl_loss": 0.12091828882694244, + "loss_ib": 0.0021369969472289085, + "step": 573 + }, + { + "ce_ib": 9.828802108764648, + "ce_orig": 0.9190781116485596, + "epoch": 0.16478539075418794, + "kl_loss": 0.17288747429847717, + "loss_ib": 0.0027117549907416105, + "step": 573 + }, + { + "ce_ib": 8.335225105285645, + "ce_orig": 0.6787893772125244, + "epoch": 0.16478539075418794, + "kl_loss": 0.1160748228430748, + "loss_ib": 0.001994270598515868, + "step": 573 + }, + { + "ce_ib": 8.189950942993164, + "ce_orig": 0.8531936407089233, + "epoch": 0.16478539075418794, + "kl_loss": 0.09948378056287766, + "loss_ib": 0.0018138327868655324, + "step": 573 + }, + { + "ce_ib": 6.455183506011963, + "ce_orig": 0.596420168876648, + "epoch": 0.1650729743331656, + "kl_loss": 0.11498283594846725, + "loss_ib": 0.0017953467322513461, + "step": 574 + }, + { + "ce_ib": 9.777907371520996, + "ce_orig": 0.8681027293205261, + "epoch": 0.1650729743331656, + "kl_loss": 0.1523522138595581, + "loss_ib": 0.002501312643289566, + "step": 574 + }, + { + "ce_ib": 9.882269859313965, + "ce_orig": 0.9420934319496155, + "epoch": 0.1650729743331656, + "kl_loss": 0.16510173678398132, + "loss_ib": 0.0026392440777271986, + "step": 574 + }, + { + "ce_ib": 12.674947738647461, + "ce_orig": 1.0848475694656372, + "epoch": 0.1650729743331656, + "kl_loss": 0.16495954990386963, + "loss_ib": 0.0029170899651944637, + "step": 574 + }, + { + "epoch": 0.1653605579121432, + "grad_norm": 0.08536024391651154, + "learning_rate": 4.992171072102663e-05, + "loss": 0.8644, + "step": 575 + }, + { + "ce_ib": 8.744396209716797, + "ce_orig": 0.7920838594436646, + "epoch": 0.1653605579121432, + "kl_loss": 0.15692180395126343, + "loss_ib": 0.002443657722324133, + "step": 575 + }, + { + "ce_ib": 10.282676696777344, + "ce_orig": 0.8695278763771057, + "epoch": 0.1653605579121432, + "kl_loss": 0.16592364013195038, + "loss_ib": 0.0026875040493905544, + "step": 575 + }, + { + "ce_ib": 11.398494720458984, + "ce_orig": 1.4248392581939697, + "epoch": 0.1653605579121432, + "kl_loss": 0.21238285303115845, + "loss_ib": 0.0032636779360473156, + "step": 575 + }, + { + "ce_ib": 6.761519908905029, + "ce_orig": 0.5061066746711731, + "epoch": 0.1653605579121432, + "kl_loss": 0.09173807501792908, + "loss_ib": 0.0015935326227918267, + "step": 575 + }, + { + "ce_ib": 10.953373908996582, + "ce_orig": 0.8243492245674133, + "epoch": 0.16564814149112086, + "kl_loss": 0.21726994216442108, + "loss_ib": 0.0032680367585271597, + "step": 576 + }, + { + "ce_ib": 8.85136890411377, + "ce_orig": 0.8645575642585754, + "epoch": 0.16564814149112086, + "kl_loss": 0.1265760213136673, + "loss_ib": 0.0021508969366550446, + "step": 576 + }, + { + "ce_ib": 4.987844944000244, + "ce_orig": 0.2835332453250885, + "epoch": 0.16564814149112086, + "kl_loss": 0.32002195715904236, + "loss_ib": 0.003699003951624036, + "step": 576 + }, + { + "ce_ib": 10.408366203308105, + "ce_orig": 0.5282058715820312, + "epoch": 0.16564814149112086, + "kl_loss": 0.1558026671409607, + "loss_ib": 0.0025988630950450897, + "step": 576 + }, + { + "ce_ib": 6.840135097503662, + "ce_orig": 0.7790481448173523, + "epoch": 0.1659357250700985, + "kl_loss": 0.17018580436706543, + "loss_ib": 0.002385871484875679, + "step": 577 + }, + { + "ce_ib": 5.964718341827393, + "ce_orig": 0.4317297339439392, + "epoch": 0.1659357250700985, + "kl_loss": 0.12732765078544617, + "loss_ib": 0.0018697483465075493, + "step": 577 + }, + { + "ce_ib": 8.891576766967773, + "ce_orig": 0.5287486910820007, + "epoch": 0.1659357250700985, + "kl_loss": 0.12389977276325226, + "loss_ib": 0.0021281554363667965, + "step": 577 + }, + { + "ce_ib": 4.9557671546936035, + "ce_orig": 0.6066608428955078, + "epoch": 0.1659357250700985, + "kl_loss": 0.12630987167358398, + "loss_ib": 0.0017586754402145743, + "step": 577 + }, + { + "ce_ib": 11.605189323425293, + "ce_orig": 1.0188590288162231, + "epoch": 0.16622330864907614, + "kl_loss": 0.18720872700214386, + "loss_ib": 0.0030326060950756073, + "step": 578 + }, + { + "ce_ib": 5.411064147949219, + "ce_orig": 0.283112108707428, + "epoch": 0.16622330864907614, + "kl_loss": 0.42558926343917847, + "loss_ib": 0.004796999040991068, + "step": 578 + }, + { + "ce_ib": 10.837127685546875, + "ce_orig": 0.8412641286849976, + "epoch": 0.16622330864907614, + "kl_loss": 0.1550775170326233, + "loss_ib": 0.0026344875805079937, + "step": 578 + }, + { + "ce_ib": 11.859156608581543, + "ce_orig": 1.3071043491363525, + "epoch": 0.16622330864907614, + "kl_loss": 0.08775961399078369, + "loss_ib": 0.002063511637970805, + "step": 578 + }, + { + "ce_ib": 12.27880573272705, + "ce_orig": 1.50951087474823, + "epoch": 0.1665108922280538, + "kl_loss": 0.17748884856700897, + "loss_ib": 0.0030027690809220076, + "step": 579 + }, + { + "ce_ib": 10.481730461120605, + "ce_orig": 1.193427562713623, + "epoch": 0.1665108922280538, + "kl_loss": 0.19660136103630066, + "loss_ib": 0.003014186630025506, + "step": 579 + }, + { + "ce_ib": 13.807245254516602, + "ce_orig": 1.1526756286621094, + "epoch": 0.1665108922280538, + "kl_loss": 0.23192673921585083, + "loss_ib": 0.0036999916192144156, + "step": 579 + }, + { + "ce_ib": 12.139798164367676, + "ce_orig": 1.2501667737960815, + "epoch": 0.1665108922280538, + "kl_loss": 0.14804279804229736, + "loss_ib": 0.0026944077108055353, + "step": 579 + }, + { + "epoch": 0.16679847580703142, + "grad_norm": 0.07876303791999817, + "learning_rate": 4.9918612136775776e-05, + "loss": 0.8655, + "step": 580 + }, + { + "ce_ib": 11.737598419189453, + "ce_orig": 1.0697919130325317, + "epoch": 0.16679847580703142, + "kl_loss": 0.17866802215576172, + "loss_ib": 0.002960439771413803, + "step": 580 + }, + { + "ce_ib": 5.683050632476807, + "ce_orig": 0.48447874188423157, + "epoch": 0.16679847580703142, + "kl_loss": 0.09303843230009079, + "loss_ib": 0.0014986892929300666, + "step": 580 + }, + { + "ce_ib": 12.669607162475586, + "ce_orig": 0.8514668345451355, + "epoch": 0.16679847580703142, + "kl_loss": 0.208790585398674, + "loss_ib": 0.003354866523295641, + "step": 580 + }, + { + "ce_ib": 11.108747482299805, + "ce_orig": 0.9369009137153625, + "epoch": 0.16679847580703142, + "kl_loss": 0.1635299026966095, + "loss_ib": 0.002746173646301031, + "step": 580 + }, + { + "ce_ib": 8.074356079101562, + "ce_orig": 0.7309190630912781, + "epoch": 0.16708605938600907, + "kl_loss": 0.2102489322423935, + "loss_ib": 0.0029099248349666595, + "step": 581 + }, + { + "ce_ib": 7.948696136474609, + "ce_orig": 0.6363977193832397, + "epoch": 0.16708605938600907, + "kl_loss": 0.45359325408935547, + "loss_ib": 0.0053308019414544106, + "step": 581 + }, + { + "ce_ib": 11.585594177246094, + "ce_orig": 1.2507925033569336, + "epoch": 0.16708605938600907, + "kl_loss": 0.14326362311840057, + "loss_ib": 0.002591195749118924, + "step": 581 + }, + { + "ce_ib": 12.229244232177734, + "ce_orig": 1.052150845527649, + "epoch": 0.16708605938600907, + "kl_loss": 0.14200204610824585, + "loss_ib": 0.002642944687977433, + "step": 581 + }, + { + "ce_ib": 7.478880405426025, + "ce_orig": 0.7154788970947266, + "epoch": 0.1673736429649867, + "kl_loss": 0.16189737617969513, + "loss_ib": 0.0023668615613132715, + "step": 582 + }, + { + "ce_ib": 12.277364730834961, + "ce_orig": 1.1993218660354614, + "epoch": 0.1673736429649867, + "kl_loss": 0.16127103567123413, + "loss_ib": 0.002840446773916483, + "step": 582 + }, + { + "ce_ib": 10.120058059692383, + "ce_orig": 0.7597600817680359, + "epoch": 0.1673736429649867, + "kl_loss": 0.13978613913059235, + "loss_ib": 0.0024098672438412905, + "step": 582 + }, + { + "ce_ib": 7.654234409332275, + "ce_orig": 0.6932991147041321, + "epoch": 0.1673736429649867, + "kl_loss": 0.09757931530475616, + "loss_ib": 0.0017412164015695453, + "step": 582 + }, + { + "ce_ib": 7.6219096183776855, + "ce_orig": 0.6601145267486572, + "epoch": 0.16766122654396434, + "kl_loss": 0.1617378145456314, + "loss_ib": 0.0023795689921826124, + "step": 583 + }, + { + "ce_ib": 7.920563220977783, + "ce_orig": 0.8288545608520508, + "epoch": 0.16766122654396434, + "kl_loss": 0.2300298511981964, + "loss_ib": 0.0030923548620194197, + "step": 583 + }, + { + "ce_ib": 9.533792495727539, + "ce_orig": 0.7653409838676453, + "epoch": 0.16766122654396434, + "kl_loss": 0.2119666188955307, + "loss_ib": 0.0030730452854186296, + "step": 583 + }, + { + "ce_ib": 7.689798831939697, + "ce_orig": 0.7431104779243469, + "epoch": 0.16766122654396434, + "kl_loss": 0.1498323678970337, + "loss_ib": 0.0022673034109175205, + "step": 583 + }, + { + "ce_ib": 10.659350395202637, + "ce_orig": 1.0661674737930298, + "epoch": 0.167948810122942, + "kl_loss": 0.15681301057338715, + "loss_ib": 0.002634064992889762, + "step": 584 + }, + { + "ce_ib": 8.092489242553711, + "ce_orig": 0.7272496819496155, + "epoch": 0.167948810122942, + "kl_loss": 0.17958983778953552, + "loss_ib": 0.0026051471941173077, + "step": 584 + }, + { + "ce_ib": 10.62472915649414, + "ce_orig": 0.8471878170967102, + "epoch": 0.167948810122942, + "kl_loss": 0.1516703963279724, + "loss_ib": 0.0025791770312935114, + "step": 584 + }, + { + "ce_ib": 13.692408561706543, + "ce_orig": 1.3621617555618286, + "epoch": 0.167948810122942, + "kl_loss": 0.2163066267967224, + "loss_ib": 0.0035323069896548986, + "step": 584 + }, + { + "epoch": 0.16823639370191962, + "grad_norm": 0.09431323409080505, + "learning_rate": 4.9915453517804554e-05, + "loss": 0.8551, + "step": 585 + }, + { + "ce_ib": 9.730319023132324, + "ce_orig": 0.9177817106246948, + "epoch": 0.16823639370191962, + "kl_loss": 0.21307173371315002, + "loss_ib": 0.0031037491280585527, + "step": 585 + }, + { + "ce_ib": 8.93181037902832, + "ce_orig": 1.0497219562530518, + "epoch": 0.16823639370191962, + "kl_loss": 0.11000088602304459, + "loss_ib": 0.001993189798668027, + "step": 585 + }, + { + "ce_ib": 9.731582641601562, + "ce_orig": 1.101962924003601, + "epoch": 0.16823639370191962, + "kl_loss": 0.2055375874042511, + "loss_ib": 0.003028533887118101, + "step": 585 + }, + { + "ce_ib": 6.749084949493408, + "ce_orig": 0.8812076449394226, + "epoch": 0.16823639370191962, + "kl_loss": 0.09668911248445511, + "loss_ib": 0.0016417994629591703, + "step": 585 + }, + { + "ce_ib": 6.907016277313232, + "ce_orig": 0.538230299949646, + "epoch": 0.16852397728089727, + "kl_loss": 0.18287301063537598, + "loss_ib": 0.002519431756809354, + "step": 586 + }, + { + "ce_ib": 13.047144889831543, + "ce_orig": 0.990262508392334, + "epoch": 0.16852397728089727, + "kl_loss": 0.19800947606563568, + "loss_ib": 0.0032848091796040535, + "step": 586 + }, + { + "ce_ib": 9.177399635314941, + "ce_orig": 0.4944288730621338, + "epoch": 0.16852397728089727, + "kl_loss": 0.09137950837612152, + "loss_ib": 0.001831535017117858, + "step": 586 + }, + { + "ce_ib": 9.995772361755371, + "ce_orig": 0.7994273900985718, + "epoch": 0.16852397728089727, + "kl_loss": 0.14279597997665405, + "loss_ib": 0.0024275369942188263, + "step": 586 + }, + { + "ce_ib": 10.55152416229248, + "ce_orig": 0.9644479155540466, + "epoch": 0.1688115608598749, + "kl_loss": 0.1346493363380432, + "loss_ib": 0.0024016457609832287, + "step": 587 + }, + { + "ce_ib": 7.489251136779785, + "ce_orig": 0.4593333601951599, + "epoch": 0.1688115608598749, + "kl_loss": 0.22705069184303284, + "loss_ib": 0.0030194318387657404, + "step": 587 + }, + { + "ce_ib": 13.462209701538086, + "ce_orig": 1.549858808517456, + "epoch": 0.1688115608598749, + "kl_loss": 0.17547796666622162, + "loss_ib": 0.0031010007951408625, + "step": 587 + }, + { + "ce_ib": 8.43393611907959, + "ce_orig": 0.9473516941070557, + "epoch": 0.1688115608598749, + "kl_loss": 0.16294658184051514, + "loss_ib": 0.0024728593416512012, + "step": 587 + }, + { + "ce_ib": 11.13269329071045, + "ce_orig": 1.07948637008667, + "epoch": 0.16909914443885254, + "kl_loss": 0.14176622033119202, + "loss_ib": 0.002530931495130062, + "step": 588 + }, + { + "ce_ib": 6.587364196777344, + "ce_orig": 0.4034996032714844, + "epoch": 0.16909914443885254, + "kl_loss": 0.10913390666246414, + "loss_ib": 0.0017500754911452532, + "step": 588 + }, + { + "ce_ib": 6.319201469421387, + "ce_orig": 0.7516131401062012, + "epoch": 0.16909914443885254, + "kl_loss": 0.11739123612642288, + "loss_ib": 0.00180583237670362, + "step": 588 + }, + { + "ce_ib": 5.119848728179932, + "ce_orig": 0.48948171734809875, + "epoch": 0.16909914443885254, + "kl_loss": 0.0971326231956482, + "loss_ib": 0.0014833110617473722, + "step": 588 + }, + { + "ce_ib": 10.022903442382812, + "ce_orig": 1.0736076831817627, + "epoch": 0.16938672801783017, + "kl_loss": 0.12581218779087067, + "loss_ib": 0.002260412322357297, + "step": 589 + }, + { + "ce_ib": 11.274831771850586, + "ce_orig": 0.7393347024917603, + "epoch": 0.16938672801783017, + "kl_loss": 0.13805577158927917, + "loss_ib": 0.0025080409832298756, + "step": 589 + }, + { + "ce_ib": 10.370933532714844, + "ce_orig": 1.0629693269729614, + "epoch": 0.16938672801783017, + "kl_loss": 0.16156615316867828, + "loss_ib": 0.0026527547743171453, + "step": 589 + }, + { + "ce_ib": 9.891374588012695, + "ce_orig": 0.7961823344230652, + "epoch": 0.16938672801783017, + "kl_loss": 0.14303208887577057, + "loss_ib": 0.0024194582365453243, + "step": 589 + }, + { + "epoch": 0.16967431159680782, + "grad_norm": 0.09825263917446136, + "learning_rate": 4.9912234871722805e-05, + "loss": 0.8531, + "step": 590 + }, + { + "ce_ib": 7.040185451507568, + "ce_orig": 0.45819783210754395, + "epoch": 0.16967431159680782, + "kl_loss": 0.19058099389076233, + "loss_ib": 0.0026098282542079687, + "step": 590 + }, + { + "ce_ib": 8.853228569030762, + "ce_orig": 0.45481324195861816, + "epoch": 0.16967431159680782, + "kl_loss": 0.16712503135204315, + "loss_ib": 0.0025565731339156628, + "step": 590 + }, + { + "ce_ib": 4.791247844696045, + "ce_orig": 0.5837194323539734, + "epoch": 0.16967431159680782, + "kl_loss": 0.09282873570919037, + "loss_ib": 0.001407412113621831, + "step": 590 + }, + { + "ce_ib": 10.547879219055176, + "ce_orig": 0.5191910862922668, + "epoch": 0.16967431159680782, + "kl_loss": 0.21911896765232086, + "loss_ib": 0.003245977684855461, + "step": 590 + }, + { + "ce_ib": 13.081586837768555, + "ce_orig": 1.6712095737457275, + "epoch": 0.16996189517578547, + "kl_loss": 0.1372794508934021, + "loss_ib": 0.002680953126400709, + "step": 591 + }, + { + "ce_ib": 10.580743789672852, + "ce_orig": 1.0760875940322876, + "epoch": 0.16996189517578547, + "kl_loss": 0.14443005621433258, + "loss_ib": 0.002502374816685915, + "step": 591 + }, + { + "ce_ib": 11.404194831848145, + "ce_orig": 1.1433049440383911, + "epoch": 0.16996189517578547, + "kl_loss": 0.1749769151210785, + "loss_ib": 0.0028901887126266956, + "step": 591 + }, + { + "ce_ib": 6.110208511352539, + "ce_orig": 0.5183145403862, + "epoch": 0.16996189517578547, + "kl_loss": 0.2318723201751709, + "loss_ib": 0.002929744077846408, + "step": 591 + }, + { + "ce_ib": 9.481659889221191, + "ce_orig": 1.2535269260406494, + "epoch": 0.1702494787547631, + "kl_loss": 0.10323582589626312, + "loss_ib": 0.0019805242773145437, + "step": 592 + }, + { + "ce_ib": 7.891531467437744, + "ce_orig": 1.11729896068573, + "epoch": 0.1702494787547631, + "kl_loss": 0.22114473581314087, + "loss_ib": 0.0030006002634763718, + "step": 592 + }, + { + "ce_ib": 11.817340850830078, + "ce_orig": 1.0938154458999634, + "epoch": 0.1702494787547631, + "kl_loss": 0.14513513445854187, + "loss_ib": 0.0026330852415412664, + "step": 592 + }, + { + "ce_ib": 7.189747333526611, + "ce_orig": 0.3452516496181488, + "epoch": 0.1702494787547631, + "kl_loss": 0.14610087871551514, + "loss_ib": 0.002179983537644148, + "step": 592 + }, + { + "ce_ib": 11.470684051513672, + "ce_orig": 1.2777209281921387, + "epoch": 0.17053706233374075, + "kl_loss": 0.21245329082012177, + "loss_ib": 0.0032716011628508568, + "step": 593 + }, + { + "ce_ib": 8.021244049072266, + "ce_orig": 0.7062201499938965, + "epoch": 0.17053706233374075, + "kl_loss": 0.17156916856765747, + "loss_ib": 0.0025178161449730396, + "step": 593 + }, + { + "ce_ib": 7.824449062347412, + "ce_orig": 0.625266432762146, + "epoch": 0.17053706233374075, + "kl_loss": 0.144322469830513, + "loss_ib": 0.00222566956654191, + "step": 593 + }, + { + "ce_ib": 8.449918746948242, + "ce_orig": 0.7968825697898865, + "epoch": 0.17053706233374075, + "kl_loss": 0.13197359442710876, + "loss_ib": 0.0021647277753800154, + "step": 593 + }, + { + "ce_ib": 12.557650566101074, + "ce_orig": 1.2830119132995605, + "epoch": 0.17082464591271837, + "kl_loss": 0.13416330516338348, + "loss_ib": 0.0025973981246352196, + "step": 594 + }, + { + "ce_ib": 16.166818618774414, + "ce_orig": 1.5858261585235596, + "epoch": 0.17082464591271837, + "kl_loss": 0.13871753215789795, + "loss_ib": 0.003003857098519802, + "step": 594 + }, + { + "ce_ib": 9.042425155639648, + "ce_orig": 0.6032095551490784, + "epoch": 0.17082464591271837, + "kl_loss": 0.14524920284748077, + "loss_ib": 0.0023567345924675465, + "step": 594 + }, + { + "ce_ib": 6.781697750091553, + "ce_orig": 0.7230402231216431, + "epoch": 0.17082464591271837, + "kl_loss": 0.11699005216360092, + "loss_ib": 0.0018480703001841903, + "step": 594 + }, + { + "epoch": 0.17111222949169602, + "grad_norm": 0.11506392806768417, + "learning_rate": 4.9908956206285e-05, + "loss": 0.8102, + "step": 595 + }, + { + "ce_ib": 8.491539001464844, + "ce_orig": 0.799639105796814, + "epoch": 0.17111222949169602, + "kl_loss": 0.310641884803772, + "loss_ib": 0.003955572843551636, + "step": 595 + }, + { + "ce_ib": 9.903238296508789, + "ce_orig": 0.9372822046279907, + "epoch": 0.17111222949169602, + "kl_loss": 0.34602734446525574, + "loss_ib": 0.004450596868991852, + "step": 595 + }, + { + "ce_ib": 10.448668479919434, + "ce_orig": 0.6878861784934998, + "epoch": 0.17111222949169602, + "kl_loss": 0.18895582854747772, + "loss_ib": 0.002934425137937069, + "step": 595 + }, + { + "ce_ib": 7.248452663421631, + "ce_orig": 0.638181746006012, + "epoch": 0.17111222949169602, + "kl_loss": 0.20415529608726501, + "loss_ib": 0.002766398014500737, + "step": 595 + }, + { + "ce_ib": 12.064194679260254, + "ce_orig": 1.1723579168319702, + "epoch": 0.17139981307067367, + "kl_loss": 0.14018401503562927, + "loss_ib": 0.0026082596741616726, + "step": 596 + }, + { + "ce_ib": 8.019061088562012, + "ce_orig": 0.6501631140708923, + "epoch": 0.17139981307067367, + "kl_loss": 0.15329334139823914, + "loss_ib": 0.0023348394315689802, + "step": 596 + }, + { + "ce_ib": 8.00246810913086, + "ce_orig": 0.6622079014778137, + "epoch": 0.17139981307067367, + "kl_loss": 0.21607182919979095, + "loss_ib": 0.0029609650373458862, + "step": 596 + }, + { + "ce_ib": 11.230345726013184, + "ce_orig": 0.9562708139419556, + "epoch": 0.17139981307067367, + "kl_loss": 0.1696317195892334, + "loss_ib": 0.002819351851940155, + "step": 596 + }, + { + "ce_ib": 10.980791091918945, + "ce_orig": 0.7618129849433899, + "epoch": 0.1716873966496513, + "kl_loss": 0.14176014065742493, + "loss_ib": 0.0025156803894788027, + "step": 597 + }, + { + "ce_ib": 12.790441513061523, + "ce_orig": 1.0193257331848145, + "epoch": 0.1716873966496513, + "kl_loss": 0.16009655594825745, + "loss_ib": 0.002880009589716792, + "step": 597 + }, + { + "ce_ib": 9.670978546142578, + "ce_orig": 1.0576988458633423, + "epoch": 0.1716873966496513, + "kl_loss": 0.31038549542427063, + "loss_ib": 0.004070952534675598, + "step": 597 + }, + { + "ce_ib": 8.231335639953613, + "ce_orig": 0.7771991491317749, + "epoch": 0.1716873966496513, + "kl_loss": 0.2681956887245178, + "loss_ib": 0.003505090484395623, + "step": 597 + }, + { + "ce_ib": 14.043437957763672, + "ce_orig": 1.6835757493972778, + "epoch": 0.17197498022862895, + "kl_loss": 0.22146332263946533, + "loss_ib": 0.0036189770326018333, + "step": 598 + }, + { + "ce_ib": 9.480457305908203, + "ce_orig": 0.8885291218757629, + "epoch": 0.17197498022862895, + "kl_loss": 0.09636467695236206, + "loss_ib": 0.0019116924377158284, + "step": 598 + }, + { + "ce_ib": 7.293931484222412, + "ce_orig": 0.6654239296913147, + "epoch": 0.17197498022862895, + "kl_loss": 0.13003098964691162, + "loss_ib": 0.0020297029986977577, + "step": 598 + }, + { + "ce_ib": 6.692012310028076, + "ce_orig": 0.5753830671310425, + "epoch": 0.17197498022862895, + "kl_loss": 0.11845792084932327, + "loss_ib": 0.0018537803553044796, + "step": 598 + }, + { + "ce_ib": 8.007339477539062, + "ce_orig": 0.897793710231781, + "epoch": 0.17226256380760657, + "kl_loss": 0.11580362170934677, + "loss_ib": 0.0019587702117860317, + "step": 599 + }, + { + "ce_ib": 10.507881164550781, + "ce_orig": 1.1790790557861328, + "epoch": 0.17226256380760657, + "kl_loss": 0.15849515795707703, + "loss_ib": 0.002635739743709564, + "step": 599 + }, + { + "ce_ib": 12.296706199645996, + "ce_orig": 1.374943494796753, + "epoch": 0.17226256380760657, + "kl_loss": 0.16814401745796204, + "loss_ib": 0.0029111106414347887, + "step": 599 + }, + { + "ce_ib": 11.69453239440918, + "ce_orig": 1.4047762155532837, + "epoch": 0.17226256380760657, + "kl_loss": 0.1669248342514038, + "loss_ib": 0.0028387014754116535, + "step": 599 + }, + { + "epoch": 0.17255014738658422, + "grad_norm": 0.09025447815656662, + "learning_rate": 4.9905617529390203e-05, + "loss": 0.7996, + "step": 600 + }, + { + "ce_ib": 13.253778457641602, + "ce_orig": 1.4632776975631714, + "epoch": 0.17255014738658422, + "kl_loss": 0.10774030536413193, + "loss_ib": 0.0024027808103710413, + "step": 600 + }, + { + "ce_ib": 9.553170204162598, + "ce_orig": 0.6664531826972961, + "epoch": 0.17255014738658422, + "kl_loss": 0.167972594499588, + "loss_ib": 0.002635042881593108, + "step": 600 + }, + { + "ce_ib": 8.886030197143555, + "ce_orig": 0.6646020412445068, + "epoch": 0.17255014738658422, + "kl_loss": 0.16888710856437683, + "loss_ib": 0.002577474107965827, + "step": 600 + }, + { + "ce_ib": 7.923281669616699, + "ce_orig": 0.6328381299972534, + "epoch": 0.17255014738658422, + "kl_loss": 0.13492171466350555, + "loss_ib": 0.002141545293852687, + "step": 600 + }, + { + "ce_ib": 5.616130828857422, + "ce_orig": 0.5428784489631653, + "epoch": 0.17283773096556188, + "kl_loss": 0.1354779154062271, + "loss_ib": 0.0019163921242579818, + "step": 601 + }, + { + "ce_ib": 9.720524787902832, + "ce_orig": 0.8215920925140381, + "epoch": 0.17283773096556188, + "kl_loss": 0.14569802582263947, + "loss_ib": 0.002429032465443015, + "step": 601 + }, + { + "ce_ib": 8.02761173248291, + "ce_orig": 0.8096197843551636, + "epoch": 0.17283773096556188, + "kl_loss": 0.1350010186433792, + "loss_ib": 0.002152771223336458, + "step": 601 + }, + { + "ce_ib": 7.660145282745361, + "ce_orig": 0.6597212553024292, + "epoch": 0.17283773096556188, + "kl_loss": 0.1304370015859604, + "loss_ib": 0.0020703845657408237, + "step": 601 + }, + { + "ce_ib": 7.52744722366333, + "ce_orig": 0.507038414478302, + "epoch": 0.1731253145445395, + "kl_loss": 0.19013965129852295, + "loss_ib": 0.0026541410479694605, + "step": 602 + }, + { + "ce_ib": 10.766141891479492, + "ce_orig": 0.8578734397888184, + "epoch": 0.1731253145445395, + "kl_loss": 0.12946805357933044, + "loss_ib": 0.002371294656768441, + "step": 602 + }, + { + "ce_ib": 5.255903244018555, + "ce_orig": 0.6048762798309326, + "epoch": 0.1731253145445395, + "kl_loss": 0.14380118250846863, + "loss_ib": 0.0019636021461337805, + "step": 602 + }, + { + "ce_ib": 10.115198135375977, + "ce_orig": 0.9766972064971924, + "epoch": 0.1731253145445395, + "kl_loss": 0.16051560640335083, + "loss_ib": 0.002616675803437829, + "step": 602 + }, + { + "ce_ib": 6.0160980224609375, + "ce_orig": 0.5172838568687439, + "epoch": 0.17341289812351715, + "kl_loss": 0.1775059700012207, + "loss_ib": 0.0023766695521771908, + "step": 603 + }, + { + "ce_ib": 7.59116792678833, + "ce_orig": 0.806316077709198, + "epoch": 0.17341289812351715, + "kl_loss": 0.23788809776306152, + "loss_ib": 0.0031379975844174623, + "step": 603 + }, + { + "ce_ib": 6.338790416717529, + "ce_orig": 0.17640917003154755, + "epoch": 0.17341289812351715, + "kl_loss": 0.11507699638605118, + "loss_ib": 0.0017846488626673818, + "step": 603 + }, + { + "ce_ib": 10.649868965148926, + "ce_orig": 1.2555245161056519, + "epoch": 0.17341289812351715, + "kl_loss": 0.16883064806461334, + "loss_ib": 0.002753293374553323, + "step": 603 + }, + { + "ce_ib": 15.31544303894043, + "ce_orig": 1.5796380043029785, + "epoch": 0.17370048170249477, + "kl_loss": 0.3695501685142517, + "loss_ib": 0.005227046087384224, + "step": 604 + }, + { + "ce_ib": 11.3411865234375, + "ce_orig": 1.3669389486312866, + "epoch": 0.17370048170249477, + "kl_loss": 0.13643240928649902, + "loss_ib": 0.0024984427727758884, + "step": 604 + }, + { + "ce_ib": 9.672558784484863, + "ce_orig": 0.6192442178726196, + "epoch": 0.17370048170249477, + "kl_loss": 0.17595481872558594, + "loss_ib": 0.0027268039993941784, + "step": 604 + }, + { + "ce_ib": 7.121029376983643, + "ce_orig": 0.7190517783164978, + "epoch": 0.17370048170249477, + "kl_loss": 0.1759684681892395, + "loss_ib": 0.0024717876221984625, + "step": 604 + }, + { + "epoch": 0.17398806528147243, + "grad_norm": 0.10308549553155899, + "learning_rate": 4.990221884908206e-05, + "loss": 0.8495, + "step": 605 + }, + { + "ce_ib": 15.291545867919922, + "ce_orig": 1.6869012117385864, + "epoch": 0.17398806528147243, + "kl_loss": 0.1663864552974701, + "loss_ib": 0.0031930189579725266, + "step": 605 + }, + { + "ce_ib": 7.900699138641357, + "ce_orig": 1.0077180862426758, + "epoch": 0.17398806528147243, + "kl_loss": 0.1302233338356018, + "loss_ib": 0.002092303242534399, + "step": 605 + }, + { + "ce_ib": 8.34791088104248, + "ce_orig": 0.7704336047172546, + "epoch": 0.17398806528147243, + "kl_loss": 0.17671293020248413, + "loss_ib": 0.0026019203942269087, + "step": 605 + }, + { + "ce_ib": 8.685835838317871, + "ce_orig": 0.8194261193275452, + "epoch": 0.17398806528147243, + "kl_loss": 0.2805391252040863, + "loss_ib": 0.0036739748902618885, + "step": 605 + }, + { + "ce_ib": 14.333061218261719, + "ce_orig": 1.967085361480713, + "epoch": 0.17427564886045008, + "kl_loss": 0.18738757073879242, + "loss_ib": 0.0033071814104914665, + "step": 606 + }, + { + "ce_ib": 4.270995616912842, + "ce_orig": 0.5261480808258057, + "epoch": 0.17427564886045008, + "kl_loss": 0.1274331510066986, + "loss_ib": 0.0017014308832585812, + "step": 606 + }, + { + "ce_ib": 9.806127548217773, + "ce_orig": 1.3137513399124146, + "epoch": 0.17427564886045008, + "kl_loss": 0.18525034189224243, + "loss_ib": 0.002833116101101041, + "step": 606 + }, + { + "ce_ib": 12.188000679016113, + "ce_orig": 1.2606420516967773, + "epoch": 0.17427564886045008, + "kl_loss": 0.15507441759109497, + "loss_ib": 0.0027695440221577883, + "step": 606 + }, + { + "ce_ib": 7.470699787139893, + "ce_orig": 0.2429499328136444, + "epoch": 0.1745632324394277, + "kl_loss": 0.18981708586215973, + "loss_ib": 0.002645240630954504, + "step": 607 + }, + { + "ce_ib": 9.157522201538086, + "ce_orig": 0.8754900693893433, + "epoch": 0.1745632324394277, + "kl_loss": 0.21200796961784363, + "loss_ib": 0.0030358319636434317, + "step": 607 + }, + { + "ce_ib": 10.273916244506836, + "ce_orig": 0.7955263257026672, + "epoch": 0.1745632324394277, + "kl_loss": 0.14678597450256348, + "loss_ib": 0.002495251130312681, + "step": 607 + }, + { + "ce_ib": 8.007857322692871, + "ce_orig": 0.8106256127357483, + "epoch": 0.1745632324394277, + "kl_loss": 0.10579822957515717, + "loss_ib": 0.001858767936937511, + "step": 607 + }, + { + "ce_ib": 14.369596481323242, + "ce_orig": 1.0616765022277832, + "epoch": 0.17485081601840535, + "kl_loss": 0.44787994027137756, + "loss_ib": 0.00591575913131237, + "step": 608 + }, + { + "ce_ib": 12.925912857055664, + "ce_orig": 1.2854984998703003, + "epoch": 0.17485081601840535, + "kl_loss": 0.22356921434402466, + "loss_ib": 0.0035282832104712725, + "step": 608 + }, + { + "ce_ib": 5.677262306213379, + "ce_orig": 0.6853749752044678, + "epoch": 0.17485081601840535, + "kl_loss": 0.2728205621242523, + "loss_ib": 0.0032959317322820425, + "step": 608 + }, + { + "ce_ib": 9.354161262512207, + "ce_orig": 0.8821960091590881, + "epoch": 0.17485081601840535, + "kl_loss": 0.1578974723815918, + "loss_ib": 0.0025143909733742476, + "step": 608 + }, + { + "ce_ib": 15.807966232299805, + "ce_orig": 1.1312600374221802, + "epoch": 0.17513839959738298, + "kl_loss": 0.2390902042388916, + "loss_ib": 0.003971698693931103, + "step": 609 + }, + { + "ce_ib": 12.78138542175293, + "ce_orig": 0.9202042818069458, + "epoch": 0.17513839959738298, + "kl_loss": 0.20055478811264038, + "loss_ib": 0.003283686237409711, + "step": 609 + }, + { + "ce_ib": 10.475568771362305, + "ce_orig": 0.8941435217857361, + "epoch": 0.17513839959738298, + "kl_loss": 0.15872544050216675, + "loss_ib": 0.0026348114479333162, + "step": 609 + }, + { + "ce_ib": 10.035277366638184, + "ce_orig": 1.2712260484695435, + "epoch": 0.17513839959738298, + "kl_loss": 0.15426132082939148, + "loss_ib": 0.002546140691265464, + "step": 609 + }, + { + "epoch": 0.17542598317636063, + "grad_norm": 0.09014883637428284, + "learning_rate": 4.989876017354878e-05, + "loss": 0.9056, + "step": 610 + }, + { + "ce_ib": 9.086161613464355, + "ce_orig": 0.9555597305297852, + "epoch": 0.17542598317636063, + "kl_loss": 0.14901208877563477, + "loss_ib": 0.002398737007752061, + "step": 610 + }, + { + "ce_ib": 13.615242004394531, + "ce_orig": 1.6006946563720703, + "epoch": 0.17542598317636063, + "kl_loss": 0.14780299365520477, + "loss_ib": 0.0028395538683980703, + "step": 610 + }, + { + "ce_ib": 9.924784660339355, + "ce_orig": 0.732452929019928, + "epoch": 0.17542598317636063, + "kl_loss": 0.14724621176719666, + "loss_ib": 0.0024649405386298895, + "step": 610 + }, + { + "ce_ib": 6.761714458465576, + "ce_orig": 0.5315162539482117, + "epoch": 0.17542598317636063, + "kl_loss": 0.12623527646064758, + "loss_ib": 0.0019385241903364658, + "step": 610 + }, + { + "ce_ib": 8.44915771484375, + "ce_orig": 0.8193613290786743, + "epoch": 0.17571356675533828, + "kl_loss": 0.1713801920413971, + "loss_ib": 0.0025587177369743586, + "step": 611 + }, + { + "ce_ib": 8.449363708496094, + "ce_orig": 0.567879319190979, + "epoch": 0.17571356675533828, + "kl_loss": 0.2723478078842163, + "loss_ib": 0.0035684143658727407, + "step": 611 + }, + { + "ce_ib": 5.55802059173584, + "ce_orig": 0.4952395260334015, + "epoch": 0.17571356675533828, + "kl_loss": 0.1825639307498932, + "loss_ib": 0.002381441183388233, + "step": 611 + }, + { + "ce_ib": 14.0112886428833, + "ce_orig": 1.512209177017212, + "epoch": 0.17571356675533828, + "kl_loss": 0.17549076676368713, + "loss_ib": 0.0031560363713651896, + "step": 611 + }, + { + "ce_ib": 8.182499885559082, + "ce_orig": 0.914882481098175, + "epoch": 0.1760011503343159, + "kl_loss": 0.16630445420742035, + "loss_ib": 0.0024812945630401373, + "step": 612 + }, + { + "ce_ib": 12.355195999145508, + "ce_orig": 0.9773331880569458, + "epoch": 0.1760011503343159, + "kl_loss": 0.23119373619556427, + "loss_ib": 0.0035474568139761686, + "step": 612 + }, + { + "ce_ib": 7.4552083015441895, + "ce_orig": 0.7183859944343567, + "epoch": 0.1760011503343159, + "kl_loss": 0.15184658765792847, + "loss_ib": 0.002263986738398671, + "step": 612 + }, + { + "ce_ib": 12.164751052856445, + "ce_orig": 1.0548458099365234, + "epoch": 0.1760011503343159, + "kl_loss": 0.179172545671463, + "loss_ib": 0.003008200554177165, + "step": 612 + }, + { + "ce_ib": 11.297383308410645, + "ce_orig": 0.8856826424598694, + "epoch": 0.17628873391329355, + "kl_loss": 0.18683576583862305, + "loss_ib": 0.0029980959370732307, + "step": 613 + }, + { + "ce_ib": 10.6996488571167, + "ce_orig": 0.816612958908081, + "epoch": 0.17628873391329355, + "kl_loss": 0.19587890803813934, + "loss_ib": 0.003028753912076354, + "step": 613 + }, + { + "ce_ib": 7.265003204345703, + "ce_orig": 0.5818957686424255, + "epoch": 0.17628873391329355, + "kl_loss": 0.24078163504600525, + "loss_ib": 0.0031343167647719383, + "step": 613 + }, + { + "ce_ib": 13.445194244384766, + "ce_orig": 1.461010217666626, + "epoch": 0.17628873391329355, + "kl_loss": 0.1896296739578247, + "loss_ib": 0.003240815829485655, + "step": 613 + }, + { + "ce_ib": 8.329078674316406, + "ce_orig": 0.5632631778717041, + "epoch": 0.17657631749227118, + "kl_loss": 0.1660267412662506, + "loss_ib": 0.0024931752122938633, + "step": 614 + }, + { + "ce_ib": 8.5061616897583, + "ce_orig": 0.668455958366394, + "epoch": 0.17657631749227118, + "kl_loss": 0.09593428671360016, + "loss_ib": 0.0018099590670317411, + "step": 614 + }, + { + "ce_ib": 7.759790420532227, + "ce_orig": 0.7224739789962769, + "epoch": 0.17657631749227118, + "kl_loss": 0.13144069910049438, + "loss_ib": 0.0020903858821839094, + "step": 614 + }, + { + "ce_ib": 9.92943000793457, + "ce_orig": 0.5543064475059509, + "epoch": 0.17657631749227118, + "kl_loss": 0.20024323463439941, + "loss_ib": 0.0029953753110021353, + "step": 614 + }, + { + "epoch": 0.17686390107124883, + "grad_norm": 0.14787979423999786, + "learning_rate": 4.9895241511123114e-05, + "loss": 0.8549, + "step": 615 + }, + { + "ce_ib": 12.167133331298828, + "ce_orig": 1.0232415199279785, + "epoch": 0.17686390107124883, + "kl_loss": 0.15243038535118103, + "loss_ib": 0.0027410173788666725, + "step": 615 + }, + { + "ce_ib": 12.68350601196289, + "ce_orig": 0.8611997365951538, + "epoch": 0.17686390107124883, + "kl_loss": 0.1832544207572937, + "loss_ib": 0.0031008946243673563, + "step": 615 + }, + { + "ce_ib": 5.987475872039795, + "ce_orig": 0.6404716968536377, + "epoch": 0.17686390107124883, + "kl_loss": 0.15817669034004211, + "loss_ib": 0.0021805143915116787, + "step": 615 + }, + { + "ce_ib": 6.460445880889893, + "ce_orig": 0.3817240297794342, + "epoch": 0.17686390107124883, + "kl_loss": 0.16189801692962646, + "loss_ib": 0.00226502469740808, + "step": 615 + }, + { + "ce_ib": 10.936073303222656, + "ce_orig": 0.48395687341690063, + "epoch": 0.17715148465022648, + "kl_loss": 0.1969614326953888, + "loss_ib": 0.003063221462070942, + "step": 616 + }, + { + "ce_ib": 8.578774452209473, + "ce_orig": 0.6014773845672607, + "epoch": 0.17715148465022648, + "kl_loss": 0.1850200891494751, + "loss_ib": 0.0027080783620476723, + "step": 616 + }, + { + "ce_ib": 6.492056369781494, + "ce_orig": 0.6777470707893372, + "epoch": 0.17715148465022648, + "kl_loss": 0.16238093376159668, + "loss_ib": 0.0022730149794369936, + "step": 616 + }, + { + "ce_ib": 5.815036296844482, + "ce_orig": 0.7170777320861816, + "epoch": 0.17715148465022648, + "kl_loss": 0.12132446467876434, + "loss_ib": 0.0017947482410818338, + "step": 616 + }, + { + "ce_ib": 6.723383903503418, + "ce_orig": 0.5069630146026611, + "epoch": 0.1774390682292041, + "kl_loss": 0.14102785289287567, + "loss_ib": 0.0020826170220971107, + "step": 617 + }, + { + "ce_ib": 7.4365363121032715, + "ce_orig": 0.6629728674888611, + "epoch": 0.1774390682292041, + "kl_loss": 0.18020984530448914, + "loss_ib": 0.002545751864090562, + "step": 617 + }, + { + "ce_ib": 10.48121166229248, + "ce_orig": 0.9769195318222046, + "epoch": 0.1774390682292041, + "kl_loss": 0.14403533935546875, + "loss_ib": 0.0024884745944291353, + "step": 617 + }, + { + "ce_ib": 8.293168067932129, + "ce_orig": 1.2928009033203125, + "epoch": 0.1774390682292041, + "kl_loss": 0.10530447959899902, + "loss_ib": 0.0018823615973815322, + "step": 617 + }, + { + "ce_ib": 5.5370988845825195, + "ce_orig": 0.6263954043388367, + "epoch": 0.17772665180818176, + "kl_loss": 0.11039714515209198, + "loss_ib": 0.001657681306824088, + "step": 618 + }, + { + "ce_ib": 13.478799819946289, + "ce_orig": 1.3046756982803345, + "epoch": 0.17772665180818176, + "kl_loss": 0.16247433423995972, + "loss_ib": 0.002972623100504279, + "step": 618 + }, + { + "ce_ib": 6.596388816833496, + "ce_orig": 0.622149646282196, + "epoch": 0.17772665180818176, + "kl_loss": 0.1503526270389557, + "loss_ib": 0.0021631652489304543, + "step": 618 + }, + { + "ce_ib": 12.335912704467773, + "ce_orig": 1.3814923763275146, + "epoch": 0.17772665180818176, + "kl_loss": 0.14966334402561188, + "loss_ib": 0.002730224747210741, + "step": 618 + }, + { + "ce_ib": 10.754959106445312, + "ce_orig": 0.859503984451294, + "epoch": 0.17801423538715938, + "kl_loss": 0.12099392712116241, + "loss_ib": 0.0022854350972920656, + "step": 619 + }, + { + "ce_ib": 5.859959125518799, + "ce_orig": 0.5606276392936707, + "epoch": 0.17801423538715938, + "kl_loss": 0.1312570571899414, + "loss_ib": 0.0018985664937645197, + "step": 619 + }, + { + "ce_ib": 5.188699722290039, + "ce_orig": 0.7638875842094421, + "epoch": 0.17801423538715938, + "kl_loss": 0.1094013974070549, + "loss_ib": 0.0016128838760778308, + "step": 619 + }, + { + "ce_ib": 11.558479309082031, + "ce_orig": 0.7178393006324768, + "epoch": 0.17801423538715938, + "kl_loss": 0.1623765379190445, + "loss_ib": 0.0027796130161732435, + "step": 619 + }, + { + "epoch": 0.17830181896613703, + "grad_norm": 0.15338236093521118, + "learning_rate": 4.989166287028234e-05, + "loss": 0.8193, + "step": 620 + }, + { + "ce_ib": 6.948857307434082, + "ce_orig": 0.7019293308258057, + "epoch": 0.17830181896613703, + "kl_loss": 0.1282852739095688, + "loss_ib": 0.001977738458663225, + "step": 620 + }, + { + "ce_ib": 6.701162815093994, + "ce_orig": 0.45527055859565735, + "epoch": 0.17830181896613703, + "kl_loss": 0.30297911167144775, + "loss_ib": 0.003699907334521413, + "step": 620 + }, + { + "ce_ib": 14.506156921386719, + "ce_orig": 1.8138208389282227, + "epoch": 0.17830181896613703, + "kl_loss": 0.20029760897159576, + "loss_ib": 0.0034535916056483984, + "step": 620 + }, + { + "ce_ib": 8.688165664672852, + "ce_orig": 0.6975913047790527, + "epoch": 0.17830181896613703, + "kl_loss": 0.1677013337612152, + "loss_ib": 0.002545829862356186, + "step": 620 + }, + { + "ce_ib": 8.23354721069336, + "ce_orig": 0.8580948710441589, + "epoch": 0.17858940254511468, + "kl_loss": 0.17454084753990173, + "loss_ib": 0.0025687632150948048, + "step": 621 + }, + { + "ce_ib": 6.359945774078369, + "ce_orig": 0.6615402102470398, + "epoch": 0.17858940254511468, + "kl_loss": 0.16662156581878662, + "loss_ib": 0.0023022103123366833, + "step": 621 + }, + { + "ce_ib": 9.940476417541504, + "ce_orig": 0.8817598819732666, + "epoch": 0.17858940254511468, + "kl_loss": 0.14029280841350555, + "loss_ib": 0.0023969756439328194, + "step": 621 + }, + { + "ce_ib": 5.855399131774902, + "ce_orig": 0.2266225963830948, + "epoch": 0.17858940254511468, + "kl_loss": 0.11816604435443878, + "loss_ib": 0.0017672003014013171, + "step": 621 + }, + { + "ce_ib": 9.321126937866211, + "ce_orig": 0.6670013666152954, + "epoch": 0.1788769861240923, + "kl_loss": 0.1992078721523285, + "loss_ib": 0.0029241912998259068, + "step": 622 + }, + { + "ce_ib": 9.116394996643066, + "ce_orig": 0.8380889296531677, + "epoch": 0.1788769861240923, + "kl_loss": 0.13510224223136902, + "loss_ib": 0.0022626619320362806, + "step": 622 + }, + { + "ce_ib": 8.283426284790039, + "ce_orig": 0.7578662037849426, + "epoch": 0.1788769861240923, + "kl_loss": 0.0886252149939537, + "loss_ib": 0.0017145946621894836, + "step": 622 + }, + { + "ce_ib": 10.24531364440918, + "ce_orig": 1.1135846376419067, + "epoch": 0.1788769861240923, + "kl_loss": 0.1562294065952301, + "loss_ib": 0.002586825517937541, + "step": 622 + }, + { + "ce_ib": 8.487592697143555, + "ce_orig": 0.6697806715965271, + "epoch": 0.17916456970306996, + "kl_loss": 0.22156865894794464, + "loss_ib": 0.003064445685595274, + "step": 623 + }, + { + "ce_ib": 7.489628314971924, + "ce_orig": 0.9431294202804565, + "epoch": 0.17916456970306996, + "kl_loss": 0.131272092461586, + "loss_ib": 0.0020616836845874786, + "step": 623 + }, + { + "ce_ib": 10.724349021911621, + "ce_orig": 1.0055797100067139, + "epoch": 0.17916456970306996, + "kl_loss": 0.1610507071018219, + "loss_ib": 0.002682941732928157, + "step": 623 + }, + { + "ce_ib": 8.10354995727539, + "ce_orig": 0.8617219924926758, + "epoch": 0.17916456970306996, + "kl_loss": 0.2971377372741699, + "loss_ib": 0.0037817321717739105, + "step": 623 + }, + { + "ce_ib": 11.173288345336914, + "ce_orig": 1.3478788137435913, + "epoch": 0.17945215328204758, + "kl_loss": 0.14247748255729675, + "loss_ib": 0.0025421034079045057, + "step": 624 + }, + { + "ce_ib": 5.565175533294678, + "ce_orig": 0.36270689964294434, + "epoch": 0.17945215328204758, + "kl_loss": 0.16036799550056458, + "loss_ib": 0.0021601973567157984, + "step": 624 + }, + { + "ce_ib": 10.509783744812012, + "ce_orig": 1.0102475881576538, + "epoch": 0.17945215328204758, + "kl_loss": 0.17176342010498047, + "loss_ib": 0.002768612466752529, + "step": 624 + }, + { + "ce_ib": 12.00045108795166, + "ce_orig": 1.1874045133590698, + "epoch": 0.17945215328204758, + "kl_loss": 0.21322676539421082, + "loss_ib": 0.003332312684506178, + "step": 624 + }, + { + "epoch": 0.17973973686102523, + "grad_norm": 0.11537851393222809, + "learning_rate": 4.988802425964824e-05, + "loss": 0.8732, + "step": 625 + }, + { + "ce_ib": 7.465215682983398, + "ce_orig": 0.6246358752250671, + "epoch": 0.17973973686102523, + "kl_loss": 0.2165844887495041, + "loss_ib": 0.002912366297096014, + "step": 625 + }, + { + "ce_ib": 6.249822616577148, + "ce_orig": 0.5895569920539856, + "epoch": 0.17973973686102523, + "kl_loss": 0.1526128053665161, + "loss_ib": 0.0021511102095246315, + "step": 625 + }, + { + "ce_ib": 8.204090118408203, + "ce_orig": 0.8461520075798035, + "epoch": 0.17973973686102523, + "kl_loss": 0.1934261918067932, + "loss_ib": 0.002754670800641179, + "step": 625 + }, + { + "ce_ib": 6.963839054107666, + "ce_orig": 0.7485008835792542, + "epoch": 0.17973973686102523, + "kl_loss": 0.08876027911901474, + "loss_ib": 0.001583986566402018, + "step": 625 + }, + { + "ce_ib": 13.58116626739502, + "ce_orig": 1.2827461957931519, + "epoch": 0.18002732044000289, + "kl_loss": 0.19602948427200317, + "loss_ib": 0.00331841129809618, + "step": 626 + }, + { + "ce_ib": 13.048462867736816, + "ce_orig": 1.3438245058059692, + "epoch": 0.18002732044000289, + "kl_loss": 0.16691835224628448, + "loss_ib": 0.002974029630422592, + "step": 626 + }, + { + "ce_ib": 8.618809700012207, + "ce_orig": 0.4586450755596161, + "epoch": 0.18002732044000289, + "kl_loss": 0.17586086690425873, + "loss_ib": 0.0026204895693808794, + "step": 626 + }, + { + "ce_ib": 9.625340461730957, + "ce_orig": 1.0119637250900269, + "epoch": 0.18002732044000289, + "kl_loss": 0.2463323473930359, + "loss_ib": 0.0034258572850376368, + "step": 626 + }, + { + "ce_ib": 9.643866539001465, + "ce_orig": 0.5789271593093872, + "epoch": 0.1803149040189805, + "kl_loss": 0.16274522244930267, + "loss_ib": 0.002591838827356696, + "step": 627 + }, + { + "ce_ib": 7.565330505371094, + "ce_orig": 0.3938678801059723, + "epoch": 0.1803149040189805, + "kl_loss": 0.16621270775794983, + "loss_ib": 0.002418660093098879, + "step": 627 + }, + { + "ce_ib": 9.062137603759766, + "ce_orig": 0.7573468089103699, + "epoch": 0.1803149040189805, + "kl_loss": 0.15040776133537292, + "loss_ib": 0.002410291461274028, + "step": 627 + }, + { + "ce_ib": 7.801846981048584, + "ce_orig": 0.5504791736602783, + "epoch": 0.1803149040189805, + "kl_loss": 0.20418506860733032, + "loss_ib": 0.0028220354579389095, + "step": 627 + }, + { + "ce_ib": 12.088622093200684, + "ce_orig": 1.4907240867614746, + "epoch": 0.18060248759795816, + "kl_loss": 0.1632252186536789, + "loss_ib": 0.0028411142993718386, + "step": 628 + }, + { + "ce_ib": 10.344350814819336, + "ce_orig": 1.1456356048583984, + "epoch": 0.18060248759795816, + "kl_loss": 0.11406631767749786, + "loss_ib": 0.0021750980522483587, + "step": 628 + }, + { + "ce_ib": 11.513299942016602, + "ce_orig": 1.1756242513656616, + "epoch": 0.18060248759795816, + "kl_loss": 0.21336236596107483, + "loss_ib": 0.003284953534603119, + "step": 628 + }, + { + "ce_ib": 13.674637794494629, + "ce_orig": 1.8630796670913696, + "epoch": 0.18060248759795816, + "kl_loss": 0.15529105067253113, + "loss_ib": 0.002920374274253845, + "step": 628 + }, + { + "ce_ib": 7.227867603302002, + "ce_orig": 0.6919394135475159, + "epoch": 0.18089007117693578, + "kl_loss": 0.1787305772304535, + "loss_ib": 0.00251009245403111, + "step": 629 + }, + { + "ce_ib": 13.092966079711914, + "ce_orig": 1.2497636079788208, + "epoch": 0.18089007117693578, + "kl_loss": 0.11016056686639786, + "loss_ib": 0.0024109024088829756, + "step": 629 + }, + { + "ce_ib": 12.19546890258789, + "ce_orig": 1.26802396774292, + "epoch": 0.18089007117693578, + "kl_loss": 0.201836496591568, + "loss_ib": 0.00323791173286736, + "step": 629 + }, + { + "ce_ib": 8.523748397827148, + "ce_orig": 0.5664364695549011, + "epoch": 0.18089007117693578, + "kl_loss": 0.2126503586769104, + "loss_ib": 0.002978878328576684, + "step": 629 + }, + { + "epoch": 0.18117765475591344, + "grad_norm": 0.13004900515079498, + "learning_rate": 4.9884325687987056e-05, + "loss": 0.8905, + "step": 630 + }, + { + "ce_ib": 12.112406730651855, + "ce_orig": 1.2464693784713745, + "epoch": 0.18117765475591344, + "kl_loss": 0.16276949644088745, + "loss_ib": 0.0028389354702085257, + "step": 630 + }, + { + "ce_ib": 6.565924644470215, + "ce_orig": 0.48202767968177795, + "epoch": 0.18117765475591344, + "kl_loss": 0.16982322931289673, + "loss_ib": 0.0023548246826976538, + "step": 630 + }, + { + "ce_ib": 11.657485961914062, + "ce_orig": 1.4749407768249512, + "epoch": 0.18117765475591344, + "kl_loss": 0.16900035738945007, + "loss_ib": 0.0028557521291077137, + "step": 630 + }, + { + "ce_ib": 10.074145317077637, + "ce_orig": 1.1275204420089722, + "epoch": 0.18117765475591344, + "kl_loss": 0.1381954848766327, + "loss_ib": 0.002389369299635291, + "step": 630 + }, + { + "ce_ib": 10.129415512084961, + "ce_orig": 0.6778865456581116, + "epoch": 0.1814652383348911, + "kl_loss": 0.12124676257371902, + "loss_ib": 0.0022254090290516615, + "step": 631 + }, + { + "ce_ib": 9.381417274475098, + "ce_orig": 0.7912308573722839, + "epoch": 0.1814652383348911, + "kl_loss": 0.1806401014328003, + "loss_ib": 0.0027445426676422358, + "step": 631 + }, + { + "ce_ib": 6.85666561126709, + "ce_orig": 0.6336774230003357, + "epoch": 0.1814652383348911, + "kl_loss": 0.14535778760910034, + "loss_ib": 0.0021392442286014557, + "step": 631 + }, + { + "ce_ib": 7.080989837646484, + "ce_orig": 0.8866851329803467, + "epoch": 0.1814652383348911, + "kl_loss": 0.1062016636133194, + "loss_ib": 0.0017701154574751854, + "step": 631 + }, + { + "ce_ib": 6.399113655090332, + "ce_orig": 0.7909007668495178, + "epoch": 0.1817528219138687, + "kl_loss": 0.09706030040979385, + "loss_ib": 0.0016105143586173654, + "step": 632 + }, + { + "ce_ib": 6.583844184875488, + "ce_orig": 0.45864665508270264, + "epoch": 0.1817528219138687, + "kl_loss": 0.1002674251794815, + "loss_ib": 0.0016610586317256093, + "step": 632 + }, + { + "ce_ib": 14.482834815979004, + "ce_orig": 1.6091009378433228, + "epoch": 0.1817528219138687, + "kl_loss": 0.1842235028743744, + "loss_ib": 0.00329051841981709, + "step": 632 + }, + { + "ce_ib": 9.707879066467285, + "ce_orig": 0.8563067317008972, + "epoch": 0.1817528219138687, + "kl_loss": 0.15452846884727478, + "loss_ib": 0.002516072243452072, + "step": 632 + }, + { + "ce_ib": 7.918076515197754, + "ce_orig": 1.0354722738265991, + "epoch": 0.18204040549284636, + "kl_loss": 0.126227468252182, + "loss_ib": 0.002054082229733467, + "step": 633 + }, + { + "ce_ib": 9.942896842956543, + "ce_orig": 1.557401418685913, + "epoch": 0.18204040549284636, + "kl_loss": 0.14876790344715118, + "loss_ib": 0.0024819686077535152, + "step": 633 + }, + { + "ce_ib": 9.64987564086914, + "ce_orig": 1.0955207347869873, + "epoch": 0.18204040549284636, + "kl_loss": 0.12603306770324707, + "loss_ib": 0.0022253182251006365, + "step": 633 + }, + { + "ce_ib": 8.467226028442383, + "ce_orig": 0.3422728478908539, + "epoch": 0.18204040549284636, + "kl_loss": 0.30516770482063293, + "loss_ib": 0.003898399416357279, + "step": 633 + }, + { + "ce_ib": 8.792363166809082, + "ce_orig": 0.9992319345474243, + "epoch": 0.182327989071824, + "kl_loss": 0.14874675869941711, + "loss_ib": 0.002366703934967518, + "step": 634 + }, + { + "ce_ib": 8.27891731262207, + "ce_orig": 1.0733726024627686, + "epoch": 0.182327989071824, + "kl_loss": 0.13452918827533722, + "loss_ib": 0.002173183485865593, + "step": 634 + }, + { + "ce_ib": 9.549663543701172, + "ce_orig": 0.8865470290184021, + "epoch": 0.182327989071824, + "kl_loss": 0.21063677966594696, + "loss_ib": 0.0030613341368734837, + "step": 634 + }, + { + "ce_ib": 6.622999668121338, + "ce_orig": 0.46363383531570435, + "epoch": 0.182327989071824, + "kl_loss": 0.161158949136734, + "loss_ib": 0.0022738894913345575, + "step": 634 + }, + { + "epoch": 0.18261557265080164, + "grad_norm": 0.09408904612064362, + "learning_rate": 4.9880567164209515e-05, + "loss": 0.8971, + "step": 635 + }, + { + "ce_ib": 8.740588188171387, + "ce_orig": 0.7484884262084961, + "epoch": 0.18261557265080164, + "kl_loss": 0.1564946174621582, + "loss_ib": 0.0024390050675719976, + "step": 635 + }, + { + "ce_ib": 7.376012325286865, + "ce_orig": 0.48069027066230774, + "epoch": 0.18261557265080164, + "kl_loss": 0.20798180997371674, + "loss_ib": 0.0028174191247671843, + "step": 635 + }, + { + "ce_ib": 7.95230770111084, + "ce_orig": 0.4582425057888031, + "epoch": 0.18261557265080164, + "kl_loss": 0.11464007198810577, + "loss_ib": 0.0019416314316913486, + "step": 635 + }, + { + "ce_ib": 11.704083442687988, + "ce_orig": 0.9559797048568726, + "epoch": 0.18261557265080164, + "kl_loss": 0.14773112535476685, + "loss_ib": 0.0026477195788174868, + "step": 635 + }, + { + "ce_ib": 9.855140686035156, + "ce_orig": 1.0313355922698975, + "epoch": 0.1829031562297793, + "kl_loss": 0.15524601936340332, + "loss_ib": 0.0025379741564393044, + "step": 636 + }, + { + "ce_ib": 8.928131103515625, + "ce_orig": 0.6653744578361511, + "epoch": 0.1829031562297793, + "kl_loss": 0.235584557056427, + "loss_ib": 0.0032486587297171354, + "step": 636 + }, + { + "ce_ib": 7.960230827331543, + "ce_orig": 0.4329434633255005, + "epoch": 0.1829031562297793, + "kl_loss": 0.13450872898101807, + "loss_ib": 0.0021411103662103415, + "step": 636 + }, + { + "ce_ib": 8.45853042602539, + "ce_orig": 0.3819558918476105, + "epoch": 0.1829031562297793, + "kl_loss": 0.18511459231376648, + "loss_ib": 0.0026969988830387592, + "step": 636 + }, + { + "ce_ib": 13.116471290588379, + "ce_orig": 1.6339662075042725, + "epoch": 0.1831907398087569, + "kl_loss": 0.1558864712715149, + "loss_ib": 0.0028705119621008635, + "step": 637 + }, + { + "ce_ib": 15.931092262268066, + "ce_orig": 1.6720495223999023, + "epoch": 0.1831907398087569, + "kl_loss": 0.15220539271831512, + "loss_ib": 0.0031151631847023964, + "step": 637 + }, + { + "ce_ib": 11.66745662689209, + "ce_orig": 1.4903631210327148, + "epoch": 0.1831907398087569, + "kl_loss": 0.29781395196914673, + "loss_ib": 0.004144885111600161, + "step": 637 + }, + { + "ce_ib": 8.845624923706055, + "ce_orig": 0.9857800602912903, + "epoch": 0.1831907398087569, + "kl_loss": 0.19395799934864044, + "loss_ib": 0.002824142575263977, + "step": 637 + }, + { + "ce_ib": 5.086018085479736, + "ce_orig": 0.40799444913864136, + "epoch": 0.18347832338773457, + "kl_loss": 0.13657420873641968, + "loss_ib": 0.0018743438413366675, + "step": 638 + }, + { + "ce_ib": 6.492795944213867, + "ce_orig": 0.6550372838973999, + "epoch": 0.18347832338773457, + "kl_loss": 0.14029884338378906, + "loss_ib": 0.002052268013358116, + "step": 638 + }, + { + "ce_ib": 11.760429382324219, + "ce_orig": 1.4298399686813354, + "epoch": 0.18347832338773457, + "kl_loss": 0.14298929274082184, + "loss_ib": 0.0026059357915073633, + "step": 638 + }, + { + "ce_ib": 10.418862342834473, + "ce_orig": 1.169357419013977, + "epoch": 0.18347832338773457, + "kl_loss": 0.13526105880737305, + "loss_ib": 0.0023944966960698366, + "step": 638 + }, + { + "ce_ib": 4.184670448303223, + "ce_orig": 0.1795201301574707, + "epoch": 0.1837659069667122, + "kl_loss": 0.3021865487098694, + "loss_ib": 0.003440332366153598, + "step": 639 + }, + { + "ce_ib": 10.151659965515137, + "ce_orig": 0.8202506899833679, + "epoch": 0.1837659069667122, + "kl_loss": 0.1445043683052063, + "loss_ib": 0.0024602096527814865, + "step": 639 + }, + { + "ce_ib": 7.495885372161865, + "ce_orig": 0.7958588004112244, + "epoch": 0.1837659069667122, + "kl_loss": 0.11683303862810135, + "loss_ib": 0.001917918911203742, + "step": 639 + }, + { + "ce_ib": 12.701330184936523, + "ce_orig": 1.1551557779312134, + "epoch": 0.1837659069667122, + "kl_loss": 0.1668703556060791, + "loss_ib": 0.002938836347311735, + "step": 639 + }, + { + "epoch": 0.18405349054568984, + "grad_norm": 0.09154196828603745, + "learning_rate": 4.987674869737077e-05, + "loss": 0.8505, + "step": 640 + }, + { + "ce_ib": 6.805731296539307, + "ce_orig": 0.8624812364578247, + "epoch": 0.18405349054568984, + "kl_loss": 0.08888162672519684, + "loss_ib": 0.0015693893656134605, + "step": 640 + }, + { + "ce_ib": 7.028994083404541, + "ce_orig": 0.5640987157821655, + "epoch": 0.18405349054568984, + "kl_loss": 0.1612461507320404, + "loss_ib": 0.0023153608199208975, + "step": 640 + }, + { + "ce_ib": 9.196657180786133, + "ce_orig": 0.7501866221427917, + "epoch": 0.18405349054568984, + "kl_loss": 0.15376678109169006, + "loss_ib": 0.0024573334958404303, + "step": 640 + }, + { + "ce_ib": 10.042610168457031, + "ce_orig": 1.2460449934005737, + "epoch": 0.18405349054568984, + "kl_loss": 0.13962647318840027, + "loss_ib": 0.0024005258455872536, + "step": 640 + }, + { + "ce_ib": 11.13807201385498, + "ce_orig": 1.2203491926193237, + "epoch": 0.1843410741246675, + "kl_loss": 0.2092892825603485, + "loss_ib": 0.003206700086593628, + "step": 641 + }, + { + "ce_ib": 6.8926472663879395, + "ce_orig": 0.9371761083602905, + "epoch": 0.1843410741246675, + "kl_loss": 0.1569281369447708, + "loss_ib": 0.0022585459519177675, + "step": 641 + }, + { + "ce_ib": 8.300681114196777, + "ce_orig": 0.8907142877578735, + "epoch": 0.1843410741246675, + "kl_loss": 0.11810189485549927, + "loss_ib": 0.002011086791753769, + "step": 641 + }, + { + "ce_ib": 9.104215621948242, + "ce_orig": 1.2735546827316284, + "epoch": 0.1843410741246675, + "kl_loss": 0.20481525361537933, + "loss_ib": 0.002958573866635561, + "step": 641 + }, + { + "ce_ib": 5.902524948120117, + "ce_orig": 0.6442005634307861, + "epoch": 0.18462865770364512, + "kl_loss": 0.06047610938549042, + "loss_ib": 0.0011950135231018066, + "step": 642 + }, + { + "ce_ib": 8.438175201416016, + "ce_orig": 0.8223277926445007, + "epoch": 0.18462865770364512, + "kl_loss": 0.1526065617799759, + "loss_ib": 0.002369883004575968, + "step": 642 + }, + { + "ce_ib": 7.181520938873291, + "ce_orig": 0.6614299416542053, + "epoch": 0.18462865770364512, + "kl_loss": 0.13312123715877533, + "loss_ib": 0.0020493643824011087, + "step": 642 + }, + { + "ce_ib": 6.217692852020264, + "ce_orig": 0.9638420939445496, + "epoch": 0.18462865770364512, + "kl_loss": 0.12894636392593384, + "loss_ib": 0.0019112328300252557, + "step": 642 + }, + { + "ce_ib": 9.330130577087402, + "ce_orig": 0.8886315226554871, + "epoch": 0.18491624128262277, + "kl_loss": 0.18079149723052979, + "loss_ib": 0.0027409279718995094, + "step": 643 + }, + { + "ce_ib": 7.036296844482422, + "ce_orig": 0.7276414632797241, + "epoch": 0.18491624128262277, + "kl_loss": 0.1510542333126068, + "loss_ib": 0.0022141719236969948, + "step": 643 + }, + { + "ce_ib": 8.81930160522461, + "ce_orig": 0.961742103099823, + "epoch": 0.18491624128262277, + "kl_loss": 0.11654697358608246, + "loss_ib": 0.0020473997574299574, + "step": 643 + }, + { + "ce_ib": 13.541308403015137, + "ce_orig": 1.0693286657333374, + "epoch": 0.18491624128262277, + "kl_loss": 0.1634470522403717, + "loss_ib": 0.0029886013362556696, + "step": 643 + }, + { + "ce_ib": 8.250872611999512, + "ce_orig": 0.7872787117958069, + "epoch": 0.1852038248616004, + "kl_loss": 0.15854009985923767, + "loss_ib": 0.0024104882031679153, + "step": 644 + }, + { + "ce_ib": 9.81741714477539, + "ce_orig": 0.981521725654602, + "epoch": 0.1852038248616004, + "kl_loss": 0.1492426097393036, + "loss_ib": 0.002474167849868536, + "step": 644 + }, + { + "ce_ib": 8.002400398254395, + "ce_orig": 0.9716108441352844, + "epoch": 0.1852038248616004, + "kl_loss": 0.2515189051628113, + "loss_ib": 0.0033154289703816175, + "step": 644 + }, + { + "ce_ib": 11.485904693603516, + "ce_orig": 1.4057406187057495, + "epoch": 0.1852038248616004, + "kl_loss": 0.1574161797761917, + "loss_ib": 0.00272275204770267, + "step": 644 + }, + { + "epoch": 0.18549140844057804, + "grad_norm": 0.10694620758295059, + "learning_rate": 4.98728702966704e-05, + "loss": 0.8809, + "step": 645 + }, + { + "ce_ib": 14.281476974487305, + "ce_orig": 0.44360876083374023, + "epoch": 0.18549140844057804, + "kl_loss": 0.16904665529727936, + "loss_ib": 0.003118614200502634, + "step": 645 + }, + { + "ce_ib": 6.123544692993164, + "ce_orig": 0.8242998719215393, + "epoch": 0.18549140844057804, + "kl_loss": 0.11964607238769531, + "loss_ib": 0.0018088150536641479, + "step": 645 + }, + { + "ce_ib": 7.845162391662598, + "ce_orig": 1.008780598640442, + "epoch": 0.18549140844057804, + "kl_loss": 0.07876063883304596, + "loss_ib": 0.0015721225645393133, + "step": 645 + }, + { + "ce_ib": 7.595258712768555, + "ce_orig": 0.41073235869407654, + "epoch": 0.18549140844057804, + "kl_loss": 0.14960895478725433, + "loss_ib": 0.0022556153126060963, + "step": 645 + }, + { + "ce_ib": 8.565896034240723, + "ce_orig": 0.8796496391296387, + "epoch": 0.1857789920195557, + "kl_loss": 0.17831774055957794, + "loss_ib": 0.0026397667825222015, + "step": 646 + }, + { + "ce_ib": 7.068782329559326, + "ce_orig": 0.5573152899742126, + "epoch": 0.1857789920195557, + "kl_loss": 0.13099712133407593, + "loss_ib": 0.002016849583014846, + "step": 646 + }, + { + "ce_ib": 7.277317047119141, + "ce_orig": 0.6383960843086243, + "epoch": 0.1857789920195557, + "kl_loss": 0.1211213618516922, + "loss_ib": 0.0019389452645555139, + "step": 646 + }, + { + "ce_ib": 6.5935797691345215, + "ce_orig": 0.6593443155288696, + "epoch": 0.1857789920195557, + "kl_loss": 0.12146840989589691, + "loss_ib": 0.0018740420928224921, + "step": 646 + }, + { + "ce_ib": 9.572060585021973, + "ce_orig": 0.9575638175010681, + "epoch": 0.18606657559853332, + "kl_loss": 0.14028745889663696, + "loss_ib": 0.0023600806016474962, + "step": 647 + }, + { + "ce_ib": 9.048948287963867, + "ce_orig": 0.7253939509391785, + "epoch": 0.18606657559853332, + "kl_loss": 0.11062663793563843, + "loss_ib": 0.0020111610647290945, + "step": 647 + }, + { + "ce_ib": 7.138680458068848, + "ce_orig": 0.7415602803230286, + "epoch": 0.18606657559853332, + "kl_loss": 0.1857338845729828, + "loss_ib": 0.0025712070055305958, + "step": 647 + }, + { + "ce_ib": 10.374600410461426, + "ce_orig": 0.7181751132011414, + "epoch": 0.18606657559853332, + "kl_loss": 0.23679107427597046, + "loss_ib": 0.0034053707495331764, + "step": 647 + }, + { + "ce_ib": 9.705562591552734, + "ce_orig": 1.087332010269165, + "epoch": 0.18635415917751097, + "kl_loss": 0.16214729845523834, + "loss_ib": 0.002592029282823205, + "step": 648 + }, + { + "ce_ib": 4.748839855194092, + "ce_orig": 0.2743741571903229, + "epoch": 0.18635415917751097, + "kl_loss": 0.2661653161048889, + "loss_ib": 0.0031365370377898216, + "step": 648 + }, + { + "ce_ib": 9.06843090057373, + "ce_orig": 0.987123966217041, + "epoch": 0.18635415917751097, + "kl_loss": 0.12213167548179626, + "loss_ib": 0.002128159860149026, + "step": 648 + }, + { + "ce_ib": 11.009614944458008, + "ce_orig": 1.2969286441802979, + "epoch": 0.18635415917751097, + "kl_loss": 0.1587267965078354, + "loss_ib": 0.0026882293168455362, + "step": 648 + }, + { + "ce_ib": 7.768403053283691, + "ce_orig": 0.9510530233383179, + "epoch": 0.1866417427564886, + "kl_loss": 0.144602969288826, + "loss_ib": 0.002222870010882616, + "step": 649 + }, + { + "ce_ib": 9.275205612182617, + "ce_orig": 0.5832199454307556, + "epoch": 0.1866417427564886, + "kl_loss": 0.18980665504932404, + "loss_ib": 0.0028255870565772057, + "step": 649 + }, + { + "ce_ib": 6.574225902557373, + "ce_orig": 0.6462238430976868, + "epoch": 0.1866417427564886, + "kl_loss": 0.156391441822052, + "loss_ib": 0.0022213368210941553, + "step": 649 + }, + { + "ce_ib": 4.824717044830322, + "ce_orig": 0.37470337748527527, + "epoch": 0.1866417427564886, + "kl_loss": 0.17615503072738647, + "loss_ib": 0.002244021976366639, + "step": 649 + }, + { + "epoch": 0.18692932633546624, + "grad_norm": 0.11466971039772034, + "learning_rate": 4.986893197145237e-05, + "loss": 0.8451, + "step": 650 + }, + { + "ce_ib": 11.404997825622559, + "ce_orig": 1.0702717304229736, + "epoch": 0.18692932633546624, + "kl_loss": 0.16459330916404724, + "loss_ib": 0.0027864326257258654, + "step": 650 + }, + { + "ce_ib": 8.943628311157227, + "ce_orig": 1.3345712423324585, + "epoch": 0.18692932633546624, + "kl_loss": 0.24026933312416077, + "loss_ib": 0.0032970558386296034, + "step": 650 + }, + { + "ce_ib": 9.607685089111328, + "ce_orig": 0.7295346856117249, + "epoch": 0.18692932633546624, + "kl_loss": 0.23726195096969604, + "loss_ib": 0.0033333878964185715, + "step": 650 + }, + { + "ce_ib": 4.072868824005127, + "ce_orig": 0.21132893860340118, + "epoch": 0.18692932633546624, + "kl_loss": 0.13498154282569885, + "loss_ib": 0.001757102319970727, + "step": 650 + }, + { + "ce_ib": 9.069856643676758, + "ce_orig": 0.3936823606491089, + "epoch": 0.1872169099144439, + "kl_loss": 0.21264250576496124, + "loss_ib": 0.0030334105249494314, + "step": 651 + }, + { + "ce_ib": 8.176159858703613, + "ce_orig": 0.7149850726127625, + "epoch": 0.1872169099144439, + "kl_loss": 0.11887285113334656, + "loss_ib": 0.002006344497203827, + "step": 651 + }, + { + "ce_ib": 6.622856140136719, + "ce_orig": 0.4089096188545227, + "epoch": 0.1872169099144439, + "kl_loss": 0.11365403234958649, + "loss_ib": 0.001798825804144144, + "step": 651 + }, + { + "ce_ib": 6.9014081954956055, + "ce_orig": 0.43414247035980225, + "epoch": 0.1872169099144439, + "kl_loss": 0.1480683982372284, + "loss_ib": 0.0021708246786147356, + "step": 651 + }, + { + "ce_ib": 9.448698043823242, + "ce_orig": 0.7146212458610535, + "epoch": 0.18750449349342152, + "kl_loss": 0.1836353838443756, + "loss_ib": 0.0027812235057353973, + "step": 652 + }, + { + "ce_ib": 7.287289619445801, + "ce_orig": 0.7478001713752747, + "epoch": 0.18750449349342152, + "kl_loss": 0.1715984046459198, + "loss_ib": 0.0024447129108011723, + "step": 652 + }, + { + "ce_ib": 9.546160697937012, + "ce_orig": 1.0961014032363892, + "epoch": 0.18750449349342152, + "kl_loss": 0.16767415404319763, + "loss_ib": 0.0026313576381653547, + "step": 652 + }, + { + "ce_ib": 10.555652618408203, + "ce_orig": 1.3851077556610107, + "epoch": 0.18750449349342152, + "kl_loss": 0.35710737109184265, + "loss_ib": 0.004626638721674681, + "step": 652 + }, + { + "ce_ib": 5.79029655456543, + "ce_orig": 0.8410673141479492, + "epoch": 0.18779207707239917, + "kl_loss": 0.12491299211978912, + "loss_ib": 0.001828159554861486, + "step": 653 + }, + { + "ce_ib": 10.17906665802002, + "ce_orig": 0.971206545829773, + "epoch": 0.18779207707239917, + "kl_loss": 0.12395337969064713, + "loss_ib": 0.0022574402391910553, + "step": 653 + }, + { + "ce_ib": 11.249361991882324, + "ce_orig": 1.02909255027771, + "epoch": 0.18779207707239917, + "kl_loss": 0.13359886407852173, + "loss_ib": 0.0024609246756881475, + "step": 653 + }, + { + "ce_ib": 7.954268932342529, + "ce_orig": 0.742697536945343, + "epoch": 0.18779207707239917, + "kl_loss": 0.14216434955596924, + "loss_ib": 0.002217070432379842, + "step": 653 + }, + { + "ce_ib": 12.005528450012207, + "ce_orig": 1.5412335395812988, + "epoch": 0.1880796606513768, + "kl_loss": 0.14288672804832458, + "loss_ib": 0.0026294200215488672, + "step": 654 + }, + { + "ce_ib": 5.368361949920654, + "ce_orig": 0.598798394203186, + "epoch": 0.1880796606513768, + "kl_loss": 0.1084536612033844, + "loss_ib": 0.001621372764930129, + "step": 654 + }, + { + "ce_ib": 8.782524108886719, + "ce_orig": 0.9397916197776794, + "epoch": 0.1880796606513768, + "kl_loss": 0.09554749727249146, + "loss_ib": 0.0018337273504585028, + "step": 654 + }, + { + "ce_ib": 9.918803215026855, + "ce_orig": 0.6324499249458313, + "epoch": 0.1880796606513768, + "kl_loss": 0.16397729516029358, + "loss_ib": 0.0026316531002521515, + "step": 654 + }, + { + "epoch": 0.18836724423035445, + "grad_norm": 0.10257323831319809, + "learning_rate": 4.986493373120502e-05, + "loss": 0.8898, + "step": 655 + }, + { + "ce_ib": 10.346714973449707, + "ce_orig": 0.493673712015152, + "epoch": 0.18836724423035445, + "kl_loss": 0.255359947681427, + "loss_ib": 0.0035882708616554737, + "step": 655 + }, + { + "ce_ib": 7.25407075881958, + "ce_orig": 0.8773781061172485, + "epoch": 0.18836724423035445, + "kl_loss": 0.1956409215927124, + "loss_ib": 0.002681816229596734, + "step": 655 + }, + { + "ce_ib": 11.828226089477539, + "ce_orig": 1.498853325843811, + "epoch": 0.18836724423035445, + "kl_loss": 0.18627619743347168, + "loss_ib": 0.0030455845408141613, + "step": 655 + }, + { + "ce_ib": 6.772419452667236, + "ce_orig": 0.41002458333969116, + "epoch": 0.18836724423035445, + "kl_loss": 0.10855728387832642, + "loss_ib": 0.0017628148198127747, + "step": 655 + }, + { + "ce_ib": 10.052515983581543, + "ce_orig": 0.9797083735466003, + "epoch": 0.1886548278093321, + "kl_loss": 0.16761581599712372, + "loss_ib": 0.0026814097072929144, + "step": 656 + }, + { + "ce_ib": 9.526687622070312, + "ce_orig": 0.9061644077301025, + "epoch": 0.1886548278093321, + "kl_loss": 0.131792351603508, + "loss_ib": 0.0022705921437591314, + "step": 656 + }, + { + "ce_ib": 5.809006690979004, + "ce_orig": 0.8629180788993835, + "epoch": 0.1886548278093321, + "kl_loss": 0.13694535195827484, + "loss_ib": 0.0019503540825098753, + "step": 656 + }, + { + "ce_ib": 7.719155311584473, + "ce_orig": 0.8281040787696838, + "epoch": 0.1886548278093321, + "kl_loss": 0.1683327555656433, + "loss_ib": 0.002455243142321706, + "step": 656 + }, + { + "ce_ib": 11.467092514038086, + "ce_orig": 0.7774757146835327, + "epoch": 0.18894241138830972, + "kl_loss": 0.17787596583366394, + "loss_ib": 0.0029254688415676355, + "step": 657 + }, + { + "ce_ib": 9.500710487365723, + "ce_orig": 0.6401790976524353, + "epoch": 0.18894241138830972, + "kl_loss": 0.20965366065502167, + "loss_ib": 0.0030466075986623764, + "step": 657 + }, + { + "ce_ib": 9.324553489685059, + "ce_orig": 0.6780283451080322, + "epoch": 0.18894241138830972, + "kl_loss": 0.18367739021778107, + "loss_ib": 0.0027692292351275682, + "step": 657 + }, + { + "ce_ib": 9.310490608215332, + "ce_orig": 0.8386406302452087, + "epoch": 0.18894241138830972, + "kl_loss": 0.18684491515159607, + "loss_ib": 0.002799498150125146, + "step": 657 + }, + { + "ce_ib": 10.932252883911133, + "ce_orig": 1.1578938961029053, + "epoch": 0.18922999496728737, + "kl_loss": 0.17252197861671448, + "loss_ib": 0.002818444976583123, + "step": 658 + }, + { + "ce_ib": 9.024280548095703, + "ce_orig": 0.5365549325942993, + "epoch": 0.18922999496728737, + "kl_loss": 0.12610819935798645, + "loss_ib": 0.0021635100711137056, + "step": 658 + }, + { + "ce_ib": 8.524632453918457, + "ce_orig": 0.6664254665374756, + "epoch": 0.18922999496728737, + "kl_loss": 0.08303587883710861, + "loss_ib": 0.0016828221268951893, + "step": 658 + }, + { + "ce_ib": 9.70861530303955, + "ce_orig": 1.0630748271942139, + "epoch": 0.18922999496728737, + "kl_loss": 0.17968884110450745, + "loss_ib": 0.002767750062048435, + "step": 658 + }, + { + "ce_ib": 8.12465763092041, + "ce_orig": 1.105127215385437, + "epoch": 0.189517578546265, + "kl_loss": 0.13839149475097656, + "loss_ib": 0.0021963806357234716, + "step": 659 + }, + { + "ce_ib": 10.226935386657715, + "ce_orig": 0.6290950775146484, + "epoch": 0.189517578546265, + "kl_loss": 0.20230096578598022, + "loss_ib": 0.003045703051611781, + "step": 659 + }, + { + "ce_ib": 11.315914154052734, + "ce_orig": 1.3234468698501587, + "epoch": 0.189517578546265, + "kl_loss": 0.15705978870391846, + "loss_ib": 0.002702189376577735, + "step": 659 + }, + { + "ce_ib": 13.434019088745117, + "ce_orig": 1.325426459312439, + "epoch": 0.189517578546265, + "kl_loss": 0.2508019208908081, + "loss_ib": 0.0038514207117259502, + "step": 659 + }, + { + "epoch": 0.18980516212524265, + "grad_norm": 0.08787354081869125, + "learning_rate": 4.986087558556104e-05, + "loss": 0.8634, + "step": 660 + }, + { + "ce_ib": 8.652778625488281, + "ce_orig": 0.8134385347366333, + "epoch": 0.18980516212524265, + "kl_loss": 0.147006094455719, + "loss_ib": 0.0023353388532996178, + "step": 660 + }, + { + "ce_ib": 9.470593452453613, + "ce_orig": 0.833583652973175, + "epoch": 0.18980516212524265, + "kl_loss": 0.1043887585401535, + "loss_ib": 0.0019909467082470655, + "step": 660 + }, + { + "ce_ib": 9.23963737487793, + "ce_orig": 0.939871609210968, + "epoch": 0.18980516212524265, + "kl_loss": 0.12045232951641083, + "loss_ib": 0.0021284869872033596, + "step": 660 + }, + { + "ce_ib": 8.225679397583008, + "ce_orig": 0.6521763801574707, + "epoch": 0.18980516212524265, + "kl_loss": 0.1941739022731781, + "loss_ib": 0.002764306962490082, + "step": 660 + }, + { + "ce_ib": 8.555021286010742, + "ce_orig": 0.6295510530471802, + "epoch": 0.1900927457042203, + "kl_loss": 0.13717862963676453, + "loss_ib": 0.0022272884380072355, + "step": 661 + }, + { + "ce_ib": 11.997432708740234, + "ce_orig": 1.630189299583435, + "epoch": 0.1900927457042203, + "kl_loss": 0.1967429369688034, + "loss_ib": 0.003167172661051154, + "step": 661 + }, + { + "ce_ib": 8.604825019836426, + "ce_orig": 0.9493278861045837, + "epoch": 0.1900927457042203, + "kl_loss": 0.1191963478922844, + "loss_ib": 0.0020524458959698677, + "step": 661 + }, + { + "ce_ib": 8.901871681213379, + "ce_orig": 0.6020686626434326, + "epoch": 0.1900927457042203, + "kl_loss": 0.1835116744041443, + "loss_ib": 0.002725303638726473, + "step": 661 + }, + { + "ce_ib": 4.333002090454102, + "ce_orig": 0.5005955100059509, + "epoch": 0.19038032928319792, + "kl_loss": 0.1462855488061905, + "loss_ib": 0.0018961557652801275, + "step": 662 + }, + { + "ce_ib": 10.557404518127441, + "ce_orig": 0.5266630053520203, + "epoch": 0.19038032928319792, + "kl_loss": 0.2895011901855469, + "loss_ib": 0.003950752317905426, + "step": 662 + }, + { + "ce_ib": 9.212621688842773, + "ce_orig": 0.936205267906189, + "epoch": 0.19038032928319792, + "kl_loss": 0.1948644369840622, + "loss_ib": 0.0028699063695967197, + "step": 662 + }, + { + "ce_ib": 8.389484405517578, + "ce_orig": 1.0228606462478638, + "epoch": 0.19038032928319792, + "kl_loss": 0.1212107315659523, + "loss_ib": 0.0020510556641966105, + "step": 662 + }, + { + "ce_ib": 7.66838264465332, + "ce_orig": 0.8763972520828247, + "epoch": 0.19066791286217558, + "kl_loss": 0.13818049430847168, + "loss_ib": 0.002148643136024475, + "step": 663 + }, + { + "ce_ib": 7.3809638023376465, + "ce_orig": 0.8760581612586975, + "epoch": 0.19066791286217558, + "kl_loss": 0.16190586984157562, + "loss_ib": 0.0023571550846099854, + "step": 663 + }, + { + "ce_ib": 12.378966331481934, + "ce_orig": 1.522248387336731, + "epoch": 0.19066791286217558, + "kl_loss": 0.27928757667541504, + "loss_ib": 0.0040307724848389626, + "step": 663 + }, + { + "ce_ib": 6.554606914520264, + "ce_orig": 0.6486589312553406, + "epoch": 0.19066791286217558, + "kl_loss": 0.16327285766601562, + "loss_ib": 0.0022881892509758472, + "step": 663 + }, + { + "ce_ib": 6.294197082519531, + "ce_orig": 0.5333172082901001, + "epoch": 0.1909554964411532, + "kl_loss": 0.10702653229236603, + "loss_ib": 0.0016996850026771426, + "step": 664 + }, + { + "ce_ib": 10.409896850585938, + "ce_orig": 1.274090051651001, + "epoch": 0.1909554964411532, + "kl_loss": 0.09604034572839737, + "loss_ib": 0.0020013931207358837, + "step": 664 + }, + { + "ce_ib": 6.934921741485596, + "ce_orig": 0.5752331614494324, + "epoch": 0.1909554964411532, + "kl_loss": 0.10759134590625763, + "loss_ib": 0.0017694055568426847, + "step": 664 + }, + { + "ce_ib": 8.656540870666504, + "ce_orig": 0.9213974475860596, + "epoch": 0.1909554964411532, + "kl_loss": 0.0822703018784523, + "loss_ib": 0.001688356976956129, + "step": 664 + }, + { + "epoch": 0.19124308002013085, + "grad_norm": 0.10957484692335129, + "learning_rate": 4.985675754429744e-05, + "loss": 0.8824, + "step": 665 + }, + { + "ce_ib": 7.545868873596191, + "ce_orig": 0.7198473811149597, + "epoch": 0.19124308002013085, + "kl_loss": 0.07544055581092834, + "loss_ib": 0.0015089923981577158, + "step": 665 + }, + { + "ce_ib": 5.871424674987793, + "ce_orig": 0.5256696939468384, + "epoch": 0.19124308002013085, + "kl_loss": 0.11832289397716522, + "loss_ib": 0.001770371338352561, + "step": 665 + }, + { + "ce_ib": 8.345460891723633, + "ce_orig": 0.8390303254127502, + "epoch": 0.19124308002013085, + "kl_loss": 0.13851025700569153, + "loss_ib": 0.002219648566097021, + "step": 665 + }, + { + "ce_ib": 10.029878616333008, + "ce_orig": 0.7570701837539673, + "epoch": 0.19124308002013085, + "kl_loss": 0.1960536539554596, + "loss_ib": 0.0029635243117809296, + "step": 665 + }, + { + "ce_ib": 12.230276107788086, + "ce_orig": 1.078134298324585, + "epoch": 0.1915306635991085, + "kl_loss": 0.17366138100624084, + "loss_ib": 0.0029596411623060703, + "step": 666 + }, + { + "ce_ib": 5.4964823722839355, + "ce_orig": 0.30615153908729553, + "epoch": 0.1915306635991085, + "kl_loss": 0.11125549674034119, + "loss_ib": 0.0016622032271698117, + "step": 666 + }, + { + "ce_ib": 11.66408634185791, + "ce_orig": 1.4393538236618042, + "epoch": 0.1915306635991085, + "kl_loss": 0.16732355952262878, + "loss_ib": 0.002839644206687808, + "step": 666 + }, + { + "ce_ib": 8.240169525146484, + "ce_orig": 0.8983179926872253, + "epoch": 0.1915306635991085, + "kl_loss": 0.13795308768749237, + "loss_ib": 0.0022035478614270687, + "step": 666 + }, + { + "ce_ib": 9.5233793258667, + "ce_orig": 1.325028657913208, + "epoch": 0.19181824717808613, + "kl_loss": 0.2247726023197174, + "loss_ib": 0.0032000639475882053, + "step": 667 + }, + { + "ce_ib": 7.075375556945801, + "ce_orig": 0.6608232855796814, + "epoch": 0.19181824717808613, + "kl_loss": 0.13226626813411713, + "loss_ib": 0.0020302000921219587, + "step": 667 + }, + { + "ce_ib": 12.767308235168457, + "ce_orig": 1.7473585605621338, + "epoch": 0.19181824717808613, + "kl_loss": 0.19319532811641693, + "loss_ib": 0.0032086840365082026, + "step": 667 + }, + { + "ce_ib": 13.47103500366211, + "ce_orig": 1.497598648071289, + "epoch": 0.19181824717808613, + "kl_loss": 0.14107322692871094, + "loss_ib": 0.002757835667580366, + "step": 667 + }, + { + "ce_ib": 8.687394142150879, + "ce_orig": 0.9108020663261414, + "epoch": 0.19210583075706378, + "kl_loss": 0.11285087466239929, + "loss_ib": 0.0019972482696175575, + "step": 668 + }, + { + "ce_ib": 9.960009574890137, + "ce_orig": 1.163490653038025, + "epoch": 0.19210583075706378, + "kl_loss": 0.13729822635650635, + "loss_ib": 0.0023689831141382456, + "step": 668 + }, + { + "ce_ib": 9.403281211853027, + "ce_orig": 0.7250685691833496, + "epoch": 0.19210583075706378, + "kl_loss": 0.18609736859798431, + "loss_ib": 0.002801301656290889, + "step": 668 + }, + { + "ce_ib": 7.637380599975586, + "ce_orig": 0.7920053601264954, + "epoch": 0.19210583075706378, + "kl_loss": 0.30280235409736633, + "loss_ib": 0.003791761351749301, + "step": 668 + }, + { + "ce_ib": 7.223052978515625, + "ce_orig": 0.5900572538375854, + "epoch": 0.1923934143360414, + "kl_loss": 0.11546684801578522, + "loss_ib": 0.0018769737798720598, + "step": 669 + }, + { + "ce_ib": 8.318660736083984, + "ce_orig": 0.772509753704071, + "epoch": 0.1923934143360414, + "kl_loss": 0.1792832911014557, + "loss_ib": 0.0026246989145874977, + "step": 669 + }, + { + "ce_ib": 5.634905815124512, + "ce_orig": 0.7107937932014465, + "epoch": 0.1923934143360414, + "kl_loss": 0.10171617567539215, + "loss_ib": 0.0015806523151695728, + "step": 669 + }, + { + "ce_ib": 9.892749786376953, + "ce_orig": 0.6184588670730591, + "epoch": 0.1923934143360414, + "kl_loss": 0.20480774343013763, + "loss_ib": 0.003037352580577135, + "step": 669 + }, + { + "epoch": 0.19268099791501905, + "grad_norm": 0.0848066508769989, + "learning_rate": 4.985257961733553e-05, + "loss": 0.8125, + "step": 670 + }, + { + "ce_ib": 6.28206205368042, + "ce_orig": 0.525664210319519, + "epoch": 0.19268099791501905, + "kl_loss": 0.21931828558444977, + "loss_ib": 0.0028213891200721264, + "step": 670 + }, + { + "ce_ib": 8.594695091247559, + "ce_orig": 1.0051332712173462, + "epoch": 0.19268099791501905, + "kl_loss": 0.20822405815124512, + "loss_ib": 0.002941709943115711, + "step": 670 + }, + { + "ce_ib": 6.8549370765686035, + "ce_orig": 0.5342727899551392, + "epoch": 0.19268099791501905, + "kl_loss": 0.15061280131340027, + "loss_ib": 0.0021916215773671865, + "step": 670 + }, + { + "ce_ib": 12.691079139709473, + "ce_orig": 1.5980875492095947, + "epoch": 0.19268099791501905, + "kl_loss": 0.18997497856616974, + "loss_ib": 0.003168857656419277, + "step": 670 + }, + { + "ce_ib": 9.243292808532715, + "ce_orig": 0.7765840291976929, + "epoch": 0.1929685814939967, + "kl_loss": 0.134027898311615, + "loss_ib": 0.0022646081633865833, + "step": 671 + }, + { + "ce_ib": 7.516940593719482, + "ce_orig": 0.7874254584312439, + "epoch": 0.1929685814939967, + "kl_loss": 0.17721888422966003, + "loss_ib": 0.0025238830130547285, + "step": 671 + }, + { + "ce_ib": 10.570497512817383, + "ce_orig": 1.05923593044281, + "epoch": 0.1929685814939967, + "kl_loss": 0.11501814424991608, + "loss_ib": 0.002207231242209673, + "step": 671 + }, + { + "ce_ib": 12.406679153442383, + "ce_orig": 1.1707953214645386, + "epoch": 0.1929685814939967, + "kl_loss": 0.22512602806091309, + "loss_ib": 0.0034919281024485826, + "step": 671 + }, + { + "ce_ib": 7.20285701751709, + "ce_orig": 0.9570423364639282, + "epoch": 0.19325616507297433, + "kl_loss": 0.17423811554908752, + "loss_ib": 0.0024626669473946095, + "step": 672 + }, + { + "ce_ib": 9.27402400970459, + "ce_orig": 0.7955689430236816, + "epoch": 0.19325616507297433, + "kl_loss": 0.20847541093826294, + "loss_ib": 0.0030121563468128443, + "step": 672 + }, + { + "ce_ib": 7.827235698699951, + "ce_orig": 0.6374052166938782, + "epoch": 0.19325616507297433, + "kl_loss": 0.18218199908733368, + "loss_ib": 0.002604543464258313, + "step": 672 + }, + { + "ce_ib": 9.849845886230469, + "ce_orig": 0.8688628673553467, + "epoch": 0.19325616507297433, + "kl_loss": 0.12743963301181793, + "loss_ib": 0.0022593808826059103, + "step": 672 + }, + { + "ce_ib": 7.564920425415039, + "ce_orig": 0.7611903548240662, + "epoch": 0.19354374865195198, + "kl_loss": 0.1504932940006256, + "loss_ib": 0.0022614249028265476, + "step": 673 + }, + { + "ce_ib": 9.132747650146484, + "ce_orig": 0.8816638588905334, + "epoch": 0.19354374865195198, + "kl_loss": 0.10706878453493118, + "loss_ib": 0.001983962720260024, + "step": 673 + }, + { + "ce_ib": 9.165678977966309, + "ce_orig": 0.9871057868003845, + "epoch": 0.19354374865195198, + "kl_loss": 0.5221493244171143, + "loss_ib": 0.006138061173260212, + "step": 673 + }, + { + "ce_ib": 11.87661361694336, + "ce_orig": 1.1763601303100586, + "epoch": 0.19354374865195198, + "kl_loss": 0.17384997010231018, + "loss_ib": 0.002926160814240575, + "step": 673 + }, + { + "ce_ib": 8.544137954711914, + "ce_orig": 0.5830659866333008, + "epoch": 0.1938313322309296, + "kl_loss": 0.2833155393600464, + "loss_ib": 0.003687569173052907, + "step": 674 + }, + { + "ce_ib": 5.531364440917969, + "ce_orig": 0.6243769526481628, + "epoch": 0.1938313322309296, + "kl_loss": 0.15741299092769623, + "loss_ib": 0.0021272662561386824, + "step": 674 + }, + { + "ce_ib": 8.059561729431152, + "ce_orig": 0.5852807760238647, + "epoch": 0.1938313322309296, + "kl_loss": 0.11351338028907776, + "loss_ib": 0.0019410898676142097, + "step": 674 + }, + { + "ce_ib": 4.790435314178467, + "ce_orig": 0.24659956991672516, + "epoch": 0.1938313322309296, + "kl_loss": 0.10444021970033646, + "loss_ib": 0.0015234457096084952, + "step": 674 + }, + { + "epoch": 0.19411891580990726, + "grad_norm": 0.0875202864408493, + "learning_rate": 4.984834181474093e-05, + "loss": 0.8311, + "step": 675 + }, + { + "ce_ib": 7.58983039855957, + "ce_orig": 0.4601333737373352, + "epoch": 0.19411891580990726, + "kl_loss": 0.30331283807754517, + "loss_ib": 0.0037921112962067127, + "step": 675 + }, + { + "ce_ib": 12.82439136505127, + "ce_orig": 1.5256766080856323, + "epoch": 0.19411891580990726, + "kl_loss": 0.13840830326080322, + "loss_ib": 0.0026665222831070423, + "step": 675 + }, + { + "ce_ib": 9.367082595825195, + "ce_orig": 0.7142963409423828, + "epoch": 0.19411891580990726, + "kl_loss": 0.11720157414674759, + "loss_ib": 0.002108723856508732, + "step": 675 + }, + { + "ce_ib": 8.170831680297852, + "ce_orig": 0.8159179091453552, + "epoch": 0.19411891580990726, + "kl_loss": 0.1447296142578125, + "loss_ib": 0.0022643792908638716, + "step": 675 + }, + { + "ce_ib": 7.056220054626465, + "ce_orig": 0.4294247627258301, + "epoch": 0.1944064993888849, + "kl_loss": 0.11858707666397095, + "loss_ib": 0.0018914927495643497, + "step": 676 + }, + { + "ce_ib": 6.837160110473633, + "ce_orig": 0.4128968417644501, + "epoch": 0.1944064993888849, + "kl_loss": 0.12182068079710007, + "loss_ib": 0.0019019227474927902, + "step": 676 + }, + { + "ce_ib": 6.321423530578613, + "ce_orig": 0.5308006405830383, + "epoch": 0.1944064993888849, + "kl_loss": 0.13625647127628326, + "loss_ib": 0.0019947069231420755, + "step": 676 + }, + { + "ce_ib": 8.114527702331543, + "ce_orig": 0.7963853478431702, + "epoch": 0.1944064993888849, + "kl_loss": 0.14647287130355835, + "loss_ib": 0.0022761814761906862, + "step": 676 + }, + { + "ce_ib": 6.881494998931885, + "ce_orig": 0.49466657638549805, + "epoch": 0.19469408296786253, + "kl_loss": 0.11938363313674927, + "loss_ib": 0.0018819858087226748, + "step": 677 + }, + { + "ce_ib": 11.58544921875, + "ce_orig": 0.9181330800056458, + "epoch": 0.19469408296786253, + "kl_loss": 0.21157556772232056, + "loss_ib": 0.0032743006013333797, + "step": 677 + }, + { + "ce_ib": 8.724087715148926, + "ce_orig": 0.7483495473861694, + "epoch": 0.19469408296786253, + "kl_loss": 0.24710629880428314, + "loss_ib": 0.0033434717915952206, + "step": 677 + }, + { + "ce_ib": 10.569550514221191, + "ce_orig": 1.092028260231018, + "epoch": 0.19469408296786253, + "kl_loss": 0.1760406792163849, + "loss_ib": 0.0028173618484288454, + "step": 677 + }, + { + "ce_ib": 9.511533737182617, + "ce_orig": 1.0603342056274414, + "epoch": 0.19498166654684018, + "kl_loss": 0.15394163131713867, + "loss_ib": 0.002490569604560733, + "step": 678 + }, + { + "ce_ib": 11.275010108947754, + "ce_orig": 0.7746595144271851, + "epoch": 0.19498166654684018, + "kl_loss": 0.15000641345977783, + "loss_ib": 0.0026275652926415205, + "step": 678 + }, + { + "ce_ib": 8.671473503112793, + "ce_orig": 1.1143134832382202, + "epoch": 0.19498166654684018, + "kl_loss": 0.10246935486793518, + "loss_ib": 0.0018918408313766122, + "step": 678 + }, + { + "ce_ib": 6.801086902618408, + "ce_orig": 0.7952865362167358, + "epoch": 0.19498166654684018, + "kl_loss": 0.21663819253444672, + "loss_ib": 0.00284649059176445, + "step": 678 + }, + { + "ce_ib": 3.812221050262451, + "ce_orig": 0.2622551918029785, + "epoch": 0.1952692501258178, + "kl_loss": 0.39648619294166565, + "loss_ib": 0.004346083849668503, + "step": 679 + }, + { + "ce_ib": 9.213861465454102, + "ce_orig": 0.8610155582427979, + "epoch": 0.1952692501258178, + "kl_loss": 0.14125597476959229, + "loss_ib": 0.0023339458275586367, + "step": 679 + }, + { + "ce_ib": 9.094021797180176, + "ce_orig": 0.7715781927108765, + "epoch": 0.1952692501258178, + "kl_loss": 0.16748535633087158, + "loss_ib": 0.0025842555332928896, + "step": 679 + }, + { + "ce_ib": 7.93590784072876, + "ce_orig": 0.555151104927063, + "epoch": 0.1952692501258178, + "kl_loss": 0.14854934811592102, + "loss_ib": 0.002279084175825119, + "step": 679 + }, + { + "epoch": 0.19555683370479546, + "grad_norm": 0.08938612043857574, + "learning_rate": 4.984404414672346e-05, + "loss": 0.8338, + "step": 680 + }, + { + "ce_ib": 5.116811752319336, + "ce_orig": 0.43150871992111206, + "epoch": 0.19555683370479546, + "kl_loss": 0.3063853979110718, + "loss_ib": 0.003575535025447607, + "step": 680 + }, + { + "ce_ib": 13.268800735473633, + "ce_orig": 1.4352538585662842, + "epoch": 0.19555683370479546, + "kl_loss": 0.17058660089969635, + "loss_ib": 0.0030327460262924433, + "step": 680 + }, + { + "ce_ib": 7.152599334716797, + "ce_orig": 0.5593920946121216, + "epoch": 0.19555683370479546, + "kl_loss": 0.09605440497398376, + "loss_ib": 0.0016758039128035307, + "step": 680 + }, + { + "ce_ib": 7.339946269989014, + "ce_orig": 0.482102632522583, + "epoch": 0.19555683370479546, + "kl_loss": 0.16910995543003082, + "loss_ib": 0.00242509413510561, + "step": 680 + }, + { + "ce_ib": 12.716341972351074, + "ce_orig": 1.5933265686035156, + "epoch": 0.1958444172837731, + "kl_loss": 0.19038161635398865, + "loss_ib": 0.0031754502560943365, + "step": 681 + }, + { + "ce_ib": 6.732904434204102, + "ce_orig": 0.5515825748443604, + "epoch": 0.1958444172837731, + "kl_loss": 0.0904855728149414, + "loss_ib": 0.0015781461261212826, + "step": 681 + }, + { + "ce_ib": 6.44156551361084, + "ce_orig": 0.720815122127533, + "epoch": 0.1958444172837731, + "kl_loss": 0.13166779279708862, + "loss_ib": 0.001960834488272667, + "step": 681 + }, + { + "ce_ib": 5.354030132293701, + "ce_orig": 0.7398959398269653, + "epoch": 0.1958444172837731, + "kl_loss": 0.07886020839214325, + "loss_ib": 0.0013240050757303834, + "step": 681 + }, + { + "ce_ib": 9.835302352905273, + "ce_orig": 0.5789269804954529, + "epoch": 0.19613200086275073, + "kl_loss": 0.12779417634010315, + "loss_ib": 0.0022614719346165657, + "step": 682 + }, + { + "ce_ib": 7.039217948913574, + "ce_orig": 0.9762406945228577, + "epoch": 0.19613200086275073, + "kl_loss": 0.07770641148090363, + "loss_ib": 0.0014809858985245228, + "step": 682 + }, + { + "ce_ib": 9.24921703338623, + "ce_orig": 0.9393744468688965, + "epoch": 0.19613200086275073, + "kl_loss": 0.1423812359571457, + "loss_ib": 0.002348734065890312, + "step": 682 + }, + { + "ce_ib": 10.166736602783203, + "ce_orig": 1.2005715370178223, + "epoch": 0.19613200086275073, + "kl_loss": 0.1362869143486023, + "loss_ib": 0.002379542915150523, + "step": 682 + }, + { + "ce_ib": 10.488746643066406, + "ce_orig": 1.114490032196045, + "epoch": 0.19641958444172838, + "kl_loss": 0.1684313416481018, + "loss_ib": 0.002733187982812524, + "step": 683 + }, + { + "ce_ib": 8.479165077209473, + "ce_orig": 1.0899877548217773, + "epoch": 0.19641958444172838, + "kl_loss": 0.17926128208637238, + "loss_ib": 0.002640529302880168, + "step": 683 + }, + { + "ce_ib": 7.596517562866211, + "ce_orig": 0.8943977355957031, + "epoch": 0.19641958444172838, + "kl_loss": 0.15933018922805786, + "loss_ib": 0.0023529534228146076, + "step": 683 + }, + { + "ce_ib": 12.160412788391113, + "ce_orig": 1.4548228979110718, + "epoch": 0.19641958444172838, + "kl_loss": 0.16472113132476807, + "loss_ib": 0.002863252302631736, + "step": 683 + }, + { + "ce_ib": 8.374470710754395, + "ce_orig": 0.7274401783943176, + "epoch": 0.196707168020706, + "kl_loss": 0.27311208844184875, + "loss_ib": 0.0035685678012669086, + "step": 684 + }, + { + "ce_ib": 8.078079223632812, + "ce_orig": 0.6676459908485413, + "epoch": 0.196707168020706, + "kl_loss": 0.210984006524086, + "loss_ib": 0.0029176478274166584, + "step": 684 + }, + { + "ce_ib": 8.356644630432129, + "ce_orig": 0.8512493371963501, + "epoch": 0.196707168020706, + "kl_loss": 0.12694287300109863, + "loss_ib": 0.0021050930954515934, + "step": 684 + }, + { + "ce_ib": 7.5951457023620605, + "ce_orig": 0.7244792580604553, + "epoch": 0.196707168020706, + "kl_loss": 0.08705386519432068, + "loss_ib": 0.0016300531569868326, + "step": 684 + }, + { + "epoch": 0.19699475159968366, + "grad_norm": 0.08720671385526657, + "learning_rate": 4.983968662363723e-05, + "loss": 0.8391, + "step": 685 + }, + { + "ce_ib": 10.551007270812988, + "ce_orig": 1.4780113697052002, + "epoch": 0.19699475159968366, + "kl_loss": 0.1139870285987854, + "loss_ib": 0.0021949708461761475, + "step": 685 + }, + { + "ce_ib": 11.811184883117676, + "ce_orig": 1.1642088890075684, + "epoch": 0.19699475159968366, + "kl_loss": 0.1284218281507492, + "loss_ib": 0.0024653368163853884, + "step": 685 + }, + { + "ce_ib": 9.679302215576172, + "ce_orig": 1.2183902263641357, + "epoch": 0.19699475159968366, + "kl_loss": 0.09832706302404404, + "loss_ib": 0.0019512007711455226, + "step": 685 + }, + { + "ce_ib": 11.119817733764648, + "ce_orig": 1.329836368560791, + "epoch": 0.19699475159968366, + "kl_loss": 0.15665268898010254, + "loss_ib": 0.002678508637472987, + "step": 685 + }, + { + "ce_ib": 6.385128974914551, + "ce_orig": 0.7773191332817078, + "epoch": 0.1972823351786613, + "kl_loss": 0.10081027448177338, + "loss_ib": 0.0016466155648231506, + "step": 686 + }, + { + "ce_ib": 9.239928245544434, + "ce_orig": 1.1652233600616455, + "epoch": 0.1972823351786613, + "kl_loss": 0.12052545696496964, + "loss_ib": 0.0021292471792548895, + "step": 686 + }, + { + "ce_ib": 7.90826416015625, + "ce_orig": 0.6415975093841553, + "epoch": 0.1972823351786613, + "kl_loss": 0.12905505299568176, + "loss_ib": 0.0020813769660890102, + "step": 686 + }, + { + "ce_ib": 7.7638397216796875, + "ce_orig": 1.3246078491210938, + "epoch": 0.1972823351786613, + "kl_loss": 0.10447216033935547, + "loss_ib": 0.0018211054848507047, + "step": 686 + }, + { + "ce_ib": 11.250504493713379, + "ce_orig": 1.2069780826568604, + "epoch": 0.19756991875763893, + "kl_loss": 0.0673050582408905, + "loss_ib": 0.0017981010023504496, + "step": 687 + }, + { + "ce_ib": 7.798498153686523, + "ce_orig": 0.8994898200035095, + "epoch": 0.19756991875763893, + "kl_loss": 0.11936768144369125, + "loss_ib": 0.0019735265523195267, + "step": 687 + }, + { + "ce_ib": 8.09422779083252, + "ce_orig": 1.0519702434539795, + "epoch": 0.19756991875763893, + "kl_loss": 0.17095480859279633, + "loss_ib": 0.002518970984965563, + "step": 687 + }, + { + "ce_ib": 11.222230911254883, + "ce_orig": 0.9225847125053406, + "epoch": 0.19756991875763893, + "kl_loss": 0.16072265803813934, + "loss_ib": 0.0027294494211673737, + "step": 687 + }, + { + "ce_ib": 7.485930919647217, + "ce_orig": 0.771294891834259, + "epoch": 0.1978575023366166, + "kl_loss": 0.13400742411613464, + "loss_ib": 0.0020886673592031, + "step": 688 + }, + { + "ce_ib": 7.196287631988525, + "ce_orig": 0.5827687382698059, + "epoch": 0.1978575023366166, + "kl_loss": 0.16821786761283875, + "loss_ib": 0.0024018073454499245, + "step": 688 + }, + { + "ce_ib": 11.187541007995605, + "ce_orig": 0.7954636812210083, + "epoch": 0.1978575023366166, + "kl_loss": 0.1373216211795807, + "loss_ib": 0.0024919703137129545, + "step": 688 + }, + { + "ce_ib": 14.164189338684082, + "ce_orig": 1.0269262790679932, + "epoch": 0.1978575023366166, + "kl_loss": 0.17505072057247162, + "loss_ib": 0.003166925860568881, + "step": 688 + }, + { + "ce_ib": 8.692153930664062, + "ce_orig": 0.7542197108268738, + "epoch": 0.1981450859155942, + "kl_loss": 0.13267484307289124, + "loss_ib": 0.002195963868871331, + "step": 689 + }, + { + "ce_ib": 7.100341796875, + "ce_orig": 0.8673600554466248, + "epoch": 0.1981450859155942, + "kl_loss": 0.17913147807121277, + "loss_ib": 0.0025013487320393324, + "step": 689 + }, + { + "ce_ib": 7.591789245605469, + "ce_orig": 0.6358543634414673, + "epoch": 0.1981450859155942, + "kl_loss": 0.1959693729877472, + "loss_ib": 0.0027188726235181093, + "step": 689 + }, + { + "ce_ib": 10.430386543273926, + "ce_orig": 1.0774213075637817, + "epoch": 0.1981450859155942, + "kl_loss": 0.1533883512020111, + "loss_ib": 0.0025769220665097237, + "step": 689 + }, + { + "epoch": 0.19843266949457186, + "grad_norm": 0.10669101029634476, + "learning_rate": 4.98352692559805e-05, + "loss": 0.8342, + "step": 690 + }, + { + "ce_ib": 8.840359687805176, + "ce_orig": 0.95009446144104, + "epoch": 0.19843266949457186, + "kl_loss": 0.26050540804862976, + "loss_ib": 0.0034890901297330856, + "step": 690 + }, + { + "ce_ib": 8.28864860534668, + "ce_orig": 0.6670023202896118, + "epoch": 0.19843266949457186, + "kl_loss": 0.2274959236383438, + "loss_ib": 0.0031038240995258093, + "step": 690 + }, + { + "ce_ib": 6.7845330238342285, + "ce_orig": 1.0058348178863525, + "epoch": 0.19843266949457186, + "kl_loss": 0.11773978173732758, + "loss_ib": 0.0018558510346338153, + "step": 690 + }, + { + "ce_ib": 11.961586952209473, + "ce_orig": 1.3419430255889893, + "epoch": 0.19843266949457186, + "kl_loss": 0.1497437208890915, + "loss_ib": 0.0026935958303511143, + "step": 690 + }, + { + "ce_ib": 8.472604751586914, + "ce_orig": 0.4991995096206665, + "epoch": 0.1987202530735495, + "kl_loss": 0.20153102278709412, + "loss_ib": 0.0028625705745071173, + "step": 691 + }, + { + "ce_ib": 9.45351791381836, + "ce_orig": 1.0036801099777222, + "epoch": 0.1987202530735495, + "kl_loss": 0.1353033483028412, + "loss_ib": 0.002298385137692094, + "step": 691 + }, + { + "ce_ib": 7.785329341888428, + "ce_orig": 0.7653356790542603, + "epoch": 0.1987202530735495, + "kl_loss": 0.15528883039951324, + "loss_ib": 0.0023314212448894978, + "step": 691 + }, + { + "ce_ib": 5.879989147186279, + "ce_orig": 0.363296777009964, + "epoch": 0.1987202530735495, + "kl_loss": 0.13487836718559265, + "loss_ib": 0.001936782500706613, + "step": 691 + }, + { + "ce_ib": 11.383331298828125, + "ce_orig": 1.4687296152114868, + "epoch": 0.19900783665252714, + "kl_loss": 0.13896168768405914, + "loss_ib": 0.0025279498659074306, + "step": 692 + }, + { + "ce_ib": 9.530930519104004, + "ce_orig": 1.2369105815887451, + "epoch": 0.19900783665252714, + "kl_loss": 0.14428474009037018, + "loss_ib": 0.0023959404788911343, + "step": 692 + }, + { + "ce_ib": 8.91688346862793, + "ce_orig": 1.2907278537750244, + "epoch": 0.19900783665252714, + "kl_loss": 0.1218222826719284, + "loss_ib": 0.002109911059960723, + "step": 692 + }, + { + "ce_ib": 6.032763957977295, + "ce_orig": 0.49460887908935547, + "epoch": 0.19900783665252714, + "kl_loss": 0.16958531737327576, + "loss_ib": 0.0022991294972598553, + "step": 692 + }, + { + "ce_ib": 8.140901565551758, + "ce_orig": 1.1833499670028687, + "epoch": 0.1992954202315048, + "kl_loss": 0.08373439311981201, + "loss_ib": 0.0016514339949935675, + "step": 693 + }, + { + "ce_ib": 6.6644673347473145, + "ce_orig": 0.7210538387298584, + "epoch": 0.1992954202315048, + "kl_loss": 0.135453999042511, + "loss_ib": 0.002020986517891288, + "step": 693 + }, + { + "ce_ib": 9.980579376220703, + "ce_orig": 1.094584345817566, + "epoch": 0.1992954202315048, + "kl_loss": 0.1299094706773758, + "loss_ib": 0.0022971525322645903, + "step": 693 + }, + { + "ce_ib": 9.847793579101562, + "ce_orig": 0.7237510681152344, + "epoch": 0.1992954202315048, + "kl_loss": 0.26058220863342285, + "loss_ib": 0.0035906012635678053, + "step": 693 + }, + { + "ce_ib": 7.582713603973389, + "ce_orig": 0.5019081234931946, + "epoch": 0.1995830038104824, + "kl_loss": 0.16329146921634674, + "loss_ib": 0.0023911860771477222, + "step": 694 + }, + { + "ce_ib": 10.57071304321289, + "ce_orig": 1.4766757488250732, + "epoch": 0.1995830038104824, + "kl_loss": 0.15818722546100616, + "loss_ib": 0.0026389434933662415, + "step": 694 + }, + { + "ce_ib": 7.770899772644043, + "ce_orig": 0.9018514156341553, + "epoch": 0.1995830038104824, + "kl_loss": 0.08791860938072205, + "loss_ib": 0.0016562759410589933, + "step": 694 + }, + { + "ce_ib": 6.5115766525268555, + "ce_orig": 0.567577600479126, + "epoch": 0.1995830038104824, + "kl_loss": 0.19944868981838226, + "loss_ib": 0.0026456445921212435, + "step": 694 + }, + { + "epoch": 0.19987058738946006, + "grad_norm": 0.09094507992267609, + "learning_rate": 4.983079205439574e-05, + "loss": 0.8932, + "step": 695 + }, + { + "ce_ib": 8.481407165527344, + "ce_orig": 0.986234724521637, + "epoch": 0.19987058738946006, + "kl_loss": 0.2988152503967285, + "loss_ib": 0.003836293239146471, + "step": 695 + }, + { + "ce_ib": 10.597221374511719, + "ce_orig": 1.2706291675567627, + "epoch": 0.19987058738946006, + "kl_loss": 0.13932810723781586, + "loss_ib": 0.002453003078699112, + "step": 695 + }, + { + "ce_ib": 9.393781661987305, + "ce_orig": 0.7206296324729919, + "epoch": 0.19987058738946006, + "kl_loss": 0.12034575641155243, + "loss_ib": 0.0021428356412798166, + "step": 695 + }, + { + "ce_ib": 8.420135498046875, + "ce_orig": 0.6943502426147461, + "epoch": 0.19987058738946006, + "kl_loss": 0.16418200731277466, + "loss_ib": 0.0024838335812091827, + "step": 695 + }, + { + "ce_ib": 9.8673095703125, + "ce_orig": 0.5838234424591064, + "epoch": 0.20015817096843772, + "kl_loss": 0.23933324217796326, + "loss_ib": 0.0033800629898905754, + "step": 696 + }, + { + "ce_ib": 7.822383880615234, + "ce_orig": 1.0413511991500854, + "epoch": 0.20015817096843772, + "kl_loss": 0.08770239353179932, + "loss_ib": 0.0016592623433098197, + "step": 696 + }, + { + "ce_ib": 5.614956855773926, + "ce_orig": 0.6229404211044312, + "epoch": 0.20015817096843772, + "kl_loss": 0.15624278783798218, + "loss_ib": 0.0021239235065877438, + "step": 696 + }, + { + "ce_ib": 7.636229038238525, + "ce_orig": 0.7482424378395081, + "epoch": 0.20015817096843772, + "kl_loss": 0.2800358831882477, + "loss_ib": 0.0035639815032482147, + "step": 696 + }, + { + "ce_ib": 13.990978240966797, + "ce_orig": 1.7175135612487793, + "epoch": 0.20044575454741534, + "kl_loss": 0.20113369822502136, + "loss_ib": 0.0034104345832020044, + "step": 697 + }, + { + "ce_ib": 5.339763641357422, + "ce_orig": 0.4271097779273987, + "epoch": 0.20044575454741534, + "kl_loss": 0.20965386927127838, + "loss_ib": 0.0026305150240659714, + "step": 697 + }, + { + "ce_ib": 8.591266632080078, + "ce_orig": 0.9749876260757446, + "epoch": 0.20044575454741534, + "kl_loss": 0.14603781700134277, + "loss_ib": 0.0023195049725472927, + "step": 697 + }, + { + "ce_ib": 11.095532417297363, + "ce_orig": 0.9422957301139832, + "epoch": 0.20044575454741534, + "kl_loss": 0.18376457691192627, + "loss_ib": 0.002947198925539851, + "step": 697 + }, + { + "ce_ib": 10.03927230834961, + "ce_orig": 1.1190671920776367, + "epoch": 0.200733338126393, + "kl_loss": 0.1347736120223999, + "loss_ib": 0.0023516633082181215, + "step": 698 + }, + { + "ce_ib": 9.63382625579834, + "ce_orig": 0.9594607353210449, + "epoch": 0.200733338126393, + "kl_loss": 0.1937919557094574, + "loss_ib": 0.0029013019520789385, + "step": 698 + }, + { + "ce_ib": 10.83333683013916, + "ce_orig": 0.5495674014091492, + "epoch": 0.200733338126393, + "kl_loss": 0.21553227305412292, + "loss_ib": 0.003238656558096409, + "step": 698 + }, + { + "ce_ib": 5.4021687507629395, + "ce_orig": 0.49759823083877563, + "epoch": 0.200733338126393, + "kl_loss": 0.11563403159379959, + "loss_ib": 0.0016965570393949747, + "step": 698 + }, + { + "ce_ib": 12.474681854248047, + "ce_orig": 0.9798773527145386, + "epoch": 0.20102092170537061, + "kl_loss": 0.19496232271194458, + "loss_ib": 0.0031970911659300327, + "step": 699 + }, + { + "ce_ib": 4.9287333488464355, + "ce_orig": 0.5070614814758301, + "epoch": 0.20102092170537061, + "kl_loss": 0.11845901608467102, + "loss_ib": 0.0016774634132161736, + "step": 699 + }, + { + "ce_ib": 6.752041816711426, + "ce_orig": 0.7083439826965332, + "epoch": 0.20102092170537061, + "kl_loss": 0.13689608871936798, + "loss_ib": 0.002044165041297674, + "step": 699 + }, + { + "ce_ib": 8.316426277160645, + "ce_orig": 0.6727461218833923, + "epoch": 0.20102092170537061, + "kl_loss": 0.1803036332130432, + "loss_ib": 0.002634678967297077, + "step": 699 + }, + { + "epoch": 0.20130850528434827, + "grad_norm": 0.09928663074970245, + "learning_rate": 4.9826255029669577e-05, + "loss": 0.8094, + "step": 700 + }, + { + "ce_ib": 5.444150924682617, + "ce_orig": 0.5673038363456726, + "epoch": 0.20130850528434827, + "kl_loss": 0.09558887034654617, + "loss_ib": 0.0015003037406131625, + "step": 700 + }, + { + "ce_ib": 8.76500129699707, + "ce_orig": 0.9259908199310303, + "epoch": 0.20130850528434827, + "kl_loss": 0.1608695387840271, + "loss_ib": 0.0024851954076439142, + "step": 700 + }, + { + "ce_ib": 10.177801132202148, + "ce_orig": 1.1493618488311768, + "epoch": 0.20130850528434827, + "kl_loss": 0.21496880054473877, + "loss_ib": 0.0031674678903073072, + "step": 700 + }, + { + "ce_ib": 8.045758247375488, + "ce_orig": 1.1165003776550293, + "epoch": 0.20130850528434827, + "kl_loss": 0.14270451664924622, + "loss_ib": 0.0022316209506243467, + "step": 700 + }, + { + "ce_ib": 7.650059223175049, + "ce_orig": 1.008382797241211, + "epoch": 0.20159608886332592, + "kl_loss": 0.12849614024162292, + "loss_ib": 0.0020499674137681723, + "step": 701 + }, + { + "ce_ib": 7.221034526824951, + "ce_orig": 0.6420146226882935, + "epoch": 0.20159608886332592, + "kl_loss": 0.09210291504859924, + "loss_ib": 0.0016431325348094106, + "step": 701 + }, + { + "ce_ib": 11.927567481994629, + "ce_orig": 1.6603496074676514, + "epoch": 0.20159608886332592, + "kl_loss": 0.15888632833957672, + "loss_ib": 0.00278162001632154, + "step": 701 + }, + { + "ce_ib": 8.415051460266113, + "ce_orig": 1.0107392072677612, + "epoch": 0.20159608886332592, + "kl_loss": 0.15115907788276672, + "loss_ib": 0.002353095915168524, + "step": 701 + }, + { + "ce_ib": 11.7966947555542, + "ce_orig": 0.8676571846008301, + "epoch": 0.20188367244230354, + "kl_loss": 0.13981035351753235, + "loss_ib": 0.0025777730625122786, + "step": 702 + }, + { + "ce_ib": 9.650755882263184, + "ce_orig": 1.0907669067382812, + "epoch": 0.20188367244230354, + "kl_loss": 0.21402806043624878, + "loss_ib": 0.0031053561251610518, + "step": 702 + }, + { + "ce_ib": 10.13062572479248, + "ce_orig": 0.6553956270217896, + "epoch": 0.20188367244230354, + "kl_loss": 0.16557368636131287, + "loss_ib": 0.002668799366801977, + "step": 702 + }, + { + "ce_ib": 7.759829521179199, + "ce_orig": 0.5733931660652161, + "epoch": 0.20188367244230354, + "kl_loss": 0.14907249808311462, + "loss_ib": 0.0022667080629616976, + "step": 702 + }, + { + "ce_ib": 11.757640838623047, + "ce_orig": 1.1416308879852295, + "epoch": 0.2021712560212812, + "kl_loss": 0.23711419105529785, + "loss_ib": 0.0035469059366732836, + "step": 703 + }, + { + "ce_ib": 6.699312210083008, + "ce_orig": 0.5058441758155823, + "epoch": 0.2021712560212812, + "kl_loss": 0.11143745481967926, + "loss_ib": 0.001784305670298636, + "step": 703 + }, + { + "ce_ib": 8.576489448547363, + "ce_orig": 0.918267011642456, + "epoch": 0.2021712560212812, + "kl_loss": 0.18565413355827332, + "loss_ib": 0.0027141901664435863, + "step": 703 + }, + { + "ce_ib": 9.381869316101074, + "ce_orig": 1.0155972242355347, + "epoch": 0.2021712560212812, + "kl_loss": 0.21349982917308807, + "loss_ib": 0.003073184983804822, + "step": 703 + }, + { + "ce_ib": 13.443440437316895, + "ce_orig": 1.5300159454345703, + "epoch": 0.20245883960025882, + "kl_loss": 0.1552499383687973, + "loss_ib": 0.0028968434780836105, + "step": 704 + }, + { + "ce_ib": 9.67573356628418, + "ce_orig": 0.6580417156219482, + "epoch": 0.20245883960025882, + "kl_loss": 0.23093733191490173, + "loss_ib": 0.0032769464887678623, + "step": 704 + }, + { + "ce_ib": 4.894504070281982, + "ce_orig": 0.5909029841423035, + "epoch": 0.20245883960025882, + "kl_loss": 0.0857822597026825, + "loss_ib": 0.0013472730061039329, + "step": 704 + }, + { + "ce_ib": 9.48438549041748, + "ce_orig": 1.0949956178665161, + "epoch": 0.20245883960025882, + "kl_loss": 0.1445428431034088, + "loss_ib": 0.002393866889178753, + "step": 704 + }, + { + "epoch": 0.20274642317923647, + "grad_norm": 0.08958058804273605, + "learning_rate": 4.982165819273275e-05, + "loss": 0.8698, + "step": 705 + }, + { + "ce_ib": 8.077401161193848, + "ce_orig": 0.7715499401092529, + "epoch": 0.20274642317923647, + "kl_loss": 0.34654679894447327, + "loss_ib": 0.004273207858204842, + "step": 705 + }, + { + "ce_ib": 6.711842060089111, + "ce_orig": 0.5991529226303101, + "epoch": 0.20274642317923647, + "kl_loss": 0.12920328974723816, + "loss_ib": 0.001963217044249177, + "step": 705 + }, + { + "ce_ib": 8.242565155029297, + "ce_orig": 0.6560894846916199, + "epoch": 0.20274642317923647, + "kl_loss": 0.1585426926612854, + "loss_ib": 0.002409683307632804, + "step": 705 + }, + { + "ce_ib": 7.23456335067749, + "ce_orig": 0.7808642387390137, + "epoch": 0.20274642317923647, + "kl_loss": 0.10972153395414352, + "loss_ib": 0.0018206714885309339, + "step": 705 + }, + { + "ce_ib": 7.769526958465576, + "ce_orig": 1.1062254905700684, + "epoch": 0.20303400675821412, + "kl_loss": 0.07121115922927856, + "loss_ib": 0.0014890641905367374, + "step": 706 + }, + { + "ce_ib": 9.15722370147705, + "ce_orig": 0.7969109416007996, + "epoch": 0.20303400675821412, + "kl_loss": 0.154433012008667, + "loss_ib": 0.0024600522592663765, + "step": 706 + }, + { + "ce_ib": 6.588254451751709, + "ce_orig": 0.43084779381752014, + "epoch": 0.20303400675821412, + "kl_loss": 0.07921120524406433, + "loss_ib": 0.0014509373577311635, + "step": 706 + }, + { + "ce_ib": 8.812955856323242, + "ce_orig": 0.728909969329834, + "epoch": 0.20303400675821412, + "kl_loss": 0.1336958110332489, + "loss_ib": 0.0022182536777108908, + "step": 706 + }, + { + "ce_ib": 13.082417488098145, + "ce_orig": 1.2153578996658325, + "epoch": 0.20332159033719174, + "kl_loss": 0.1605956256389618, + "loss_ib": 0.002914197975769639, + "step": 707 + }, + { + "ce_ib": 10.410371780395508, + "ce_orig": 1.198205590248108, + "epoch": 0.20332159033719174, + "kl_loss": 0.08825291693210602, + "loss_ib": 0.0019235662184655666, + "step": 707 + }, + { + "ce_ib": 11.29565715789795, + "ce_orig": 1.242211103439331, + "epoch": 0.20332159033719174, + "kl_loss": 0.19409918785095215, + "loss_ib": 0.0030705577228218317, + "step": 707 + }, + { + "ce_ib": 8.208379745483398, + "ce_orig": 0.872199296951294, + "epoch": 0.20332159033719174, + "kl_loss": 0.1233471930027008, + "loss_ib": 0.002054309705272317, + "step": 707 + }, + { + "ce_ib": 7.685192584991455, + "ce_orig": 0.9802998304367065, + "epoch": 0.2036091739161694, + "kl_loss": 0.10023842006921768, + "loss_ib": 0.001770903472788632, + "step": 708 + }, + { + "ce_ib": 7.994464874267578, + "ce_orig": 0.7009384632110596, + "epoch": 0.2036091739161694, + "kl_loss": 0.1141389012336731, + "loss_ib": 0.001940835383720696, + "step": 708 + }, + { + "ce_ib": 8.558679580688477, + "ce_orig": 0.6219755411148071, + "epoch": 0.2036091739161694, + "kl_loss": 0.20445235073566437, + "loss_ib": 0.002900391351431608, + "step": 708 + }, + { + "ce_ib": 4.708154678344727, + "ce_orig": 0.8312263488769531, + "epoch": 0.2036091739161694, + "kl_loss": 0.07935391366481781, + "loss_ib": 0.001264354563318193, + "step": 708 + }, + { + "ce_ib": 17.41877555847168, + "ce_orig": 2.1723456382751465, + "epoch": 0.20389675749514702, + "kl_loss": 0.20338605344295502, + "loss_ib": 0.003775737714022398, + "step": 709 + }, + { + "ce_ib": 7.36018705368042, + "ce_orig": 0.9659443497657776, + "epoch": 0.20389675749514702, + "kl_loss": 0.18725061416625977, + "loss_ib": 0.0026085248682647943, + "step": 709 + }, + { + "ce_ib": 9.507402420043945, + "ce_orig": 1.092591404914856, + "epoch": 0.20389675749514702, + "kl_loss": 0.15672272443771362, + "loss_ib": 0.0025179674848914146, + "step": 709 + }, + { + "ce_ib": 9.41507625579834, + "ce_orig": 1.0811327695846558, + "epoch": 0.20389675749514702, + "kl_loss": 0.1266251802444458, + "loss_ib": 0.00220775930210948, + "step": 709 + }, + { + "epoch": 0.20418434107412467, + "grad_norm": 0.11347930133342743, + "learning_rate": 4.98170015546601e-05, + "loss": 0.8764, + "step": 710 + }, + { + "ce_ib": 11.422625541687012, + "ce_orig": 0.9313300251960754, + "epoch": 0.20418434107412467, + "kl_loss": 0.13721315562725067, + "loss_ib": 0.0025143937673419714, + "step": 710 + }, + { + "ce_ib": 6.7661237716674805, + "ce_orig": 0.7199991941452026, + "epoch": 0.20418434107412467, + "kl_loss": 0.17893491685390472, + "loss_ib": 0.0024659615010023117, + "step": 710 + }, + { + "ce_ib": 5.129522323608398, + "ce_orig": 0.4815012812614441, + "epoch": 0.20418434107412467, + "kl_loss": 0.17363528907299042, + "loss_ib": 0.002249305136501789, + "step": 710 + }, + { + "ce_ib": 6.5166168212890625, + "ce_orig": 0.7667418718338013, + "epoch": 0.20418434107412467, + "kl_loss": 0.0964181199669838, + "loss_ib": 0.001615842804312706, + "step": 710 + }, + { + "ce_ib": 5.599911212921143, + "ce_orig": 0.5180696249008179, + "epoch": 0.20447192465310232, + "kl_loss": 0.13735494017601013, + "loss_ib": 0.0019335405668243766, + "step": 711 + }, + { + "ce_ib": 13.007081031799316, + "ce_orig": 1.5647224187850952, + "epoch": 0.20447192465310232, + "kl_loss": 0.12819761037826538, + "loss_ib": 0.0025826841592788696, + "step": 711 + }, + { + "ce_ib": 11.807796478271484, + "ce_orig": 1.4334594011306763, + "epoch": 0.20447192465310232, + "kl_loss": 0.17374610900878906, + "loss_ib": 0.002918240614235401, + "step": 711 + }, + { + "ce_ib": 7.3837714195251465, + "ce_orig": 0.7142208814620972, + "epoch": 0.20447192465310232, + "kl_loss": 0.1720019280910492, + "loss_ib": 0.00245839636772871, + "step": 711 + }, + { + "ce_ib": 7.580549716949463, + "ce_orig": 0.7085816264152527, + "epoch": 0.20475950823207995, + "kl_loss": 0.14903539419174194, + "loss_ib": 0.0022484087385237217, + "step": 712 + }, + { + "ce_ib": 5.8234477043151855, + "ce_orig": 0.5772075057029724, + "epoch": 0.20475950823207995, + "kl_loss": 0.13270236551761627, + "loss_ib": 0.0019093683222308755, + "step": 712 + }, + { + "ce_ib": 6.38820743560791, + "ce_orig": 0.6470109224319458, + "epoch": 0.20475950823207995, + "kl_loss": 0.10315969586372375, + "loss_ib": 0.001670417725108564, + "step": 712 + }, + { + "ce_ib": 8.948882102966309, + "ce_orig": 0.9020084738731384, + "epoch": 0.20475950823207995, + "kl_loss": 0.18468233942985535, + "loss_ib": 0.0027417116798460484, + "step": 712 + }, + { + "ce_ib": 8.562841415405273, + "ce_orig": 0.8737965226173401, + "epoch": 0.2050470918110576, + "kl_loss": 0.1370054930448532, + "loss_ib": 0.002226338954642415, + "step": 713 + }, + { + "ce_ib": 8.57533073425293, + "ce_orig": 1.3104912042617798, + "epoch": 0.2050470918110576, + "kl_loss": 0.18775838613510132, + "loss_ib": 0.002735116984695196, + "step": 713 + }, + { + "ce_ib": 6.767639636993408, + "ce_orig": 0.7026990056037903, + "epoch": 0.2050470918110576, + "kl_loss": 0.1764390766620636, + "loss_ib": 0.00244115456007421, + "step": 713 + }, + { + "ce_ib": 10.467775344848633, + "ce_orig": 0.9951248168945312, + "epoch": 0.2050470918110576, + "kl_loss": 0.14137201011180878, + "loss_ib": 0.0024604976642876863, + "step": 713 + }, + { + "ce_ib": 12.358627319335938, + "ce_orig": 1.6978172063827515, + "epoch": 0.20533467539003522, + "kl_loss": 0.13740137219429016, + "loss_ib": 0.002609876450151205, + "step": 714 + }, + { + "ce_ib": 9.153459548950195, + "ce_orig": 0.7250450849533081, + "epoch": 0.20533467539003522, + "kl_loss": 0.14596766233444214, + "loss_ib": 0.0023750225082039833, + "step": 714 + }, + { + "ce_ib": 6.154027462005615, + "ce_orig": 0.6910421848297119, + "epoch": 0.20533467539003522, + "kl_loss": 0.1430780291557312, + "loss_ib": 0.0020461829844862223, + "step": 714 + }, + { + "ce_ib": 8.212672233581543, + "ce_orig": 0.8080992698669434, + "epoch": 0.20533467539003522, + "kl_loss": 0.15016669034957886, + "loss_ib": 0.002322934102267027, + "step": 714 + }, + { + "epoch": 0.20562225896901287, + "grad_norm": 0.09610775858163834, + "learning_rate": 4.981228512667057e-05, + "loss": 0.8553, + "step": 715 + }, + { + "ce_ib": 7.444067478179932, + "ce_orig": 0.8634759783744812, + "epoch": 0.20562225896901287, + "kl_loss": 0.11479885876178741, + "loss_ib": 0.0018923953175544739, + "step": 715 + }, + { + "ce_ib": 6.535699367523193, + "ce_orig": 0.7095260620117188, + "epoch": 0.20562225896901287, + "kl_loss": 0.08906295150518417, + "loss_ib": 0.0015441994182765484, + "step": 715 + }, + { + "ce_ib": 9.0155029296875, + "ce_orig": 0.5211726427078247, + "epoch": 0.20562225896901287, + "kl_loss": 0.2069450318813324, + "loss_ib": 0.0029710005037486553, + "step": 715 + }, + { + "ce_ib": 7.391125679016113, + "ce_orig": 1.0465866327285767, + "epoch": 0.20562225896901287, + "kl_loss": 0.14382626116275787, + "loss_ib": 0.0021773751359432936, + "step": 715 + }, + { + "ce_ib": 5.707086086273193, + "ce_orig": 0.7374852895736694, + "epoch": 0.2059098425479905, + "kl_loss": 0.1332034170627594, + "loss_ib": 0.0019027426606044173, + "step": 716 + }, + { + "ce_ib": 5.903783798217773, + "ce_orig": 0.7245094180107117, + "epoch": 0.2059098425479905, + "kl_loss": 0.09125680476427078, + "loss_ib": 0.0015029464848339558, + "step": 716 + }, + { + "ce_ib": 9.94698715209961, + "ce_orig": 1.195637583732605, + "epoch": 0.2059098425479905, + "kl_loss": 0.13582003116607666, + "loss_ib": 0.0023528989404439926, + "step": 716 + }, + { + "ce_ib": 7.605693340301514, + "ce_orig": 0.8307034969329834, + "epoch": 0.2059098425479905, + "kl_loss": 0.09922278672456741, + "loss_ib": 0.001752797164954245, + "step": 716 + }, + { + "ce_ib": 9.15215015411377, + "ce_orig": 0.9594472646713257, + "epoch": 0.20619742612696815, + "kl_loss": 0.21580404043197632, + "loss_ib": 0.0030732552986592054, + "step": 717 + }, + { + "ce_ib": 8.207721710205078, + "ce_orig": 0.5845073461532593, + "epoch": 0.20619742612696815, + "kl_loss": 0.3212279975414276, + "loss_ib": 0.004033051896840334, + "step": 717 + }, + { + "ce_ib": 12.13454818725586, + "ce_orig": 1.3720506429672241, + "epoch": 0.20619742612696815, + "kl_loss": 0.17790672183036804, + "loss_ib": 0.002992521971464157, + "step": 717 + }, + { + "ce_ib": 4.388705253601074, + "ce_orig": 0.47980645298957825, + "epoch": 0.20619742612696815, + "kl_loss": 0.12024472653865814, + "loss_ib": 0.0016413177363574505, + "step": 717 + }, + { + "ce_ib": 11.236103057861328, + "ce_orig": 0.76035475730896, + "epoch": 0.2064850097059458, + "kl_loss": 0.1305321604013443, + "loss_ib": 0.0024289318826049566, + "step": 718 + }, + { + "ce_ib": 9.612162590026855, + "ce_orig": 0.898193895816803, + "epoch": 0.2064850097059458, + "kl_loss": 0.15393592417240143, + "loss_ib": 0.002500575501471758, + "step": 718 + }, + { + "ce_ib": 9.815557479858398, + "ce_orig": 1.1573611497879028, + "epoch": 0.2064850097059458, + "kl_loss": 0.23672373592853546, + "loss_ib": 0.003348792903125286, + "step": 718 + }, + { + "ce_ib": 8.44405746459961, + "ce_orig": 0.4511740803718567, + "epoch": 0.2064850097059458, + "kl_loss": 0.21269428730010986, + "loss_ib": 0.002971348585560918, + "step": 718 + }, + { + "ce_ib": 12.143657684326172, + "ce_orig": 1.3069207668304443, + "epoch": 0.20677259328492342, + "kl_loss": 0.13651582598686218, + "loss_ib": 0.0025795239489525557, + "step": 719 + }, + { + "ce_ib": 11.6182279586792, + "ce_orig": 1.6872526407241821, + "epoch": 0.20677259328492342, + "kl_loss": 0.14566782116889954, + "loss_ib": 0.002618500730022788, + "step": 719 + }, + { + "ce_ib": 5.154110431671143, + "ce_orig": 0.5514587759971619, + "epoch": 0.20677259328492342, + "kl_loss": 0.0836012214422226, + "loss_ib": 0.001351423212327063, + "step": 719 + }, + { + "ce_ib": 9.787772178649902, + "ce_orig": 0.9540395736694336, + "epoch": 0.20677259328492342, + "kl_loss": 0.1440960168838501, + "loss_ib": 0.0024197371676564217, + "step": 719 + }, + { + "epoch": 0.20706017686390107, + "grad_norm": 0.09736236929893494, + "learning_rate": 4.980750892012711e-05, + "loss": 0.8556, + "step": 720 + }, + { + "ce_ib": 6.094231605529785, + "ce_orig": 0.432678759098053, + "epoch": 0.20706017686390107, + "kl_loss": 0.07415103167295456, + "loss_ib": 0.0013509334530681372, + "step": 720 + }, + { + "ce_ib": 11.028846740722656, + "ce_orig": 1.3781291246414185, + "epoch": 0.20706017686390107, + "kl_loss": 0.13459554314613342, + "loss_ib": 0.0024488400667905807, + "step": 720 + }, + { + "ce_ib": 7.907367706298828, + "ce_orig": 0.524237871170044, + "epoch": 0.20706017686390107, + "kl_loss": 0.11885979026556015, + "loss_ib": 0.0019793345127254725, + "step": 720 + }, + { + "ce_ib": 6.396885395050049, + "ce_orig": 0.748169481754303, + "epoch": 0.20706017686390107, + "kl_loss": 0.10846064984798431, + "loss_ib": 0.001724294968880713, + "step": 720 + }, + { + "ce_ib": 7.320477485656738, + "ce_orig": 0.5779712796211243, + "epoch": 0.2073477604428787, + "kl_loss": 0.1938854306936264, + "loss_ib": 0.0026709020603448153, + "step": 721 + }, + { + "ce_ib": 8.144287109375, + "ce_orig": 0.6583978533744812, + "epoch": 0.2073477604428787, + "kl_loss": 0.1584765613079071, + "loss_ib": 0.002399194287136197, + "step": 721 + }, + { + "ce_ib": 8.83871078491211, + "ce_orig": 1.5272163152694702, + "epoch": 0.2073477604428787, + "kl_loss": 0.12005112320184708, + "loss_ib": 0.0020843823440372944, + "step": 721 + }, + { + "ce_ib": 11.102349281311035, + "ce_orig": 1.1282752752304077, + "epoch": 0.2073477604428787, + "kl_loss": 0.14990922808647156, + "loss_ib": 0.002609326969832182, + "step": 721 + }, + { + "ce_ib": 6.203333854675293, + "ce_orig": 0.46742182970046997, + "epoch": 0.20763534402185635, + "kl_loss": 0.1615336537361145, + "loss_ib": 0.0022356698755174875, + "step": 722 + }, + { + "ce_ib": 7.080376148223877, + "ce_orig": 0.8276212215423584, + "epoch": 0.20763534402185635, + "kl_loss": 0.0906413346529007, + "loss_ib": 0.0016144509427249432, + "step": 722 + }, + { + "ce_ib": 4.1770477294921875, + "ce_orig": 0.48553168773651123, + "epoch": 0.20763534402185635, + "kl_loss": 0.08101706951856613, + "loss_ib": 0.0012278754729777575, + "step": 722 + }, + { + "ce_ib": 4.8663129806518555, + "ce_orig": 0.35782018303871155, + "epoch": 0.20763534402185635, + "kl_loss": 0.10124047100543976, + "loss_ib": 0.0014990359777584672, + "step": 722 + }, + { + "ce_ib": 6.2184247970581055, + "ce_orig": 0.5856457948684692, + "epoch": 0.207922927600834, + "kl_loss": 0.10668499767780304, + "loss_ib": 0.0016886923694983125, + "step": 723 + }, + { + "ce_ib": 8.754106521606445, + "ce_orig": 0.7017520070075989, + "epoch": 0.207922927600834, + "kl_loss": 0.1465778797864914, + "loss_ib": 0.002341189421713352, + "step": 723 + }, + { + "ce_ib": 7.906350612640381, + "ce_orig": 0.8050028085708618, + "epoch": 0.207922927600834, + "kl_loss": 0.19620084762573242, + "loss_ib": 0.002752643544226885, + "step": 723 + }, + { + "ce_ib": 4.550933361053467, + "ce_orig": 0.5352786779403687, + "epoch": 0.207922927600834, + "kl_loss": 0.12208560854196548, + "loss_ib": 0.0016759493155404925, + "step": 723 + }, + { + "ce_ib": 9.424193382263184, + "ce_orig": 0.30670666694641113, + "epoch": 0.20821051117981162, + "kl_loss": 0.4818356931209564, + "loss_ib": 0.005760776344686747, + "step": 724 + }, + { + "ce_ib": 11.714197158813477, + "ce_orig": 0.9081876277923584, + "epoch": 0.20821051117981162, + "kl_loss": 0.18002018332481384, + "loss_ib": 0.00297162146307528, + "step": 724 + }, + { + "ce_ib": 3.9292664527893066, + "ce_orig": 0.1352614313364029, + "epoch": 0.20821051117981162, + "kl_loss": 0.4230746030807495, + "loss_ib": 0.00462367245927453, + "step": 724 + }, + { + "ce_ib": 10.647526741027832, + "ce_orig": 1.4817237854003906, + "epoch": 0.20821051117981162, + "kl_loss": 0.2258502095937729, + "loss_ib": 0.003323254408314824, + "step": 724 + }, + { + "epoch": 0.20849809475878928, + "grad_norm": 0.08137502521276474, + "learning_rate": 4.980267294653671e-05, + "loss": 0.8851, + "step": 725 + }, + { + "ce_ib": 6.349156856536865, + "ce_orig": 0.7236438393592834, + "epoch": 0.20849809475878928, + "kl_loss": 0.12380913645029068, + "loss_ib": 0.0018730070441961288, + "step": 725 + }, + { + "ce_ib": 6.632828235626221, + "ce_orig": 0.9744963049888611, + "epoch": 0.20849809475878928, + "kl_loss": 0.08201053738594055, + "loss_ib": 0.0014833882451057434, + "step": 725 + }, + { + "ce_ib": 7.85797643661499, + "ce_orig": 0.7217217683792114, + "epoch": 0.20849809475878928, + "kl_loss": 0.10760220885276794, + "loss_ib": 0.0018618195317685604, + "step": 725 + }, + { + "ce_ib": 5.568864822387695, + "ce_orig": 0.5582032799720764, + "epoch": 0.20849809475878928, + "kl_loss": 0.10348623245954514, + "loss_ib": 0.0015917486744001508, + "step": 725 + }, + { + "ce_ib": 6.322106838226318, + "ce_orig": 0.6340025663375854, + "epoch": 0.2087856783377669, + "kl_loss": 0.1141437366604805, + "loss_ib": 0.001773647964000702, + "step": 726 + }, + { + "ce_ib": 9.839473724365234, + "ce_orig": 0.8133766651153564, + "epoch": 0.2087856783377669, + "kl_loss": 0.1660975217819214, + "loss_ib": 0.002644922584295273, + "step": 726 + }, + { + "ce_ib": 11.004530906677246, + "ce_orig": 1.3718172311782837, + "epoch": 0.2087856783377669, + "kl_loss": 0.1735047996044159, + "loss_ib": 0.0028355009853839874, + "step": 726 + }, + { + "ce_ib": 12.823655128479004, + "ce_orig": 1.5520806312561035, + "epoch": 0.2087856783377669, + "kl_loss": 0.15558293461799622, + "loss_ib": 0.0028381948359310627, + "step": 726 + }, + { + "ce_ib": 9.323108673095703, + "ce_orig": 0.8158657550811768, + "epoch": 0.20907326191674455, + "kl_loss": 0.2026049792766571, + "loss_ib": 0.002958360593765974, + "step": 727 + }, + { + "ce_ib": 10.1639404296875, + "ce_orig": 0.658790111541748, + "epoch": 0.20907326191674455, + "kl_loss": 0.11903582513332367, + "loss_ib": 0.002206752309575677, + "step": 727 + }, + { + "ce_ib": 8.439207077026367, + "ce_orig": 0.5992903113365173, + "epoch": 0.20907326191674455, + "kl_loss": 0.12204043567180634, + "loss_ib": 0.002064324915409088, + "step": 727 + }, + { + "ce_ib": 5.409970760345459, + "ce_orig": 0.5538503527641296, + "epoch": 0.20907326191674455, + "kl_loss": 0.09466078877449036, + "loss_ib": 0.0014876049244776368, + "step": 727 + }, + { + "ce_ib": 6.850286960601807, + "ce_orig": 0.5898058414459229, + "epoch": 0.2093608454957222, + "kl_loss": 0.0770832896232605, + "loss_ib": 0.001455861609429121, + "step": 728 + }, + { + "ce_ib": 7.669926643371582, + "ce_orig": 0.7111138105392456, + "epoch": 0.2093608454957222, + "kl_loss": 0.1397348940372467, + "loss_ib": 0.0021643415093421936, + "step": 728 + }, + { + "ce_ib": 8.357053756713867, + "ce_orig": 0.866073727607727, + "epoch": 0.2093608454957222, + "kl_loss": 0.12581852078437805, + "loss_ib": 0.0020938904490321875, + "step": 728 + }, + { + "ce_ib": 9.84300422668457, + "ce_orig": 0.9218365550041199, + "epoch": 0.2093608454957222, + "kl_loss": 0.20025497674942017, + "loss_ib": 0.002986849984154105, + "step": 728 + }, + { + "ce_ib": 7.854262828826904, + "ce_orig": 0.6762682199478149, + "epoch": 0.20964842907469983, + "kl_loss": 0.1409384310245514, + "loss_ib": 0.00219481042586267, + "step": 729 + }, + { + "ce_ib": 8.109271049499512, + "ce_orig": 0.639650821685791, + "epoch": 0.20964842907469983, + "kl_loss": 0.15809345245361328, + "loss_ib": 0.002391861518844962, + "step": 729 + }, + { + "ce_ib": 7.235316276550293, + "ce_orig": 0.2508554458618164, + "epoch": 0.20964842907469983, + "kl_loss": 0.2572559714317322, + "loss_ib": 0.0032960912212729454, + "step": 729 + }, + { + "ce_ib": 7.926023960113525, + "ce_orig": 0.7422289848327637, + "epoch": 0.20964842907469983, + "kl_loss": 0.16561353206634521, + "loss_ib": 0.002448737621307373, + "step": 729 + }, + { + "epoch": 0.20993601265367748, + "grad_norm": 0.08178116381168365, + "learning_rate": 4.9797777217550367e-05, + "loss": 0.861, + "step": 730 + }, + { + "ce_ib": 8.730643272399902, + "ce_orig": 0.867591142654419, + "epoch": 0.20993601265367748, + "kl_loss": 0.11806496232748032, + "loss_ib": 0.0020537138916552067, + "step": 730 + }, + { + "ce_ib": 9.545483589172363, + "ce_orig": 0.9179695844650269, + "epoch": 0.20993601265367748, + "kl_loss": 0.36148688197135925, + "loss_ib": 0.004569417331367731, + "step": 730 + }, + { + "ce_ib": 11.301115989685059, + "ce_orig": 1.146166443824768, + "epoch": 0.20993601265367748, + "kl_loss": 0.16140805184841156, + "loss_ib": 0.002744192024692893, + "step": 730 + }, + { + "ce_ib": 11.486992835998535, + "ce_orig": 1.2627525329589844, + "epoch": 0.20993601265367748, + "kl_loss": 0.16576717793941498, + "loss_ib": 0.002806370845064521, + "step": 730 + }, + { + "ce_ib": 9.185856819152832, + "ce_orig": 0.7200559377670288, + "epoch": 0.2102235962326551, + "kl_loss": 0.20296761393547058, + "loss_ib": 0.002948261797428131, + "step": 731 + }, + { + "ce_ib": 10.240104675292969, + "ce_orig": 0.6126185059547424, + "epoch": 0.2102235962326551, + "kl_loss": 0.16589286923408508, + "loss_ib": 0.0026829391717910767, + "step": 731 + }, + { + "ce_ib": 6.059067249298096, + "ce_orig": 0.5460329055786133, + "epoch": 0.2102235962326551, + "kl_loss": 0.11571324616670609, + "loss_ib": 0.0017630391521379352, + "step": 731 + }, + { + "ce_ib": 4.1030120849609375, + "ce_orig": 0.14751267433166504, + "epoch": 0.2102235962326551, + "kl_loss": 0.3347550630569458, + "loss_ib": 0.0037578516639769077, + "step": 731 + }, + { + "ce_ib": 5.566345691680908, + "ce_orig": 0.47219768166542053, + "epoch": 0.21051117981163275, + "kl_loss": 0.10999684035778046, + "loss_ib": 0.0016566028352826834, + "step": 732 + }, + { + "ce_ib": 6.692113876342773, + "ce_orig": 0.8962125778198242, + "epoch": 0.21051117981163275, + "kl_loss": 0.10143055766820908, + "loss_ib": 0.0016835168935358524, + "step": 732 + }, + { + "ce_ib": 10.237395286560059, + "ce_orig": 0.8566427826881409, + "epoch": 0.21051117981163275, + "kl_loss": 0.13674971461296082, + "loss_ib": 0.002391236601397395, + "step": 732 + }, + { + "ce_ib": 7.358321189880371, + "ce_orig": 0.7953360080718994, + "epoch": 0.21051117981163275, + "kl_loss": 0.11545369774103165, + "loss_ib": 0.0018903689924627542, + "step": 732 + }, + { + "ce_ib": 7.0491719245910645, + "ce_orig": 0.5768455862998962, + "epoch": 0.2107987633906104, + "kl_loss": 0.14890553057193756, + "loss_ib": 0.0021939724683761597, + "step": 733 + }, + { + "ce_ib": 7.68407678604126, + "ce_orig": 0.8678704500198364, + "epoch": 0.2107987633906104, + "kl_loss": 0.19429832696914673, + "loss_ib": 0.002711390843614936, + "step": 733 + }, + { + "ce_ib": 9.173813819885254, + "ce_orig": 0.9225053191184998, + "epoch": 0.2107987633906104, + "kl_loss": 0.3797130286693573, + "loss_ib": 0.004714511334896088, + "step": 733 + }, + { + "ce_ib": 9.308879852294922, + "ce_orig": 0.5995781421661377, + "epoch": 0.2107987633906104, + "kl_loss": 0.145217627286911, + "loss_ib": 0.0023830642458051443, + "step": 733 + }, + { + "ce_ib": 9.65007495880127, + "ce_orig": 0.7233938574790955, + "epoch": 0.21108634696958803, + "kl_loss": 0.2051672786474228, + "loss_ib": 0.0030166800133883953, + "step": 734 + }, + { + "ce_ib": 8.14127254486084, + "ce_orig": 0.6786958575248718, + "epoch": 0.21108634696958803, + "kl_loss": 0.16025173664093018, + "loss_ib": 0.002416644711047411, + "step": 734 + }, + { + "ce_ib": 9.92184066772461, + "ce_orig": 0.828167200088501, + "epoch": 0.21108634696958803, + "kl_loss": 0.1490582823753357, + "loss_ib": 0.0024827667511999607, + "step": 734 + }, + { + "ce_ib": 9.757160186767578, + "ce_orig": 1.001948595046997, + "epoch": 0.21108634696958803, + "kl_loss": 0.17845755815505981, + "loss_ib": 0.002760291565209627, + "step": 734 + }, + { + "epoch": 0.21137393054856568, + "grad_norm": 0.09134446084499359, + "learning_rate": 4.979282174496302e-05, + "loss": 0.883, + "step": 735 + }, + { + "ce_ib": 7.697344779968262, + "ce_orig": 0.613048255443573, + "epoch": 0.21137393054856568, + "kl_loss": 0.1581844836473465, + "loss_ib": 0.0023515792563557625, + "step": 735 + }, + { + "ce_ib": 9.774848937988281, + "ce_orig": 0.9369478821754456, + "epoch": 0.21137393054856568, + "kl_loss": 0.1405380368232727, + "loss_ib": 0.0023828651756048203, + "step": 735 + }, + { + "ce_ib": 10.879542350769043, + "ce_orig": 1.151315450668335, + "epoch": 0.21137393054856568, + "kl_loss": 0.14188644289970398, + "loss_ib": 0.0025068186223506927, + "step": 735 + }, + { + "ce_ib": 7.50467586517334, + "ce_orig": 0.8081782460212708, + "epoch": 0.21137393054856568, + "kl_loss": 0.13665321469306946, + "loss_ib": 0.002116999588906765, + "step": 735 + }, + { + "ce_ib": 8.551518440246582, + "ce_orig": 1.0012387037277222, + "epoch": 0.2116615141275433, + "kl_loss": 0.09218282997608185, + "loss_ib": 0.0017769801197573543, + "step": 736 + }, + { + "ce_ib": 12.103117942810059, + "ce_orig": 1.6258405447006226, + "epoch": 0.2116615141275433, + "kl_loss": 0.2883414626121521, + "loss_ib": 0.0040937261655926704, + "step": 736 + }, + { + "ce_ib": 9.884223937988281, + "ce_orig": 1.289268970489502, + "epoch": 0.2116615141275433, + "kl_loss": 0.11228608340024948, + "loss_ib": 0.002111283130943775, + "step": 736 + }, + { + "ce_ib": 12.174933433532715, + "ce_orig": 1.3357943296432495, + "epoch": 0.2116615141275433, + "kl_loss": 0.1535387933254242, + "loss_ib": 0.0027528812643140554, + "step": 736 + }, + { + "ce_ib": 5.271547317504883, + "ce_orig": 0.6658823490142822, + "epoch": 0.21194909770652096, + "kl_loss": 0.09715841710567474, + "loss_ib": 0.001498738769441843, + "step": 737 + }, + { + "ce_ib": 4.523626327514648, + "ce_orig": 0.43075889348983765, + "epoch": 0.21194909770652096, + "kl_loss": 0.11853388696908951, + "loss_ib": 0.0016377015272155404, + "step": 737 + }, + { + "ce_ib": 7.904093265533447, + "ce_orig": 0.9350758194923401, + "epoch": 0.21194909770652096, + "kl_loss": 0.11697270721197128, + "loss_ib": 0.001960136229172349, + "step": 737 + }, + { + "ce_ib": 8.9533109664917, + "ce_orig": 0.7697760462760925, + "epoch": 0.21194909770652096, + "kl_loss": 0.3157818913459778, + "loss_ib": 0.004053149838000536, + "step": 737 + }, + { + "ce_ib": 8.857293128967285, + "ce_orig": 0.7813129425048828, + "epoch": 0.2122366812854986, + "kl_loss": 0.16268154978752136, + "loss_ib": 0.0025125446263700724, + "step": 738 + }, + { + "ce_ib": 8.424586296081543, + "ce_orig": 0.9425109624862671, + "epoch": 0.2122366812854986, + "kl_loss": 0.12331412732601166, + "loss_ib": 0.0020755997393280268, + "step": 738 + }, + { + "ce_ib": 9.702607154846191, + "ce_orig": 1.580017328262329, + "epoch": 0.2122366812854986, + "kl_loss": 0.09408050775527954, + "loss_ib": 0.0019110658904537559, + "step": 738 + }, + { + "ce_ib": 7.1836347579956055, + "ce_orig": 0.4377358555793762, + "epoch": 0.2122366812854986, + "kl_loss": 0.14707301557064056, + "loss_ib": 0.002189093502238393, + "step": 738 + }, + { + "ce_ib": 3.4647040367126465, + "ce_orig": 0.35970985889434814, + "epoch": 0.21252426486447623, + "kl_loss": 0.2587149143218994, + "loss_ib": 0.0029336195439100266, + "step": 739 + }, + { + "ce_ib": 6.37571907043457, + "ce_orig": 0.5920301675796509, + "epoch": 0.21252426486447623, + "kl_loss": 0.11138466000556946, + "loss_ib": 0.0017514183418825269, + "step": 739 + }, + { + "ce_ib": 5.618171215057373, + "ce_orig": 0.9007509350776672, + "epoch": 0.21252426486447623, + "kl_loss": 0.11110688745975494, + "loss_ib": 0.0016728859627619386, + "step": 739 + }, + { + "ce_ib": 8.733031272888184, + "ce_orig": 1.0930041074752808, + "epoch": 0.21252426486447623, + "kl_loss": 0.11575108021497726, + "loss_ib": 0.0020308138336986303, + "step": 739 + }, + { + "epoch": 0.21281184844345388, + "grad_norm": 0.08747689425945282, + "learning_rate": 4.9787806540713546e-05, + "loss": 0.8554, + "step": 740 + }, + { + "ce_ib": 8.982237815856934, + "ce_orig": 0.755938708782196, + "epoch": 0.21281184844345388, + "kl_loss": 0.15475055575370789, + "loss_ib": 0.0024457292165607214, + "step": 740 + }, + { + "ce_ib": 7.661835193634033, + "ce_orig": 0.7428460121154785, + "epoch": 0.21281184844345388, + "kl_loss": 0.11066774278879166, + "loss_ib": 0.001872860942967236, + "step": 740 + }, + { + "ce_ib": 6.563145637512207, + "ce_orig": 0.6415489912033081, + "epoch": 0.21281184844345388, + "kl_loss": 0.08858776092529297, + "loss_ib": 0.0015421920688822865, + "step": 740 + }, + { + "ce_ib": 9.22264575958252, + "ce_orig": 1.1372004747390747, + "epoch": 0.21281184844345388, + "kl_loss": 0.1382768154144287, + "loss_ib": 0.002305032452568412, + "step": 740 + }, + { + "ce_ib": 5.523608207702637, + "ce_orig": 0.5463114976882935, + "epoch": 0.2130994320224315, + "kl_loss": 0.12450896203517914, + "loss_ib": 0.0017974504735320807, + "step": 741 + }, + { + "ce_ib": 6.9053497314453125, + "ce_orig": 0.566726803779602, + "epoch": 0.2130994320224315, + "kl_loss": 0.11424671858549118, + "loss_ib": 0.001833002083003521, + "step": 741 + }, + { + "ce_ib": 8.808889389038086, + "ce_orig": 0.666398823261261, + "epoch": 0.2130994320224315, + "kl_loss": 0.19304627180099487, + "loss_ib": 0.0028113515581935644, + "step": 741 + }, + { + "ce_ib": 9.8599214553833, + "ce_orig": 1.0360751152038574, + "epoch": 0.2130994320224315, + "kl_loss": 0.3881288170814514, + "loss_ib": 0.004867279902100563, + "step": 741 + }, + { + "ce_ib": 10.459798812866211, + "ce_orig": 1.267624855041504, + "epoch": 0.21338701560140916, + "kl_loss": 0.1840190589427948, + "loss_ib": 0.0028861702885478735, + "step": 742 + }, + { + "ce_ib": 9.469386100769043, + "ce_orig": 1.4054698944091797, + "epoch": 0.21338701560140916, + "kl_loss": 0.10489386320114136, + "loss_ib": 0.0019958773627877235, + "step": 742 + }, + { + "ce_ib": 7.87640380859375, + "ce_orig": 0.8942380547523499, + "epoch": 0.21338701560140916, + "kl_loss": 0.17778579890727997, + "loss_ib": 0.0025654982309788465, + "step": 742 + }, + { + "ce_ib": 7.7208356857299805, + "ce_orig": 0.5113922953605652, + "epoch": 0.21338701560140916, + "kl_loss": 0.1375960260629654, + "loss_ib": 0.00214804382994771, + "step": 742 + }, + { + "ce_ib": 9.136881828308105, + "ce_orig": 0.6699970960617065, + "epoch": 0.2136745991803868, + "kl_loss": 0.15580974519252777, + "loss_ib": 0.0024717855267226696, + "step": 743 + }, + { + "ce_ib": 4.8074541091918945, + "ce_orig": 0.4599434435367584, + "epoch": 0.2136745991803868, + "kl_loss": 0.08423975110054016, + "loss_ib": 0.0013231429038569331, + "step": 743 + }, + { + "ce_ib": 8.024765968322754, + "ce_orig": 0.4649258255958557, + "epoch": 0.2136745991803868, + "kl_loss": 0.1424439400434494, + "loss_ib": 0.0022269159089773893, + "step": 743 + }, + { + "ce_ib": 9.357501983642578, + "ce_orig": 0.8131682276725769, + "epoch": 0.2136745991803868, + "kl_loss": 0.13159029185771942, + "loss_ib": 0.0022516530007123947, + "step": 743 + }, + { + "ce_ib": 8.306589126586914, + "ce_orig": 0.8594993352890015, + "epoch": 0.21396218275936443, + "kl_loss": 0.19585707783699036, + "loss_ib": 0.0027892296202480793, + "step": 744 + }, + { + "ce_ib": 6.1273651123046875, + "ce_orig": 0.8473232388496399, + "epoch": 0.21396218275936443, + "kl_loss": 0.08322134613990784, + "loss_ib": 0.0014449498848989606, + "step": 744 + }, + { + "ce_ib": 7.272594451904297, + "ce_orig": 0.9642258882522583, + "epoch": 0.21396218275936443, + "kl_loss": 0.15547984838485718, + "loss_ib": 0.0022820578888058662, + "step": 744 + }, + { + "ce_ib": 11.65031909942627, + "ce_orig": 1.2790560722351074, + "epoch": 0.21396218275936443, + "kl_loss": 0.1697477102279663, + "loss_ib": 0.002862508874386549, + "step": 744 + }, + { + "epoch": 0.21424976633834208, + "grad_norm": 0.10272146761417389, + "learning_rate": 4.9782731616884736e-05, + "loss": 0.8185, + "step": 745 + }, + { + "ce_ib": 12.1670503616333, + "ce_orig": 0.6025158762931824, + "epoch": 0.21424976633834208, + "kl_loss": 0.17272210121154785, + "loss_ib": 0.002943925792351365, + "step": 745 + }, + { + "ce_ib": 8.464635848999023, + "ce_orig": 0.9119886755943298, + "epoch": 0.21424976633834208, + "kl_loss": 0.09585784375667572, + "loss_ib": 0.001805042033083737, + "step": 745 + }, + { + "ce_ib": 7.6008076667785645, + "ce_orig": 0.9770063161849976, + "epoch": 0.21424976633834208, + "kl_loss": 0.1800667643547058, + "loss_ib": 0.0025607484858483076, + "step": 745 + }, + { + "ce_ib": 9.016298294067383, + "ce_orig": 0.9819609522819519, + "epoch": 0.21424976633834208, + "kl_loss": 0.15516135096549988, + "loss_ib": 0.002453243127092719, + "step": 745 + }, + { + "ce_ib": 7.594570636749268, + "ce_orig": 0.8066179156303406, + "epoch": 0.2145373499173197, + "kl_loss": 0.16977733373641968, + "loss_ib": 0.0024572303518652916, + "step": 746 + }, + { + "ce_ib": 6.28651237487793, + "ce_orig": 0.46179917454719543, + "epoch": 0.2145373499173197, + "kl_loss": 0.1124720424413681, + "loss_ib": 0.001753371674567461, + "step": 746 + }, + { + "ce_ib": 6.5596394538879395, + "ce_orig": 0.6535757780075073, + "epoch": 0.2145373499173197, + "kl_loss": 0.14391300082206726, + "loss_ib": 0.0020950939506292343, + "step": 746 + }, + { + "ce_ib": 10.635780334472656, + "ce_orig": 1.0490766763687134, + "epoch": 0.2145373499173197, + "kl_loss": 0.11995188891887665, + "loss_ib": 0.002263096859678626, + "step": 746 + }, + { + "ce_ib": 7.497809886932373, + "ce_orig": 0.743564784526825, + "epoch": 0.21482493349629736, + "kl_loss": 0.15132027864456177, + "loss_ib": 0.0022629837039858103, + "step": 747 + }, + { + "ce_ib": 11.009923934936523, + "ce_orig": 1.1343742609024048, + "epoch": 0.21482493349629736, + "kl_loss": 0.13817675411701202, + "loss_ib": 0.0024827599991112947, + "step": 747 + }, + { + "ce_ib": 8.217153549194336, + "ce_orig": 0.9935986995697021, + "epoch": 0.21482493349629736, + "kl_loss": 0.08536257594823837, + "loss_ib": 0.0016753410454839468, + "step": 747 + }, + { + "ce_ib": 7.1405253410339355, + "ce_orig": 0.6493523716926575, + "epoch": 0.21482493349629736, + "kl_loss": 0.12277388572692871, + "loss_ib": 0.0019417913863435388, + "step": 747 + }, + { + "ce_ib": 8.954559326171875, + "ce_orig": 0.9859476089477539, + "epoch": 0.215112517075275, + "kl_loss": 0.09736071527004242, + "loss_ib": 0.0018690630095079541, + "step": 748 + }, + { + "ce_ib": 9.049851417541504, + "ce_orig": 0.5681316256523132, + "epoch": 0.215112517075275, + "kl_loss": 0.2084466814994812, + "loss_ib": 0.002989451866596937, + "step": 748 + }, + { + "ce_ib": 10.671403884887695, + "ce_orig": 1.2401421070098877, + "epoch": 0.215112517075275, + "kl_loss": 0.12268570065498352, + "loss_ib": 0.0022939974442124367, + "step": 748 + }, + { + "ce_ib": 9.870866775512695, + "ce_orig": 0.7144081592559814, + "epoch": 0.215112517075275, + "kl_loss": 0.1475132703781128, + "loss_ib": 0.002462219214066863, + "step": 748 + }, + { + "ce_ib": 7.921173095703125, + "ce_orig": 0.7684431076049805, + "epoch": 0.21540010065425264, + "kl_loss": 0.18796128034591675, + "loss_ib": 0.0026717297732830048, + "step": 749 + }, + { + "ce_ib": 5.337991714477539, + "ce_orig": 0.701056718826294, + "epoch": 0.21540010065425264, + "kl_loss": 0.13435563445091248, + "loss_ib": 0.0018773555057123303, + "step": 749 + }, + { + "ce_ib": 8.451627731323242, + "ce_orig": 1.0293208360671997, + "epoch": 0.21540010065425264, + "kl_loss": 0.13992534577846527, + "loss_ib": 0.002244416158646345, + "step": 749 + }, + { + "ce_ib": 9.440494537353516, + "ce_orig": 0.6093024015426636, + "epoch": 0.21540010065425264, + "kl_loss": 0.16520021855831146, + "loss_ib": 0.0025960516650229692, + "step": 749 + }, + { + "epoch": 0.2156876842332303, + "grad_norm": 0.0997624471783638, + "learning_rate": 4.977759698570328e-05, + "loss": 0.8982, + "step": 750 + }, + { + "ce_ib": 7.538696765899658, + "ce_orig": 0.8382879495620728, + "epoch": 0.2156876842332303, + "kl_loss": 0.13652510941028595, + "loss_ib": 0.0021191206760704517, + "step": 750 + }, + { + "ce_ib": 12.170858383178711, + "ce_orig": 0.9007084369659424, + "epoch": 0.2156876842332303, + "kl_loss": 0.1638849973678589, + "loss_ib": 0.002855935599654913, + "step": 750 + }, + { + "ce_ib": 11.650144577026367, + "ce_orig": 1.1218953132629395, + "epoch": 0.2156876842332303, + "kl_loss": 0.15194326639175415, + "loss_ib": 0.0026844472158700228, + "step": 750 + }, + { + "ce_ib": 10.354049682617188, + "ce_orig": 0.8704142570495605, + "epoch": 0.2156876842332303, + "kl_loss": 0.14096030592918396, + "loss_ib": 0.0024450079072266817, + "step": 750 + }, + { + "ce_ib": 8.408703804016113, + "ce_orig": 0.4949643015861511, + "epoch": 0.2159752678122079, + "kl_loss": 0.09109346568584442, + "loss_ib": 0.0017518049571663141, + "step": 751 + }, + { + "ce_ib": 9.992585182189941, + "ce_orig": 0.7481764554977417, + "epoch": 0.2159752678122079, + "kl_loss": 0.22328117489814758, + "loss_ib": 0.003232070244848728, + "step": 751 + }, + { + "ce_ib": 6.670741081237793, + "ce_orig": 0.8640801906585693, + "epoch": 0.2159752678122079, + "kl_loss": 0.16885803639888763, + "loss_ib": 0.002355654491111636, + "step": 751 + }, + { + "ce_ib": 8.197010040283203, + "ce_orig": 0.8291086554527283, + "epoch": 0.2159752678122079, + "kl_loss": 0.20890876650810242, + "loss_ib": 0.0029087886214256287, + "step": 751 + }, + { + "ce_ib": 9.433305740356445, + "ce_orig": 1.1036256551742554, + "epoch": 0.21626285139118556, + "kl_loss": 0.0969347208738327, + "loss_ib": 0.0019126776605844498, + "step": 752 + }, + { + "ce_ib": 10.305464744567871, + "ce_orig": 0.9490968585014343, + "epoch": 0.21626285139118556, + "kl_loss": 0.12411025166511536, + "loss_ib": 0.0022716489620506763, + "step": 752 + }, + { + "ce_ib": 6.960268497467041, + "ce_orig": 0.6807049512863159, + "epoch": 0.21626285139118556, + "kl_loss": 0.2037605196237564, + "loss_ib": 0.0027336319908499718, + "step": 752 + }, + { + "ce_ib": 14.218523025512695, + "ce_orig": 1.866547703742981, + "epoch": 0.21626285139118556, + "kl_loss": 0.1546570360660553, + "loss_ib": 0.0029684226028621197, + "step": 752 + }, + { + "ce_ib": 5.236147403717041, + "ce_orig": 0.6408563256263733, + "epoch": 0.2165504349701632, + "kl_loss": 0.11485590040683746, + "loss_ib": 0.0016721737338230014, + "step": 753 + }, + { + "ce_ib": 13.076562881469727, + "ce_orig": 1.6438502073287964, + "epoch": 0.2165504349701632, + "kl_loss": 0.18503537774085999, + "loss_ib": 0.0031580100767314434, + "step": 753 + }, + { + "ce_ib": 7.389747142791748, + "ce_orig": 1.122937798500061, + "epoch": 0.2165504349701632, + "kl_loss": 0.12777473032474518, + "loss_ib": 0.0020167219918221235, + "step": 753 + }, + { + "ce_ib": 9.260527610778809, + "ce_orig": 0.832629919052124, + "epoch": 0.2165504349701632, + "kl_loss": 0.17178930342197418, + "loss_ib": 0.002643945859745145, + "step": 753 + }, + { + "ce_ib": 8.497020721435547, + "ce_orig": 1.3572919368743896, + "epoch": 0.21683801854914084, + "kl_loss": 0.14210839569568634, + "loss_ib": 0.0022707858588546515, + "step": 754 + }, + { + "ce_ib": 8.139203071594238, + "ce_orig": 0.7181857824325562, + "epoch": 0.21683801854914084, + "kl_loss": 0.13708284497261047, + "loss_ib": 0.002184748649597168, + "step": 754 + }, + { + "ce_ib": 7.636706829071045, + "ce_orig": 1.157459020614624, + "epoch": 0.21683801854914084, + "kl_loss": 0.1138184517621994, + "loss_ib": 0.0019018551101908088, + "step": 754 + }, + { + "ce_ib": 7.350226879119873, + "ce_orig": 0.8966202735900879, + "epoch": 0.21683801854914084, + "kl_loss": 0.12013451755046844, + "loss_ib": 0.0019363677129149437, + "step": 754 + }, + { + "epoch": 0.2171256021281185, + "grad_norm": 0.10654988884925842, + "learning_rate": 4.9772402659539674e-05, + "loss": 0.9059, + "step": 755 + }, + { + "ce_ib": 6.863460540771484, + "ce_orig": 0.7471085786819458, + "epoch": 0.2171256021281185, + "kl_loss": 0.12676385045051575, + "loss_ib": 0.0019539843779057264, + "step": 755 + }, + { + "ce_ib": 5.62061882019043, + "ce_orig": 0.4919710159301758, + "epoch": 0.2171256021281185, + "kl_loss": 0.1688041090965271, + "loss_ib": 0.0022501028142869473, + "step": 755 + }, + { + "ce_ib": 7.917287349700928, + "ce_orig": 0.42383748292922974, + "epoch": 0.2171256021281185, + "kl_loss": 0.15159422159194946, + "loss_ib": 0.0023076708894222975, + "step": 755 + }, + { + "ce_ib": 7.0679731369018555, + "ce_orig": 0.9794975519180298, + "epoch": 0.2171256021281185, + "kl_loss": 0.12907591462135315, + "loss_ib": 0.0019975563045591116, + "step": 755 + }, + { + "ce_ib": 10.093253135681152, + "ce_orig": 0.6868664622306824, + "epoch": 0.2174131857070961, + "kl_loss": 0.1843118667602539, + "loss_ib": 0.0028524440713226795, + "step": 756 + }, + { + "ce_ib": 8.960195541381836, + "ce_orig": 0.9731432795524597, + "epoch": 0.2174131857070961, + "kl_loss": 0.15665888786315918, + "loss_ib": 0.0024626085069030523, + "step": 756 + }, + { + "ce_ib": 8.387116432189941, + "ce_orig": 0.6317304372787476, + "epoch": 0.2174131857070961, + "kl_loss": 0.1664927899837494, + "loss_ib": 0.002503639319911599, + "step": 756 + }, + { + "ce_ib": 7.766140937805176, + "ce_orig": 0.6035816073417664, + "epoch": 0.2174131857070961, + "kl_loss": 0.12087871134281158, + "loss_ib": 0.0019854011479765177, + "step": 756 + }, + { + "ce_ib": 9.264239311218262, + "ce_orig": 0.9195747375488281, + "epoch": 0.21770076928607376, + "kl_loss": 0.1662123203277588, + "loss_ib": 0.0025885470677167177, + "step": 757 + }, + { + "ce_ib": 10.044254302978516, + "ce_orig": 1.1064798831939697, + "epoch": 0.21770076928607376, + "kl_loss": 0.17693579196929932, + "loss_ib": 0.0027737831696867943, + "step": 757 + }, + { + "ce_ib": 6.537443161010742, + "ce_orig": 0.634574294090271, + "epoch": 0.21770076928607376, + "kl_loss": 0.2076174020767212, + "loss_ib": 0.0027299183420836926, + "step": 757 + }, + { + "ce_ib": 7.090516567230225, + "ce_orig": 0.8195549249649048, + "epoch": 0.21770076928607376, + "kl_loss": 0.0990576520562172, + "loss_ib": 0.001699628192000091, + "step": 757 + }, + { + "ce_ib": 7.096627712249756, + "ce_orig": 0.6805820465087891, + "epoch": 0.21798835286505142, + "kl_loss": 0.09520435333251953, + "loss_ib": 0.0016617062501609325, + "step": 758 + }, + { + "ce_ib": 6.252221584320068, + "ce_orig": 0.5069523453712463, + "epoch": 0.21798835286505142, + "kl_loss": 0.11282311379909515, + "loss_ib": 0.0017534532817080617, + "step": 758 + }, + { + "ce_ib": 9.68701171875, + "ce_orig": 1.14574134349823, + "epoch": 0.21798835286505142, + "kl_loss": 0.139566570520401, + "loss_ib": 0.0023643667809665203, + "step": 758 + }, + { + "ce_ib": 7.7025065422058105, + "ce_orig": 0.6887799501419067, + "epoch": 0.21798835286505142, + "kl_loss": 0.1532985270023346, + "loss_ib": 0.002303235698491335, + "step": 758 + }, + { + "ce_ib": 5.569612503051758, + "ce_orig": 0.7203543186187744, + "epoch": 0.21827593644402904, + "kl_loss": 0.17881646752357483, + "loss_ib": 0.002345125889405608, + "step": 759 + }, + { + "ce_ib": 7.569202899932861, + "ce_orig": 0.6448748707771301, + "epoch": 0.21827593644402904, + "kl_loss": 0.15759307146072388, + "loss_ib": 0.0023328508250415325, + "step": 759 + }, + { + "ce_ib": 7.562583923339844, + "ce_orig": 0.7065814733505249, + "epoch": 0.21827593644402904, + "kl_loss": 0.20230957865715027, + "loss_ib": 0.0027793541084975004, + "step": 759 + }, + { + "ce_ib": 7.415134429931641, + "ce_orig": 0.8418720960617065, + "epoch": 0.21827593644402904, + "kl_loss": 0.16802442073822021, + "loss_ib": 0.0024217574391514063, + "step": 759 + }, + { + "epoch": 0.2185635200230067, + "grad_norm": 0.09956327825784683, + "learning_rate": 4.976714865090827e-05, + "loss": 0.871, + "step": 760 + }, + { + "ce_ib": 8.476737976074219, + "ce_orig": 0.9896325469017029, + "epoch": 0.2185635200230067, + "kl_loss": 0.10956001281738281, + "loss_ib": 0.001943273819051683, + "step": 760 + }, + { + "ce_ib": 7.939772605895996, + "ce_orig": 0.4322037100791931, + "epoch": 0.2185635200230067, + "kl_loss": 0.19129762053489685, + "loss_ib": 0.0027069533243775368, + "step": 760 + }, + { + "ce_ib": 4.769078254699707, + "ce_orig": 0.621930718421936, + "epoch": 0.2185635200230067, + "kl_loss": 0.08950284123420715, + "loss_ib": 0.0013719361741095781, + "step": 760 + }, + { + "ce_ib": 11.311341285705566, + "ce_orig": 1.3912595510482788, + "epoch": 0.2185635200230067, + "kl_loss": 0.10796618461608887, + "loss_ib": 0.0022107958793640137, + "step": 760 + }, + { + "ce_ib": 11.38323974609375, + "ce_orig": 1.3958052396774292, + "epoch": 0.21885110360198431, + "kl_loss": 0.1116107627749443, + "loss_ib": 0.00225443160161376, + "step": 761 + }, + { + "ce_ib": 7.089112758636475, + "ce_orig": 0.39750197529792786, + "epoch": 0.21885110360198431, + "kl_loss": 0.22810953855514526, + "loss_ib": 0.0029900067020207644, + "step": 761 + }, + { + "ce_ib": 9.000521659851074, + "ce_orig": 0.47054076194763184, + "epoch": 0.21885110360198431, + "kl_loss": 0.1665971875190735, + "loss_ib": 0.002566023962572217, + "step": 761 + }, + { + "ce_ib": 7.182977676391602, + "ce_orig": 0.8364933133125305, + "epoch": 0.21885110360198431, + "kl_loss": 0.11331808567047119, + "loss_ib": 0.0018514784751459956, + "step": 761 + }, + { + "ce_ib": 12.852972030639648, + "ce_orig": 1.7782992124557495, + "epoch": 0.21913868718096197, + "kl_loss": 0.1779230237007141, + "loss_ib": 0.0030645274091511965, + "step": 762 + }, + { + "ce_ib": 12.079954147338867, + "ce_orig": 1.306889533996582, + "epoch": 0.21913868718096197, + "kl_loss": 0.15216562151908875, + "loss_ib": 0.002729651518166065, + "step": 762 + }, + { + "ce_ib": 7.572328567504883, + "ce_orig": 1.0523992776870728, + "epoch": 0.21913868718096197, + "kl_loss": 0.1251816600561142, + "loss_ib": 0.002009049290791154, + "step": 762 + }, + { + "ce_ib": 9.228711128234863, + "ce_orig": 0.7844505310058594, + "epoch": 0.21913868718096197, + "kl_loss": 0.18018165230751038, + "loss_ib": 0.002724687336012721, + "step": 762 + }, + { + "ce_ib": 11.388764381408691, + "ce_orig": 0.7564859390258789, + "epoch": 0.21942627075993962, + "kl_loss": 0.3528626263141632, + "loss_ib": 0.004667502362281084, + "step": 763 + }, + { + "ce_ib": 10.088685989379883, + "ce_orig": 1.3417243957519531, + "epoch": 0.21942627075993962, + "kl_loss": 0.1274024248123169, + "loss_ib": 0.002282892819494009, + "step": 763 + }, + { + "ce_ib": 5.146511554718018, + "ce_orig": 0.5599508285522461, + "epoch": 0.21942627075993962, + "kl_loss": 0.1776261031627655, + "loss_ib": 0.0022909119725227356, + "step": 763 + }, + { + "ce_ib": 7.239597797393799, + "ce_orig": 0.857480525970459, + "epoch": 0.21942627075993962, + "kl_loss": 0.11923994868993759, + "loss_ib": 0.0019163591787219048, + "step": 763 + }, + { + "ce_ib": 6.657400608062744, + "ce_orig": 0.642911970615387, + "epoch": 0.21971385433891724, + "kl_loss": 0.09522129595279694, + "loss_ib": 0.001617952948436141, + "step": 764 + }, + { + "ce_ib": 5.648870944976807, + "ce_orig": 0.5404942035675049, + "epoch": 0.21971385433891724, + "kl_loss": 0.2369777113199234, + "loss_ib": 0.002934664022177458, + "step": 764 + }, + { + "ce_ib": 5.368570327758789, + "ce_orig": 0.2739843726158142, + "epoch": 0.21971385433891724, + "kl_loss": 0.0845465362071991, + "loss_ib": 0.00138232228346169, + "step": 764 + }, + { + "ce_ib": 10.863349914550781, + "ce_orig": 0.7003340721130371, + "epoch": 0.21971385433891724, + "kl_loss": 0.4336280822753906, + "loss_ib": 0.00542261591181159, + "step": 764 + }, + { + "epoch": 0.2200014379178949, + "grad_norm": 0.08464247733354568, + "learning_rate": 4.9761834972467185e-05, + "loss": 0.8365, + "step": 765 + }, + { + "ce_ib": 9.576865196228027, + "ce_orig": 1.0863311290740967, + "epoch": 0.2200014379178949, + "kl_loss": 0.08513174206018448, + "loss_ib": 0.0018090038793161511, + "step": 765 + }, + { + "ce_ib": 4.838799953460693, + "ce_orig": 0.6935594081878662, + "epoch": 0.2200014379178949, + "kl_loss": 0.08847616612911224, + "loss_ib": 0.001368641504086554, + "step": 765 + }, + { + "ce_ib": 9.570419311523438, + "ce_orig": 1.0386337041854858, + "epoch": 0.2200014379178949, + "kl_loss": 0.16993722319602966, + "loss_ib": 0.002656414173543453, + "step": 765 + }, + { + "ce_ib": 6.40081262588501, + "ce_orig": 0.8035165667533875, + "epoch": 0.2200014379178949, + "kl_loss": 0.0860595852136612, + "loss_ib": 0.0015006770845502615, + "step": 765 + }, + { + "ce_ib": 5.312402248382568, + "ce_orig": 0.509066641330719, + "epoch": 0.22028902149687252, + "kl_loss": 0.09284783899784088, + "loss_ib": 0.0014597185654565692, + "step": 766 + }, + { + "ce_ib": 7.273090839385986, + "ce_orig": 0.7035940289497375, + "epoch": 0.22028902149687252, + "kl_loss": 0.1298186182975769, + "loss_ib": 0.0020254950504750013, + "step": 766 + }, + { + "ce_ib": 8.391249656677246, + "ce_orig": 1.0880930423736572, + "epoch": 0.22028902149687252, + "kl_loss": 0.1667131632566452, + "loss_ib": 0.0025062565691769123, + "step": 766 + }, + { + "ce_ib": 8.774653434753418, + "ce_orig": 0.9759833216667175, + "epoch": 0.22028902149687252, + "kl_loss": 0.13655591011047363, + "loss_ib": 0.0022430242970585823, + "step": 766 + }, + { + "ce_ib": 9.209096908569336, + "ce_orig": 0.836510956287384, + "epoch": 0.22057660507585017, + "kl_loss": 0.2317269891500473, + "loss_ib": 0.0032381797209382057, + "step": 767 + }, + { + "ce_ib": 8.248529434204102, + "ce_orig": 0.668925404548645, + "epoch": 0.22057660507585017, + "kl_loss": 0.15469834208488464, + "loss_ib": 0.00237183622084558, + "step": 767 + }, + { + "ce_ib": 9.433579444885254, + "ce_orig": 1.0252512693405151, + "epoch": 0.22057660507585017, + "kl_loss": 0.11495402455329895, + "loss_ib": 0.0020928981248289347, + "step": 767 + }, + { + "ce_ib": 4.54541015625, + "ce_orig": 0.5551728010177612, + "epoch": 0.22057660507585017, + "kl_loss": 0.14444425702095032, + "loss_ib": 0.0018989834934473038, + "step": 767 + }, + { + "ce_ib": 10.690596580505371, + "ce_orig": 0.8334415555000305, + "epoch": 0.22086418865482782, + "kl_loss": 0.14985781908035278, + "loss_ib": 0.002567637711763382, + "step": 768 + }, + { + "ce_ib": 9.497960090637207, + "ce_orig": 1.0465785264968872, + "epoch": 0.22086418865482782, + "kl_loss": 0.10572009533643723, + "loss_ib": 0.002006996888667345, + "step": 768 + }, + { + "ce_ib": 9.094498634338379, + "ce_orig": 0.8460086584091187, + "epoch": 0.22086418865482782, + "kl_loss": 0.12992800772190094, + "loss_ib": 0.002208729973062873, + "step": 768 + }, + { + "ce_ib": 7.6064934730529785, + "ce_orig": 0.7029877305030823, + "epoch": 0.22086418865482782, + "kl_loss": 0.2135726511478424, + "loss_ib": 0.0028963754884898663, + "step": 768 + }, + { + "ce_ib": 10.828954696655273, + "ce_orig": 0.9486677646636963, + "epoch": 0.22115177223380544, + "kl_loss": 0.1448005884885788, + "loss_ib": 0.0025309014599770308, + "step": 769 + }, + { + "ce_ib": 6.5569987297058105, + "ce_orig": 0.5725328326225281, + "epoch": 0.22115177223380544, + "kl_loss": 0.14763084053993225, + "loss_ib": 0.0021320083178579807, + "step": 769 + }, + { + "ce_ib": 8.761213302612305, + "ce_orig": 0.9140744209289551, + "epoch": 0.22115177223380544, + "kl_loss": 0.10454612970352173, + "loss_ib": 0.0019215825013816357, + "step": 769 + }, + { + "ce_ib": 12.391948699951172, + "ce_orig": 1.4241856336593628, + "epoch": 0.22115177223380544, + "kl_loss": 0.16774173080921173, + "loss_ib": 0.0029166119638830423, + "step": 769 + }, + { + "epoch": 0.2214393558127831, + "grad_norm": 0.09514256566762924, + "learning_rate": 4.975646163701831e-05, + "loss": 0.9192, + "step": 770 + }, + { + "ce_ib": 9.242008209228516, + "ce_orig": 1.029587984085083, + "epoch": 0.2214393558127831, + "kl_loss": 0.16982057690620422, + "loss_ib": 0.0026224064640700817, + "step": 770 + }, + { + "ce_ib": 6.082286357879639, + "ce_orig": 0.7157595753669739, + "epoch": 0.2214393558127831, + "kl_loss": 0.12317483127117157, + "loss_ib": 0.0018399768741801381, + "step": 770 + }, + { + "ce_ib": 14.0202054977417, + "ce_orig": 2.0901854038238525, + "epoch": 0.2214393558127831, + "kl_loss": 0.15905095636844635, + "loss_ib": 0.0029925298877060413, + "step": 770 + }, + { + "ce_ib": 8.374448776245117, + "ce_orig": 0.7532703280448914, + "epoch": 0.2214393558127831, + "kl_loss": 0.14771360158920288, + "loss_ib": 0.002314580837264657, + "step": 770 + }, + { + "ce_ib": 7.05565881729126, + "ce_orig": 0.4825326204299927, + "epoch": 0.22172693939176072, + "kl_loss": 0.13400578498840332, + "loss_ib": 0.0020456237252801657, + "step": 771 + }, + { + "ce_ib": 9.674530982971191, + "ce_orig": 0.9703323245048523, + "epoch": 0.22172693939176072, + "kl_loss": 0.21550621092319489, + "loss_ib": 0.003122515045106411, + "step": 771 + }, + { + "ce_ib": 7.9153361320495605, + "ce_orig": 0.7711546421051025, + "epoch": 0.22172693939176072, + "kl_loss": 0.1621248424053192, + "loss_ib": 0.0024127820506691933, + "step": 771 + }, + { + "ce_ib": 5.088393211364746, + "ce_orig": 0.41473719477653503, + "epoch": 0.22172693939176072, + "kl_loss": 0.2381688356399536, + "loss_ib": 0.002890527481213212, + "step": 771 + }, + { + "ce_ib": 5.727818965911865, + "ce_orig": 0.47517552971839905, + "epoch": 0.22201452297073837, + "kl_loss": 0.09936833381652832, + "loss_ib": 0.0015664651291444898, + "step": 772 + }, + { + "ce_ib": 8.125673294067383, + "ce_orig": 0.8591288924217224, + "epoch": 0.22201452297073837, + "kl_loss": 0.16502976417541504, + "loss_ib": 0.0024628648534417152, + "step": 772 + }, + { + "ce_ib": 5.820714473724365, + "ce_orig": 0.6233316659927368, + "epoch": 0.22201452297073837, + "kl_loss": 0.1810447871685028, + "loss_ib": 0.0023925192654132843, + "step": 772 + }, + { + "ce_ib": 4.671726703643799, + "ce_orig": 0.31970176100730896, + "epoch": 0.22201452297073837, + "kl_loss": 0.06134669482707977, + "loss_ib": 0.001080639660358429, + "step": 772 + }, + { + "ce_ib": 7.360152721405029, + "ce_orig": 0.5947248935699463, + "epoch": 0.22230210654971602, + "kl_loss": 0.1539122462272644, + "loss_ib": 0.002275137696415186, + "step": 773 + }, + { + "ce_ib": 6.672601222991943, + "ce_orig": 0.6609217524528503, + "epoch": 0.22230210654971602, + "kl_loss": 0.1591755449771881, + "loss_ib": 0.0022590155713260174, + "step": 773 + }, + { + "ce_ib": 5.962435245513916, + "ce_orig": 0.6456025242805481, + "epoch": 0.22230210654971602, + "kl_loss": 0.08421975374221802, + "loss_ib": 0.0014384409878402948, + "step": 773 + }, + { + "ce_ib": 10.913044929504395, + "ce_orig": 1.3626434803009033, + "epoch": 0.22230210654971602, + "kl_loss": 0.18016685545444489, + "loss_ib": 0.0028929731342941523, + "step": 773 + }, + { + "ce_ib": 5.439235687255859, + "ce_orig": 0.5556134581565857, + "epoch": 0.22258969012869365, + "kl_loss": 0.14146508276462555, + "loss_ib": 0.001958574401214719, + "step": 774 + }, + { + "ce_ib": 11.287528038024902, + "ce_orig": 1.369814395904541, + "epoch": 0.22258969012869365, + "kl_loss": 0.1479172706604004, + "loss_ib": 0.0026079255621880293, + "step": 774 + }, + { + "ce_ib": 10.662454605102539, + "ce_orig": 1.3886735439300537, + "epoch": 0.22258969012869365, + "kl_loss": 0.1496671736240387, + "loss_ib": 0.0025629170704632998, + "step": 774 + }, + { + "ce_ib": 6.370062351226807, + "ce_orig": 0.7246447801589966, + "epoch": 0.22258969012869365, + "kl_loss": 0.10358047485351562, + "loss_ib": 0.0016728108748793602, + "step": 774 + }, + { + "epoch": 0.2228772737076713, + "grad_norm": 0.09098898619413376, + "learning_rate": 4.975102865750725e-05, + "loss": 0.8571, + "step": 775 + }, + { + "ce_ib": 9.296186447143555, + "ce_orig": 0.8976593613624573, + "epoch": 0.2228772737076713, + "kl_loss": 0.11623773723840714, + "loss_ib": 0.0020919961389154196, + "step": 775 + }, + { + "ce_ib": 12.612628936767578, + "ce_orig": 1.3324798345565796, + "epoch": 0.2228772737076713, + "kl_loss": 0.14951446652412415, + "loss_ib": 0.0027564074844121933, + "step": 775 + }, + { + "ce_ib": 7.314347267150879, + "ce_orig": 0.8472996354103088, + "epoch": 0.2228772737076713, + "kl_loss": 0.3302251100540161, + "loss_ib": 0.00403368566185236, + "step": 775 + }, + { + "ce_ib": 7.56318473815918, + "ce_orig": 0.82821124792099, + "epoch": 0.2228772737076713, + "kl_loss": 0.13697001338005066, + "loss_ib": 0.0021260185167193413, + "step": 775 + }, + { + "ce_ib": 8.542595863342285, + "ce_orig": 1.0312063694000244, + "epoch": 0.22316485728664892, + "kl_loss": 0.11569753289222717, + "loss_ib": 0.0020112348720431328, + "step": 776 + }, + { + "ce_ib": 7.049263000488281, + "ce_orig": 0.8572791814804077, + "epoch": 0.22316485728664892, + "kl_loss": 0.12043007463216782, + "loss_ib": 0.0019092269940301776, + "step": 776 + }, + { + "ce_ib": 9.571832656860352, + "ce_orig": 0.8414773941040039, + "epoch": 0.22316485728664892, + "kl_loss": 0.22330361604690552, + "loss_ib": 0.0031902194023132324, + "step": 776 + }, + { + "ce_ib": 6.793343544006348, + "ce_orig": 0.8694676160812378, + "epoch": 0.22316485728664892, + "kl_loss": 0.10456552356481552, + "loss_ib": 0.0017249895026907325, + "step": 776 + }, + { + "ce_ib": 8.001526832580566, + "ce_orig": 0.7133891582489014, + "epoch": 0.22345244086562657, + "kl_loss": 0.12774983048439026, + "loss_ib": 0.0020776509772986174, + "step": 777 + }, + { + "ce_ib": 6.953750133514404, + "ce_orig": 0.7376645803451538, + "epoch": 0.22345244086562657, + "kl_loss": 0.11668390780687332, + "loss_ib": 0.001862214063294232, + "step": 777 + }, + { + "ce_ib": 14.96567440032959, + "ce_orig": 2.0463645458221436, + "epoch": 0.22345244086562657, + "kl_loss": 0.23001086711883545, + "loss_ib": 0.003796675940975547, + "step": 777 + }, + { + "ce_ib": 6.386999130249023, + "ce_orig": 0.5333241820335388, + "epoch": 0.22345244086562657, + "kl_loss": 0.1477910727262497, + "loss_ib": 0.002116610761731863, + "step": 777 + }, + { + "ce_ib": 4.84302282333374, + "ce_orig": 0.6310755014419556, + "epoch": 0.22374002444460422, + "kl_loss": 0.08825128525495529, + "loss_ib": 0.0013668150641024113, + "step": 778 + }, + { + "ce_ib": 11.377039909362793, + "ce_orig": 1.1023659706115723, + "epoch": 0.22374002444460422, + "kl_loss": 0.14939387142658234, + "loss_ib": 0.0026316428557038307, + "step": 778 + }, + { + "ce_ib": 8.388514518737793, + "ce_orig": 0.8287127614021301, + "epoch": 0.22374002444460422, + "kl_loss": 0.15204116702079773, + "loss_ib": 0.002359262900426984, + "step": 778 + }, + { + "ce_ib": 7.143672943115234, + "ce_orig": 1.143507719039917, + "epoch": 0.22374002444460422, + "kl_loss": 0.15379482507705688, + "loss_ib": 0.0022523156367242336, + "step": 778 + }, + { + "ce_ib": 6.243022918701172, + "ce_orig": 0.6409770846366882, + "epoch": 0.22402760802358185, + "kl_loss": 0.1467035710811615, + "loss_ib": 0.00209133792668581, + "step": 779 + }, + { + "ce_ib": 6.419865608215332, + "ce_orig": 0.5069408416748047, + "epoch": 0.22402760802358185, + "kl_loss": 0.11442327499389648, + "loss_ib": 0.0017862193053588271, + "step": 779 + }, + { + "ce_ib": 8.781683921813965, + "ce_orig": 0.7263645529747009, + "epoch": 0.22402760802358185, + "kl_loss": 0.1588398516178131, + "loss_ib": 0.0024665668606758118, + "step": 779 + }, + { + "ce_ib": 5.928696155548096, + "ce_orig": 0.7882704734802246, + "epoch": 0.22402760802358185, + "kl_loss": 0.11991754919290543, + "loss_ib": 0.0017920450773090124, + "step": 779 + }, + { + "epoch": 0.2243151916025595, + "grad_norm": 0.10398890823125839, + "learning_rate": 4.9745536047023324e-05, + "loss": 0.8706, + "step": 780 + }, + { + "ce_ib": 8.916302680969238, + "ce_orig": 0.8575690984725952, + "epoch": 0.2243151916025595, + "kl_loss": 0.15519979596138, + "loss_ib": 0.002443628152832389, + "step": 780 + }, + { + "ce_ib": 9.42071533203125, + "ce_orig": 0.7921391725540161, + "epoch": 0.2243151916025595, + "kl_loss": 0.1433313935995102, + "loss_ib": 0.0023753854911774397, + "step": 780 + }, + { + "ce_ib": 8.125630378723145, + "ce_orig": 1.126109004020691, + "epoch": 0.2243151916025595, + "kl_loss": 0.1137545108795166, + "loss_ib": 0.001950108096934855, + "step": 780 + }, + { + "ce_ib": 9.454373359680176, + "ce_orig": 0.7673488259315491, + "epoch": 0.2243151916025595, + "kl_loss": 0.1790568232536316, + "loss_ib": 0.0027360054664313793, + "step": 780 + }, + { + "ce_ib": 8.905075073242188, + "ce_orig": 1.072008490562439, + "epoch": 0.22460277518153712, + "kl_loss": 0.2111148089170456, + "loss_ib": 0.0030016556847840548, + "step": 781 + }, + { + "ce_ib": 6.170800685882568, + "ce_orig": 0.6450889706611633, + "epoch": 0.22460277518153712, + "kl_loss": 0.14403045177459717, + "loss_ib": 0.00205738446675241, + "step": 781 + }, + { + "ce_ib": 4.165629863739014, + "ce_orig": 0.602213442325592, + "epoch": 0.22460277518153712, + "kl_loss": 0.09626461565494537, + "loss_ib": 0.001379209104925394, + "step": 781 + }, + { + "ce_ib": 11.429503440856934, + "ce_orig": 1.4089714288711548, + "epoch": 0.22460277518153712, + "kl_loss": 0.07960424572229385, + "loss_ib": 0.0019389926455914974, + "step": 781 + }, + { + "ce_ib": 5.01399040222168, + "ce_orig": 0.8204609751701355, + "epoch": 0.22489035876051477, + "kl_loss": 0.13064663112163544, + "loss_ib": 0.0018078653374686837, + "step": 782 + }, + { + "ce_ib": 8.626324653625488, + "ce_orig": 1.1237170696258545, + "epoch": 0.22489035876051477, + "kl_loss": 0.10630378127098083, + "loss_ib": 0.0019256701925769448, + "step": 782 + }, + { + "ce_ib": 8.674455642700195, + "ce_orig": 1.2192915678024292, + "epoch": 0.22489035876051477, + "kl_loss": 0.11441108584403992, + "loss_ib": 0.002011556178331375, + "step": 782 + }, + { + "ce_ib": 4.90167760848999, + "ce_orig": 0.7894431352615356, + "epoch": 0.22489035876051477, + "kl_loss": 0.13594220578670502, + "loss_ib": 0.0018495897529646754, + "step": 782 + }, + { + "ce_ib": 8.894073486328125, + "ce_orig": 0.8344406485557556, + "epoch": 0.22517794233949243, + "kl_loss": 0.2180064618587494, + "loss_ib": 0.0030694720335304737, + "step": 783 + }, + { + "ce_ib": 7.178038597106934, + "ce_orig": 0.6856690645217896, + "epoch": 0.22517794233949243, + "kl_loss": 0.14548270404338837, + "loss_ib": 0.0021726307459175587, + "step": 783 + }, + { + "ce_ib": 5.8297576904296875, + "ce_orig": 0.644056499004364, + "epoch": 0.22517794233949243, + "kl_loss": 0.1136014312505722, + "loss_ib": 0.0017189900390803814, + "step": 783 + }, + { + "ce_ib": 6.987138271331787, + "ce_orig": 0.2856312394142151, + "epoch": 0.22517794233949243, + "kl_loss": 0.25451338291168213, + "loss_ib": 0.003243847517296672, + "step": 783 + }, + { + "ce_ib": 7.792212009429932, + "ce_orig": 1.1498944759368896, + "epoch": 0.22546552591847005, + "kl_loss": 0.09925955533981323, + "loss_ib": 0.0017718167509883642, + "step": 784 + }, + { + "ce_ib": 10.821850776672363, + "ce_orig": 1.169548511505127, + "epoch": 0.22546552591847005, + "kl_loss": 0.1603054404258728, + "loss_ib": 0.0026852393057197332, + "step": 784 + }, + { + "ce_ib": 11.060176849365234, + "ce_orig": 0.6884800791740417, + "epoch": 0.22546552591847005, + "kl_loss": 0.23210455477237701, + "loss_ib": 0.00342706311494112, + "step": 784 + }, + { + "ce_ib": 7.353107929229736, + "ce_orig": 1.0316444635391235, + "epoch": 0.22546552591847005, + "kl_loss": 0.11858627945184708, + "loss_ib": 0.0019211735343560576, + "step": 784 + }, + { + "epoch": 0.2257531094974477, + "grad_norm": 0.09250946342945099, + "learning_rate": 4.973998381879949e-05, + "loss": 0.8962, + "step": 785 + }, + { + "ce_ib": 10.600707054138184, + "ce_orig": 1.2356044054031372, + "epoch": 0.2257531094974477, + "kl_loss": 0.09297977387905121, + "loss_ib": 0.0019898684695363045, + "step": 785 + }, + { + "ce_ib": 9.557263374328613, + "ce_orig": 0.9233797788619995, + "epoch": 0.2257531094974477, + "kl_loss": 0.13532377779483795, + "loss_ib": 0.002308964030817151, + "step": 785 + }, + { + "ce_ib": 11.174012184143066, + "ce_orig": 1.6008797883987427, + "epoch": 0.2257531094974477, + "kl_loss": 0.20560556650161743, + "loss_ib": 0.003173456760123372, + "step": 785 + }, + { + "ce_ib": 7.452459812164307, + "ce_orig": 0.6774175763130188, + "epoch": 0.2257531094974477, + "kl_loss": 0.15653352439403534, + "loss_ib": 0.002310581039637327, + "step": 785 + }, + { + "ce_ib": 4.595323085784912, + "ce_orig": 0.48615762591362, + "epoch": 0.22604069307642533, + "kl_loss": 0.10117632150650024, + "loss_ib": 0.0014712954871356487, + "step": 786 + }, + { + "ce_ib": 9.364824295043945, + "ce_orig": 1.0560548305511475, + "epoch": 0.22604069307642533, + "kl_loss": 0.16391177475452423, + "loss_ib": 0.002575600054115057, + "step": 786 + }, + { + "ce_ib": 5.813327789306641, + "ce_orig": 0.7324556708335876, + "epoch": 0.22604069307642533, + "kl_loss": 0.14763601124286652, + "loss_ib": 0.0020576927345246077, + "step": 786 + }, + { + "ce_ib": 8.11837100982666, + "ce_orig": 0.6141310334205627, + "epoch": 0.22604069307642533, + "kl_loss": 0.19747400283813477, + "loss_ib": 0.0027865769807249308, + "step": 786 + }, + { + "ce_ib": 7.725346565246582, + "ce_orig": 0.5023128390312195, + "epoch": 0.22632827665540298, + "kl_loss": 0.1939527690410614, + "loss_ib": 0.0027120623271912336, + "step": 787 + }, + { + "ce_ib": 6.31313943862915, + "ce_orig": 0.5808637738227844, + "epoch": 0.22632827665540298, + "kl_loss": 0.1817108541727066, + "loss_ib": 0.0024484223686158657, + "step": 787 + }, + { + "ce_ib": 7.112109184265137, + "ce_orig": 0.41836559772491455, + "epoch": 0.22632827665540298, + "kl_loss": 0.1097191721200943, + "loss_ib": 0.0018084024777635932, + "step": 787 + }, + { + "ce_ib": 6.42437744140625, + "ce_orig": 0.45045945048332214, + "epoch": 0.22632827665540298, + "kl_loss": 0.4161728620529175, + "loss_ib": 0.004804166033864021, + "step": 787 + }, + { + "ce_ib": 6.569514274597168, + "ce_orig": 0.7998676300048828, + "epoch": 0.22661586023438063, + "kl_loss": 0.15867647528648376, + "loss_ib": 0.002243716036900878, + "step": 788 + }, + { + "ce_ib": 11.660774230957031, + "ce_orig": 1.3734692335128784, + "epoch": 0.22661586023438063, + "kl_loss": 0.1902739703655243, + "loss_ib": 0.0030688170809298754, + "step": 788 + }, + { + "ce_ib": 11.077461242675781, + "ce_orig": 1.3770290613174438, + "epoch": 0.22661586023438063, + "kl_loss": 0.1206635981798172, + "loss_ib": 0.0023143819998949766, + "step": 788 + }, + { + "ce_ib": 4.626669406890869, + "ce_orig": 0.5129048228263855, + "epoch": 0.22661586023438063, + "kl_loss": 0.13894721865653992, + "loss_ib": 0.0018521390156820416, + "step": 788 + }, + { + "ce_ib": 9.747454643249512, + "ce_orig": 0.8806663155555725, + "epoch": 0.22690344381335825, + "kl_loss": 0.216609925031662, + "loss_ib": 0.003140844637528062, + "step": 789 + }, + { + "ce_ib": 4.014584541320801, + "ce_orig": 0.5087191462516785, + "epoch": 0.22690344381335825, + "kl_loss": 0.10901661217212677, + "loss_ib": 0.0014916245127096772, + "step": 789 + }, + { + "ce_ib": 6.954845905303955, + "ce_orig": 0.5227988362312317, + "epoch": 0.22690344381335825, + "kl_loss": 0.13825106620788574, + "loss_ib": 0.0020779951009899378, + "step": 789 + }, + { + "ce_ib": 9.023216247558594, + "ce_orig": 0.9189206957817078, + "epoch": 0.22690344381335825, + "kl_loss": 0.14568957686424255, + "loss_ib": 0.0023592172656208277, + "step": 789 + }, + { + "epoch": 0.2271910273923359, + "grad_norm": 0.09999674558639526, + "learning_rate": 4.973437198621237e-05, + "loss": 0.8786, + "step": 790 + }, + { + "ce_ib": 9.979926109313965, + "ce_orig": 0.7355432510375977, + "epoch": 0.2271910273923359, + "kl_loss": 0.16418962180614471, + "loss_ib": 0.002639888785779476, + "step": 790 + }, + { + "ce_ib": 10.314345359802246, + "ce_orig": 1.2523021697998047, + "epoch": 0.2271910273923359, + "kl_loss": 0.14147615432739258, + "loss_ib": 0.0024461960420012474, + "step": 790 + }, + { + "ce_ib": 7.049835205078125, + "ce_orig": 0.5150954723358154, + "epoch": 0.2271910273923359, + "kl_loss": 0.21324026584625244, + "loss_ib": 0.002837385982275009, + "step": 790 + }, + { + "ce_ib": 6.2608137130737305, + "ce_orig": 0.8024607300758362, + "epoch": 0.2271910273923359, + "kl_loss": 0.11846175044775009, + "loss_ib": 0.0018106987699866295, + "step": 790 + }, + { + "ce_ib": 10.44774341583252, + "ce_orig": 0.47436025738716125, + "epoch": 0.22747861097131353, + "kl_loss": 0.30650708079338074, + "loss_ib": 0.004109845031052828, + "step": 791 + }, + { + "ce_ib": 6.676301956176758, + "ce_orig": 0.7750266194343567, + "epoch": 0.22747861097131353, + "kl_loss": 0.13108739256858826, + "loss_ib": 0.001978504005819559, + "step": 791 + }, + { + "ce_ib": 9.86817455291748, + "ce_orig": 0.7998197078704834, + "epoch": 0.22747861097131353, + "kl_loss": 0.158120796084404, + "loss_ib": 0.002568025141954422, + "step": 791 + }, + { + "ce_ib": 9.6447172164917, + "ce_orig": 1.2034450769424438, + "epoch": 0.22747861097131353, + "kl_loss": 0.15354730188846588, + "loss_ib": 0.0024999447632580996, + "step": 791 + }, + { + "ce_ib": 10.113997459411621, + "ce_orig": 0.657515823841095, + "epoch": 0.22776619455029118, + "kl_loss": 0.17590703070163727, + "loss_ib": 0.0027704699896275997, + "step": 792 + }, + { + "ce_ib": 9.48293685913086, + "ce_orig": 1.4745361804962158, + "epoch": 0.22776619455029118, + "kl_loss": 0.1749359369277954, + "loss_ib": 0.0026976531371474266, + "step": 792 + }, + { + "ce_ib": 12.24776554107666, + "ce_orig": 1.7418227195739746, + "epoch": 0.22776619455029118, + "kl_loss": 0.17912819981575012, + "loss_ib": 0.0030160583555698395, + "step": 792 + }, + { + "ce_ib": 5.070426940917969, + "ce_orig": 0.35741880536079407, + "epoch": 0.22776619455029118, + "kl_loss": 0.12080200016498566, + "loss_ib": 0.001715062651783228, + "step": 792 + }, + { + "ce_ib": 10.660409927368164, + "ce_orig": 0.9658419489860535, + "epoch": 0.22805377812926883, + "kl_loss": 0.17371653020381927, + "loss_ib": 0.002803206443786621, + "step": 793 + }, + { + "ce_ib": 5.962430477142334, + "ce_orig": 0.6669458150863647, + "epoch": 0.22805377812926883, + "kl_loss": 0.16286441683769226, + "loss_ib": 0.0022248870227485895, + "step": 793 + }, + { + "ce_ib": 7.378296852111816, + "ce_orig": 0.938609778881073, + "epoch": 0.22805377812926883, + "kl_loss": 0.15764188766479492, + "loss_ib": 0.002314248587936163, + "step": 793 + }, + { + "ce_ib": 10.278300285339355, + "ce_orig": 0.8079096078872681, + "epoch": 0.22805377812926883, + "kl_loss": 0.20027652382850647, + "loss_ib": 0.003030595136806369, + "step": 793 + }, + { + "ce_ib": 3.596778154373169, + "ce_orig": 0.483516126871109, + "epoch": 0.22834136170824645, + "kl_loss": 0.4539979100227356, + "loss_ib": 0.004899656865745783, + "step": 794 + }, + { + "ce_ib": 7.560880661010742, + "ce_orig": 0.12549665570259094, + "epoch": 0.22834136170824645, + "kl_loss": 0.4688561260700226, + "loss_ib": 0.005444649141281843, + "step": 794 + }, + { + "ce_ib": 6.17995548248291, + "ce_orig": 0.4187643826007843, + "epoch": 0.22834136170824645, + "kl_loss": 0.12410400807857513, + "loss_ib": 0.001859035575762391, + "step": 794 + }, + { + "ce_ib": 9.39510726928711, + "ce_orig": 0.9322624206542969, + "epoch": 0.22834136170824645, + "kl_loss": 0.22806903719902039, + "loss_ib": 0.003220201004296541, + "step": 794 + }, + { + "epoch": 0.2286289452872241, + "grad_norm": 0.09920623153448105, + "learning_rate": 4.972870056278216e-05, + "loss": 0.8659, + "step": 795 + }, + { + "ce_ib": 8.673847198486328, + "ce_orig": 0.805489718914032, + "epoch": 0.2286289452872241, + "kl_loss": 0.12766921520233154, + "loss_ib": 0.0021440768614411354, + "step": 795 + }, + { + "ce_ib": 7.604935646057129, + "ce_orig": 0.6243016719818115, + "epoch": 0.2286289452872241, + "kl_loss": 0.19819331169128418, + "loss_ib": 0.0027424267027527094, + "step": 795 + }, + { + "ce_ib": 6.0254225730896, + "ce_orig": 0.33990374207496643, + "epoch": 0.2286289452872241, + "kl_loss": 0.10447593778371811, + "loss_ib": 0.0016473016003146768, + "step": 795 + }, + { + "ce_ib": 9.746012687683105, + "ce_orig": 1.2364401817321777, + "epoch": 0.2286289452872241, + "kl_loss": 0.12592259049415588, + "loss_ib": 0.002233827020972967, + "step": 795 + }, + { + "ce_ib": 11.734527587890625, + "ce_orig": 1.7873170375823975, + "epoch": 0.22891652886620173, + "kl_loss": 0.1425134390592575, + "loss_ib": 0.00259858719073236, + "step": 796 + }, + { + "ce_ib": 8.280077934265137, + "ce_orig": 1.111116886138916, + "epoch": 0.22891652886620173, + "kl_loss": 0.15820619463920593, + "loss_ib": 0.0024100695736706257, + "step": 796 + }, + { + "ce_ib": 7.481040000915527, + "ce_orig": 0.8186451196670532, + "epoch": 0.22891652886620173, + "kl_loss": 0.13088949024677277, + "loss_ib": 0.002056998899206519, + "step": 796 + }, + { + "ce_ib": 9.56103801727295, + "ce_orig": 0.745765209197998, + "epoch": 0.22891652886620173, + "kl_loss": 0.18324166536331177, + "loss_ib": 0.0027885204181075096, + "step": 796 + }, + { + "ce_ib": 6.580921173095703, + "ce_orig": 0.7819819450378418, + "epoch": 0.22920411244517938, + "kl_loss": 0.11691413819789886, + "loss_ib": 0.0018272333545610309, + "step": 797 + }, + { + "ce_ib": 6.933665752410889, + "ce_orig": 0.7905831336975098, + "epoch": 0.22920411244517938, + "kl_loss": 0.15403807163238525, + "loss_ib": 0.002233747160062194, + "step": 797 + }, + { + "ce_ib": 10.086153984069824, + "ce_orig": 0.8927478790283203, + "epoch": 0.22920411244517938, + "kl_loss": 0.22950385510921478, + "loss_ib": 0.0033036537934094667, + "step": 797 + }, + { + "ce_ib": 8.20946216583252, + "ce_orig": 0.8975688815116882, + "epoch": 0.22920411244517938, + "kl_loss": 0.22808901965618134, + "loss_ib": 0.003101836424320936, + "step": 797 + }, + { + "ce_ib": 6.896512985229492, + "ce_orig": 0.6491000652313232, + "epoch": 0.22949169602415703, + "kl_loss": 0.1170637458562851, + "loss_ib": 0.0018602886702865362, + "step": 798 + }, + { + "ce_ib": 5.728438377380371, + "ce_orig": 0.8358505368232727, + "epoch": 0.22949169602415703, + "kl_loss": 0.1260027289390564, + "loss_ib": 0.0018328711157664657, + "step": 798 + }, + { + "ce_ib": 5.766465187072754, + "ce_orig": 0.6195739507675171, + "epoch": 0.22949169602415703, + "kl_loss": 0.16632917523384094, + "loss_ib": 0.0022399381268769503, + "step": 798 + }, + { + "ce_ib": 4.202791690826416, + "ce_orig": 0.4293147325515747, + "epoch": 0.22949169602415703, + "kl_loss": 0.10201328992843628, + "loss_ib": 0.0014404120156541467, + "step": 798 + }, + { + "ce_ib": 10.78468132019043, + "ce_orig": 1.314720869064331, + "epoch": 0.22977927960313466, + "kl_loss": 0.08088640868663788, + "loss_ib": 0.0018873321823775768, + "step": 799 + }, + { + "ce_ib": 7.550471305847168, + "ce_orig": 0.5705544352531433, + "epoch": 0.22977927960313466, + "kl_loss": 0.17705941200256348, + "loss_ib": 0.0025256413500756025, + "step": 799 + }, + { + "ce_ib": 7.8654398918151855, + "ce_orig": 0.6908157467842102, + "epoch": 0.22977927960313466, + "kl_loss": 0.15278248488903046, + "loss_ib": 0.0023143687285482883, + "step": 799 + }, + { + "ce_ib": 9.453792572021484, + "ce_orig": 0.9354315996170044, + "epoch": 0.22977927960313466, + "kl_loss": 0.15325641632080078, + "loss_ib": 0.0024779431987553835, + "step": 799 + }, + { + "epoch": 0.2300668631821123, + "grad_norm": 0.08366382122039795, + "learning_rate": 4.972296956217265e-05, + "loss": 0.8533, + "step": 800 + }, + { + "ce_ib": 6.754378795623779, + "ce_orig": 0.6593028903007507, + "epoch": 0.2300668631821123, + "kl_loss": 0.27594077587127686, + "loss_ib": 0.0034348457120358944, + "step": 800 + }, + { + "ce_ib": 9.11864185333252, + "ce_orig": 1.0368092060089111, + "epoch": 0.2300668631821123, + "kl_loss": 0.2670667767524719, + "loss_ib": 0.0035825318191200495, + "step": 800 + }, + { + "ce_ib": 9.781817436218262, + "ce_orig": 1.2738507986068726, + "epoch": 0.2300668631821123, + "kl_loss": 0.14330054819583893, + "loss_ib": 0.0024111871607601643, + "step": 800 + }, + { + "ce_ib": 11.391901969909668, + "ce_orig": 1.4393359422683716, + "epoch": 0.2300668631821123, + "kl_loss": 0.17502886056900024, + "loss_ib": 0.002889478811994195, + "step": 800 + }, + { + "ce_ib": 5.774404525756836, + "ce_orig": 0.4819744825363159, + "epoch": 0.23035444676108993, + "kl_loss": 0.12495163083076477, + "loss_ib": 0.0018269566353410482, + "step": 801 + }, + { + "ce_ib": 8.93179702758789, + "ce_orig": 0.9988149404525757, + "epoch": 0.23035444676108993, + "kl_loss": 0.15840879082679749, + "loss_ib": 0.0024772675242275, + "step": 801 + }, + { + "ce_ib": 6.787378787994385, + "ce_orig": 0.6689311861991882, + "epoch": 0.23035444676108993, + "kl_loss": 0.12284412235021591, + "loss_ib": 0.0019071790156885982, + "step": 801 + }, + { + "ce_ib": 6.773270130157471, + "ce_orig": 0.8712309002876282, + "epoch": 0.23035444676108993, + "kl_loss": 0.1521710455417633, + "loss_ib": 0.0021990372333675623, + "step": 801 + }, + { + "ce_ib": 11.580021858215332, + "ce_orig": 0.9791839122772217, + "epoch": 0.23064203034006758, + "kl_loss": 0.12627840042114258, + "loss_ib": 0.0024207860697060823, + "step": 802 + }, + { + "ce_ib": 9.938983917236328, + "ce_orig": 1.2283821105957031, + "epoch": 0.23064203034006758, + "kl_loss": 0.27586644887924194, + "loss_ib": 0.00375256291590631, + "step": 802 + }, + { + "ce_ib": 7.058392524719238, + "ce_orig": 0.5756127834320068, + "epoch": 0.23064203034006758, + "kl_loss": 0.1477137804031372, + "loss_ib": 0.0021829770412296057, + "step": 802 + }, + { + "ce_ib": 3.1069231033325195, + "ce_orig": 0.1339166760444641, + "epoch": 0.23064203034006758, + "kl_loss": 0.3081814646720886, + "loss_ib": 0.0033925068564713, + "step": 802 + }, + { + "ce_ib": 10.960715293884277, + "ce_orig": 1.3200205564498901, + "epoch": 0.23092961391904523, + "kl_loss": 0.16258013248443604, + "loss_ib": 0.002721872879192233, + "step": 803 + }, + { + "ce_ib": 9.877068519592285, + "ce_orig": 0.7766249179840088, + "epoch": 0.23092961391904523, + "kl_loss": 0.11411984264850616, + "loss_ib": 0.0021289053838700056, + "step": 803 + }, + { + "ce_ib": 8.853331565856934, + "ce_orig": 1.3422200679779053, + "epoch": 0.23092961391904523, + "kl_loss": 0.2864736318588257, + "loss_ib": 0.003750069299712777, + "step": 803 + }, + { + "ce_ib": 6.893815994262695, + "ce_orig": 0.8211920261383057, + "epoch": 0.23092961391904523, + "kl_loss": 0.1476418375968933, + "loss_ib": 0.0021657999604940414, + "step": 803 + }, + { + "ce_ib": 5.529889106750488, + "ce_orig": 0.5488899946212769, + "epoch": 0.23121719749802286, + "kl_loss": 0.09762927889823914, + "loss_ib": 0.0015292817261070013, + "step": 804 + }, + { + "ce_ib": 7.026906967163086, + "ce_orig": 0.7769794464111328, + "epoch": 0.23121719749802286, + "kl_loss": 0.09977978467941284, + "loss_ib": 0.001700488617643714, + "step": 804 + }, + { + "ce_ib": 4.3003740310668945, + "ce_orig": 0.3249621093273163, + "epoch": 0.23121719749802286, + "kl_loss": 0.32431769371032715, + "loss_ib": 0.0036732142325490713, + "step": 804 + }, + { + "ce_ib": 8.477049827575684, + "ce_orig": 0.7804782390594482, + "epoch": 0.23121719749802286, + "kl_loss": 0.10728403180837631, + "loss_ib": 0.0019205452408641577, + "step": 804 + }, + { + "epoch": 0.2315047810770005, + "grad_norm": 0.0931197851896286, + "learning_rate": 4.971717899819113e-05, + "loss": 0.8871, + "step": 805 + }, + { + "ce_ib": 9.26341438293457, + "ce_orig": 1.0961662530899048, + "epoch": 0.2315047810770005, + "kl_loss": 0.17699837684631348, + "loss_ib": 0.002696325071156025, + "step": 805 + }, + { + "ce_ib": 9.548799514770508, + "ce_orig": 1.2269480228424072, + "epoch": 0.2315047810770005, + "kl_loss": 0.18628443777561188, + "loss_ib": 0.0028177243657410145, + "step": 805 + }, + { + "ce_ib": 9.232553482055664, + "ce_orig": 1.10349440574646, + "epoch": 0.2315047810770005, + "kl_loss": 0.17955434322357178, + "loss_ib": 0.002718798816204071, + "step": 805 + }, + { + "ce_ib": 9.328289985656738, + "ce_orig": 1.420828938484192, + "epoch": 0.2315047810770005, + "kl_loss": 0.1031360924243927, + "loss_ib": 0.0019641900435090065, + "step": 805 + }, + { + "ce_ib": 8.62269401550293, + "ce_orig": 0.6604496836662292, + "epoch": 0.23179236465597813, + "kl_loss": 0.14411967992782593, + "loss_ib": 0.0023034662008285522, + "step": 806 + }, + { + "ce_ib": 11.635430335998535, + "ce_orig": 1.4418182373046875, + "epoch": 0.23179236465597813, + "kl_loss": 0.14442530274391174, + "loss_ib": 0.0026077961083501577, + "step": 806 + }, + { + "ce_ib": 10.606904983520508, + "ce_orig": 1.4939662218093872, + "epoch": 0.23179236465597813, + "kl_loss": 0.10982929170131683, + "loss_ib": 0.002158983377739787, + "step": 806 + }, + { + "ce_ib": 9.094931602478027, + "ce_orig": 1.313065528869629, + "epoch": 0.23179236465597813, + "kl_loss": 0.11446712166070938, + "loss_ib": 0.002054164418950677, + "step": 806 + }, + { + "ce_ib": 8.538125038146973, + "ce_orig": 1.2573816776275635, + "epoch": 0.23207994823495579, + "kl_loss": 0.15647096931934357, + "loss_ib": 0.0024185222573578358, + "step": 807 + }, + { + "ce_ib": 6.605203151702881, + "ce_orig": 0.6826567053794861, + "epoch": 0.23207994823495579, + "kl_loss": 0.14014863967895508, + "loss_ib": 0.0020620066206902266, + "step": 807 + }, + { + "ce_ib": 8.780204772949219, + "ce_orig": 1.2285255193710327, + "epoch": 0.23207994823495579, + "kl_loss": 0.10112646222114563, + "loss_ib": 0.0018892850494012237, + "step": 807 + }, + { + "ce_ib": 8.27536392211914, + "ce_orig": 0.528601884841919, + "epoch": 0.23207994823495579, + "kl_loss": 0.2035016268491745, + "loss_ib": 0.002862552646547556, + "step": 807 + }, + { + "ce_ib": 6.892450332641602, + "ce_orig": 0.8363218903541565, + "epoch": 0.23236753181393344, + "kl_loss": 0.14483782649040222, + "loss_ib": 0.0021376232616603374, + "step": 808 + }, + { + "ce_ib": 9.606917381286621, + "ce_orig": 1.0296199321746826, + "epoch": 0.23236753181393344, + "kl_loss": 0.2421770691871643, + "loss_ib": 0.003382462076842785, + "step": 808 + }, + { + "ce_ib": 8.105813026428223, + "ce_orig": 0.7836432456970215, + "epoch": 0.23236753181393344, + "kl_loss": 0.1529703140258789, + "loss_ib": 0.0023402844090014696, + "step": 808 + }, + { + "ce_ib": 9.704617500305176, + "ce_orig": 1.174484372138977, + "epoch": 0.23236753181393344, + "kl_loss": 0.13153155148029327, + "loss_ib": 0.002285777358338237, + "step": 808 + }, + { + "ce_ib": 6.115880012512207, + "ce_orig": 0.7601160407066345, + "epoch": 0.23265511539291106, + "kl_loss": 0.10592241585254669, + "loss_ib": 0.0016708120238035917, + "step": 809 + }, + { + "ce_ib": 6.771764278411865, + "ce_orig": 0.5327339172363281, + "epoch": 0.23265511539291106, + "kl_loss": 0.20717453956604004, + "loss_ib": 0.0027489217463880777, + "step": 809 + }, + { + "ce_ib": 11.804062843322754, + "ce_orig": 0.6770117282867432, + "epoch": 0.23265511539291106, + "kl_loss": 0.11569809913635254, + "loss_ib": 0.002337387064471841, + "step": 809 + }, + { + "ce_ib": 8.106520652770996, + "ce_orig": 1.036596655845642, + "epoch": 0.23265511539291106, + "kl_loss": 0.13593992590904236, + "loss_ib": 0.0021700512152165174, + "step": 809 + }, + { + "epoch": 0.2329426989718887, + "grad_norm": 0.10620737075805664, + "learning_rate": 4.9711328884788434e-05, + "loss": 0.8885, + "step": 810 + }, + { + "ce_ib": 9.17796802520752, + "ce_orig": 1.0972896814346313, + "epoch": 0.2329426989718887, + "kl_loss": 0.13107869029045105, + "loss_ib": 0.002228583674877882, + "step": 810 + }, + { + "ce_ib": 10.562804222106934, + "ce_orig": 0.9702454209327698, + "epoch": 0.2329426989718887, + "kl_loss": 0.14495953917503357, + "loss_ib": 0.0025058756582438946, + "step": 810 + }, + { + "ce_ib": 6.212893962860107, + "ce_orig": 0.783852219581604, + "epoch": 0.2329426989718887, + "kl_loss": 0.1454237699508667, + "loss_ib": 0.002075526863336563, + "step": 810 + }, + { + "ce_ib": 11.819994926452637, + "ce_orig": 1.4107239246368408, + "epoch": 0.2329426989718887, + "kl_loss": 0.1865510791540146, + "loss_ib": 0.0030475102830678225, + "step": 810 + }, + { + "ce_ib": 6.329524517059326, + "ce_orig": 0.6930771470069885, + "epoch": 0.23323028255086634, + "kl_loss": 0.14692606031894684, + "loss_ib": 0.002102212980389595, + "step": 811 + }, + { + "ce_ib": 7.555665969848633, + "ce_orig": 1.0139789581298828, + "epoch": 0.23323028255086634, + "kl_loss": 0.11456457525491714, + "loss_ib": 0.0019012122647836804, + "step": 811 + }, + { + "ce_ib": 6.150085926055908, + "ce_orig": 0.5341890454292297, + "epoch": 0.23323028255086634, + "kl_loss": 0.19206318259239197, + "loss_ib": 0.002535640262067318, + "step": 811 + }, + { + "ce_ib": 9.82231330871582, + "ce_orig": 0.7214909195899963, + "epoch": 0.23323028255086634, + "kl_loss": 0.08180269598960876, + "loss_ib": 0.0018002580618485808, + "step": 811 + }, + { + "ce_ib": 10.72133731842041, + "ce_orig": 1.3293489217758179, + "epoch": 0.233517866129844, + "kl_loss": 0.1370580643415451, + "loss_ib": 0.0024427142925560474, + "step": 812 + }, + { + "ce_ib": 7.326596736907959, + "ce_orig": 1.0470627546310425, + "epoch": 0.233517866129844, + "kl_loss": 0.1406283676624298, + "loss_ib": 0.002138943411409855, + "step": 812 + }, + { + "ce_ib": 12.790474891662598, + "ce_orig": 1.9125604629516602, + "epoch": 0.233517866129844, + "kl_loss": 0.17688024044036865, + "loss_ib": 0.00304784975014627, + "step": 812 + }, + { + "ce_ib": 6.637095928192139, + "ce_orig": 0.4109753668308258, + "epoch": 0.233517866129844, + "kl_loss": 0.1006464958190918, + "loss_ib": 0.0016701745335012674, + "step": 812 + }, + { + "ce_ib": 9.163846015930176, + "ce_orig": 1.499794363975525, + "epoch": 0.23380544970882164, + "kl_loss": 0.13409070670604706, + "loss_ib": 0.002257291693240404, + "step": 813 + }, + { + "ce_ib": 7.725475311279297, + "ce_orig": 0.6007325649261475, + "epoch": 0.23380544970882164, + "kl_loss": 0.11577519029378891, + "loss_ib": 0.001930299331434071, + "step": 813 + }, + { + "ce_ib": 10.204571723937988, + "ce_orig": 1.0934782028198242, + "epoch": 0.23380544970882164, + "kl_loss": 0.11761227995157242, + "loss_ib": 0.0021965799387544394, + "step": 813 + }, + { + "ce_ib": 6.080852031707764, + "ce_orig": 0.566545844078064, + "epoch": 0.23380544970882164, + "kl_loss": 0.07402099668979645, + "loss_ib": 0.0013482951326295733, + "step": 813 + }, + { + "ce_ib": 7.153719425201416, + "ce_orig": 0.8980187773704529, + "epoch": 0.23409303328779926, + "kl_loss": 0.10498209297657013, + "loss_ib": 0.0017651927191764116, + "step": 814 + }, + { + "ce_ib": 6.541371822357178, + "ce_orig": 0.9704118371009827, + "epoch": 0.23409303328779926, + "kl_loss": 0.09279821068048477, + "loss_ib": 0.0015821191482245922, + "step": 814 + }, + { + "ce_ib": 6.066292762756348, + "ce_orig": 0.668864905834198, + "epoch": 0.23409303328779926, + "kl_loss": 0.10827763378620148, + "loss_ib": 0.0016894055297598243, + "step": 814 + }, + { + "ce_ib": 7.872057914733887, + "ce_orig": 0.6739667654037476, + "epoch": 0.23409303328779926, + "kl_loss": 0.11645969748497009, + "loss_ib": 0.0019518026383593678, + "step": 814 + }, + { + "epoch": 0.23438061686677691, + "grad_norm": 0.11317208409309387, + "learning_rate": 4.9705419236058825e-05, + "loss": 0.9053, + "step": 815 + }, + { + "ce_ib": 7.275731086730957, + "ce_orig": 0.5088436007499695, + "epoch": 0.23438061686677691, + "kl_loss": 0.1330629289150238, + "loss_ib": 0.0020582021679729223, + "step": 815 + }, + { + "ce_ib": 11.397271156311035, + "ce_orig": 0.6790756583213806, + "epoch": 0.23438061686677691, + "kl_loss": 0.18960818648338318, + "loss_ib": 0.0030358086805790663, + "step": 815 + }, + { + "ce_ib": 7.752520561218262, + "ce_orig": 0.5414913892745972, + "epoch": 0.23438061686677691, + "kl_loss": 0.16944292187690735, + "loss_ib": 0.002469681203365326, + "step": 815 + }, + { + "ce_ib": 6.504335403442383, + "ce_orig": 0.5197587609291077, + "epoch": 0.23438061686677691, + "kl_loss": 0.10620582848787308, + "loss_ib": 0.001712491735816002, + "step": 815 + }, + { + "ce_ib": 6.495504856109619, + "ce_orig": 1.07676362991333, + "epoch": 0.23466820044575454, + "kl_loss": 0.09302103519439697, + "loss_ib": 0.001579760923050344, + "step": 816 + }, + { + "ce_ib": 6.134542942047119, + "ce_orig": 1.04434335231781, + "epoch": 0.23466820044575454, + "kl_loss": 0.08342786133289337, + "loss_ib": 0.0014477329095825553, + "step": 816 + }, + { + "ce_ib": 6.335164546966553, + "ce_orig": 0.625989556312561, + "epoch": 0.23466820044575454, + "kl_loss": 0.14174768328666687, + "loss_ib": 0.002050993265584111, + "step": 816 + }, + { + "ce_ib": 10.488992691040039, + "ce_orig": 1.2239924669265747, + "epoch": 0.23466820044575454, + "kl_loss": 0.1575402021408081, + "loss_ib": 0.002624301239848137, + "step": 816 + }, + { + "ce_ib": 6.741306781768799, + "ce_orig": 0.8143828511238098, + "epoch": 0.2349557840247322, + "kl_loss": 0.14506280422210693, + "loss_ib": 0.00212475867010653, + "step": 817 + }, + { + "ce_ib": 5.542083740234375, + "ce_orig": 0.7346311211585999, + "epoch": 0.2349557840247322, + "kl_loss": 0.083269402384758, + "loss_ib": 0.0013869022950530052, + "step": 817 + }, + { + "ce_ib": 7.235450267791748, + "ce_orig": 0.6082554459571838, + "epoch": 0.2349557840247322, + "kl_loss": 0.16868101060390472, + "loss_ib": 0.002410355256870389, + "step": 817 + }, + { + "ce_ib": 6.437801837921143, + "ce_orig": 0.3926742672920227, + "epoch": 0.2349557840247322, + "kl_loss": 0.32581275701522827, + "loss_ib": 0.0039019077084958553, + "step": 817 + }, + { + "ce_ib": 7.426485061645508, + "ce_orig": 0.40766289830207825, + "epoch": 0.23524336760370984, + "kl_loss": 0.1242557093501091, + "loss_ib": 0.0019852055702358484, + "step": 818 + }, + { + "ce_ib": 7.13580846786499, + "ce_orig": 0.6141798496246338, + "epoch": 0.23524336760370984, + "kl_loss": 0.14575709402561188, + "loss_ib": 0.0021711518056690693, + "step": 818 + }, + { + "ce_ib": 7.478182315826416, + "ce_orig": 0.9550573825836182, + "epoch": 0.23524336760370984, + "kl_loss": 0.134103924036026, + "loss_ib": 0.0020888573490083218, + "step": 818 + }, + { + "ce_ib": 9.941823959350586, + "ce_orig": 1.12644362449646, + "epoch": 0.23524336760370984, + "kl_loss": 0.19895997643470764, + "loss_ib": 0.002983782207593322, + "step": 818 + }, + { + "ce_ib": 9.203929901123047, + "ce_orig": 1.0872249603271484, + "epoch": 0.23553095118268746, + "kl_loss": 0.19965161383152008, + "loss_ib": 0.0029169090557843447, + "step": 819 + }, + { + "ce_ib": 8.64352035522461, + "ce_orig": 1.0518972873687744, + "epoch": 0.23553095118268746, + "kl_loss": 0.10414116084575653, + "loss_ib": 0.0019057635217905045, + "step": 819 + }, + { + "ce_ib": 2.9574663639068604, + "ce_orig": 0.15662173926830292, + "epoch": 0.23553095118268746, + "kl_loss": 0.2213229537010193, + "loss_ib": 0.0025089760310947895, + "step": 819 + }, + { + "ce_ib": 9.636175155639648, + "ce_orig": 1.2638198137283325, + "epoch": 0.23553095118268746, + "kl_loss": 0.28710055351257324, + "loss_ib": 0.003834622912108898, + "step": 819 + }, + { + "epoch": 0.23581853476166512, + "grad_norm": 0.09467697888612747, + "learning_rate": 4.969945006624003e-05, + "loss": 0.8118, + "step": 820 + }, + { + "ce_ib": 6.223392963409424, + "ce_orig": 0.4875457286834717, + "epoch": 0.23581853476166512, + "kl_loss": 0.09896323084831238, + "loss_ib": 0.001611971529200673, + "step": 820 + }, + { + "ce_ib": 8.920371055603027, + "ce_orig": 1.074205756187439, + "epoch": 0.23581853476166512, + "kl_loss": 0.09543949365615845, + "loss_ib": 0.0018464321037754416, + "step": 820 + }, + { + "ce_ib": 7.298362731933594, + "ce_orig": 0.6542941331863403, + "epoch": 0.23581853476166512, + "kl_loss": 0.1842930018901825, + "loss_ib": 0.0025727662723511457, + "step": 820 + }, + { + "ce_ib": 4.780843734741211, + "ce_orig": 0.4712677597999573, + "epoch": 0.23581853476166512, + "kl_loss": 0.09687883406877518, + "loss_ib": 0.0014468726003542542, + "step": 820 + }, + { + "ce_ib": 13.902252197265625, + "ce_orig": 1.981177806854248, + "epoch": 0.23610611834064274, + "kl_loss": 0.21506190299987793, + "loss_ib": 0.003540844190865755, + "step": 821 + }, + { + "ce_ib": 4.231637001037598, + "ce_orig": 0.4178099036216736, + "epoch": 0.23610611834064274, + "kl_loss": 0.12692990899085999, + "loss_ib": 0.0016924628289416432, + "step": 821 + }, + { + "ce_ib": 9.162365913391113, + "ce_orig": 0.6495018005371094, + "epoch": 0.23610611834064274, + "kl_loss": 0.1748168021440506, + "loss_ib": 0.0026644044555723667, + "step": 821 + }, + { + "ce_ib": 6.503030776977539, + "ce_orig": 0.7990416288375854, + "epoch": 0.23610611834064274, + "kl_loss": 0.143586203455925, + "loss_ib": 0.002086165128275752, + "step": 821 + }, + { + "ce_ib": 7.101401329040527, + "ce_orig": 0.7612082958221436, + "epoch": 0.2363937019196204, + "kl_loss": 0.15823988616466522, + "loss_ib": 0.002292538760229945, + "step": 822 + }, + { + "ce_ib": 5.6985883712768555, + "ce_orig": 0.7191292643547058, + "epoch": 0.2363937019196204, + "kl_loss": 0.10908360779285431, + "loss_ib": 0.0016606948338449001, + "step": 822 + }, + { + "ce_ib": 5.306804656982422, + "ce_orig": 0.5981192588806152, + "epoch": 0.2363937019196204, + "kl_loss": 0.12700998783111572, + "loss_ib": 0.0018007803009822965, + "step": 822 + }, + { + "ce_ib": 6.363889694213867, + "ce_orig": 0.5744786858558655, + "epoch": 0.2363937019196204, + "kl_loss": 0.09524562954902649, + "loss_ib": 0.0015888451598584652, + "step": 822 + }, + { + "ce_ib": 14.43346118927002, + "ce_orig": 0.6333121061325073, + "epoch": 0.23668128549859804, + "kl_loss": 0.18850618600845337, + "loss_ib": 0.003328407881781459, + "step": 823 + }, + { + "ce_ib": 4.770277500152588, + "ce_orig": 0.6905955076217651, + "epoch": 0.23668128549859804, + "kl_loss": 0.10428404062986374, + "loss_ib": 0.0015198680339381099, + "step": 823 + }, + { + "ce_ib": 6.385842323303223, + "ce_orig": 0.5164174437522888, + "epoch": 0.23668128549859804, + "kl_loss": 0.17304900288581848, + "loss_ib": 0.0023690741509199142, + "step": 823 + }, + { + "ce_ib": 7.28901481628418, + "ce_orig": 0.8119218349456787, + "epoch": 0.23668128549859804, + "kl_loss": 0.11210718750953674, + "loss_ib": 0.0018499733414500952, + "step": 823 + }, + { + "ce_ib": 7.684753894805908, + "ce_orig": 0.4133395552635193, + "epoch": 0.23696886907757567, + "kl_loss": 0.24416936933994293, + "loss_ib": 0.003210169030353427, + "step": 824 + }, + { + "ce_ib": 6.748510360717773, + "ce_orig": 0.4567016363143921, + "epoch": 0.23696886907757567, + "kl_loss": 0.19947892427444458, + "loss_ib": 0.0026696401182562113, + "step": 824 + }, + { + "ce_ib": 9.122523307800293, + "ce_orig": 1.1772557497024536, + "epoch": 0.23696886907757567, + "kl_loss": 0.10364347696304321, + "loss_ib": 0.0019486871315166354, + "step": 824 + }, + { + "ce_ib": 8.981727600097656, + "ce_orig": 1.1669374704360962, + "epoch": 0.23696886907757567, + "kl_loss": 0.18251746892929077, + "loss_ib": 0.002723347395658493, + "step": 824 + }, + { + "epoch": 0.23725645265655332, + "grad_norm": 0.1238342672586441, + "learning_rate": 4.9693421389713156e-05, + "loss": 0.8523, + "step": 825 + }, + { + "ce_ib": 8.044266700744629, + "ce_orig": 1.2204943895339966, + "epoch": 0.23725645265655332, + "kl_loss": 0.13153710961341858, + "loss_ib": 0.002119797747582197, + "step": 825 + }, + { + "ce_ib": 5.693978786468506, + "ce_orig": 0.6213144063949585, + "epoch": 0.23725645265655332, + "kl_loss": 0.14717428386211395, + "loss_ib": 0.002041140804067254, + "step": 825 + }, + { + "ce_ib": 8.591774940490723, + "ce_orig": 1.2455124855041504, + "epoch": 0.23725645265655332, + "kl_loss": 0.11206641793251038, + "loss_ib": 0.0019798416178673506, + "step": 825 + }, + { + "ce_ib": 9.281439781188965, + "ce_orig": 0.8546298742294312, + "epoch": 0.23725645265655332, + "kl_loss": 0.2340502291917801, + "loss_ib": 0.0032686463091522455, + "step": 825 + }, + { + "ce_ib": 9.022857666015625, + "ce_orig": 1.0518991947174072, + "epoch": 0.23754403623553094, + "kl_loss": 0.11388400197029114, + "loss_ib": 0.0020411256700754166, + "step": 826 + }, + { + "ce_ib": 9.080761909484863, + "ce_orig": 0.9453988075256348, + "epoch": 0.23754403623553094, + "kl_loss": 0.13348603248596191, + "loss_ib": 0.002242936519905925, + "step": 826 + }, + { + "ce_ib": 9.81284236907959, + "ce_orig": 1.3243674039840698, + "epoch": 0.23754403623553094, + "kl_loss": 0.13746213912963867, + "loss_ib": 0.002355905482545495, + "step": 826 + }, + { + "ce_ib": 11.309003829956055, + "ce_orig": 1.3904995918273926, + "epoch": 0.23754403623553094, + "kl_loss": 0.13674914836883545, + "loss_ib": 0.0024983917828649282, + "step": 826 + }, + { + "ce_ib": 9.344799041748047, + "ce_orig": 1.560760259628296, + "epoch": 0.2378316198145086, + "kl_loss": 0.11014031618833542, + "loss_ib": 0.0020358830224722624, + "step": 827 + }, + { + "ce_ib": 5.963412761688232, + "ce_orig": 0.8929498791694641, + "epoch": 0.2378316198145086, + "kl_loss": 0.10516968369483948, + "loss_ib": 0.001648038043640554, + "step": 827 + }, + { + "ce_ib": 7.594840049743652, + "ce_orig": 0.8408666253089905, + "epoch": 0.2378316198145086, + "kl_loss": 0.09780211001634598, + "loss_ib": 0.0017375051975250244, + "step": 827 + }, + { + "ce_ib": 9.385004997253418, + "ce_orig": 1.3767368793487549, + "epoch": 0.2378316198145086, + "kl_loss": 0.17203593254089355, + "loss_ib": 0.0026588598266243935, + "step": 827 + }, + { + "ce_ib": 12.047724723815918, + "ce_orig": 1.5393359661102295, + "epoch": 0.23811920339348625, + "kl_loss": 0.11752515286207199, + "loss_ib": 0.002380023943260312, + "step": 828 + }, + { + "ce_ib": 7.751171112060547, + "ce_orig": 0.89743971824646, + "epoch": 0.23811920339348625, + "kl_loss": 0.15309442579746246, + "loss_ib": 0.0023060613311827183, + "step": 828 + }, + { + "ce_ib": 7.59684419631958, + "ce_orig": 0.5676822066307068, + "epoch": 0.23811920339348625, + "kl_loss": 0.1733154058456421, + "loss_ib": 0.00249283853918314, + "step": 828 + }, + { + "ce_ib": 10.951656341552734, + "ce_orig": 1.7123820781707764, + "epoch": 0.23811920339348625, + "kl_loss": 0.15858086943626404, + "loss_ib": 0.0026809743139892817, + "step": 828 + }, + { + "ce_ib": 6.502542972564697, + "ce_orig": 0.8747706413269043, + "epoch": 0.23840678697246387, + "kl_loss": 0.09269970655441284, + "loss_ib": 0.001577251241542399, + "step": 829 + }, + { + "ce_ib": 6.223927021026611, + "ce_orig": 0.6755560040473938, + "epoch": 0.23840678697246387, + "kl_loss": 0.12796303629875183, + "loss_ib": 0.001902023097500205, + "step": 829 + }, + { + "ce_ib": 5.137899875640869, + "ce_orig": 0.7314615249633789, + "epoch": 0.23840678697246387, + "kl_loss": 0.08606921136379242, + "loss_ib": 0.0013744820607826114, + "step": 829 + }, + { + "ce_ib": 7.8740620613098145, + "ce_orig": 0.7106395959854126, + "epoch": 0.23840678697246387, + "kl_loss": 0.11989139020442963, + "loss_ib": 0.001986319897696376, + "step": 829 + }, + { + "epoch": 0.23869437055144152, + "grad_norm": 0.11228325217962265, + "learning_rate": 4.96873332210027e-05, + "loss": 0.8861, + "step": 830 + }, + { + "ce_ib": 4.807431697845459, + "ce_orig": 0.8043792247772217, + "epoch": 0.23869437055144152, + "kl_loss": 0.07121051847934723, + "loss_ib": 0.0011928483145311475, + "step": 830 + }, + { + "ce_ib": 12.73621940612793, + "ce_orig": 0.9692448973655701, + "epoch": 0.23869437055144152, + "kl_loss": 0.1215188279747963, + "loss_ib": 0.0024888101033866405, + "step": 830 + }, + { + "ce_ib": 7.984679222106934, + "ce_orig": 1.0848784446716309, + "epoch": 0.23869437055144152, + "kl_loss": 0.15019136667251587, + "loss_ib": 0.0023003816604614258, + "step": 830 + }, + { + "ce_ib": 10.544111251831055, + "ce_orig": 1.18326997756958, + "epoch": 0.23869437055144152, + "kl_loss": 0.18155136704444885, + "loss_ib": 0.0028699247632175684, + "step": 830 + }, + { + "ce_ib": 5.490835189819336, + "ce_orig": 0.4462292790412903, + "epoch": 0.23898195413041914, + "kl_loss": 0.1400134116411209, + "loss_ib": 0.001949217519722879, + "step": 831 + }, + { + "ce_ib": 8.376250267028809, + "ce_orig": 0.8037113547325134, + "epoch": 0.23898195413041914, + "kl_loss": 0.13274028897285461, + "loss_ib": 0.002165027894079685, + "step": 831 + }, + { + "ce_ib": 9.137575149536133, + "ce_orig": 0.9506906867027283, + "epoch": 0.23898195413041914, + "kl_loss": 0.12928739190101624, + "loss_ib": 0.002206631237640977, + "step": 831 + }, + { + "ce_ib": 8.357852935791016, + "ce_orig": 1.6927680969238281, + "epoch": 0.23898195413041914, + "kl_loss": 0.16599999368190765, + "loss_ib": 0.002495785243809223, + "step": 831 + }, + { + "ce_ib": 13.266925811767578, + "ce_orig": 1.9612349271774292, + "epoch": 0.2392695377093968, + "kl_loss": 0.16674180328845978, + "loss_ib": 0.0029941105749458075, + "step": 832 + }, + { + "ce_ib": 5.552142143249512, + "ce_orig": 0.7127636671066284, + "epoch": 0.2392695377093968, + "kl_loss": 0.16101816296577454, + "loss_ib": 0.0021653957664966583, + "step": 832 + }, + { + "ce_ib": 7.777743339538574, + "ce_orig": 0.6769495010375977, + "epoch": 0.2392695377093968, + "kl_loss": 0.15848492085933685, + "loss_ib": 0.00236262334510684, + "step": 832 + }, + { + "ce_ib": 8.940630912780762, + "ce_orig": 1.3365821838378906, + "epoch": 0.2392695377093968, + "kl_loss": 0.20301774144172668, + "loss_ib": 0.0029242404270917177, + "step": 832 + }, + { + "ce_ib": 9.560267448425293, + "ce_orig": 0.9713718891143799, + "epoch": 0.23955712128837445, + "kl_loss": 0.13948719203472137, + "loss_ib": 0.0023508986923843622, + "step": 833 + }, + { + "ce_ib": 8.09939193725586, + "ce_orig": 0.9209976196289062, + "epoch": 0.23955712128837445, + "kl_loss": 0.1723610758781433, + "loss_ib": 0.0025335499085485935, + "step": 833 + }, + { + "ce_ib": 5.985219478607178, + "ce_orig": 0.7876350283622742, + "epoch": 0.23955712128837445, + "kl_loss": 0.06713362783193588, + "loss_ib": 0.0012698580976575613, + "step": 833 + }, + { + "ce_ib": 7.734393119812012, + "ce_orig": 0.9870476126670837, + "epoch": 0.23955712128837445, + "kl_loss": 0.17694149911403656, + "loss_ib": 0.0025428542867302895, + "step": 833 + }, + { + "ce_ib": 7.501764297485352, + "ce_orig": 0.9713405966758728, + "epoch": 0.23984470486735207, + "kl_loss": 0.11347226798534393, + "loss_ib": 0.0018848991021513939, + "step": 834 + }, + { + "ce_ib": 5.259660243988037, + "ce_orig": 0.8085374236106873, + "epoch": 0.23984470486735207, + "kl_loss": 0.07916925847530365, + "loss_ib": 0.0013176585780456662, + "step": 834 + }, + { + "ce_ib": 10.007915496826172, + "ce_orig": 1.2824541330337524, + "epoch": 0.23984470486735207, + "kl_loss": 0.15162745118141174, + "loss_ib": 0.002517065964639187, + "step": 834 + }, + { + "ce_ib": 5.082655906677246, + "ce_orig": 0.43925535678863525, + "epoch": 0.23984470486735207, + "kl_loss": 0.24141454696655273, + "loss_ib": 0.0029224108438938856, + "step": 834 + }, + { + "epoch": 0.24013228844632972, + "grad_norm": 0.0987640768289566, + "learning_rate": 4.9681185574776446e-05, + "loss": 0.8962, + "step": 835 + }, + { + "ce_ib": 7.371253967285156, + "ce_orig": 1.0186665058135986, + "epoch": 0.24013228844632972, + "kl_loss": 0.1406937539577484, + "loss_ib": 0.0021440626587718725, + "step": 835 + }, + { + "ce_ib": 5.862243175506592, + "ce_orig": 0.4793251156806946, + "epoch": 0.24013228844632972, + "kl_loss": 0.11569086462259293, + "loss_ib": 0.0017431328305974603, + "step": 835 + }, + { + "ce_ib": 7.355347633361816, + "ce_orig": 0.7462732791900635, + "epoch": 0.24013228844632972, + "kl_loss": 0.18415525555610657, + "loss_ib": 0.0025770871434360743, + "step": 835 + }, + { + "ce_ib": 5.006382465362549, + "ce_orig": 0.30985262989997864, + "epoch": 0.24013228844632972, + "kl_loss": 0.1356092095375061, + "loss_ib": 0.0018567302031442523, + "step": 835 + }, + { + "ce_ib": 6.978796482086182, + "ce_orig": 0.8654583692550659, + "epoch": 0.24041987202530735, + "kl_loss": 0.10511042922735214, + "loss_ib": 0.001748983864672482, + "step": 836 + }, + { + "ce_ib": 8.150212287902832, + "ce_orig": 0.4995287358760834, + "epoch": 0.24041987202530735, + "kl_loss": 0.2076493203639984, + "loss_ib": 0.002891514217481017, + "step": 836 + }, + { + "ce_ib": 11.759064674377441, + "ce_orig": 1.6323509216308594, + "epoch": 0.24041987202530735, + "kl_loss": 0.19571126997470856, + "loss_ib": 0.0031330191995948553, + "step": 836 + }, + { + "ce_ib": 11.017194747924805, + "ce_orig": 1.2573686838150024, + "epoch": 0.24041987202530735, + "kl_loss": 0.12838904559612274, + "loss_ib": 0.002385609783232212, + "step": 836 + }, + { + "ce_ib": 2.2583088874816895, + "ce_orig": 0.16112041473388672, + "epoch": 0.240707455604285, + "kl_loss": 0.3453638553619385, + "loss_ib": 0.003679469460621476, + "step": 837 + }, + { + "ce_ib": 10.247907638549805, + "ce_orig": 1.4024429321289062, + "epoch": 0.240707455604285, + "kl_loss": 0.1429089903831482, + "loss_ib": 0.0024538806173950434, + "step": 837 + }, + { + "ce_ib": 5.830674648284912, + "ce_orig": 0.6212292909622192, + "epoch": 0.240707455604285, + "kl_loss": 0.15315499901771545, + "loss_ib": 0.0021146174985915422, + "step": 837 + }, + { + "ce_ib": 4.623894691467285, + "ce_orig": 0.4536569118499756, + "epoch": 0.240707455604285, + "kl_loss": 0.1346922367811203, + "loss_ib": 0.0018093117978423834, + "step": 837 + }, + { + "ce_ib": 8.1456937789917, + "ce_orig": 1.164689540863037, + "epoch": 0.24099503918326265, + "kl_loss": 0.10320104658603668, + "loss_ib": 0.00184657983481884, + "step": 838 + }, + { + "ce_ib": 6.376285552978516, + "ce_orig": 0.7863490581512451, + "epoch": 0.24099503918326265, + "kl_loss": 0.13553473353385925, + "loss_ib": 0.001992975827306509, + "step": 838 + }, + { + "ce_ib": 4.726283073425293, + "ce_orig": 0.357181191444397, + "epoch": 0.24099503918326265, + "kl_loss": 0.17762082815170288, + "loss_ib": 0.002248836448416114, + "step": 838 + }, + { + "ce_ib": 5.20059871673584, + "ce_orig": 0.41502946615219116, + "epoch": 0.24099503918326265, + "kl_loss": 0.08028832077980042, + "loss_ib": 0.001322943135164678, + "step": 838 + }, + { + "ce_ib": 5.245758533477783, + "ce_orig": 0.5355814695358276, + "epoch": 0.24128262276224027, + "kl_loss": 0.12484327703714371, + "loss_ib": 0.0017730086110532284, + "step": 839 + }, + { + "ce_ib": 6.892401218414307, + "ce_orig": 1.2295132875442505, + "epoch": 0.24128262276224027, + "kl_loss": 0.12067458033561707, + "loss_ib": 0.001895985915325582, + "step": 839 + }, + { + "ce_ib": 6.100953102111816, + "ce_orig": 0.762692391872406, + "epoch": 0.24128262276224027, + "kl_loss": 0.15519046783447266, + "loss_ib": 0.0021619999315589666, + "step": 839 + }, + { + "ce_ib": 5.918198585510254, + "ce_orig": 0.7303500771522522, + "epoch": 0.24128262276224027, + "kl_loss": 0.08662743866443634, + "loss_ib": 0.001458094222471118, + "step": 839 + }, + { + "epoch": 0.24157020634121792, + "grad_norm": 0.10592812299728394, + "learning_rate": 4.967497846584552e-05, + "loss": 0.8622, + "step": 840 + }, + { + "ce_ib": 8.441739082336426, + "ce_orig": 0.8219562768936157, + "epoch": 0.24157020634121792, + "kl_loss": 0.13953891396522522, + "loss_ib": 0.002239563036710024, + "step": 840 + }, + { + "ce_ib": 5.784753322601318, + "ce_orig": 0.7019002437591553, + "epoch": 0.24157020634121792, + "kl_loss": 0.10375018417835236, + "loss_ib": 0.0016159771475940943, + "step": 840 + }, + { + "ce_ib": 6.836467266082764, + "ce_orig": 0.8054057359695435, + "epoch": 0.24157020634121792, + "kl_loss": 0.1309238076210022, + "loss_ib": 0.0019928847905248404, + "step": 840 + }, + { + "ce_ib": 6.871645927429199, + "ce_orig": 0.4505913257598877, + "epoch": 0.24157020634121792, + "kl_loss": 0.1236223429441452, + "loss_ib": 0.0019233878701925278, + "step": 840 + }, + { + "ce_ib": 7.9782023429870605, + "ce_orig": 0.7888794541358948, + "epoch": 0.24185778992019555, + "kl_loss": 0.13207761943340302, + "loss_ib": 0.0021185963414609432, + "step": 841 + }, + { + "ce_ib": 12.381393432617188, + "ce_orig": 1.5155247449874878, + "epoch": 0.24185778992019555, + "kl_loss": 0.11220519244670868, + "loss_ib": 0.002360191196203232, + "step": 841 + }, + { + "ce_ib": 5.204415798187256, + "ce_orig": 0.7087980508804321, + "epoch": 0.24185778992019555, + "kl_loss": 0.11631835997104645, + "loss_ib": 0.0016836250433698297, + "step": 841 + }, + { + "ce_ib": 7.512041091918945, + "ce_orig": 0.9449790120124817, + "epoch": 0.24185778992019555, + "kl_loss": 0.1673145592212677, + "loss_ib": 0.0024243497755378485, + "step": 841 + }, + { + "ce_ib": 10.613036155700684, + "ce_orig": 1.0805548429489136, + "epoch": 0.2421453734991732, + "kl_loss": 0.14164038002490997, + "loss_ib": 0.002477707341313362, + "step": 842 + }, + { + "ce_ib": 7.113550186157227, + "ce_orig": 0.8366743326187134, + "epoch": 0.2421453734991732, + "kl_loss": 0.10816210508346558, + "loss_ib": 0.0017929759342223406, + "step": 842 + }, + { + "ce_ib": 11.691953659057617, + "ce_orig": 0.9221868515014648, + "epoch": 0.2421453734991732, + "kl_loss": 0.20221099257469177, + "loss_ib": 0.003191305324435234, + "step": 842 + }, + { + "ce_ib": 5.920987129211426, + "ce_orig": 0.6415844559669495, + "epoch": 0.2421453734991732, + "kl_loss": 0.12173058837652206, + "loss_ib": 0.0018094044644385576, + "step": 842 + }, + { + "ce_ib": 8.948143005371094, + "ce_orig": 0.6214312314987183, + "epoch": 0.24243295707815085, + "kl_loss": 0.47353020310401917, + "loss_ib": 0.005630116444081068, + "step": 843 + }, + { + "ce_ib": 9.252907752990723, + "ce_orig": 0.9344848990440369, + "epoch": 0.24243295707815085, + "kl_loss": 0.15891718864440918, + "loss_ib": 0.0025144624523818493, + "step": 843 + }, + { + "ce_ib": 8.631677627563477, + "ce_orig": 0.6140725612640381, + "epoch": 0.24243295707815085, + "kl_loss": 0.1363321989774704, + "loss_ib": 0.0022264898288995028, + "step": 843 + }, + { + "ce_ib": 7.94747257232666, + "ce_orig": 1.072943091392517, + "epoch": 0.24243295707815085, + "kl_loss": 0.22313611209392548, + "loss_ib": 0.0030261084903031588, + "step": 843 + }, + { + "ce_ib": 2.1055665016174316, + "ce_orig": 0.09904298186302185, + "epoch": 0.24272054065712848, + "kl_loss": 0.27453792095184326, + "loss_ib": 0.0029559358954429626, + "step": 844 + }, + { + "ce_ib": 9.203268051147461, + "ce_orig": 0.764379620552063, + "epoch": 0.24272054065712848, + "kl_loss": 0.1489913910627365, + "loss_ib": 0.0024102407041937113, + "step": 844 + }, + { + "ce_ib": 6.683197498321533, + "ce_orig": 0.6841699481010437, + "epoch": 0.24272054065712848, + "kl_loss": 0.12199226766824722, + "loss_ib": 0.0018882423173636198, + "step": 844 + }, + { + "ce_ib": 9.931221961975098, + "ce_orig": 0.8138086795806885, + "epoch": 0.24272054065712848, + "kl_loss": 0.12627676129341125, + "loss_ib": 0.002255889819934964, + "step": 844 + }, + { + "epoch": 0.24300812423610613, + "grad_norm": 0.09755532443523407, + "learning_rate": 4.96687119091643e-05, + "loss": 0.8374, + "step": 845 + }, + { + "ce_ib": 8.698118209838867, + "ce_orig": 0.943545401096344, + "epoch": 0.24300812423610613, + "kl_loss": 0.07969736307859421, + "loss_ib": 0.0016667854506522417, + "step": 845 + }, + { + "ce_ib": 8.357364654541016, + "ce_orig": 1.0275182723999023, + "epoch": 0.24300812423610613, + "kl_loss": 0.15439343452453613, + "loss_ib": 0.002379670739173889, + "step": 845 + }, + { + "ce_ib": 6.27513313293457, + "ce_orig": 0.6612330079078674, + "epoch": 0.24300812423610613, + "kl_loss": 0.0844767689704895, + "loss_ib": 0.0014722809428349137, + "step": 845 + }, + { + "ce_ib": 9.774298667907715, + "ce_orig": 1.1108872890472412, + "epoch": 0.24300812423610613, + "kl_loss": 0.1235627681016922, + "loss_ib": 0.0022130575962364674, + "step": 845 + }, + { + "ce_ib": 7.689428329467773, + "ce_orig": 1.050940990447998, + "epoch": 0.24329570781508375, + "kl_loss": 0.16950029134750366, + "loss_ib": 0.0024639456532895565, + "step": 846 + }, + { + "ce_ib": 11.597556114196777, + "ce_orig": 0.8476027846336365, + "epoch": 0.24329570781508375, + "kl_loss": 0.22601580619812012, + "loss_ib": 0.0034199135843664408, + "step": 846 + }, + { + "ce_ib": 9.092070579528809, + "ce_orig": 0.7816643118858337, + "epoch": 0.24329570781508375, + "kl_loss": 0.16418534517288208, + "loss_ib": 0.0025510601699352264, + "step": 846 + }, + { + "ce_ib": 8.94192123413086, + "ce_orig": 0.9194644093513489, + "epoch": 0.24329570781508375, + "kl_loss": 0.2698814570903778, + "loss_ib": 0.0035930066369473934, + "step": 846 + }, + { + "ce_ib": 6.7314372062683105, + "ce_orig": 0.6977682113647461, + "epoch": 0.2435832913940614, + "kl_loss": 0.12507882714271545, + "loss_ib": 0.0019239319954067469, + "step": 847 + }, + { + "ce_ib": 8.144753456115723, + "ce_orig": 0.8696945309638977, + "epoch": 0.2435832913940614, + "kl_loss": 0.18285347521305084, + "loss_ib": 0.0026430098805576563, + "step": 847 + }, + { + "ce_ib": 6.152936935424805, + "ce_orig": 0.5545529127120972, + "epoch": 0.2435832913940614, + "kl_loss": 0.08746962249279022, + "loss_ib": 0.0014899899251759052, + "step": 847 + }, + { + "ce_ib": 7.335354804992676, + "ce_orig": 0.7962471842765808, + "epoch": 0.2435832913940614, + "kl_loss": 0.08078482747077942, + "loss_ib": 0.0015413836808875203, + "step": 847 + }, + { + "ce_ib": 10.031454086303711, + "ce_orig": 1.227720022201538, + "epoch": 0.24387087497303903, + "kl_loss": 0.15805502235889435, + "loss_ib": 0.002583695575594902, + "step": 848 + }, + { + "ce_ib": 7.987579345703125, + "ce_orig": 0.7593538761138916, + "epoch": 0.24387087497303903, + "kl_loss": 0.0982484295964241, + "loss_ib": 0.0017812422011047602, + "step": 848 + }, + { + "ce_ib": 4.435133457183838, + "ce_orig": 0.5653334856033325, + "epoch": 0.24387087497303903, + "kl_loss": 0.14194026589393616, + "loss_ib": 0.0018629160476848483, + "step": 848 + }, + { + "ce_ib": 6.288003921508789, + "ce_orig": 0.6333604454994202, + "epoch": 0.24387087497303903, + "kl_loss": 0.14213651418685913, + "loss_ib": 0.0020501655526459217, + "step": 848 + }, + { + "ce_ib": 5.361220836639404, + "ce_orig": 0.5679022073745728, + "epoch": 0.24415845855201668, + "kl_loss": 0.12149189412593842, + "loss_ib": 0.001751040923409164, + "step": 849 + }, + { + "ce_ib": 8.351764678955078, + "ce_orig": 1.017719030380249, + "epoch": 0.24415845855201668, + "kl_loss": 0.18313950300216675, + "loss_ib": 0.0026665714103728533, + "step": 849 + }, + { + "ce_ib": 6.253474712371826, + "ce_orig": 0.9022907614707947, + "epoch": 0.24415845855201668, + "kl_loss": 0.1341339647769928, + "loss_ib": 0.0019666871521621943, + "step": 849 + }, + { + "ce_ib": 5.258345127105713, + "ce_orig": 0.5824132561683655, + "epoch": 0.24415845855201668, + "kl_loss": 0.1137775108218193, + "loss_ib": 0.001663609524257481, + "step": 849 + }, + { + "epoch": 0.24444604213099433, + "grad_norm": 0.10801159590482712, + "learning_rate": 4.9662385919830347e-05, + "loss": 0.8029, + "step": 850 + }, + { + "ce_ib": 5.604668617248535, + "ce_orig": 0.5457404255867004, + "epoch": 0.24444604213099433, + "kl_loss": 0.12972185015678406, + "loss_ib": 0.0018576852744445205, + "step": 850 + }, + { + "ce_ib": 9.36043643951416, + "ce_orig": 1.0641096830368042, + "epoch": 0.24444604213099433, + "kl_loss": 0.16872264444828033, + "loss_ib": 0.002623270032927394, + "step": 850 + }, + { + "ce_ib": 10.290757179260254, + "ce_orig": 1.258750319480896, + "epoch": 0.24444604213099433, + "kl_loss": 0.18471182882785797, + "loss_ib": 0.0028761939611285925, + "step": 850 + }, + { + "ce_ib": 8.235913276672363, + "ce_orig": 0.9114575982093811, + "epoch": 0.24444604213099433, + "kl_loss": 0.14525389671325684, + "loss_ib": 0.0022761302534490824, + "step": 850 + }, + { + "ce_ib": 5.972557544708252, + "ce_orig": 0.3176371157169342, + "epoch": 0.24473362570997195, + "kl_loss": 0.1236177608370781, + "loss_ib": 0.001833433285355568, + "step": 851 + }, + { + "ce_ib": 6.819241523742676, + "ce_orig": 0.4694307744503021, + "epoch": 0.24473362570997195, + "kl_loss": 0.13898760080337524, + "loss_ib": 0.0020718001760542393, + "step": 851 + }, + { + "ce_ib": 5.671989917755127, + "ce_orig": 0.7460182309150696, + "epoch": 0.24473362570997195, + "kl_loss": 0.09529311209917068, + "loss_ib": 0.0015201299684122205, + "step": 851 + }, + { + "ce_ib": 6.121541500091553, + "ce_orig": 0.580995500087738, + "epoch": 0.24473362570997195, + "kl_loss": 0.0963808223605156, + "loss_ib": 0.001575962291099131, + "step": 851 + }, + { + "ce_ib": 11.055216789245605, + "ce_orig": 1.4706339836120605, + "epoch": 0.2450212092889496, + "kl_loss": 0.21470844745635986, + "loss_ib": 0.0032526059076189995, + "step": 852 + }, + { + "ce_ib": 9.577301979064941, + "ce_orig": 0.6492410898208618, + "epoch": 0.2450212092889496, + "kl_loss": 0.15032362937927246, + "loss_ib": 0.0024609663523733616, + "step": 852 + }, + { + "ce_ib": 4.050168514251709, + "ce_orig": 0.366349995136261, + "epoch": 0.2450212092889496, + "kl_loss": 0.16982409358024597, + "loss_ib": 0.00210325769148767, + "step": 852 + }, + { + "ce_ib": 7.699157238006592, + "ce_orig": 0.9390696287155151, + "epoch": 0.2450212092889496, + "kl_loss": 0.1424662470817566, + "loss_ib": 0.0021945780608803034, + "step": 852 + }, + { + "ce_ib": 7.2492804527282715, + "ce_orig": 1.026281476020813, + "epoch": 0.24530879286792723, + "kl_loss": 0.12123741209506989, + "loss_ib": 0.0019373020622879267, + "step": 853 + }, + { + "ce_ib": 5.721704483032227, + "ce_orig": 0.6315010190010071, + "epoch": 0.24530879286792723, + "kl_loss": 0.15391187369823456, + "loss_ib": 0.00211128918454051, + "step": 853 + }, + { + "ce_ib": 3.785083293914795, + "ce_orig": 0.3414902687072754, + "epoch": 0.24530879286792723, + "kl_loss": 0.13184034824371338, + "loss_ib": 0.0016969117568805814, + "step": 853 + }, + { + "ce_ib": 4.662577152252197, + "ce_orig": 0.562096118927002, + "epoch": 0.24530879286792723, + "kl_loss": 0.08554943650960922, + "loss_ib": 0.0013217520900070667, + "step": 853 + }, + { + "ce_ib": 6.567530155181885, + "ce_orig": 0.8952581882476807, + "epoch": 0.24559637644690488, + "kl_loss": 0.27641063928604126, + "loss_ib": 0.0034208595752716064, + "step": 854 + }, + { + "ce_ib": 9.29099178314209, + "ce_orig": 0.5317293405532837, + "epoch": 0.24559637644690488, + "kl_loss": 0.11667747795581818, + "loss_ib": 0.0020958739332854748, + "step": 854 + }, + { + "ce_ib": 5.92125129699707, + "ce_orig": 0.8134939074516296, + "epoch": 0.24559637644690488, + "kl_loss": 0.10207337141036987, + "loss_ib": 0.0016128587303683162, + "step": 854 + }, + { + "ce_ib": 5.6100382804870605, + "ce_orig": 0.7054432034492493, + "epoch": 0.24559637644690488, + "kl_loss": 0.08496654033660889, + "loss_ib": 0.001410669181495905, + "step": 854 + }, + { + "epoch": 0.24588396002588253, + "grad_norm": 0.10967330634593964, + "learning_rate": 4.9656000513084455e-05, + "loss": 0.8293, + "step": 855 + }, + { + "ce_ib": 8.405036926269531, + "ce_orig": 0.5193880796432495, + "epoch": 0.24588396002588253, + "kl_loss": 0.16030897200107574, + "loss_ib": 0.0024435934610664845, + "step": 855 + }, + { + "ce_ib": 6.491952896118164, + "ce_orig": 0.8281493186950684, + "epoch": 0.24588396002588253, + "kl_loss": 0.09462558478116989, + "loss_ib": 0.0015954510308802128, + "step": 855 + }, + { + "ce_ib": 6.649583339691162, + "ce_orig": 0.7275351881980896, + "epoch": 0.24588396002588253, + "kl_loss": 0.11419142782688141, + "loss_ib": 0.0018068724311888218, + "step": 855 + }, + { + "ce_ib": 8.009108543395996, + "ce_orig": 1.0409449338912964, + "epoch": 0.24588396002588253, + "kl_loss": 0.12785518169403076, + "loss_ib": 0.002079462632536888, + "step": 855 + }, + { + "ce_ib": 5.276194095611572, + "ce_orig": 0.44244295358657837, + "epoch": 0.24617154360486015, + "kl_loss": 0.1523188054561615, + "loss_ib": 0.0020508074667304754, + "step": 856 + }, + { + "ce_ib": 7.91589879989624, + "ce_orig": 0.9207080006599426, + "epoch": 0.24617154360486015, + "kl_loss": 0.1827230304479599, + "loss_ib": 0.0026188199408352375, + "step": 856 + }, + { + "ce_ib": 8.683913230895996, + "ce_orig": 0.8899644017219543, + "epoch": 0.24617154360486015, + "kl_loss": 0.14581285417079926, + "loss_ib": 0.0023265196941792965, + "step": 856 + }, + { + "ce_ib": 8.30103874206543, + "ce_orig": 0.948857843875885, + "epoch": 0.24617154360486015, + "kl_loss": 0.16537992656230927, + "loss_ib": 0.0024839031975716352, + "step": 856 + }, + { + "ce_ib": 9.835265159606934, + "ce_orig": 1.3811537027359009, + "epoch": 0.2464591271838378, + "kl_loss": 0.13627251982688904, + "loss_ib": 0.0023462516255676746, + "step": 857 + }, + { + "ce_ib": 6.258663177490234, + "ce_orig": 0.6032944917678833, + "epoch": 0.2464591271838378, + "kl_loss": 0.1421205997467041, + "loss_ib": 0.00204707239754498, + "step": 857 + }, + { + "ce_ib": 4.90713357925415, + "ce_orig": 0.6079412698745728, + "epoch": 0.2464591271838378, + "kl_loss": 0.1172361895442009, + "loss_ib": 0.0016630751779302955, + "step": 857 + }, + { + "ce_ib": 12.298185348510742, + "ce_orig": 1.3265632390975952, + "epoch": 0.2464591271838378, + "kl_loss": 0.14415337145328522, + "loss_ib": 0.002671352354809642, + "step": 857 + }, + { + "ce_ib": 2.617100954055786, + "ce_orig": 0.17996713519096375, + "epoch": 0.24674671076281543, + "kl_loss": 0.3445594608783722, + "loss_ib": 0.00370730459690094, + "step": 858 + }, + { + "ce_ib": 7.720390796661377, + "ce_orig": 0.7807556986808777, + "epoch": 0.24674671076281543, + "kl_loss": 0.15011197328567505, + "loss_ib": 0.002273158635944128, + "step": 858 + }, + { + "ce_ib": 8.279568672180176, + "ce_orig": 0.8452048301696777, + "epoch": 0.24674671076281543, + "kl_loss": 0.19392766058444977, + "loss_ib": 0.0027672334108501673, + "step": 858 + }, + { + "ce_ib": 3.605633020401001, + "ce_orig": 0.3069818913936615, + "epoch": 0.24674671076281543, + "kl_loss": 0.2172875851392746, + "loss_ib": 0.0025334390811622143, + "step": 858 + }, + { + "ce_ib": 8.567093849182129, + "ce_orig": 0.745737612247467, + "epoch": 0.24703429434179308, + "kl_loss": 0.12669722735881805, + "loss_ib": 0.0021236815955489874, + "step": 859 + }, + { + "ce_ib": 3.890596389770508, + "ce_orig": 0.3934782147407532, + "epoch": 0.24703429434179308, + "kl_loss": 0.11800628155469894, + "loss_ib": 0.0015691223088651896, + "step": 859 + }, + { + "ce_ib": 5.680053234100342, + "ce_orig": 0.7315199375152588, + "epoch": 0.24703429434179308, + "kl_loss": 0.10949228703975677, + "loss_ib": 0.001662928145378828, + "step": 859 + }, + { + "ce_ib": 5.438368320465088, + "ce_orig": 0.7946762442588806, + "epoch": 0.24703429434179308, + "kl_loss": 0.1166611984372139, + "loss_ib": 0.0017104488797485828, + "step": 859 + }, + { + "epoch": 0.24732187792077073, + "grad_norm": 0.08520792424678802, + "learning_rate": 4.964955570431055e-05, + "loss": 0.8513, + "step": 860 + }, + { + "ce_ib": 8.245199203491211, + "ce_orig": 1.2291977405548096, + "epoch": 0.24732187792077073, + "kl_loss": 0.12287493050098419, + "loss_ib": 0.002053269185125828, + "step": 860 + }, + { + "ce_ib": 7.371245384216309, + "ce_orig": 0.7984662652015686, + "epoch": 0.24732187792077073, + "kl_loss": 0.0932641252875328, + "loss_ib": 0.0016697656828910112, + "step": 860 + }, + { + "ce_ib": 9.231488227844238, + "ce_orig": 1.1531388759613037, + "epoch": 0.24732187792077073, + "kl_loss": 0.17074084281921387, + "loss_ib": 0.0026305571664124727, + "step": 860 + }, + { + "ce_ib": 10.435407638549805, + "ce_orig": 1.37950599193573, + "epoch": 0.24732187792077073, + "kl_loss": 0.1756286323070526, + "loss_ib": 0.002799827139824629, + "step": 860 + }, + { + "ce_ib": 5.245795726776123, + "ce_orig": 0.8678305745124817, + "epoch": 0.24760946149974836, + "kl_loss": 0.10350409895181656, + "loss_ib": 0.0015596204902976751, + "step": 861 + }, + { + "ce_ib": 8.952935218811035, + "ce_orig": 0.7108750939369202, + "epoch": 0.24760946149974836, + "kl_loss": 0.11388901621103287, + "loss_ib": 0.002034183591604233, + "step": 861 + }, + { + "ce_ib": 9.032291412353516, + "ce_orig": 0.5742269158363342, + "epoch": 0.24760946149974836, + "kl_loss": 0.1395682990550995, + "loss_ib": 0.0022989120334386826, + "step": 861 + }, + { + "ce_ib": 4.5656280517578125, + "ce_orig": 0.6355873942375183, + "epoch": 0.24760946149974836, + "kl_loss": 0.15184998512268066, + "loss_ib": 0.0019750625360757113, + "step": 861 + }, + { + "ce_ib": 7.731656551361084, + "ce_orig": 0.8927205204963684, + "epoch": 0.247897045078726, + "kl_loss": 0.14044861495494843, + "loss_ib": 0.0021776517387479544, + "step": 862 + }, + { + "ce_ib": 7.413579940795898, + "ce_orig": 0.5944902300834656, + "epoch": 0.247897045078726, + "kl_loss": 0.19562631845474243, + "loss_ib": 0.0026976210065186024, + "step": 862 + }, + { + "ce_ib": 7.817788600921631, + "ce_orig": 0.5387774705886841, + "epoch": 0.247897045078726, + "kl_loss": 0.1566201001405716, + "loss_ib": 0.0023479796946048737, + "step": 862 + }, + { + "ce_ib": 7.256852149963379, + "ce_orig": 0.6728155612945557, + "epoch": 0.247897045078726, + "kl_loss": 0.18778842687606812, + "loss_ib": 0.002603569533675909, + "step": 862 + }, + { + "ce_ib": 5.361456394195557, + "ce_orig": 0.7745912075042725, + "epoch": 0.24818462865770363, + "kl_loss": 0.08714807778596878, + "loss_ib": 0.0014076264342293143, + "step": 863 + }, + { + "ce_ib": 5.879522800445557, + "ce_orig": 0.4841998219490051, + "epoch": 0.24818462865770363, + "kl_loss": 0.10464229434728622, + "loss_ib": 0.0016343750758096576, + "step": 863 + }, + { + "ce_ib": 11.785419464111328, + "ce_orig": 1.7475069761276245, + "epoch": 0.24818462865770363, + "kl_loss": 0.12912291288375854, + "loss_ib": 0.0024697710759937763, + "step": 863 + }, + { + "ce_ib": 12.430148124694824, + "ce_orig": 1.5259690284729004, + "epoch": 0.24818462865770363, + "kl_loss": 0.49463123083114624, + "loss_ib": 0.006189327221363783, + "step": 863 + }, + { + "ce_ib": 9.013341903686523, + "ce_orig": 0.6152147650718689, + "epoch": 0.24847221223668128, + "kl_loss": 0.2053050994873047, + "loss_ib": 0.002954385243356228, + "step": 864 + }, + { + "ce_ib": 9.306499481201172, + "ce_orig": 1.2726976871490479, + "epoch": 0.24847221223668128, + "kl_loss": 0.14018534123897552, + "loss_ib": 0.0023325032088905573, + "step": 864 + }, + { + "ce_ib": 9.57296371459961, + "ce_orig": 0.7869003415107727, + "epoch": 0.24847221223668128, + "kl_loss": 0.17775607109069824, + "loss_ib": 0.0027348571456968784, + "step": 864 + }, + { + "ce_ib": 7.739504814147949, + "ce_orig": 1.0867712497711182, + "epoch": 0.24847221223668128, + "kl_loss": 0.11266046017408371, + "loss_ib": 0.0019005549838766456, + "step": 864 + }, + { + "epoch": 0.24875979581565894, + "grad_norm": 0.09175752103328705, + "learning_rate": 4.964305150903566e-05, + "loss": 0.901, + "step": 865 + }, + { + "ce_ib": 9.967638969421387, + "ce_orig": 0.9802643656730652, + "epoch": 0.24875979581565894, + "kl_loss": 0.14677110314369202, + "loss_ib": 0.0024644748773425817, + "step": 865 + }, + { + "ce_ib": 10.069565773010254, + "ce_orig": 0.9357901811599731, + "epoch": 0.24875979581565894, + "kl_loss": 0.18884184956550598, + "loss_ib": 0.002895374782383442, + "step": 865 + }, + { + "ce_ib": 10.978303909301758, + "ce_orig": 1.3109445571899414, + "epoch": 0.24875979581565894, + "kl_loss": 0.17037174105644226, + "loss_ib": 0.0028015475254505873, + "step": 865 + }, + { + "ce_ib": 5.982998371124268, + "ce_orig": 0.558401882648468, + "epoch": 0.24875979581565894, + "kl_loss": 0.17013560235500336, + "loss_ib": 0.002299655694514513, + "step": 865 + }, + { + "ce_ib": 6.899177074432373, + "ce_orig": 0.7828370928764343, + "epoch": 0.24904737939463656, + "kl_loss": 0.19635936617851257, + "loss_ib": 0.0026535114739090204, + "step": 866 + }, + { + "ce_ib": 6.190727710723877, + "ce_orig": 0.6056550145149231, + "epoch": 0.24904737939463656, + "kl_loss": 0.16528858244419098, + "loss_ib": 0.002271958626806736, + "step": 866 + }, + { + "ce_ib": 10.652737617492676, + "ce_orig": 1.5506879091262817, + "epoch": 0.24904737939463656, + "kl_loss": 0.16518110036849976, + "loss_ib": 0.0027170847170054913, + "step": 866 + }, + { + "ce_ib": 8.15753173828125, + "ce_orig": 1.1266238689422607, + "epoch": 0.24904737939463656, + "kl_loss": 0.1654905378818512, + "loss_ib": 0.002470658626407385, + "step": 866 + }, + { + "ce_ib": 5.835992336273193, + "ce_orig": 0.5801675915718079, + "epoch": 0.2493349629736142, + "kl_loss": 0.15277384221553802, + "loss_ib": 0.00211133761331439, + "step": 867 + }, + { + "ce_ib": 6.383537769317627, + "ce_orig": 0.8467006087303162, + "epoch": 0.2493349629736142, + "kl_loss": 0.11249042302370071, + "loss_ib": 0.001763257896527648, + "step": 867 + }, + { + "ce_ib": 15.002373695373535, + "ce_orig": 2.252410650253296, + "epoch": 0.2493349629736142, + "kl_loss": 0.15410232543945312, + "loss_ib": 0.0030412604101002216, + "step": 867 + }, + { + "ce_ib": 7.483471393585205, + "ce_orig": 0.5316120982170105, + "epoch": 0.2493349629736142, + "kl_loss": 0.3930402994155884, + "loss_ib": 0.004678749945014715, + "step": 867 + }, + { + "ce_ib": 8.862229347229004, + "ce_orig": 0.9752901792526245, + "epoch": 0.24962254655259183, + "kl_loss": 0.1402927041053772, + "loss_ib": 0.002289149910211563, + "step": 868 + }, + { + "ce_ib": 12.469680786132812, + "ce_orig": 1.5079736709594727, + "epoch": 0.24962254655259183, + "kl_loss": 0.18042317032814026, + "loss_ib": 0.0030511999502778053, + "step": 868 + }, + { + "ce_ib": 5.774080753326416, + "ce_orig": 0.8587755560874939, + "epoch": 0.24962254655259183, + "kl_loss": 0.1097964197397232, + "loss_ib": 0.0016753722447901964, + "step": 868 + }, + { + "ce_ib": 9.14037036895752, + "ce_orig": 0.701475977897644, + "epoch": 0.24962254655259183, + "kl_loss": 0.3841056227684021, + "loss_ib": 0.00475509325042367, + "step": 868 + }, + { + "ce_ib": 8.243424415588379, + "ce_orig": 0.8065715432167053, + "epoch": 0.24991013013156949, + "kl_loss": 0.172191321849823, + "loss_ib": 0.002546255476772785, + "step": 869 + }, + { + "ce_ib": 5.66312313079834, + "ce_orig": 0.771187961101532, + "epoch": 0.24991013013156949, + "kl_loss": 0.06962478160858154, + "loss_ib": 0.0012625601375475526, + "step": 869 + }, + { + "ce_ib": 6.285440921783447, + "ce_orig": 0.7630317211151123, + "epoch": 0.24991013013156949, + "kl_loss": 0.10592949390411377, + "loss_ib": 0.001687839045189321, + "step": 869 + }, + { + "ce_ib": 6.893797397613525, + "ce_orig": 0.7610549926757812, + "epoch": 0.24991013013156949, + "kl_loss": 0.1999196857213974, + "loss_ib": 0.002688576467335224, + "step": 869 + }, + { + "epoch": 0.2501977137105471, + "grad_norm": 0.09903328865766525, + "learning_rate": 4.963648794292992e-05, + "loss": 0.8646, + "step": 870 + }, + { + "ce_ib": 9.630828857421875, + "ce_orig": 1.031785011291504, + "epoch": 0.2501977137105471, + "kl_loss": 0.17289333045482635, + "loss_ib": 0.0026920160744339228, + "step": 870 + }, + { + "ce_ib": 5.168337345123291, + "ce_orig": 0.5879364609718323, + "epoch": 0.2501977137105471, + "kl_loss": 0.13910838961601257, + "loss_ib": 0.0019079175544902682, + "step": 870 + }, + { + "ce_ib": 9.871175765991211, + "ce_orig": 0.9261044859886169, + "epoch": 0.2501977137105471, + "kl_loss": 0.08857513964176178, + "loss_ib": 0.001872868975624442, + "step": 870 + }, + { + "ce_ib": 6.347537994384766, + "ce_orig": 0.9719029068946838, + "epoch": 0.2501977137105471, + "kl_loss": 0.07947400212287903, + "loss_ib": 0.0014294936554506421, + "step": 870 + }, + { + "ce_ib": 9.052336692810059, + "ce_orig": 1.1934534311294556, + "epoch": 0.2504852972895248, + "kl_loss": 0.16040383279323578, + "loss_ib": 0.0025092719588428736, + "step": 871 + }, + { + "ce_ib": 8.992751121520996, + "ce_orig": 0.9628534317016602, + "epoch": 0.2504852972895248, + "kl_loss": 0.10795988142490387, + "loss_ib": 0.001978873973712325, + "step": 871 + }, + { + "ce_ib": 6.835910797119141, + "ce_orig": 0.41721463203430176, + "epoch": 0.2504852972895248, + "kl_loss": 0.22937476634979248, + "loss_ib": 0.002977338619530201, + "step": 871 + }, + { + "ce_ib": 6.061643123626709, + "ce_orig": 0.7965611815452576, + "epoch": 0.2504852972895248, + "kl_loss": 0.10941595584154129, + "loss_ib": 0.001700323773548007, + "step": 871 + }, + { + "ce_ib": 4.520278453826904, + "ce_orig": 0.34330859780311584, + "epoch": 0.2507728808685024, + "kl_loss": 0.3076925277709961, + "loss_ib": 0.0035289530642330647, + "step": 872 + }, + { + "ce_ib": 5.3499250411987305, + "ce_orig": 0.7291832566261292, + "epoch": 0.2507728808685024, + "kl_loss": 0.24808456003665924, + "loss_ib": 0.0030158378649502993, + "step": 872 + }, + { + "ce_ib": 6.289168357849121, + "ce_orig": 0.5440212488174438, + "epoch": 0.2507728808685024, + "kl_loss": 0.1381717026233673, + "loss_ib": 0.002010633936151862, + "step": 872 + }, + { + "ce_ib": 4.648695945739746, + "ce_orig": 0.5465182662010193, + "epoch": 0.2507728808685024, + "kl_loss": 0.11056109517812729, + "loss_ib": 0.0015704804100096226, + "step": 872 + }, + { + "ce_ib": 7.006433963775635, + "ce_orig": 1.0426201820373535, + "epoch": 0.25106046444748004, + "kl_loss": 0.10194164514541626, + "loss_ib": 0.001720059779472649, + "step": 873 + }, + { + "ce_ib": 6.2683305740356445, + "ce_orig": 0.6865673065185547, + "epoch": 0.25106046444748004, + "kl_loss": 0.15586382150650024, + "loss_ib": 0.0021854713559150696, + "step": 873 + }, + { + "ce_ib": 8.336287498474121, + "ce_orig": 0.7352637052536011, + "epoch": 0.25106046444748004, + "kl_loss": 0.14312681555747986, + "loss_ib": 0.002264896873384714, + "step": 873 + }, + { + "ce_ib": 5.228641986846924, + "ce_orig": 0.7911911010742188, + "epoch": 0.25106046444748004, + "kl_loss": 0.13875475525856018, + "loss_ib": 0.0019104116363450885, + "step": 873 + }, + { + "ce_ib": 7.886993885040283, + "ce_orig": 0.8877960443496704, + "epoch": 0.2513480480264577, + "kl_loss": 0.21264252066612244, + "loss_ib": 0.0029151246417313814, + "step": 874 + }, + { + "ce_ib": 8.536798477172852, + "ce_orig": 1.23441481590271, + "epoch": 0.2513480480264577, + "kl_loss": 0.12252427637577057, + "loss_ib": 0.002078922698274255, + "step": 874 + }, + { + "ce_ib": 5.5121893882751465, + "ce_orig": 0.7155554294586182, + "epoch": 0.2513480480264577, + "kl_loss": 0.10985850542783737, + "loss_ib": 0.0016498039476573467, + "step": 874 + }, + { + "ce_ib": 8.014837265014648, + "ce_orig": 0.9826061129570007, + "epoch": 0.2513480480264577, + "kl_loss": 0.12555983662605286, + "loss_ib": 0.0020570820197463036, + "step": 874 + }, + { + "epoch": 0.25163563160543534, + "grad_norm": 0.09154003113508224, + "learning_rate": 4.962986502180648e-05, + "loss": 0.8859, + "step": 875 + }, + { + "ce_ib": 10.280941009521484, + "ce_orig": 1.6837189197540283, + "epoch": 0.25163563160543534, + "kl_loss": 0.1310340017080307, + "loss_ib": 0.0023384341038763523, + "step": 875 + }, + { + "ce_ib": 4.716495513916016, + "ce_orig": 0.6339424848556519, + "epoch": 0.25163563160543534, + "kl_loss": 0.1124955490231514, + "loss_ib": 0.0015966049395501614, + "step": 875 + }, + { + "ce_ib": 3.884503126144409, + "ce_orig": 0.2738972306251526, + "epoch": 0.25163563160543534, + "kl_loss": 0.19628892838954926, + "loss_ib": 0.002351339440792799, + "step": 875 + }, + { + "ce_ib": 9.08601188659668, + "ce_orig": 0.8660534620285034, + "epoch": 0.25163563160543534, + "kl_loss": 0.15325835347175598, + "loss_ib": 0.0024411845952272415, + "step": 875 + }, + { + "ce_ib": 10.415492057800293, + "ce_orig": 0.7765947580337524, + "epoch": 0.25192321518441296, + "kl_loss": 0.1637212485074997, + "loss_ib": 0.0026787614915519953, + "step": 876 + }, + { + "ce_ib": 10.087233543395996, + "ce_orig": 1.2951592206954956, + "epoch": 0.25192321518441296, + "kl_loss": 0.14713844656944275, + "loss_ib": 0.0024801078252494335, + "step": 876 + }, + { + "ce_ib": 9.932022094726562, + "ce_orig": 1.3337452411651611, + "epoch": 0.25192321518441296, + "kl_loss": 0.10245153307914734, + "loss_ib": 0.0020177175756543875, + "step": 876 + }, + { + "ce_ib": 8.801789283752441, + "ce_orig": 1.1899961233139038, + "epoch": 0.25192321518441296, + "kl_loss": 0.13182136416435242, + "loss_ib": 0.002198392292484641, + "step": 876 + }, + { + "ce_ib": 4.309330463409424, + "ce_orig": 0.517691433429718, + "epoch": 0.2522107987633906, + "kl_loss": 0.09331747889518738, + "loss_ib": 0.001364107825793326, + "step": 877 + }, + { + "ce_ib": 13.823863983154297, + "ce_orig": 1.6123818159103394, + "epoch": 0.2522107987633906, + "kl_loss": 0.15045878291130066, + "loss_ib": 0.0028869742527604103, + "step": 877 + }, + { + "ce_ib": 5.531408786773682, + "ce_orig": 0.622874915599823, + "epoch": 0.2522107987633906, + "kl_loss": 0.09007586538791656, + "loss_ib": 0.001453899429179728, + "step": 877 + }, + { + "ce_ib": 7.4732136726379395, + "ce_orig": 0.40653085708618164, + "epoch": 0.2522107987633906, + "kl_loss": 0.1261965036392212, + "loss_ib": 0.0020092863123863935, + "step": 877 + }, + { + "ce_ib": 10.97634220123291, + "ce_orig": 1.196432113647461, + "epoch": 0.25249838234236827, + "kl_loss": 0.14364852011203766, + "loss_ib": 0.002534119412302971, + "step": 878 + }, + { + "ce_ib": 7.987130165100098, + "ce_orig": 0.8016747832298279, + "epoch": 0.25249838234236827, + "kl_loss": 0.14365743100643158, + "loss_ib": 0.0022352873347699642, + "step": 878 + }, + { + "ce_ib": 5.696638107299805, + "ce_orig": 0.7144477367401123, + "epoch": 0.25249838234236827, + "kl_loss": 0.12607070803642273, + "loss_ib": 0.0018303708638995886, + "step": 878 + }, + { + "ce_ib": 6.691961288452148, + "ce_orig": 0.7480747699737549, + "epoch": 0.25249838234236827, + "kl_loss": 0.18506991863250732, + "loss_ib": 0.0025198953226208687, + "step": 878 + }, + { + "ce_ib": 7.895229816436768, + "ce_orig": 0.43932151794433594, + "epoch": 0.2527859659213459, + "kl_loss": 0.13867847621440887, + "loss_ib": 0.0021763076074421406, + "step": 879 + }, + { + "ce_ib": 9.321691513061523, + "ce_orig": 0.8994438052177429, + "epoch": 0.2527859659213459, + "kl_loss": 0.10325250029563904, + "loss_ib": 0.001964694121852517, + "step": 879 + }, + { + "ce_ib": 10.132121086120605, + "ce_orig": 0.9195699095726013, + "epoch": 0.2527859659213459, + "kl_loss": 0.12802192568778992, + "loss_ib": 0.0022934312000870705, + "step": 879 + }, + { + "ce_ib": 4.005758285522461, + "ce_orig": 0.6068211793899536, + "epoch": 0.2527859659213459, + "kl_loss": 0.08329135924577713, + "loss_ib": 0.0012334893690422177, + "step": 879 + }, + { + "epoch": 0.2530735495003235, + "grad_norm": 0.09337525814771652, + "learning_rate": 4.962318276162148e-05, + "loss": 0.8195, + "step": 880 + }, + { + "ce_ib": 6.823204517364502, + "ce_orig": 1.1319113969802856, + "epoch": 0.2530735495003235, + "kl_loss": 0.14816519618034363, + "loss_ib": 0.0021639724727720022, + "step": 880 + }, + { + "ce_ib": 10.786904335021973, + "ce_orig": 1.6341019868850708, + "epoch": 0.2530735495003235, + "kl_loss": 0.28171437978744507, + "loss_ib": 0.0038958340883255005, + "step": 880 + }, + { + "ce_ib": 9.805071830749512, + "ce_orig": 1.169114589691162, + "epoch": 0.2530735495003235, + "kl_loss": 0.15983524918556213, + "loss_ib": 0.002578859683126211, + "step": 880 + }, + { + "ce_ib": 11.32461929321289, + "ce_orig": 1.619084358215332, + "epoch": 0.2530735495003235, + "kl_loss": 0.18278929591178894, + "loss_ib": 0.002960354555398226, + "step": 880 + }, + { + "ce_ib": 5.599311828613281, + "ce_orig": 0.800615668296814, + "epoch": 0.2533611330793012, + "kl_loss": 0.15062254667282104, + "loss_ib": 0.002066156594082713, + "step": 881 + }, + { + "ce_ib": 10.209794998168945, + "ce_orig": 0.7918082475662231, + "epoch": 0.2533611330793012, + "kl_loss": 0.22898060083389282, + "loss_ib": 0.0033107856288552284, + "step": 881 + }, + { + "ce_ib": 10.093332290649414, + "ce_orig": 1.3126697540283203, + "epoch": 0.2533611330793012, + "kl_loss": 0.13278324902057648, + "loss_ib": 0.002337165642529726, + "step": 881 + }, + { + "ce_ib": 7.731043815612793, + "ce_orig": 0.8154935836791992, + "epoch": 0.2533611330793012, + "kl_loss": 0.14035317301750183, + "loss_ib": 0.002176636131480336, + "step": 881 + }, + { + "ce_ib": 6.037160396575928, + "ce_orig": 0.728894829750061, + "epoch": 0.2536487166582788, + "kl_loss": 0.12200887501239777, + "loss_ib": 0.0018238048069179058, + "step": 882 + }, + { + "ce_ib": 5.499096870422363, + "ce_orig": 0.8592274188995361, + "epoch": 0.2536487166582788, + "kl_loss": 0.08512680232524872, + "loss_ib": 0.0014011776074767113, + "step": 882 + }, + { + "ce_ib": 8.627275466918945, + "ce_orig": 0.9206818342208862, + "epoch": 0.2536487166582788, + "kl_loss": 0.11716502904891968, + "loss_ib": 0.00203437777236104, + "step": 882 + }, + { + "ce_ib": 7.004565238952637, + "ce_orig": 0.9188127517700195, + "epoch": 0.2536487166582788, + "kl_loss": 0.17430272698402405, + "loss_ib": 0.0024434837978333235, + "step": 882 + }, + { + "ce_ib": 4.813985347747803, + "ce_orig": 0.5024008750915527, + "epoch": 0.25393630023725644, + "kl_loss": 0.11281563341617584, + "loss_ib": 0.0016095548635348678, + "step": 883 + }, + { + "ce_ib": 7.398796081542969, + "ce_orig": 1.046441674232483, + "epoch": 0.25393630023725644, + "kl_loss": 0.13350337743759155, + "loss_ib": 0.002074913354590535, + "step": 883 + }, + { + "ce_ib": 6.332355976104736, + "ce_orig": 0.6513664126396179, + "epoch": 0.25393630023725644, + "kl_loss": 0.12430296093225479, + "loss_ib": 0.0018762650433927774, + "step": 883 + }, + { + "ce_ib": 3.995943784713745, + "ce_orig": 0.4478011727333069, + "epoch": 0.25393630023725644, + "kl_loss": 0.1076059564948082, + "loss_ib": 0.0014756539603695273, + "step": 883 + }, + { + "ce_ib": 5.7259297370910645, + "ce_orig": 0.7168182730674744, + "epoch": 0.2542238838162341, + "kl_loss": 0.1538955271244049, + "loss_ib": 0.0021115480922162533, + "step": 884 + }, + { + "ce_ib": 9.420291900634766, + "ce_orig": 0.7637949585914612, + "epoch": 0.2542238838162341, + "kl_loss": 0.14228776097297668, + "loss_ib": 0.0023649067152291536, + "step": 884 + }, + { + "ce_ib": 5.211411952972412, + "ce_orig": 0.55478835105896, + "epoch": 0.2542238838162341, + "kl_loss": 0.1502072811126709, + "loss_ib": 0.0020232140086591244, + "step": 884 + }, + { + "ce_ib": 6.278275489807129, + "ce_orig": 0.6644842028617859, + "epoch": 0.2542238838162341, + "kl_loss": 0.10490093380212784, + "loss_ib": 0.001676836865954101, + "step": 884 + }, + { + "epoch": 0.25451146739521174, + "grad_norm": 0.09509612619876862, + "learning_rate": 4.9616441178474044e-05, + "loss": 0.883, + "step": 885 + }, + { + "ce_ib": 10.628482818603516, + "ce_orig": 1.298897385597229, + "epoch": 0.25451146739521174, + "kl_loss": 0.18016132712364197, + "loss_ib": 0.0028644613921642303, + "step": 885 + }, + { + "ce_ib": 8.307840347290039, + "ce_orig": 1.0710110664367676, + "epoch": 0.25451146739521174, + "kl_loss": 0.10537383705377579, + "loss_ib": 0.001884522382169962, + "step": 885 + }, + { + "ce_ib": 7.4360222816467285, + "ce_orig": 0.7180832028388977, + "epoch": 0.25451146739521174, + "kl_loss": 0.0949300155043602, + "loss_ib": 0.0016929024131968617, + "step": 885 + }, + { + "ce_ib": 5.020169258117676, + "ce_orig": 0.6514172554016113, + "epoch": 0.25451146739521174, + "kl_loss": 0.0820763111114502, + "loss_ib": 0.0013227799208834767, + "step": 885 + }, + { + "ce_ib": 9.22244644165039, + "ce_orig": 1.4875003099441528, + "epoch": 0.25479905097418937, + "kl_loss": 0.09314162284135818, + "loss_ib": 0.0018536609131842852, + "step": 886 + }, + { + "ce_ib": 5.483860492706299, + "ce_orig": 0.8450250625610352, + "epoch": 0.25479905097418937, + "kl_loss": 0.10762491822242737, + "loss_ib": 0.0016246350714936852, + "step": 886 + }, + { + "ce_ib": 3.3092868328094482, + "ce_orig": 0.42637813091278076, + "epoch": 0.25479905097418937, + "kl_loss": 0.07745881378650665, + "loss_ib": 0.0011055167997255921, + "step": 886 + }, + { + "ce_ib": 7.81026029586792, + "ce_orig": 0.7796308398246765, + "epoch": 0.25479905097418937, + "kl_loss": 0.14506946504116058, + "loss_ib": 0.0022317206021398306, + "step": 886 + }, + { + "ce_ib": 11.031375885009766, + "ce_orig": 1.4328182935714722, + "epoch": 0.255086634553167, + "kl_loss": 0.18477189540863037, + "loss_ib": 0.0029508566949516535, + "step": 887 + }, + { + "ce_ib": 6.726977825164795, + "ce_orig": 0.8052558302879333, + "epoch": 0.255086634553167, + "kl_loss": 0.14022012054920197, + "loss_ib": 0.0020748989190906286, + "step": 887 + }, + { + "ce_ib": 5.699434280395508, + "ce_orig": 0.6416595578193665, + "epoch": 0.255086634553167, + "kl_loss": 0.17253366112709045, + "loss_ib": 0.002295280108228326, + "step": 887 + }, + { + "ce_ib": 3.5943145751953125, + "ce_orig": 0.3237018585205078, + "epoch": 0.255086634553167, + "kl_loss": 0.13443215191364288, + "loss_ib": 0.001703753019683063, + "step": 887 + }, + { + "ce_ib": 8.55632209777832, + "ce_orig": 1.148319125175476, + "epoch": 0.25537421813214467, + "kl_loss": 0.12872056663036346, + "loss_ib": 0.002142837969586253, + "step": 888 + }, + { + "ce_ib": 8.510859489440918, + "ce_orig": 1.0213509798049927, + "epoch": 0.25537421813214467, + "kl_loss": 0.13635118305683136, + "loss_ib": 0.0022145977709442377, + "step": 888 + }, + { + "ce_ib": 7.644900321960449, + "ce_orig": 1.0376789569854736, + "epoch": 0.25537421813214467, + "kl_loss": 0.16750267148017883, + "loss_ib": 0.002439516829326749, + "step": 888 + }, + { + "ce_ib": 5.5938920974731445, + "ce_orig": 0.7180891036987305, + "epoch": 0.25537421813214467, + "kl_loss": 0.1146186962723732, + "loss_ib": 0.0017055762000381947, + "step": 888 + }, + { + "ce_ib": 7.188848972320557, + "ce_orig": 0.9195379018783569, + "epoch": 0.2556618017111223, + "kl_loss": 0.18785634636878967, + "loss_ib": 0.002597448183223605, + "step": 889 + }, + { + "ce_ib": 6.305577754974365, + "ce_orig": 0.7665948271751404, + "epoch": 0.2556618017111223, + "kl_loss": 0.1667861044406891, + "loss_ib": 0.0022984188981354237, + "step": 889 + }, + { + "ce_ib": 5.9111199378967285, + "ce_orig": 0.6647948026657104, + "epoch": 0.2556618017111223, + "kl_loss": 0.11888445913791656, + "loss_ib": 0.0017799565102905035, + "step": 889 + }, + { + "ce_ib": 7.212058067321777, + "ce_orig": 1.0140317678451538, + "epoch": 0.2556618017111223, + "kl_loss": 0.14530731737613678, + "loss_ib": 0.0021742789540439844, + "step": 889 + }, + { + "epoch": 0.2559493852900999, + "grad_norm": 0.08070097863674164, + "learning_rate": 4.9609640288606205e-05, + "loss": 0.8527, + "step": 890 + }, + { + "ce_ib": 7.837732315063477, + "ce_orig": 0.9493030905723572, + "epoch": 0.2559493852900999, + "kl_loss": 0.13700008392333984, + "loss_ib": 0.0021537740249186754, + "step": 890 + }, + { + "ce_ib": 8.325454711914062, + "ce_orig": 0.43459352850914, + "epoch": 0.2559493852900999, + "kl_loss": 0.18098728358745575, + "loss_ib": 0.002642418025061488, + "step": 890 + }, + { + "ce_ib": 4.257238864898682, + "ce_orig": 0.5357754826545715, + "epoch": 0.2559493852900999, + "kl_loss": 0.11766894906759262, + "loss_ib": 0.0016024133656173944, + "step": 890 + }, + { + "ce_ib": 5.715028285980225, + "ce_orig": 0.5111578702926636, + "epoch": 0.2559493852900999, + "kl_loss": 0.11944234371185303, + "loss_ib": 0.0017659261357039213, + "step": 890 + }, + { + "ce_ib": 7.341698169708252, + "ce_orig": 0.5357319712638855, + "epoch": 0.2562369688690776, + "kl_loss": 0.17682841420173645, + "loss_ib": 0.0025024539791047573, + "step": 891 + }, + { + "ce_ib": 8.842231750488281, + "ce_orig": 0.9881081581115723, + "epoch": 0.2562369688690776, + "kl_loss": 0.13770417869091034, + "loss_ib": 0.0022612649481743574, + "step": 891 + }, + { + "ce_ib": 8.00112247467041, + "ce_orig": 1.0665711164474487, + "epoch": 0.2562369688690776, + "kl_loss": 0.3117631673812866, + "loss_ib": 0.003917743917554617, + "step": 891 + }, + { + "ce_ib": 5.529026508331299, + "ce_orig": 0.5637280344963074, + "epoch": 0.2562369688690776, + "kl_loss": 0.18045048415660858, + "loss_ib": 0.002357407473027706, + "step": 891 + }, + { + "ce_ib": 6.365163803100586, + "ce_orig": 0.7977558970451355, + "epoch": 0.2565245524480552, + "kl_loss": 0.14809830486774445, + "loss_ib": 0.00211749947629869, + "step": 892 + }, + { + "ce_ib": 6.874579429626465, + "ce_orig": 0.4706336259841919, + "epoch": 0.2565245524480552, + "kl_loss": 0.14625059068202972, + "loss_ib": 0.00214996375143528, + "step": 892 + }, + { + "ce_ib": 7.569476127624512, + "ce_orig": 1.0135836601257324, + "epoch": 0.2565245524480552, + "kl_loss": 0.15987078845500946, + "loss_ib": 0.0023556554224342108, + "step": 892 + }, + { + "ce_ib": 10.080713272094727, + "ce_orig": 0.8937276601791382, + "epoch": 0.2565245524480552, + "kl_loss": 0.10532618314027786, + "loss_ib": 0.002061333041638136, + "step": 892 + }, + { + "ce_ib": 3.6760833263397217, + "ce_orig": 0.3286120891571045, + "epoch": 0.25681213602703284, + "kl_loss": 0.1290312111377716, + "loss_ib": 0.0016579204238951206, + "step": 893 + }, + { + "ce_ib": 8.532036781311035, + "ce_orig": 0.9751139283180237, + "epoch": 0.25681213602703284, + "kl_loss": 0.177982360124588, + "loss_ib": 0.0026330272667109966, + "step": 893 + }, + { + "ce_ib": 7.181567668914795, + "ce_orig": 0.8265181183815002, + "epoch": 0.25681213602703284, + "kl_loss": 0.11877339333295822, + "loss_ib": 0.0019058906473219395, + "step": 893 + }, + { + "ce_ib": 6.04387092590332, + "ce_orig": 0.7296001315116882, + "epoch": 0.25681213602703284, + "kl_loss": 0.07870463281869888, + "loss_ib": 0.0013914334122091532, + "step": 893 + }, + { + "ce_ib": 4.4211602210998535, + "ce_orig": 0.5287713408470154, + "epoch": 0.2570997196060105, + "kl_loss": 0.09574338793754578, + "loss_ib": 0.0013995497720316052, + "step": 894 + }, + { + "ce_ib": 6.459323883056641, + "ce_orig": 0.6177991628646851, + "epoch": 0.2570997196060105, + "kl_loss": 0.1003279983997345, + "loss_ib": 0.0016492123249918222, + "step": 894 + }, + { + "ce_ib": 8.493982315063477, + "ce_orig": 1.0795583724975586, + "epoch": 0.2570997196060105, + "kl_loss": 0.2378617227077484, + "loss_ib": 0.0032280152663588524, + "step": 894 + }, + { + "ce_ib": 8.2710599899292, + "ce_orig": 0.8991292715072632, + "epoch": 0.2570997196060105, + "kl_loss": 0.13314013183116913, + "loss_ib": 0.0021585074719041586, + "step": 894 + }, + { + "epoch": 0.25738730318498815, + "grad_norm": 0.08752463012933731, + "learning_rate": 4.96027801084029e-05, + "loss": 0.8476, + "step": 895 + }, + { + "ce_ib": 7.055402755737305, + "ce_orig": 0.7234551906585693, + "epoch": 0.25738730318498815, + "kl_loss": 0.10340514779090881, + "loss_ib": 0.0017395915929228067, + "step": 895 + }, + { + "ce_ib": 7.390566349029541, + "ce_orig": 0.6613413691520691, + "epoch": 0.25738730318498815, + "kl_loss": 0.19420111179351807, + "loss_ib": 0.002681067446246743, + "step": 895 + }, + { + "ce_ib": 8.047496795654297, + "ce_orig": 0.5606889724731445, + "epoch": 0.25738730318498815, + "kl_loss": 0.1564003825187683, + "loss_ib": 0.002368753543123603, + "step": 895 + }, + { + "ce_ib": 7.199287414550781, + "ce_orig": 0.4812185764312744, + "epoch": 0.25738730318498815, + "kl_loss": 0.15393242239952087, + "loss_ib": 0.0022592528257519007, + "step": 895 + }, + { + "ce_ib": 7.276932239532471, + "ce_orig": 1.0173298120498657, + "epoch": 0.25767488676396577, + "kl_loss": 0.11038987338542938, + "loss_ib": 0.0018315919442102313, + "step": 896 + }, + { + "ce_ib": 13.540701866149902, + "ce_orig": 1.9171059131622314, + "epoch": 0.25767488676396577, + "kl_loss": 0.1913508027791977, + "loss_ib": 0.0032675780821591616, + "step": 896 + }, + { + "ce_ib": 11.850728988647461, + "ce_orig": 1.4846457242965698, + "epoch": 0.25767488676396577, + "kl_loss": 0.15523210167884827, + "loss_ib": 0.0027373938355594873, + "step": 896 + }, + { + "ce_ib": 8.497753143310547, + "ce_orig": 1.1700297594070435, + "epoch": 0.25767488676396577, + "kl_loss": 0.1125074177980423, + "loss_ib": 0.0019748492632061243, + "step": 896 + }, + { + "ce_ib": 8.266855239868164, + "ce_orig": 0.8340995907783508, + "epoch": 0.2579624703429434, + "kl_loss": 0.09629837423563004, + "loss_ib": 0.0017896691570058465, + "step": 897 + }, + { + "ce_ib": 7.022110939025879, + "ce_orig": 1.0352916717529297, + "epoch": 0.2579624703429434, + "kl_loss": 0.08450064808130264, + "loss_ib": 0.0015472176019102335, + "step": 897 + }, + { + "ce_ib": 8.604351043701172, + "ce_orig": 0.8481941223144531, + "epoch": 0.2579624703429434, + "kl_loss": 0.14510974287986755, + "loss_ib": 0.002311532385647297, + "step": 897 + }, + { + "ce_ib": 7.79695987701416, + "ce_orig": 0.8436591029167175, + "epoch": 0.2579624703429434, + "kl_loss": 0.16441552340984344, + "loss_ib": 0.002423851052299142, + "step": 897 + }, + { + "ce_ib": 8.946906089782715, + "ce_orig": 1.135650873184204, + "epoch": 0.2582500539219211, + "kl_loss": 0.17562073469161987, + "loss_ib": 0.0026508979499340057, + "step": 898 + }, + { + "ce_ib": 7.635839939117432, + "ce_orig": 0.8321837782859802, + "epoch": 0.2582500539219211, + "kl_loss": 0.24018771946430206, + "loss_ib": 0.0031654611229896545, + "step": 898 + }, + { + "ce_ib": 5.402726173400879, + "ce_orig": 0.5606065392494202, + "epoch": 0.2582500539219211, + "kl_loss": 0.16093802452087402, + "loss_ib": 0.0021496526896953583, + "step": 898 + }, + { + "ce_ib": 12.75641918182373, + "ce_orig": 1.9636436700820923, + "epoch": 0.2582500539219211, + "kl_loss": 0.14146681129932404, + "loss_ib": 0.002690309891477227, + "step": 898 + }, + { + "ce_ib": 6.529972076416016, + "ce_orig": 0.9304379820823669, + "epoch": 0.2585376375008987, + "kl_loss": 0.09048961102962494, + "loss_ib": 0.001557893236167729, + "step": 899 + }, + { + "ce_ib": 9.07840633392334, + "ce_orig": 0.864215612411499, + "epoch": 0.2585376375008987, + "kl_loss": 0.1602395474910736, + "loss_ib": 0.0025102358777076006, + "step": 899 + }, + { + "ce_ib": 12.58299732208252, + "ce_orig": 1.5156524181365967, + "epoch": 0.2585376375008987, + "kl_loss": 0.13811329007148743, + "loss_ib": 0.0026394324377179146, + "step": 899 + }, + { + "ce_ib": 5.367837429046631, + "ce_orig": 0.3980950117111206, + "epoch": 0.2585376375008987, + "kl_loss": 0.15040189027786255, + "loss_ib": 0.0020408027339726686, + "step": 899 + }, + { + "epoch": 0.2588252210798763, + "grad_norm": 0.10310398787260056, + "learning_rate": 4.959586065439189e-05, + "loss": 0.8364, + "step": 900 + }, + { + "ce_ib": 7.380722522735596, + "ce_orig": 0.7507438063621521, + "epoch": 0.2588252210798763, + "kl_loss": 0.1838665008544922, + "loss_ib": 0.002576737431809306, + "step": 900 + }, + { + "ce_ib": 6.7126383781433105, + "ce_orig": 0.9561918377876282, + "epoch": 0.2588252210798763, + "kl_loss": 0.10371539741754532, + "loss_ib": 0.0017084177816286683, + "step": 900 + }, + { + "ce_ib": 7.612314224243164, + "ce_orig": 0.5231863856315613, + "epoch": 0.2588252210798763, + "kl_loss": 0.12142337113618851, + "loss_ib": 0.001975465100258589, + "step": 900 + }, + { + "ce_ib": 8.406620979309082, + "ce_orig": 0.7458207607269287, + "epoch": 0.2588252210798763, + "kl_loss": 0.18224099278450012, + "loss_ib": 0.0026630719657987356, + "step": 900 + }, + { + "ce_ib": 4.506711483001709, + "ce_orig": 0.4876461923122406, + "epoch": 0.259112804658854, + "kl_loss": 0.09765396267175674, + "loss_ib": 0.001427210634574294, + "step": 901 + }, + { + "ce_ib": 6.801796913146973, + "ce_orig": 0.6831690669059753, + "epoch": 0.259112804658854, + "kl_loss": 0.17530401051044464, + "loss_ib": 0.0024332196917384863, + "step": 901 + }, + { + "ce_ib": 4.268216609954834, + "ce_orig": 0.7346028089523315, + "epoch": 0.259112804658854, + "kl_loss": 0.08807877451181412, + "loss_ib": 0.0013076093746349216, + "step": 901 + }, + { + "ce_ib": 6.742536544799805, + "ce_orig": 0.497639924287796, + "epoch": 0.259112804658854, + "kl_loss": 0.13441647589206696, + "loss_ib": 0.002018418163061142, + "step": 901 + }, + { + "ce_ib": 5.220005512237549, + "ce_orig": 0.7627407312393188, + "epoch": 0.2594003882378316, + "kl_loss": 0.10395920276641846, + "loss_ib": 0.0015615924494341016, + "step": 902 + }, + { + "ce_ib": 6.868656158447266, + "ce_orig": 0.9283803701400757, + "epoch": 0.2594003882378316, + "kl_loss": 0.08269625902175903, + "loss_ib": 0.0015138281742110848, + "step": 902 + }, + { + "ce_ib": 6.296173095703125, + "ce_orig": 0.7904551029205322, + "epoch": 0.2594003882378316, + "kl_loss": 0.11948268860578537, + "loss_ib": 0.0018244441598653793, + "step": 902 + }, + { + "ce_ib": 8.819576263427734, + "ce_orig": 0.9299820065498352, + "epoch": 0.2594003882378316, + "kl_loss": 0.16270163655281067, + "loss_ib": 0.0025089739356189966, + "step": 902 + }, + { + "ce_ib": 4.031962871551514, + "ce_orig": 0.4961770176887512, + "epoch": 0.25968797181680925, + "kl_loss": 0.08937801420688629, + "loss_ib": 0.0012969764648005366, + "step": 903 + }, + { + "ce_ib": 6.408451080322266, + "ce_orig": 0.4822241961956024, + "epoch": 0.25968797181680925, + "kl_loss": 0.1613832265138626, + "loss_ib": 0.0022546774707734585, + "step": 903 + }, + { + "ce_ib": 5.839984893798828, + "ce_orig": 0.38266420364379883, + "epoch": 0.25968797181680925, + "kl_loss": 0.0887475237250328, + "loss_ib": 0.0014714737189933658, + "step": 903 + }, + { + "ce_ib": 7.462576866149902, + "ce_orig": 0.7355101108551025, + "epoch": 0.25968797181680925, + "kl_loss": 0.10309060662984848, + "loss_ib": 0.0017771637067198753, + "step": 903 + }, + { + "ce_ib": 5.721911430358887, + "ce_orig": 0.6964151263237, + "epoch": 0.25997555539578693, + "kl_loss": 0.1606331169605255, + "loss_ib": 0.002178522292524576, + "step": 904 + }, + { + "ce_ib": 8.619745254516602, + "ce_orig": 1.2370572090148926, + "epoch": 0.25997555539578693, + "kl_loss": 0.11422259360551834, + "loss_ib": 0.0020042003598064184, + "step": 904 + }, + { + "ce_ib": 9.898813247680664, + "ce_orig": 0.7860179543495178, + "epoch": 0.25997555539578693, + "kl_loss": 0.13293001055717468, + "loss_ib": 0.002319181337952614, + "step": 904 + }, + { + "ce_ib": 6.747185707092285, + "ce_orig": 0.9206150770187378, + "epoch": 0.25997555539578693, + "kl_loss": 0.08839345723390579, + "loss_ib": 0.0015586530789732933, + "step": 904 + }, + { + "epoch": 0.26026313897476455, + "grad_norm": 0.08432283997535706, + "learning_rate": 4.958888194324374e-05, + "loss": 0.7976, + "step": 905 + }, + { + "ce_ib": 9.342927932739258, + "ce_orig": 0.9755980372428894, + "epoch": 0.26026313897476455, + "kl_loss": 0.16350196301937103, + "loss_ib": 0.0025693124625831842, + "step": 905 + }, + { + "ce_ib": 3.104599952697754, + "ce_orig": 0.15370331704616547, + "epoch": 0.26026313897476455, + "kl_loss": 0.28929489850997925, + "loss_ib": 0.0032034090254455805, + "step": 905 + }, + { + "ce_ib": 7.122314453125, + "ce_orig": 0.8155995607376099, + "epoch": 0.26026313897476455, + "kl_loss": 0.11030598729848862, + "loss_ib": 0.001815291354432702, + "step": 905 + }, + { + "ce_ib": 7.569455146789551, + "ce_orig": 0.924788236618042, + "epoch": 0.26026313897476455, + "kl_loss": 0.16827288269996643, + "loss_ib": 0.002439674222841859, + "step": 905 + }, + { + "ce_ib": 8.322404861450195, + "ce_orig": 0.6664920449256897, + "epoch": 0.2605507225537422, + "kl_loss": 0.19735190272331238, + "loss_ib": 0.002805759198963642, + "step": 906 + }, + { + "ce_ib": 11.654561996459961, + "ce_orig": 1.7791378498077393, + "epoch": 0.2605507225537422, + "kl_loss": 0.16949772834777832, + "loss_ib": 0.0028604334220290184, + "step": 906 + }, + { + "ce_ib": 8.227044105529785, + "ce_orig": 1.3938320875167847, + "epoch": 0.2605507225537422, + "kl_loss": 0.12992843985557556, + "loss_ib": 0.00212198868393898, + "step": 906 + }, + { + "ce_ib": 7.226250171661377, + "ce_orig": 0.25086575746536255, + "epoch": 0.2605507225537422, + "kl_loss": 0.11621251702308655, + "loss_ib": 0.0018847500905394554, + "step": 906 + }, + { + "ce_ib": 8.640125274658203, + "ce_orig": 0.8351717591285706, + "epoch": 0.2608383061327198, + "kl_loss": 0.12283913791179657, + "loss_ib": 0.0020924038253724575, + "step": 907 + }, + { + "ce_ib": 5.74461555480957, + "ce_orig": 0.6159311532974243, + "epoch": 0.2608383061327198, + "kl_loss": 0.0854053944349289, + "loss_ib": 0.0014285154175013304, + "step": 907 + }, + { + "ce_ib": 6.865052700042725, + "ce_orig": 0.7871037721633911, + "epoch": 0.2608383061327198, + "kl_loss": 0.10555486381053925, + "loss_ib": 0.0017420538933947682, + "step": 907 + }, + { + "ce_ib": 10.412860870361328, + "ce_orig": 1.4906829595565796, + "epoch": 0.2608383061327198, + "kl_loss": 0.1556229293346405, + "loss_ib": 0.0025975152384489775, + "step": 907 + }, + { + "ce_ib": 6.144519805908203, + "ce_orig": 0.548992395401001, + "epoch": 0.2611258897116975, + "kl_loss": 0.17790073156356812, + "loss_ib": 0.002393459202721715, + "step": 908 + }, + { + "ce_ib": 4.397858142852783, + "ce_orig": 0.41396814584732056, + "epoch": 0.2611258897116975, + "kl_loss": 0.10652049630880356, + "loss_ib": 0.0015049907378852367, + "step": 908 + }, + { + "ce_ib": 9.191903114318848, + "ce_orig": 1.1926069259643555, + "epoch": 0.2611258897116975, + "kl_loss": 0.12529361248016357, + "loss_ib": 0.0021721264347434044, + "step": 908 + }, + { + "ce_ib": 8.310478210449219, + "ce_orig": 1.0322816371917725, + "epoch": 0.2611258897116975, + "kl_loss": 0.12497265636920929, + "loss_ib": 0.0020807741675525904, + "step": 908 + }, + { + "ce_ib": 6.497647762298584, + "ce_orig": 0.48664146661758423, + "epoch": 0.2614134732906751, + "kl_loss": 0.10144509375095367, + "loss_ib": 0.0016642155824229121, + "step": 909 + }, + { + "ce_ib": 8.37979793548584, + "ce_orig": 0.7440108060836792, + "epoch": 0.2614134732906751, + "kl_loss": 0.11546805500984192, + "loss_ib": 0.001992660341784358, + "step": 909 + }, + { + "ce_ib": 4.586841106414795, + "ce_orig": 0.7357204556465149, + "epoch": 0.2614134732906751, + "kl_loss": 0.11390420794487, + "loss_ib": 0.0015977261355146766, + "step": 909 + }, + { + "ce_ib": 5.848711967468262, + "ce_orig": 0.7831753492355347, + "epoch": 0.2614134732906751, + "kl_loss": 0.10904887318611145, + "loss_ib": 0.0016753599047660828, + "step": 909 + }, + { + "epoch": 0.2617010568696527, + "grad_norm": 0.09389620274305344, + "learning_rate": 4.958184399177178e-05, + "loss": 0.8516, + "step": 910 + }, + { + "ce_ib": 4.011692047119141, + "ce_orig": 0.5226119160652161, + "epoch": 0.2617010568696527, + "kl_loss": 0.0997629314661026, + "loss_ib": 0.001398798543959856, + "step": 910 + }, + { + "ce_ib": 5.39235782623291, + "ce_orig": 0.4774095416069031, + "epoch": 0.2617010568696527, + "kl_loss": 0.11160556972026825, + "loss_ib": 0.001655291416682303, + "step": 910 + }, + { + "ce_ib": 6.545243740081787, + "ce_orig": 0.877449631690979, + "epoch": 0.2617010568696527, + "kl_loss": 0.113972969353199, + "loss_ib": 0.0017942539416253567, + "step": 910 + }, + { + "ce_ib": 4.371427059173584, + "ce_orig": 0.677146315574646, + "epoch": 0.2617010568696527, + "kl_loss": 0.08138425648212433, + "loss_ib": 0.0012509851949289441, + "step": 910 + }, + { + "ce_ib": 6.525064945220947, + "ce_orig": 0.7258946299552917, + "epoch": 0.2619886404486304, + "kl_loss": 0.1372908651828766, + "loss_ib": 0.002025415189564228, + "step": 911 + }, + { + "ce_ib": 7.87410306930542, + "ce_orig": 1.1585533618927002, + "epoch": 0.2619886404486304, + "kl_loss": 0.11789745837450027, + "loss_ib": 0.0019663849379867315, + "step": 911 + }, + { + "ce_ib": 9.139239311218262, + "ce_orig": 0.9819349646568298, + "epoch": 0.2619886404486304, + "kl_loss": 0.16793784499168396, + "loss_ib": 0.0025933024007827044, + "step": 911 + }, + { + "ce_ib": 7.358417510986328, + "ce_orig": 0.6855488419532776, + "epoch": 0.2619886404486304, + "kl_loss": 0.14852701127529144, + "loss_ib": 0.0022211119066923857, + "step": 911 + }, + { + "ce_ib": 12.192155838012695, + "ce_orig": 1.4514764547348022, + "epoch": 0.26227622402760803, + "kl_loss": 0.09740576148033142, + "loss_ib": 0.0021932730451226234, + "step": 912 + }, + { + "ce_ib": 6.15526819229126, + "ce_orig": 0.7057509422302246, + "epoch": 0.26227622402760803, + "kl_loss": 0.09535631537437439, + "loss_ib": 0.0015690898289903998, + "step": 912 + }, + { + "ce_ib": 6.982746124267578, + "ce_orig": 0.739734411239624, + "epoch": 0.26227622402760803, + "kl_loss": 0.1276528239250183, + "loss_ib": 0.001974802929908037, + "step": 912 + }, + { + "ce_ib": 9.937287330627441, + "ce_orig": 0.5557059645652771, + "epoch": 0.26227622402760803, + "kl_loss": 0.12562233209609985, + "loss_ib": 0.0022499519400298595, + "step": 912 + }, + { + "ce_ib": 6.873095512390137, + "ce_orig": 0.4055517911911011, + "epoch": 0.26256380760658565, + "kl_loss": 0.15974824130535126, + "loss_ib": 0.00228479178622365, + "step": 913 + }, + { + "ce_ib": 10.225861549377441, + "ce_orig": 1.6037037372589111, + "epoch": 0.26256380760658565, + "kl_loss": 0.10312145203351974, + "loss_ib": 0.0020538007374852896, + "step": 913 + }, + { + "ce_ib": 11.041903495788574, + "ce_orig": 1.3984054327011108, + "epoch": 0.26256380760658565, + "kl_loss": 0.08899977803230286, + "loss_ib": 0.0019941881764680147, + "step": 913 + }, + { + "ce_ib": 7.812140464782715, + "ce_orig": 0.8241643309593201, + "epoch": 0.26256380760658565, + "kl_loss": 0.15096673369407654, + "loss_ib": 0.0022908812388777733, + "step": 913 + }, + { + "ce_ib": 4.639538764953613, + "ce_orig": 0.47728028893470764, + "epoch": 0.2628513911855633, + "kl_loss": 0.101595938205719, + "loss_ib": 0.0014799132477492094, + "step": 914 + }, + { + "ce_ib": 8.127906799316406, + "ce_orig": 0.6452031135559082, + "epoch": 0.2628513911855633, + "kl_loss": 0.19892573356628418, + "loss_ib": 0.0028020478785037994, + "step": 914 + }, + { + "ce_ib": 10.456913948059082, + "ce_orig": 1.3510535955429077, + "epoch": 0.2628513911855633, + "kl_loss": 0.19482071697711945, + "loss_ib": 0.002993898233398795, + "step": 914 + }, + { + "ce_ib": 3.9594027996063232, + "ce_orig": 0.4253597855567932, + "epoch": 0.2628513911855633, + "kl_loss": 0.19654974341392517, + "loss_ib": 0.002361437538638711, + "step": 914 + }, + { + "epoch": 0.26313897476454096, + "grad_norm": 0.0944618359208107, + "learning_rate": 4.9574746816932084e-05, + "loss": 0.902, + "step": 915 + }, + { + "ce_ib": 9.708820343017578, + "ce_orig": 0.7267554402351379, + "epoch": 0.26313897476454096, + "kl_loss": 0.18204952776432037, + "loss_ib": 0.0027913772501051426, + "step": 915 + }, + { + "ce_ib": 4.622426986694336, + "ce_orig": 0.5747974514961243, + "epoch": 0.26313897476454096, + "kl_loss": 0.11130377650260925, + "loss_ib": 0.0015752804465591908, + "step": 915 + }, + { + "ce_ib": 8.35964298248291, + "ce_orig": 1.0757566690444946, + "epoch": 0.26313897476454096, + "kl_loss": 0.11115144193172455, + "loss_ib": 0.00194747862406075, + "step": 915 + }, + { + "ce_ib": 7.628917217254639, + "ce_orig": 0.6007151007652283, + "epoch": 0.26313897476454096, + "kl_loss": 0.16418364644050598, + "loss_ib": 0.0024047282058745623, + "step": 915 + }, + { + "ce_ib": 4.411064624786377, + "ce_orig": 0.3923698663711548, + "epoch": 0.2634265583435186, + "kl_loss": 0.07581924647092819, + "loss_ib": 0.001199298887513578, + "step": 916 + }, + { + "ce_ib": 8.376907348632812, + "ce_orig": 0.9289705753326416, + "epoch": 0.2634265583435186, + "kl_loss": 0.11779868602752686, + "loss_ib": 0.0020156775135546923, + "step": 916 + }, + { + "ce_ib": 4.883301734924316, + "ce_orig": 0.49596890807151794, + "epoch": 0.2634265583435186, + "kl_loss": 0.15270809829235077, + "loss_ib": 0.0020154111552983522, + "step": 916 + }, + { + "ce_ib": 3.5627071857452393, + "ce_orig": 0.35683321952819824, + "epoch": 0.2634265583435186, + "kl_loss": 0.08917704224586487, + "loss_ib": 0.001248041051439941, + "step": 916 + }, + { + "ce_ib": 5.704797744750977, + "ce_orig": 0.5445812940597534, + "epoch": 0.2637141419224962, + "kl_loss": 0.126393124461174, + "loss_ib": 0.0018344109412282705, + "step": 917 + }, + { + "ce_ib": 8.156935691833496, + "ce_orig": 1.2448252439498901, + "epoch": 0.2637141419224962, + "kl_loss": 0.11635102331638336, + "loss_ib": 0.001979203661903739, + "step": 917 + }, + { + "ce_ib": 7.226184368133545, + "ce_orig": 0.8044269680976868, + "epoch": 0.2637141419224962, + "kl_loss": 0.11676135659217834, + "loss_ib": 0.001890231971628964, + "step": 917 + }, + { + "ce_ib": 6.512085914611816, + "ce_orig": 1.0719903707504272, + "epoch": 0.2637141419224962, + "kl_loss": 0.14638350903987885, + "loss_ib": 0.002115043578669429, + "step": 917 + }, + { + "ce_ib": 4.944369316101074, + "ce_orig": 0.7357708215713501, + "epoch": 0.2640017255014739, + "kl_loss": 0.10602225363254547, + "loss_ib": 0.0015546594513580203, + "step": 918 + }, + { + "ce_ib": 7.204721927642822, + "ce_orig": 0.7557398080825806, + "epoch": 0.2640017255014739, + "kl_loss": 0.12639883160591125, + "loss_ib": 0.001984460512176156, + "step": 918 + }, + { + "ce_ib": 8.273849487304688, + "ce_orig": 1.0668854713439941, + "epoch": 0.2640017255014739, + "kl_loss": 0.18592819571495056, + "loss_ib": 0.002686666790395975, + "step": 918 + }, + { + "ce_ib": 11.763835906982422, + "ce_orig": 0.6219754219055176, + "epoch": 0.2640017255014739, + "kl_loss": 0.1133304089307785, + "loss_ib": 0.0023096876684576273, + "step": 918 + }, + { + "ce_ib": 7.171568870544434, + "ce_orig": 0.7699258923530579, + "epoch": 0.2642893090804515, + "kl_loss": 0.10670562088489532, + "loss_ib": 0.0017842131201177835, + "step": 919 + }, + { + "ce_ib": 9.258209228515625, + "ce_orig": 0.8205808401107788, + "epoch": 0.2642893090804515, + "kl_loss": 0.16718566417694092, + "loss_ib": 0.002597677754238248, + "step": 919 + }, + { + "ce_ib": 7.402035236358643, + "ce_orig": 1.1611003875732422, + "epoch": 0.2642893090804515, + "kl_loss": 0.10410024970769882, + "loss_ib": 0.001781205995939672, + "step": 919 + }, + { + "ce_ib": 9.35278606414795, + "ce_orig": 1.3219811916351318, + "epoch": 0.2642893090804515, + "kl_loss": 0.11624269187450409, + "loss_ib": 0.0020977056119590998, + "step": 919 + }, + { + "epoch": 0.26457689265942913, + "grad_norm": 0.09958989173173904, + "learning_rate": 4.9567590435823383e-05, + "loss": 0.8282, + "step": 920 + }, + { + "ce_ib": 6.871311187744141, + "ce_orig": 0.5260292291641235, + "epoch": 0.26457689265942913, + "kl_loss": 0.20388224720954895, + "loss_ib": 0.002725953469052911, + "step": 920 + }, + { + "ce_ib": 8.602705955505371, + "ce_orig": 1.0546218156814575, + "epoch": 0.26457689265942913, + "kl_loss": 0.13618257641792297, + "loss_ib": 0.0022220963146537542, + "step": 920 + }, + { + "ce_ib": 8.520659446716309, + "ce_orig": 0.9017817378044128, + "epoch": 0.26457689265942913, + "kl_loss": 0.1527824103832245, + "loss_ib": 0.002379890065640211, + "step": 920 + }, + { + "ce_ib": 9.107587814331055, + "ce_orig": 0.9084448218345642, + "epoch": 0.26457689265942913, + "kl_loss": 0.17240267992019653, + "loss_ib": 0.002634785370901227, + "step": 920 + }, + { + "ce_ib": 9.081536293029785, + "ce_orig": 1.3624303340911865, + "epoch": 0.2648644762384068, + "kl_loss": 0.19510559737682343, + "loss_ib": 0.00285920943133533, + "step": 921 + }, + { + "ce_ib": 2.0994462966918945, + "ce_orig": 0.09127107262611389, + "epoch": 0.2648644762384068, + "kl_loss": 0.23770758509635925, + "loss_ib": 0.0025870203971862793, + "step": 921 + }, + { + "ce_ib": 9.57007122039795, + "ce_orig": 0.840829074382782, + "epoch": 0.2648644762384068, + "kl_loss": 0.13091081380844116, + "loss_ib": 0.002266115276142955, + "step": 921 + }, + { + "ce_ib": 6.87959623336792, + "ce_orig": 0.47246253490448, + "epoch": 0.2648644762384068, + "kl_loss": 0.1876753866672516, + "loss_ib": 0.0025647133588790894, + "step": 921 + }, + { + "ce_ib": 10.32319450378418, + "ce_orig": 1.2931030988693237, + "epoch": 0.26515205981738443, + "kl_loss": 0.1872691810131073, + "loss_ib": 0.0029050111770629883, + "step": 922 + }, + { + "ce_ib": 6.233536720275879, + "ce_orig": 0.5803642272949219, + "epoch": 0.26515205981738443, + "kl_loss": 0.11040376126766205, + "loss_ib": 0.0017273911507800221, + "step": 922 + }, + { + "ce_ib": 5.026586532592773, + "ce_orig": 0.42871662974357605, + "epoch": 0.26515205981738443, + "kl_loss": 0.10527107119560242, + "loss_ib": 0.001555369351990521, + "step": 922 + }, + { + "ce_ib": 8.718320846557617, + "ce_orig": 1.142681360244751, + "epoch": 0.26515205981738443, + "kl_loss": 0.11979550123214722, + "loss_ib": 0.002069787122309208, + "step": 922 + }, + { + "ce_ib": 5.136258125305176, + "ce_orig": 0.751221239566803, + "epoch": 0.26543964339636206, + "kl_loss": 0.09198145568370819, + "loss_ib": 0.0014334403676912189, + "step": 923 + }, + { + "ce_ib": 4.875478744506836, + "ce_orig": 0.6654835343360901, + "epoch": 0.26543964339636206, + "kl_loss": 0.11321474611759186, + "loss_ib": 0.001619695220142603, + "step": 923 + }, + { + "ce_ib": 4.8394036293029785, + "ce_orig": 0.2200663983821869, + "epoch": 0.26543964339636206, + "kl_loss": 0.20495754480361938, + "loss_ib": 0.0025335156824439764, + "step": 923 + }, + { + "ce_ib": 7.12498140335083, + "ce_orig": 0.9060279726982117, + "epoch": 0.26543964339636206, + "kl_loss": 0.10331471264362335, + "loss_ib": 0.0017456451896578074, + "step": 923 + }, + { + "ce_ib": 6.738324165344238, + "ce_orig": 0.7269691824913025, + "epoch": 0.2657272269753397, + "kl_loss": 0.12429259717464447, + "loss_ib": 0.0019167583668604493, + "step": 924 + }, + { + "ce_ib": 12.713713645935059, + "ce_orig": 1.8025732040405273, + "epoch": 0.2657272269753397, + "kl_loss": 0.18177086114883423, + "loss_ib": 0.0030890798661857843, + "step": 924 + }, + { + "ce_ib": 9.708475112915039, + "ce_orig": 1.155458927154541, + "epoch": 0.2657272269753397, + "kl_loss": 0.14343459904193878, + "loss_ib": 0.0024051934015005827, + "step": 924 + }, + { + "ce_ib": 7.697851181030273, + "ce_orig": 0.7456066608428955, + "epoch": 0.2657272269753397, + "kl_loss": 0.15187275409698486, + "loss_ib": 0.0022885126527398825, + "step": 924 + }, + { + "epoch": 0.26601481055431736, + "grad_norm": 0.08948251605033875, + "learning_rate": 4.956037486568706e-05, + "loss": 0.8789, + "step": 925 + }, + { + "ce_ib": 5.39132022857666, + "ce_orig": 0.6468148231506348, + "epoch": 0.26601481055431736, + "kl_loss": 0.11626164615154266, + "loss_ib": 0.001701748464256525, + "step": 925 + }, + { + "ce_ib": 7.801209926605225, + "ce_orig": 0.6465848684310913, + "epoch": 0.26601481055431736, + "kl_loss": 0.15845727920532227, + "loss_ib": 0.0023646936751902103, + "step": 925 + }, + { + "ce_ib": 8.216442108154297, + "ce_orig": 1.0202922821044922, + "epoch": 0.26601481055431736, + "kl_loss": 0.151662677526474, + "loss_ib": 0.002338270889595151, + "step": 925 + }, + { + "ce_ib": 7.495940685272217, + "ce_orig": 0.8159665465354919, + "epoch": 0.26601481055431736, + "kl_loss": 0.13693495094776154, + "loss_ib": 0.002118943491950631, + "step": 925 + }, + { + "ce_ib": 6.499263763427734, + "ce_orig": 0.7159230709075928, + "epoch": 0.266302394133295, + "kl_loss": 0.09872101247310638, + "loss_ib": 0.0016371364472433925, + "step": 926 + }, + { + "ce_ib": 6.620962619781494, + "ce_orig": 0.8149300813674927, + "epoch": 0.266302394133295, + "kl_loss": 0.13282713294029236, + "loss_ib": 0.0019903674256056547, + "step": 926 + }, + { + "ce_ib": 7.567070007324219, + "ce_orig": 0.9069898724555969, + "epoch": 0.266302394133295, + "kl_loss": 0.08865264058113098, + "loss_ib": 0.0016432332340627909, + "step": 926 + }, + { + "ce_ib": 4.736062526702881, + "ce_orig": 0.6141554117202759, + "epoch": 0.266302394133295, + "kl_loss": 0.10103703290224075, + "loss_ib": 0.001483976491726935, + "step": 926 + }, + { + "ce_ib": 4.775996208190918, + "ce_orig": 0.5475775003433228, + "epoch": 0.2665899777122726, + "kl_loss": 0.08474655449390411, + "loss_ib": 0.0013250651536509395, + "step": 927 + }, + { + "ce_ib": 5.6749396324157715, + "ce_orig": 0.5858866572380066, + "epoch": 0.2665899777122726, + "kl_loss": 0.12519749999046326, + "loss_ib": 0.0018194690346717834, + "step": 927 + }, + { + "ce_ib": 10.990907669067383, + "ce_orig": 1.7932952642440796, + "epoch": 0.2665899777122726, + "kl_loss": 0.09833800792694092, + "loss_ib": 0.002082470804452896, + "step": 927 + }, + { + "ce_ib": 9.858685493469238, + "ce_orig": 1.2990039587020874, + "epoch": 0.2665899777122726, + "kl_loss": 0.13593445718288422, + "loss_ib": 0.0023452129680663347, + "step": 927 + }, + { + "ce_ib": 7.085406303405762, + "ce_orig": 1.029772400856018, + "epoch": 0.2668775612912503, + "kl_loss": 0.08736493438482285, + "loss_ib": 0.001582189928740263, + "step": 928 + }, + { + "ce_ib": 5.06296968460083, + "ce_orig": 0.7175295948982239, + "epoch": 0.2668775612912503, + "kl_loss": 0.09772807359695435, + "loss_ib": 0.0014835776528343558, + "step": 928 + }, + { + "ce_ib": 8.964816093444824, + "ce_orig": 1.3526090383529663, + "epoch": 0.2668775612912503, + "kl_loss": 0.08428291231393814, + "loss_ib": 0.0017393105663359165, + "step": 928 + }, + { + "ce_ib": 5.652362823486328, + "ce_orig": 0.6536108255386353, + "epoch": 0.2668775612912503, + "kl_loss": 0.11744387447834015, + "loss_ib": 0.0017396750627085567, + "step": 928 + }, + { + "ce_ib": 5.928165435791016, + "ce_orig": 0.5609773993492126, + "epoch": 0.2671651448702279, + "kl_loss": 0.10269203037023544, + "loss_ib": 0.0016197367804124951, + "step": 929 + }, + { + "ce_ib": 7.495223522186279, + "ce_orig": 0.6944816708564758, + "epoch": 0.2671651448702279, + "kl_loss": 0.13109838962554932, + "loss_ib": 0.002060506260022521, + "step": 929 + }, + { + "ce_ib": 4.497866153717041, + "ce_orig": 0.3619462251663208, + "epoch": 0.2671651448702279, + "kl_loss": 0.2023397535085678, + "loss_ib": 0.0024731841403990984, + "step": 929 + }, + { + "ce_ib": 10.906074523925781, + "ce_orig": 1.6625550985336304, + "epoch": 0.2671651448702279, + "kl_loss": 0.20823043584823608, + "loss_ib": 0.0031729117035865784, + "step": 929 + }, + { + "epoch": 0.26745272844920553, + "grad_norm": 0.09865286946296692, + "learning_rate": 4.955310012390711e-05, + "loss": 0.9273, + "step": 930 + }, + { + "ce_ib": 10.8095121383667, + "ce_orig": 1.3800263404846191, + "epoch": 0.26745272844920553, + "kl_loss": 0.12191278487443924, + "loss_ib": 0.0023000789806246758, + "step": 930 + }, + { + "ce_ib": 7.083028316497803, + "ce_orig": 1.2210267782211304, + "epoch": 0.26745272844920553, + "kl_loss": 0.05867953598499298, + "loss_ib": 0.0012950979871675372, + "step": 930 + }, + { + "ce_ib": 7.6254425048828125, + "ce_orig": 0.8195652961730957, + "epoch": 0.26745272844920553, + "kl_loss": 0.1415867656469345, + "loss_ib": 0.0021784116979688406, + "step": 930 + }, + { + "ce_ib": 2.1736412048339844, + "ce_orig": 0.1812783181667328, + "epoch": 0.26745272844920553, + "kl_loss": 0.26977869868278503, + "loss_ib": 0.0029151509515941143, + "step": 930 + }, + { + "ce_ib": 6.303011894226074, + "ce_orig": 0.6517394185066223, + "epoch": 0.2677403120281832, + "kl_loss": 0.09915041923522949, + "loss_ib": 0.001621805364266038, + "step": 931 + }, + { + "ce_ib": 2.727393865585327, + "ce_orig": 0.37597399950027466, + "epoch": 0.2677403120281832, + "kl_loss": 0.21541434526443481, + "loss_ib": 0.002426882740110159, + "step": 931 + }, + { + "ce_ib": 10.125308990478516, + "ce_orig": 1.2238138914108276, + "epoch": 0.2677403120281832, + "kl_loss": 0.13982701301574707, + "loss_ib": 0.0024108008947223425, + "step": 931 + }, + { + "ce_ib": 5.368266582489014, + "ce_orig": 0.8618974089622498, + "epoch": 0.2677403120281832, + "kl_loss": 0.12464284151792526, + "loss_ib": 0.0017832550220191479, + "step": 931 + }, + { + "ce_ib": 5.595724105834961, + "ce_orig": 0.733282208442688, + "epoch": 0.26802789560716084, + "kl_loss": 0.14293760061264038, + "loss_ib": 0.0019889483228325844, + "step": 932 + }, + { + "ce_ib": 14.770402908325195, + "ce_orig": 2.0770435333251953, + "epoch": 0.26802789560716084, + "kl_loss": 0.15044143795967102, + "loss_ib": 0.002981454599648714, + "step": 932 + }, + { + "ce_ib": 6.786084175109863, + "ce_orig": 0.7236509323120117, + "epoch": 0.26802789560716084, + "kl_loss": 0.11320096254348755, + "loss_ib": 0.0018106179777532816, + "step": 932 + }, + { + "ce_ib": 6.113559246063232, + "ce_orig": 0.547592043876648, + "epoch": 0.26802789560716084, + "kl_loss": 0.10517580807209015, + "loss_ib": 0.0016631139442324638, + "step": 932 + }, + { + "ce_ib": 4.208815097808838, + "ce_orig": 0.6119555234909058, + "epoch": 0.26831547918613846, + "kl_loss": 0.08684396743774414, + "loss_ib": 0.0012893211096525192, + "step": 933 + }, + { + "ce_ib": 9.651471138000488, + "ce_orig": 1.3539904356002808, + "epoch": 0.26831547918613846, + "kl_loss": 0.15913406014442444, + "loss_ib": 0.002556487452238798, + "step": 933 + }, + { + "ce_ib": 8.182539939880371, + "ce_orig": 0.7589184641838074, + "epoch": 0.26831547918613846, + "kl_loss": 0.11191841959953308, + "loss_ib": 0.0019374381517991424, + "step": 933 + }, + { + "ce_ib": 9.856441497802734, + "ce_orig": 0.6627126336097717, + "epoch": 0.26831547918613846, + "kl_loss": 0.19334280490875244, + "loss_ib": 0.002919072052463889, + "step": 933 + }, + { + "ce_ib": 5.727910041809082, + "ce_orig": 0.5337786674499512, + "epoch": 0.2686030627651161, + "kl_loss": 0.08856379240751266, + "loss_ib": 0.0014584289165213704, + "step": 934 + }, + { + "ce_ib": 6.411639213562012, + "ce_orig": 0.7354600429534912, + "epoch": 0.2686030627651161, + "kl_loss": 0.11536514759063721, + "loss_ib": 0.0017948152963072062, + "step": 934 + }, + { + "ce_ib": 6.148459434509277, + "ce_orig": 0.7758929133415222, + "epoch": 0.2686030627651161, + "kl_loss": 0.12726286053657532, + "loss_ib": 0.0018874744419008493, + "step": 934 + }, + { + "ce_ib": 6.0549845695495605, + "ce_orig": 0.6095043420791626, + "epoch": 0.2686030627651161, + "kl_loss": 0.10193461179733276, + "loss_ib": 0.001624844502657652, + "step": 934 + }, + { + "epoch": 0.26889064634409376, + "grad_norm": 0.10580892115831375, + "learning_rate": 4.954576622801006e-05, + "loss": 0.8217, + "step": 935 + }, + { + "ce_ib": 8.449764251708984, + "ce_orig": 1.0514365434646606, + "epoch": 0.26889064634409376, + "kl_loss": 0.13770011067390442, + "loss_ib": 0.002221977338194847, + "step": 935 + }, + { + "ce_ib": 6.928380489349365, + "ce_orig": 0.6566464900970459, + "epoch": 0.26889064634409376, + "kl_loss": 0.19501274824142456, + "loss_ib": 0.002642965642735362, + "step": 935 + }, + { + "ce_ib": 8.186603546142578, + "ce_orig": 0.7874239683151245, + "epoch": 0.26889064634409376, + "kl_loss": 0.09358172118663788, + "loss_ib": 0.001754477620124817, + "step": 935 + }, + { + "ce_ib": 6.060873985290527, + "ce_orig": 0.624786913394928, + "epoch": 0.26889064634409376, + "kl_loss": 0.1422898769378662, + "loss_ib": 0.0020289861131459475, + "step": 935 + }, + { + "ce_ib": 5.951212406158447, + "ce_orig": 0.5704233050346375, + "epoch": 0.2691782299230714, + "kl_loss": 0.12384402006864548, + "loss_ib": 0.0018335613422095776, + "step": 936 + }, + { + "ce_ib": 4.107890605926514, + "ce_orig": 0.4098372459411621, + "epoch": 0.2691782299230714, + "kl_loss": 0.1344190239906311, + "loss_ib": 0.0017549792537465692, + "step": 936 + }, + { + "ce_ib": 7.884426116943359, + "ce_orig": 0.6171572804450989, + "epoch": 0.2691782299230714, + "kl_loss": 0.1363716423511505, + "loss_ib": 0.0021521588787436485, + "step": 936 + }, + { + "ce_ib": 8.209308624267578, + "ce_orig": 1.1194345951080322, + "epoch": 0.2691782299230714, + "kl_loss": 0.1114901453256607, + "loss_ib": 0.0019358322024345398, + "step": 936 + }, + { + "ce_ib": 5.281935214996338, + "ce_orig": 0.6868589520454407, + "epoch": 0.269465813502049, + "kl_loss": 0.11274972558021545, + "loss_ib": 0.0016556908376514912, + "step": 937 + }, + { + "ce_ib": 8.918413162231445, + "ce_orig": 0.9377288818359375, + "epoch": 0.269465813502049, + "kl_loss": 0.09959867596626282, + "loss_ib": 0.0018878281116485596, + "step": 937 + }, + { + "ce_ib": 5.661655902862549, + "ce_orig": 0.9097051024436951, + "epoch": 0.269465813502049, + "kl_loss": 0.08012732863426208, + "loss_ib": 0.00136743881739676, + "step": 937 + }, + { + "ce_ib": 6.918816089630127, + "ce_orig": 0.47837385535240173, + "epoch": 0.269465813502049, + "kl_loss": 0.1599336564540863, + "loss_ib": 0.0022912181448191404, + "step": 937 + }, + { + "ce_ib": 7.659276485443115, + "ce_orig": 0.4183506965637207, + "epoch": 0.2697533970810267, + "kl_loss": 0.13869816064834595, + "loss_ib": 0.002152909291908145, + "step": 938 + }, + { + "ce_ib": 7.376327991485596, + "ce_orig": 0.9017499685287476, + "epoch": 0.2697533970810267, + "kl_loss": 0.18247684836387634, + "loss_ib": 0.0025624013505876064, + "step": 938 + }, + { + "ce_ib": 6.123374938964844, + "ce_orig": 0.6197956800460815, + "epoch": 0.2697533970810267, + "kl_loss": 0.15362223982810974, + "loss_ib": 0.002148559782654047, + "step": 938 + }, + { + "ce_ib": 6.555751800537109, + "ce_orig": 0.4299125373363495, + "epoch": 0.2697533970810267, + "kl_loss": 0.1346968412399292, + "loss_ib": 0.0020025435369461775, + "step": 938 + }, + { + "ce_ib": 6.542535781860352, + "ce_orig": 0.549587070941925, + "epoch": 0.2700409806600043, + "kl_loss": 0.20072101056575775, + "loss_ib": 0.0026614635717123747, + "step": 939 + }, + { + "ce_ib": 7.215294361114502, + "ce_orig": 0.9843421578407288, + "epoch": 0.2700409806600043, + "kl_loss": 0.10044269263744354, + "loss_ib": 0.0017259563319385052, + "step": 939 + }, + { + "ce_ib": 5.538031578063965, + "ce_orig": 0.8232380747795105, + "epoch": 0.2700409806600043, + "kl_loss": 0.06865495443344116, + "loss_ib": 0.0012403526343405247, + "step": 939 + }, + { + "ce_ib": 10.024641990661621, + "ce_orig": 1.4815994501113892, + "epoch": 0.2700409806600043, + "kl_loss": 0.12842750549316406, + "loss_ib": 0.002286739181727171, + "step": 939 + }, + { + "epoch": 0.27032856423898194, + "grad_norm": 0.0914548933506012, + "learning_rate": 4.953837319566497e-05, + "loss": 0.8191, + "step": 940 + }, + { + "ce_ib": 3.5875911712646484, + "ce_orig": 0.4435073435306549, + "epoch": 0.27032856423898194, + "kl_loss": 0.07417052984237671, + "loss_ib": 0.001100464491173625, + "step": 940 + }, + { + "ce_ib": 5.263745307922363, + "ce_orig": 0.660015344619751, + "epoch": 0.27032856423898194, + "kl_loss": 0.07660418748855591, + "loss_ib": 0.0012924164766445756, + "step": 940 + }, + { + "ce_ib": 7.743003845214844, + "ce_orig": 1.1857998371124268, + "epoch": 0.27032856423898194, + "kl_loss": 0.14771535992622375, + "loss_ib": 0.0022514539305120707, + "step": 940 + }, + { + "ce_ib": 10.746769905090332, + "ce_orig": 1.7959994077682495, + "epoch": 0.27032856423898194, + "kl_loss": 0.11718818545341492, + "loss_ib": 0.002246558666229248, + "step": 940 + }, + { + "ce_ib": 6.919939041137695, + "ce_orig": 0.9838729500770569, + "epoch": 0.2706161478179596, + "kl_loss": 0.0781673863530159, + "loss_ib": 0.0014736676821485162, + "step": 941 + }, + { + "ce_ib": 5.7579755783081055, + "ce_orig": 0.6983100175857544, + "epoch": 0.2706161478179596, + "kl_loss": 0.09704221040010452, + "loss_ib": 0.0015462195733562112, + "step": 941 + }, + { + "ce_ib": 9.96190357208252, + "ce_orig": 1.2530534267425537, + "epoch": 0.2706161478179596, + "kl_loss": 0.21759331226348877, + "loss_ib": 0.0031721233390271664, + "step": 941 + }, + { + "ce_ib": 7.985954761505127, + "ce_orig": 0.3687174320220947, + "epoch": 0.2706161478179596, + "kl_loss": 0.12543964385986328, + "loss_ib": 0.002052991883829236, + "step": 941 + }, + { + "ce_ib": 7.240424633026123, + "ce_orig": 0.6854646801948547, + "epoch": 0.27090373139693724, + "kl_loss": 0.1816634237766266, + "loss_ib": 0.002540676621720195, + "step": 942 + }, + { + "ce_ib": 7.516676902770996, + "ce_orig": 0.7132022976875305, + "epoch": 0.27090373139693724, + "kl_loss": 0.11213131248950958, + "loss_ib": 0.0018729808507487178, + "step": 942 + }, + { + "ce_ib": 7.73270845413208, + "ce_orig": 0.9187069535255432, + "epoch": 0.27090373139693724, + "kl_loss": 0.14587682485580444, + "loss_ib": 0.0022320388816297054, + "step": 942 + }, + { + "ce_ib": 7.356476783752441, + "ce_orig": 0.8093485236167908, + "epoch": 0.27090373139693724, + "kl_loss": 0.1405678391456604, + "loss_ib": 0.002141325967386365, + "step": 942 + }, + { + "ce_ib": 7.718157768249512, + "ce_orig": 0.5669108033180237, + "epoch": 0.27119131497591487, + "kl_loss": 0.23648160696029663, + "loss_ib": 0.0031366317998617887, + "step": 943 + }, + { + "ce_ib": 9.620587348937988, + "ce_orig": 1.4407234191894531, + "epoch": 0.27119131497591487, + "kl_loss": 0.18117858469486237, + "loss_ib": 0.002773844636976719, + "step": 943 + }, + { + "ce_ib": 6.493579387664795, + "ce_orig": 0.7694388031959534, + "epoch": 0.27119131497591487, + "kl_loss": 0.11386668682098389, + "loss_ib": 0.0017880249070003629, + "step": 943 + }, + { + "ce_ib": 8.296916961669922, + "ce_orig": 1.0959537029266357, + "epoch": 0.27119131497591487, + "kl_loss": 0.14310365915298462, + "loss_ib": 0.0022607280407100916, + "step": 943 + }, + { + "ce_ib": 8.241883277893066, + "ce_orig": 1.090752124786377, + "epoch": 0.2714788985548925, + "kl_loss": 0.2852647304534912, + "loss_ib": 0.00367683544754982, + "step": 944 + }, + { + "ce_ib": 6.730658531188965, + "ce_orig": 0.7248155474662781, + "epoch": 0.2714788985548925, + "kl_loss": 0.14084209501743317, + "loss_ib": 0.002081486862152815, + "step": 944 + }, + { + "ce_ib": 4.981124401092529, + "ce_orig": 0.676002562046051, + "epoch": 0.2714788985548925, + "kl_loss": 0.08100677281618118, + "loss_ib": 0.0013081800425425172, + "step": 944 + }, + { + "ce_ib": 6.86767578125, + "ce_orig": 0.7279362678527832, + "epoch": 0.2714788985548925, + "kl_loss": 0.1707763522863388, + "loss_ib": 0.0023945309221744537, + "step": 944 + }, + { + "epoch": 0.27176648213387017, + "grad_norm": 0.0977693498134613, + "learning_rate": 4.9530921044683374e-05, + "loss": 0.8319, + "step": 945 + }, + { + "ce_ib": 6.423201560974121, + "ce_orig": 0.9425351619720459, + "epoch": 0.27176648213387017, + "kl_loss": 0.13374529778957367, + "loss_ib": 0.0019797729328274727, + "step": 945 + }, + { + "ce_ib": 7.035811901092529, + "ce_orig": 0.728024959564209, + "epoch": 0.27176648213387017, + "kl_loss": 0.17420685291290283, + "loss_ib": 0.0024456498213112354, + "step": 945 + }, + { + "ce_ib": 5.947598934173584, + "ce_orig": 0.43916672468185425, + "epoch": 0.27176648213387017, + "kl_loss": 0.1581525057554245, + "loss_ib": 0.0021762847900390625, + "step": 945 + }, + { + "ce_ib": 11.003464698791504, + "ce_orig": 1.16835355758667, + "epoch": 0.27176648213387017, + "kl_loss": 0.1289106160402298, + "loss_ib": 0.002389452653005719, + "step": 945 + }, + { + "ce_ib": 11.310383796691895, + "ce_orig": 1.4770395755767822, + "epoch": 0.2720540657128478, + "kl_loss": 0.1667083203792572, + "loss_ib": 0.0027981214225292206, + "step": 946 + }, + { + "ce_ib": 6.251741409301758, + "ce_orig": 0.6906797289848328, + "epoch": 0.2720540657128478, + "kl_loss": 0.1108119785785675, + "loss_ib": 0.0017332937568426132, + "step": 946 + }, + { + "ce_ib": 4.572713851928711, + "ce_orig": 0.5401942133903503, + "epoch": 0.2720540657128478, + "kl_loss": 0.11654126644134521, + "loss_ib": 0.0016226839506998658, + "step": 946 + }, + { + "ce_ib": 11.478056907653809, + "ce_orig": 1.915285587310791, + "epoch": 0.2720540657128478, + "kl_loss": 0.16052968800067902, + "loss_ib": 0.0027531024534255266, + "step": 946 + }, + { + "ce_ib": 8.262956619262695, + "ce_orig": 1.16330885887146, + "epoch": 0.2723416492918254, + "kl_loss": 0.10167140513658524, + "loss_ib": 0.0018430094933137298, + "step": 947 + }, + { + "ce_ib": 3.876277446746826, + "ce_orig": 0.4559311866760254, + "epoch": 0.2723416492918254, + "kl_loss": 0.1875893473625183, + "loss_ib": 0.002263521309942007, + "step": 947 + }, + { + "ce_ib": 6.256802082061768, + "ce_orig": 0.8145736455917358, + "epoch": 0.2723416492918254, + "kl_loss": 0.24971362948417664, + "loss_ib": 0.003122816327959299, + "step": 947 + }, + { + "ce_ib": 6.3700761795043945, + "ce_orig": 0.3787907361984253, + "epoch": 0.2723416492918254, + "kl_loss": 0.4455419182777405, + "loss_ib": 0.00509242620319128, + "step": 947 + }, + { + "ce_ib": 7.6757049560546875, + "ce_orig": 1.1953049898147583, + "epoch": 0.2726292328708031, + "kl_loss": 0.16150033473968506, + "loss_ib": 0.0023825736716389656, + "step": 948 + }, + { + "ce_ib": 6.165060520172119, + "ce_orig": 0.8224126696586609, + "epoch": 0.2726292328708031, + "kl_loss": 0.12725231051445007, + "loss_ib": 0.0018890290521085262, + "step": 948 + }, + { + "ce_ib": 8.268503189086914, + "ce_orig": 0.600581705570221, + "epoch": 0.2726292328708031, + "kl_loss": 0.15321186184883118, + "loss_ib": 0.002358968835324049, + "step": 948 + }, + { + "ce_ib": 7.921170234680176, + "ce_orig": 0.6710708737373352, + "epoch": 0.2726292328708031, + "kl_loss": 0.17998698353767395, + "loss_ib": 0.0025919866748154163, + "step": 948 + }, + { + "ce_ib": 8.306171417236328, + "ce_orig": 0.8242880702018738, + "epoch": 0.2729168164497807, + "kl_loss": 0.14044451713562012, + "loss_ib": 0.002235062187537551, + "step": 949 + }, + { + "ce_ib": 9.588052749633789, + "ce_orig": 1.178617000579834, + "epoch": 0.2729168164497807, + "kl_loss": 0.13992881774902344, + "loss_ib": 0.0023580933921039104, + "step": 949 + }, + { + "ce_ib": 3.2139103412628174, + "ce_orig": 0.4203823208808899, + "epoch": 0.2729168164497807, + "kl_loss": 0.08177628368139267, + "loss_ib": 0.0011391538428142667, + "step": 949 + }, + { + "ce_ib": 9.011565208435059, + "ce_orig": 1.113925814628601, + "epoch": 0.2729168164497807, + "kl_loss": 0.13560637831687927, + "loss_ib": 0.0022572202142328024, + "step": 949 + }, + { + "epoch": 0.27320440002875834, + "grad_norm": 0.10619111359119415, + "learning_rate": 4.952340979301924e-05, + "loss": 0.8482, + "step": 950 + }, + { + "ce_ib": 4.546792030334473, + "ce_orig": 0.5716915726661682, + "epoch": 0.27320440002875834, + "kl_loss": 0.12070260941982269, + "loss_ib": 0.001661705318838358, + "step": 950 + }, + { + "ce_ib": 5.78235387802124, + "ce_orig": 0.7140153050422668, + "epoch": 0.27320440002875834, + "kl_loss": 0.14742611348628998, + "loss_ib": 0.0020524964202195406, + "step": 950 + }, + { + "ce_ib": 6.821101665496826, + "ce_orig": 0.9626045227050781, + "epoch": 0.27320440002875834, + "kl_loss": 0.18677476048469543, + "loss_ib": 0.002549857832491398, + "step": 950 + }, + { + "ce_ib": 7.699038028717041, + "ce_orig": 0.9650766849517822, + "epoch": 0.27320440002875834, + "kl_loss": 0.1534407138824463, + "loss_ib": 0.0023043109104037285, + "step": 950 + }, + { + "ce_ib": 8.740656852722168, + "ce_orig": 1.00751531124115, + "epoch": 0.273491983607736, + "kl_loss": 0.14653702080249786, + "loss_ib": 0.0023394357413053513, + "step": 951 + }, + { + "ce_ib": 5.055346488952637, + "ce_orig": 0.7659692764282227, + "epoch": 0.273491983607736, + "kl_loss": 0.11174146831035614, + "loss_ib": 0.0016229492612183094, + "step": 951 + }, + { + "ce_ib": 9.451150894165039, + "ce_orig": 1.0679696798324585, + "epoch": 0.273491983607736, + "kl_loss": 0.07588327676057816, + "loss_ib": 0.0017039477825164795, + "step": 951 + }, + { + "ce_ib": 9.73048210144043, + "ce_orig": 1.5051500797271729, + "epoch": 0.273491983607736, + "kl_loss": 0.15752311050891876, + "loss_ib": 0.002548279007896781, + "step": 951 + }, + { + "ce_ib": 10.435773849487305, + "ce_orig": 1.525643229484558, + "epoch": 0.27377956718671365, + "kl_loss": 0.11705584824085236, + "loss_ib": 0.0022141358349472284, + "step": 952 + }, + { + "ce_ib": 5.055952548980713, + "ce_orig": 0.34516263008117676, + "epoch": 0.27377956718671365, + "kl_loss": 0.06728208065032959, + "loss_ib": 0.0011784159578382969, + "step": 952 + }, + { + "ce_ib": 7.771252632141113, + "ce_orig": 1.0997884273529053, + "epoch": 0.27377956718671365, + "kl_loss": 0.13861092925071716, + "loss_ib": 0.0021632343996316195, + "step": 952 + }, + { + "ce_ib": 8.088645935058594, + "ce_orig": 1.0756627321243286, + "epoch": 0.27377956718671365, + "kl_loss": 0.08951813727617264, + "loss_ib": 0.0017040459206327796, + "step": 952 + }, + { + "ce_ib": 6.806406021118164, + "ce_orig": 0.6910561919212341, + "epoch": 0.27406715076569127, + "kl_loss": 0.10122386366128922, + "loss_ib": 0.0016928791301324964, + "step": 953 + }, + { + "ce_ib": 8.417391777038574, + "ce_orig": 0.7060823440551758, + "epoch": 0.27406715076569127, + "kl_loss": 0.1868474781513214, + "loss_ib": 0.0027102138847112656, + "step": 953 + }, + { + "ce_ib": 5.739500522613525, + "ce_orig": 0.664655327796936, + "epoch": 0.27406715076569127, + "kl_loss": 0.12121591717004776, + "loss_ib": 0.0017861091764643788, + "step": 953 + }, + { + "ce_ib": 3.8396944999694824, + "ce_orig": 0.2927386164665222, + "epoch": 0.27406715076569127, + "kl_loss": 0.12834453582763672, + "loss_ib": 0.0016674146754667163, + "step": 953 + }, + { + "ce_ib": 10.94080924987793, + "ce_orig": 1.681357979774475, + "epoch": 0.2743547343446689, + "kl_loss": 0.14820876717567444, + "loss_ib": 0.002576168393716216, + "step": 954 + }, + { + "ce_ib": 9.138489723205566, + "ce_orig": 0.8752321600914001, + "epoch": 0.2743547343446689, + "kl_loss": 0.15767325460910797, + "loss_ib": 0.002490581478923559, + "step": 954 + }, + { + "ce_ib": 10.636330604553223, + "ce_orig": 1.0436869859695435, + "epoch": 0.2743547343446689, + "kl_loss": 0.10358019173145294, + "loss_ib": 0.0020994350779801607, + "step": 954 + }, + { + "ce_ib": 5.532519340515137, + "ce_orig": 0.5938249826431274, + "epoch": 0.2743547343446689, + "kl_loss": 0.16369004547595978, + "loss_ib": 0.0021901524160057306, + "step": 954 + }, + { + "epoch": 0.2746423179236466, + "grad_norm": 0.08476871252059937, + "learning_rate": 4.9515839458768905e-05, + "loss": 0.8402, + "step": 955 + }, + { + "ce_ib": 7.063848495483398, + "ce_orig": 0.47076311707496643, + "epoch": 0.2746423179236466, + "kl_loss": 0.19533999264240265, + "loss_ib": 0.0026597848627716303, + "step": 955 + }, + { + "ce_ib": 8.551830291748047, + "ce_orig": 0.8301795125007629, + "epoch": 0.2746423179236466, + "kl_loss": 0.14242975413799286, + "loss_ib": 0.002279480453580618, + "step": 955 + }, + { + "ce_ib": 8.352051734924316, + "ce_orig": 0.5644355416297913, + "epoch": 0.2746423179236466, + "kl_loss": 0.17968347668647766, + "loss_ib": 0.0026320398319512606, + "step": 955 + }, + { + "ce_ib": 6.1855645179748535, + "ce_orig": 0.8240520358085632, + "epoch": 0.2746423179236466, + "kl_loss": 0.20574909448623657, + "loss_ib": 0.002676047384738922, + "step": 955 + }, + { + "ce_ib": 4.310649871826172, + "ce_orig": 0.43459251523017883, + "epoch": 0.2749299015026242, + "kl_loss": 0.14753244817256927, + "loss_ib": 0.001906389370560646, + "step": 956 + }, + { + "ce_ib": 7.657717227935791, + "ce_orig": 0.8175066709518433, + "epoch": 0.2749299015026242, + "kl_loss": 0.08417732268571854, + "loss_ib": 0.0016075449530035257, + "step": 956 + }, + { + "ce_ib": 6.655043601989746, + "ce_orig": 0.46536022424697876, + "epoch": 0.2749299015026242, + "kl_loss": 0.4305586814880371, + "loss_ib": 0.004971091169863939, + "step": 956 + }, + { + "ce_ib": 4.4826507568359375, + "ce_orig": 0.7566420435905457, + "epoch": 0.2749299015026242, + "kl_loss": 0.10255283117294312, + "loss_ib": 0.0014737934106960893, + "step": 956 + }, + { + "ce_ib": 6.804374694824219, + "ce_orig": 0.8272842168807983, + "epoch": 0.2752174850816018, + "kl_loss": 0.17810380458831787, + "loss_ib": 0.002461475320160389, + "step": 957 + }, + { + "ce_ib": 7.224662780761719, + "ce_orig": 1.0401030778884888, + "epoch": 0.2752174850816018, + "kl_loss": 0.11128075420856476, + "loss_ib": 0.0018352738115936518, + "step": 957 + }, + { + "ce_ib": 6.040480613708496, + "ce_orig": 0.6835022568702698, + "epoch": 0.2752174850816018, + "kl_loss": 0.14640529453754425, + "loss_ib": 0.0020681009627878666, + "step": 957 + }, + { + "ce_ib": 9.293033599853516, + "ce_orig": 1.5635472536087036, + "epoch": 0.2752174850816018, + "kl_loss": 0.11437170207500458, + "loss_ib": 0.0020730202086269855, + "step": 957 + }, + { + "ce_ib": 7.540154933929443, + "ce_orig": 0.5447720289230347, + "epoch": 0.2755050686605795, + "kl_loss": 0.15002988278865814, + "loss_ib": 0.0022543142549693584, + "step": 958 + }, + { + "ce_ib": 6.4098615646362305, + "ce_orig": 0.6705033183097839, + "epoch": 0.2755050686605795, + "kl_loss": 0.12306762486696243, + "loss_ib": 0.0018716624472290277, + "step": 958 + }, + { + "ce_ib": 4.094173431396484, + "ce_orig": 0.6588820219039917, + "epoch": 0.2755050686605795, + "kl_loss": 0.13385936617851257, + "loss_ib": 0.0017480109818279743, + "step": 958 + }, + { + "ce_ib": 10.632689476013184, + "ce_orig": 1.3828617334365845, + "epoch": 0.2755050686605795, + "kl_loss": 0.11779659241437912, + "loss_ib": 0.0022412347607314587, + "step": 958 + }, + { + "ce_ib": 4.860330104827881, + "ce_orig": 0.7951819896697998, + "epoch": 0.2757926522395571, + "kl_loss": 0.06981615722179413, + "loss_ib": 0.0011841944651678205, + "step": 959 + }, + { + "ce_ib": 7.603151321411133, + "ce_orig": 0.838845431804657, + "epoch": 0.2757926522395571, + "kl_loss": 0.12948527932167053, + "loss_ib": 0.002055167919024825, + "step": 959 + }, + { + "ce_ib": 4.83817195892334, + "ce_orig": 0.35420238971710205, + "epoch": 0.2757926522395571, + "kl_loss": 0.12115734070539474, + "loss_ib": 0.0016953905578702688, + "step": 959 + }, + { + "ce_ib": 10.160306930541992, + "ce_orig": 1.2111716270446777, + "epoch": 0.2757926522395571, + "kl_loss": 0.2245725691318512, + "loss_ib": 0.0032617561519145966, + "step": 959 + }, + { + "epoch": 0.27608023581853475, + "grad_norm": 0.11242754757404327, + "learning_rate": 4.950821006017107e-05, + "loss": 0.7701, + "step": 960 + }, + { + "ce_ib": 12.709158897399902, + "ce_orig": 1.7171677350997925, + "epoch": 0.27608023581853475, + "kl_loss": 0.15762443840503693, + "loss_ib": 0.002847159979864955, + "step": 960 + }, + { + "ce_ib": 5.944572925567627, + "ce_orig": 0.7122161388397217, + "epoch": 0.27608023581853475, + "kl_loss": 0.11333119124174118, + "loss_ib": 0.0017277691513299942, + "step": 960 + }, + { + "ce_ib": 11.57944393157959, + "ce_orig": 1.2153781652450562, + "epoch": 0.27608023581853475, + "kl_loss": 0.11728625744581223, + "loss_ib": 0.002330806804820895, + "step": 960 + }, + { + "ce_ib": 8.180597305297852, + "ce_orig": 1.0113308429718018, + "epoch": 0.27608023581853475, + "kl_loss": 0.06936834752559662, + "loss_ib": 0.0015117431757971644, + "step": 960 + }, + { + "ce_ib": 7.877357006072998, + "ce_orig": 1.2754735946655273, + "epoch": 0.2763678193975124, + "kl_loss": 0.23580428957939148, + "loss_ib": 0.0031457783188670874, + "step": 961 + }, + { + "ce_ib": 9.339914321899414, + "ce_orig": 1.0975970029830933, + "epoch": 0.2763678193975124, + "kl_loss": 0.12502053380012512, + "loss_ib": 0.0021841966081410646, + "step": 961 + }, + { + "ce_ib": 9.925426483154297, + "ce_orig": 1.3003571033477783, + "epoch": 0.2763678193975124, + "kl_loss": 0.12838998436927795, + "loss_ib": 0.002276442479342222, + "step": 961 + }, + { + "ce_ib": 4.420860767364502, + "ce_orig": 0.4933626055717468, + "epoch": 0.2763678193975124, + "kl_loss": 0.07248329371213913, + "loss_ib": 0.0011669190134853125, + "step": 961 + }, + { + "ce_ib": 4.496903419494629, + "ce_orig": 0.4532707631587982, + "epoch": 0.27665540297649005, + "kl_loss": 0.21559402346611023, + "loss_ib": 0.0026056303177028894, + "step": 962 + }, + { + "ce_ib": 8.14924144744873, + "ce_orig": 0.8354823589324951, + "epoch": 0.27665540297649005, + "kl_loss": 0.1402096003293991, + "loss_ib": 0.0022170201409608126, + "step": 962 + }, + { + "ce_ib": 9.650452613830566, + "ce_orig": 1.246025562286377, + "epoch": 0.27665540297649005, + "kl_loss": 0.20398131012916565, + "loss_ib": 0.0030048585031181574, + "step": 962 + }, + { + "ce_ib": 5.663705825805664, + "ce_orig": 0.6232472658157349, + "epoch": 0.27665540297649005, + "kl_loss": 0.10515444725751877, + "loss_ib": 0.0016179149970412254, + "step": 962 + }, + { + "ce_ib": 6.809243202209473, + "ce_orig": 0.8654524087905884, + "epoch": 0.2769429865554677, + "kl_loss": 0.106082022190094, + "loss_ib": 0.001741744577884674, + "step": 963 + }, + { + "ce_ib": 4.1796956062316895, + "ce_orig": 0.4485793113708496, + "epoch": 0.2769429865554677, + "kl_loss": 0.11551377177238464, + "loss_ib": 0.0015731072053313255, + "step": 963 + }, + { + "ce_ib": 9.93834114074707, + "ce_orig": 1.3555216789245605, + "epoch": 0.2769429865554677, + "kl_loss": 0.14208224415779114, + "loss_ib": 0.0024146565701812506, + "step": 963 + }, + { + "ce_ib": 4.870518684387207, + "ce_orig": 0.9535307884216309, + "epoch": 0.2769429865554677, + "kl_loss": 0.08944790810346603, + "loss_ib": 0.0013815308921039104, + "step": 963 + }, + { + "ce_ib": 5.912570953369141, + "ce_orig": 0.6123507022857666, + "epoch": 0.2772305701344453, + "kl_loss": 0.10765205323696136, + "loss_ib": 0.0016677775420248508, + "step": 964 + }, + { + "ce_ib": 5.70536470413208, + "ce_orig": 0.5494059920310974, + "epoch": 0.2772305701344453, + "kl_loss": 0.14170430600643158, + "loss_ib": 0.0019875795114785433, + "step": 964 + }, + { + "ce_ib": 10.486671447753906, + "ce_orig": 1.401133418083191, + "epoch": 0.2772305701344453, + "kl_loss": 0.08173699676990509, + "loss_ib": 0.0018660370260477066, + "step": 964 + }, + { + "ce_ib": 4.862819671630859, + "ce_orig": 0.48107895255088806, + "epoch": 0.2772305701344453, + "kl_loss": 0.08526574820280075, + "loss_ib": 0.00133893929887563, + "step": 964 + }, + { + "epoch": 0.277518153713423, + "grad_norm": 0.09270508587360382, + "learning_rate": 4.9500521615606716e-05, + "loss": 0.8706, + "step": 965 + }, + { + "ce_ib": 3.7485735416412354, + "ce_orig": 0.6303088068962097, + "epoch": 0.277518153713423, + "kl_loss": 0.07887739688158035, + "loss_ib": 0.001163631328381598, + "step": 965 + }, + { + "ce_ib": 5.018900394439697, + "ce_orig": 0.42151200771331787, + "epoch": 0.277518153713423, + "kl_loss": 0.16141340136528015, + "loss_ib": 0.0021160240285098553, + "step": 965 + }, + { + "ce_ib": 6.465913772583008, + "ce_orig": 0.5550833344459534, + "epoch": 0.277518153713423, + "kl_loss": 0.0822734534740448, + "loss_ib": 0.0014693258563056588, + "step": 965 + }, + { + "ce_ib": 9.078096389770508, + "ce_orig": 1.2660472393035889, + "epoch": 0.277518153713423, + "kl_loss": 0.11909198760986328, + "loss_ib": 0.002098729368299246, + "step": 965 + }, + { + "ce_ib": 6.4851603507995605, + "ce_orig": 0.8773921132087708, + "epoch": 0.2778057372924006, + "kl_loss": 0.11482943594455719, + "loss_ib": 0.0017968103056773543, + "step": 966 + }, + { + "ce_ib": 8.88327407836914, + "ce_orig": 1.1675580739974976, + "epoch": 0.2778057372924006, + "kl_loss": 0.14929817616939545, + "loss_ib": 0.0023813091684132814, + "step": 966 + }, + { + "ce_ib": 9.463626861572266, + "ce_orig": 1.2751141786575317, + "epoch": 0.2778057372924006, + "kl_loss": 0.14539405703544617, + "loss_ib": 0.002400303026661277, + "step": 966 + }, + { + "ce_ib": 5.47840690612793, + "ce_orig": 0.7138165235519409, + "epoch": 0.2778057372924006, + "kl_loss": 0.10650286078453064, + "loss_ib": 0.0016128692077472806, + "step": 966 + }, + { + "ce_ib": 9.330538749694824, + "ce_orig": 1.5326582193374634, + "epoch": 0.2780933208713782, + "kl_loss": 0.12342990189790726, + "loss_ib": 0.0021673529408872128, + "step": 967 + }, + { + "ce_ib": 3.4421770572662354, + "ce_orig": 0.248417928814888, + "epoch": 0.2780933208713782, + "kl_loss": 0.25681114196777344, + "loss_ib": 0.0029123290441930294, + "step": 967 + }, + { + "ce_ib": 7.802358150482178, + "ce_orig": 0.8956292271614075, + "epoch": 0.2780933208713782, + "kl_loss": 0.2054874747991562, + "loss_ib": 0.0028351102955639362, + "step": 967 + }, + { + "ce_ib": 4.530078887939453, + "ce_orig": 0.43318378925323486, + "epoch": 0.2780933208713782, + "kl_loss": 0.06482702493667603, + "loss_ib": 0.0011012781178578734, + "step": 967 + }, + { + "ce_ib": 6.4143900871276855, + "ce_orig": 0.9001240134239197, + "epoch": 0.2783809044503559, + "kl_loss": 0.1251344531774521, + "loss_ib": 0.0018927834462374449, + "step": 968 + }, + { + "ce_ib": 9.123422622680664, + "ce_orig": 1.1014381647109985, + "epoch": 0.2783809044503559, + "kl_loss": 0.15173783898353577, + "loss_ib": 0.0024297204799950123, + "step": 968 + }, + { + "ce_ib": 2.160301685333252, + "ce_orig": 0.17562763392925262, + "epoch": 0.2783809044503559, + "kl_loss": 0.3059711456298828, + "loss_ib": 0.0032757415901869535, + "step": 968 + }, + { + "ce_ib": 5.158329963684082, + "ce_orig": 0.48985013365745544, + "epoch": 0.2783809044503559, + "kl_loss": 0.14261916279792786, + "loss_ib": 0.0019420244498178363, + "step": 968 + }, + { + "ce_ib": 5.991279125213623, + "ce_orig": 0.8108060359954834, + "epoch": 0.2786684880293335, + "kl_loss": 0.16150274872779846, + "loss_ib": 0.0022141553927212954, + "step": 969 + }, + { + "ce_ib": 6.686485767364502, + "ce_orig": 1.2489796876907349, + "epoch": 0.2786684880293335, + "kl_loss": 0.10450765490531921, + "loss_ib": 0.0017137250397354364, + "step": 969 + }, + { + "ce_ib": 4.417532920837402, + "ce_orig": 0.6061100363731384, + "epoch": 0.2786684880293335, + "kl_loss": 0.11191940307617188, + "loss_ib": 0.0015609472757205367, + "step": 969 + }, + { + "ce_ib": 6.771849632263184, + "ce_orig": 0.6890683770179749, + "epoch": 0.2786684880293335, + "kl_loss": 0.16166692972183228, + "loss_ib": 0.0022938542533665895, + "step": 969 + }, + { + "epoch": 0.27895607160831115, + "grad_norm": 0.10704389959573746, + "learning_rate": 4.94927741435991e-05, + "loss": 0.8406, + "step": 970 + }, + { + "ce_ib": 7.555027008056641, + "ce_orig": 1.2379882335662842, + "epoch": 0.27895607160831115, + "kl_loss": 0.13296280801296234, + "loss_ib": 0.002085130661725998, + "step": 970 + }, + { + "ce_ib": 6.707873821258545, + "ce_orig": 1.0386744737625122, + "epoch": 0.27895607160831115, + "kl_loss": 0.11403495073318481, + "loss_ib": 0.0018111368408426642, + "step": 970 + }, + { + "ce_ib": 7.210244178771973, + "ce_orig": 0.7288638949394226, + "epoch": 0.27895607160831115, + "kl_loss": 0.18328508734703064, + "loss_ib": 0.0025538753252476454, + "step": 970 + }, + { + "ce_ib": 8.974588394165039, + "ce_orig": 0.6134091019630432, + "epoch": 0.27895607160831115, + "kl_loss": 0.11008819192647934, + "loss_ib": 0.0019983407109975815, + "step": 970 + }, + { + "ce_ib": 6.788546562194824, + "ce_orig": 0.7252051830291748, + "epoch": 0.27924365518728883, + "kl_loss": 0.11143806576728821, + "loss_ib": 0.0017932351911440492, + "step": 971 + }, + { + "ce_ib": 7.9452314376831055, + "ce_orig": 0.6665673851966858, + "epoch": 0.27924365518728883, + "kl_loss": 0.08723453432321548, + "loss_ib": 0.0016668684547767043, + "step": 971 + }, + { + "ce_ib": 6.406195163726807, + "ce_orig": 0.4924929141998291, + "epoch": 0.27924365518728883, + "kl_loss": 0.10466066002845764, + "loss_ib": 0.0016872260021045804, + "step": 971 + }, + { + "ce_ib": 5.720137596130371, + "ce_orig": 0.6668531894683838, + "epoch": 0.27924365518728883, + "kl_loss": 0.10390918701887131, + "loss_ib": 0.0016111056320369244, + "step": 971 + }, + { + "ce_ib": 9.839632987976074, + "ce_orig": 1.026991844177246, + "epoch": 0.27953123876626645, + "kl_loss": 0.14091286063194275, + "loss_ib": 0.002393091795966029, + "step": 972 + }, + { + "ce_ib": 7.829953193664551, + "ce_orig": 0.9950670003890991, + "epoch": 0.27953123876626645, + "kl_loss": 0.1180608719587326, + "loss_ib": 0.0019636040087789297, + "step": 972 + }, + { + "ce_ib": 8.307772636413574, + "ce_orig": 1.106313705444336, + "epoch": 0.27953123876626645, + "kl_loss": 0.23758354783058167, + "loss_ib": 0.0032066127751022577, + "step": 972 + }, + { + "ce_ib": 7.722874164581299, + "ce_orig": 0.6765212416648865, + "epoch": 0.27953123876626645, + "kl_loss": 0.093751460313797, + "loss_ib": 0.0017098019598051906, + "step": 972 + }, + { + "ce_ib": 6.720781326293945, + "ce_orig": 0.8855082392692566, + "epoch": 0.2798188223452441, + "kl_loss": 0.16517385840415955, + "loss_ib": 0.0023238167632371187, + "step": 973 + }, + { + "ce_ib": 7.164329528808594, + "ce_orig": 0.5470868945121765, + "epoch": 0.2798188223452441, + "kl_loss": 0.06691183894872665, + "loss_ib": 0.0013855514116585255, + "step": 973 + }, + { + "ce_ib": 5.7693562507629395, + "ce_orig": 0.5427741408348083, + "epoch": 0.2798188223452441, + "kl_loss": 0.15940767526626587, + "loss_ib": 0.0021710123401135206, + "step": 973 + }, + { + "ce_ib": 7.7585530281066895, + "ce_orig": 1.0339730978012085, + "epoch": 0.2798188223452441, + "kl_loss": 0.17508529126644135, + "loss_ib": 0.0025267081800848246, + "step": 973 + }, + { + "ce_ib": 6.318320274353027, + "ce_orig": 0.6542577147483826, + "epoch": 0.2801064059242217, + "kl_loss": 0.13582909107208252, + "loss_ib": 0.001990122953429818, + "step": 974 + }, + { + "ce_ib": 8.61296558380127, + "ce_orig": 0.584928035736084, + "epoch": 0.2801064059242217, + "kl_loss": 0.12153268605470657, + "loss_ib": 0.002076623495668173, + "step": 974 + }, + { + "ce_ib": 9.057924270629883, + "ce_orig": 0.7471427917480469, + "epoch": 0.2801064059242217, + "kl_loss": 0.20692235231399536, + "loss_ib": 0.00297501590102911, + "step": 974 + }, + { + "ce_ib": 7.101727485656738, + "ce_orig": 0.6872411966323853, + "epoch": 0.2801064059242217, + "kl_loss": 0.12273427844047546, + "loss_ib": 0.0019375154515728354, + "step": 974 + }, + { + "epoch": 0.2803939895031994, + "grad_norm": 0.106829434633255, + "learning_rate": 4.948496766281368e-05, + "loss": 0.8025, + "step": 975 + }, + { + "ce_ib": 7.509278774261475, + "ce_orig": 0.8762276768684387, + "epoch": 0.2803939895031994, + "kl_loss": 0.1653478443622589, + "loss_ib": 0.002404406201094389, + "step": 975 + }, + { + "ce_ib": 8.116082191467285, + "ce_orig": 0.8653174638748169, + "epoch": 0.2803939895031994, + "kl_loss": 0.15266814827919006, + "loss_ib": 0.0023382895160466433, + "step": 975 + }, + { + "ce_ib": 4.437204360961914, + "ce_orig": 0.6615025997161865, + "epoch": 0.2803939895031994, + "kl_loss": 0.08827726542949677, + "loss_ib": 0.0013264929875731468, + "step": 975 + }, + { + "ce_ib": 7.19654655456543, + "ce_orig": 0.7415102124214172, + "epoch": 0.2803939895031994, + "kl_loss": 0.14185458421707153, + "loss_ib": 0.0021382004488259554, + "step": 975 + }, + { + "ce_ib": 5.182796955108643, + "ce_orig": 0.43733975291252136, + "epoch": 0.280681573082177, + "kl_loss": 0.0836355909705162, + "loss_ib": 0.0013546355767175555, + "step": 976 + }, + { + "ce_ib": 4.123630046844482, + "ce_orig": 0.45059525966644287, + "epoch": 0.280681573082177, + "kl_loss": 0.08219993859529495, + "loss_ib": 0.001234362367540598, + "step": 976 + }, + { + "ce_ib": 7.806309700012207, + "ce_orig": 0.784027636051178, + "epoch": 0.280681573082177, + "kl_loss": 0.18839582800865173, + "loss_ib": 0.002664589323103428, + "step": 976 + }, + { + "ce_ib": 9.582615852355957, + "ce_orig": 1.1754748821258545, + "epoch": 0.280681573082177, + "kl_loss": 0.08723856508731842, + "loss_ib": 0.0018306472338736057, + "step": 976 + }, + { + "ce_ib": 6.1553449630737305, + "ce_orig": 0.3517453074455261, + "epoch": 0.28096915666115463, + "kl_loss": 0.226718932390213, + "loss_ib": 0.0028827236965298653, + "step": 977 + }, + { + "ce_ib": 4.898242950439453, + "ce_orig": 0.7067365050315857, + "epoch": 0.28096915666115463, + "kl_loss": 0.09806376695632935, + "loss_ib": 0.0014704619534313679, + "step": 977 + }, + { + "ce_ib": 4.567273139953613, + "ce_orig": 0.6066350936889648, + "epoch": 0.28096915666115463, + "kl_loss": 0.12975236773490906, + "loss_ib": 0.0017542509594932199, + "step": 977 + }, + { + "ce_ib": 8.436737060546875, + "ce_orig": 1.028468132019043, + "epoch": 0.28096915666115463, + "kl_loss": 0.11548972129821777, + "loss_ib": 0.0019985707476735115, + "step": 977 + }, + { + "ce_ib": 5.896955966949463, + "ce_orig": 0.6225305199623108, + "epoch": 0.2812567402401323, + "kl_loss": 0.10841213911771774, + "loss_ib": 0.0016738170525059104, + "step": 978 + }, + { + "ce_ib": 8.217554092407227, + "ce_orig": 1.3426951169967651, + "epoch": 0.2812567402401323, + "kl_loss": 0.11771957576274872, + "loss_ib": 0.001998951192945242, + "step": 978 + }, + { + "ce_ib": 8.582084655761719, + "ce_orig": 0.990624189376831, + "epoch": 0.2812567402401323, + "kl_loss": 0.24700742959976196, + "loss_ib": 0.0033282828517258167, + "step": 978 + }, + { + "ce_ib": 11.665776252746582, + "ce_orig": 1.7566227912902832, + "epoch": 0.2812567402401323, + "kl_loss": 0.1942056119441986, + "loss_ib": 0.0031086336821317673, + "step": 978 + }, + { + "ce_ib": 6.679926872253418, + "ce_orig": 0.6784400343894958, + "epoch": 0.28154432381910993, + "kl_loss": 0.17957545816898346, + "loss_ib": 0.0024637472815811634, + "step": 979 + }, + { + "ce_ib": 5.655110836029053, + "ce_orig": 0.7223968505859375, + "epoch": 0.28154432381910993, + "kl_loss": 0.09845352172851562, + "loss_ib": 0.0015500461449846625, + "step": 979 + }, + { + "ce_ib": 9.194509506225586, + "ce_orig": 1.1544585227966309, + "epoch": 0.28154432381910993, + "kl_loss": 0.17654821276664734, + "loss_ib": 0.0026849329005926847, + "step": 979 + }, + { + "ce_ib": 5.0447773933410645, + "ce_orig": 0.8073973059654236, + "epoch": 0.28154432381910993, + "kl_loss": 0.1345929354429245, + "loss_ib": 0.0018504071049392223, + "step": 979 + }, + { + "epoch": 0.28183190739808756, + "grad_norm": 0.09493906795978546, + "learning_rate": 4.947710219205808e-05, + "loss": 0.8179, + "step": 980 + }, + { + "ce_ib": 4.477856159210205, + "ce_orig": 0.55965256690979, + "epoch": 0.28183190739808756, + "kl_loss": 0.09496183693408966, + "loss_ib": 0.0013974038884043694, + "step": 980 + }, + { + "ce_ib": 7.635720252990723, + "ce_orig": 0.7726288437843323, + "epoch": 0.28183190739808756, + "kl_loss": 0.12528853118419647, + "loss_ib": 0.002016457263380289, + "step": 980 + }, + { + "ce_ib": 5.129683494567871, + "ce_orig": 0.7097966074943542, + "epoch": 0.28183190739808756, + "kl_loss": 0.1135503426194191, + "loss_ib": 0.0016484718071296811, + "step": 980 + }, + { + "ce_ib": 6.113365173339844, + "ce_orig": 0.8547828197479248, + "epoch": 0.28183190739808756, + "kl_loss": 0.07798996567726135, + "loss_ib": 0.0013912362046539783, + "step": 980 + }, + { + "ce_ib": 6.860866069793701, + "ce_orig": 0.6225427985191345, + "epoch": 0.28211949097706523, + "kl_loss": 0.09530524909496307, + "loss_ib": 0.0016391390236094594, + "step": 981 + }, + { + "ce_ib": 5.513166427612305, + "ce_orig": 0.7645548582077026, + "epoch": 0.28211949097706523, + "kl_loss": 0.10374398529529572, + "loss_ib": 0.001588756451383233, + "step": 981 + }, + { + "ce_ib": 5.0253400802612305, + "ce_orig": 0.45561596751213074, + "epoch": 0.28211949097706523, + "kl_loss": 0.12157364189624786, + "loss_ib": 0.0017182704759761691, + "step": 981 + }, + { + "ce_ib": 8.435596466064453, + "ce_orig": 0.8526706695556641, + "epoch": 0.28211949097706523, + "kl_loss": 0.15986429154872894, + "loss_ib": 0.0024422025308012962, + "step": 981 + }, + { + "ce_ib": 6.921075344085693, + "ce_orig": 0.9211604595184326, + "epoch": 0.28240707455604286, + "kl_loss": 0.1525169461965561, + "loss_ib": 0.002217276953160763, + "step": 982 + }, + { + "ce_ib": 8.066728591918945, + "ce_orig": 1.1519079208374023, + "epoch": 0.28240707455604286, + "kl_loss": 0.18316397070884705, + "loss_ib": 0.0026383125223219395, + "step": 982 + }, + { + "ce_ib": 3.8123619556427, + "ce_orig": 0.5717668533325195, + "epoch": 0.28240707455604286, + "kl_loss": 0.14196446537971497, + "loss_ib": 0.001800880883820355, + "step": 982 + }, + { + "ce_ib": 9.549180030822754, + "ce_orig": 1.2062931060791016, + "epoch": 0.28240707455604286, + "kl_loss": 0.1640200912952423, + "loss_ib": 0.002595118712633848, + "step": 982 + }, + { + "ce_ib": 12.103609085083008, + "ce_orig": 1.862855076789856, + "epoch": 0.2826946581350205, + "kl_loss": 0.1695042848587036, + "loss_ib": 0.002905403496697545, + "step": 983 + }, + { + "ce_ib": 5.744802951812744, + "ce_orig": 0.559029757976532, + "epoch": 0.2826946581350205, + "kl_loss": 0.15063290297985077, + "loss_ib": 0.002080809324979782, + "step": 983 + }, + { + "ce_ib": 4.9721784591674805, + "ce_orig": 0.5969537496566772, + "epoch": 0.2826946581350205, + "kl_loss": 0.08739107847213745, + "loss_ib": 0.0013711284846067429, + "step": 983 + }, + { + "ce_ib": 7.158525466918945, + "ce_orig": 1.099306583404541, + "epoch": 0.2826946581350205, + "kl_loss": 0.11099478602409363, + "loss_ib": 0.0018258003983646631, + "step": 983 + }, + { + "ce_ib": 6.190611839294434, + "ce_orig": 0.5297380685806274, + "epoch": 0.2829822417139981, + "kl_loss": 0.1250627338886261, + "loss_ib": 0.0018696883926168084, + "step": 984 + }, + { + "ce_ib": 8.836004257202148, + "ce_orig": 1.0750619173049927, + "epoch": 0.2829822417139981, + "kl_loss": 0.11145786195993423, + "loss_ib": 0.001998178893700242, + "step": 984 + }, + { + "ce_ib": 4.149031639099121, + "ce_orig": 0.654617428779602, + "epoch": 0.2829822417139981, + "kl_loss": 0.10280697047710419, + "loss_ib": 0.0014429728034883738, + "step": 984 + }, + { + "ce_ib": 7.487407684326172, + "ce_orig": 0.9551166296005249, + "epoch": 0.2829822417139981, + "kl_loss": 0.10395655781030655, + "loss_ib": 0.0017883061664178967, + "step": 984 + }, + { + "epoch": 0.2832698252929758, + "grad_norm": 0.0821569636464119, + "learning_rate": 4.946917775028204e-05, + "loss": 0.8411, + "step": 985 + }, + { + "ce_ib": 6.546420574188232, + "ce_orig": 0.8366989493370056, + "epoch": 0.2832698252929758, + "kl_loss": 0.09713012725114822, + "loss_ib": 0.0016259433468803763, + "step": 985 + }, + { + "ce_ib": 5.233980178833008, + "ce_orig": 0.6575462818145752, + "epoch": 0.2832698252929758, + "kl_loss": 0.1284598708152771, + "loss_ib": 0.0018079965375363827, + "step": 985 + }, + { + "ce_ib": 7.609673976898193, + "ce_orig": 0.8251411318778992, + "epoch": 0.2832698252929758, + "kl_loss": 0.16431501507759094, + "loss_ib": 0.002404117491096258, + "step": 985 + }, + { + "ce_ib": 8.2174711227417, + "ce_orig": 1.1952818632125854, + "epoch": 0.2832698252929758, + "kl_loss": 0.1562347710132599, + "loss_ib": 0.0023840947542339563, + "step": 985 + }, + { + "ce_ib": 8.832913398742676, + "ce_orig": 0.8630291223526001, + "epoch": 0.2835574088719534, + "kl_loss": 0.14526310563087463, + "loss_ib": 0.0023359223268926144, + "step": 986 + }, + { + "ce_ib": 9.954623222351074, + "ce_orig": 1.3729417324066162, + "epoch": 0.2835574088719534, + "kl_loss": 0.18901260197162628, + "loss_ib": 0.0028855884447693825, + "step": 986 + }, + { + "ce_ib": 7.703253269195557, + "ce_orig": 1.0099605321884155, + "epoch": 0.2835574088719534, + "kl_loss": 0.1130913719534874, + "loss_ib": 0.0019012389238923788, + "step": 986 + }, + { + "ce_ib": 8.98315715789795, + "ce_orig": 0.9903598427772522, + "epoch": 0.2835574088719534, + "kl_loss": 0.1280607134103775, + "loss_ib": 0.002178922761231661, + "step": 986 + }, + { + "ce_ib": 7.346461296081543, + "ce_orig": 0.526503324508667, + "epoch": 0.28384499245093103, + "kl_loss": 0.20876693725585938, + "loss_ib": 0.0028223153203725815, + "step": 987 + }, + { + "ce_ib": 6.163048267364502, + "ce_orig": 0.6525804996490479, + "epoch": 0.28384499245093103, + "kl_loss": 0.10793370008468628, + "loss_ib": 0.0016956417821347713, + "step": 987 + }, + { + "ce_ib": 6.027551651000977, + "ce_orig": 0.6111074686050415, + "epoch": 0.28384499245093103, + "kl_loss": 0.08734263479709625, + "loss_ib": 0.0014761814381927252, + "step": 987 + }, + { + "ce_ib": 6.522339344024658, + "ce_orig": 0.47297385334968567, + "epoch": 0.28384499245093103, + "kl_loss": 0.19408410787582397, + "loss_ib": 0.0025930749252438545, + "step": 987 + }, + { + "ce_ib": 5.587515830993652, + "ce_orig": 0.7599571347236633, + "epoch": 0.2841325760299087, + "kl_loss": 0.15328392386436462, + "loss_ib": 0.002091590780764818, + "step": 988 + }, + { + "ce_ib": 5.406798362731934, + "ce_orig": 0.7001034021377563, + "epoch": 0.2841325760299087, + "kl_loss": 0.12634865939617157, + "loss_ib": 0.0018041663570329547, + "step": 988 + }, + { + "ce_ib": 5.6719746589660645, + "ce_orig": 0.6059353947639465, + "epoch": 0.2841325760299087, + "kl_loss": 0.17453685402870178, + "loss_ib": 0.0023125659208744764, + "step": 988 + }, + { + "ce_ib": 6.926519870758057, + "ce_orig": 0.9303135871887207, + "epoch": 0.2841325760299087, + "kl_loss": 0.15477648377418518, + "loss_ib": 0.002240416593849659, + "step": 988 + }, + { + "ce_ib": 8.383512496948242, + "ce_orig": 1.1582266092300415, + "epoch": 0.28442015960888634, + "kl_loss": 0.16042135655879974, + "loss_ib": 0.0024425648152828217, + "step": 989 + }, + { + "ce_ib": 9.473687171936035, + "ce_orig": 1.514312982559204, + "epoch": 0.28442015960888634, + "kl_loss": 0.09347137808799744, + "loss_ib": 0.0018820824334397912, + "step": 989 + }, + { + "ce_ib": 8.357083320617676, + "ce_orig": 1.2286492586135864, + "epoch": 0.28442015960888634, + "kl_loss": 0.14219217002391815, + "loss_ib": 0.002257629996165633, + "step": 989 + }, + { + "ce_ib": 8.783169746398926, + "ce_orig": 1.3666160106658936, + "epoch": 0.28442015960888634, + "kl_loss": 0.1416359841823578, + "loss_ib": 0.0022946768440306187, + "step": 989 + }, + { + "epoch": 0.28470774318786396, + "grad_norm": 0.1091771125793457, + "learning_rate": 4.946119435657738e-05, + "loss": 0.8971, + "step": 990 + }, + { + "ce_ib": 6.181784152984619, + "ce_orig": 0.6989761590957642, + "epoch": 0.28470774318786396, + "kl_loss": 0.11948366463184357, + "loss_ib": 0.0018130149692296982, + "step": 990 + }, + { + "ce_ib": 7.10377836227417, + "ce_orig": 0.8298677802085876, + "epoch": 0.28470774318786396, + "kl_loss": 0.12324854731559753, + "loss_ib": 0.0019428632222115993, + "step": 990 + }, + { + "ce_ib": 7.611861705780029, + "ce_orig": 1.172329306602478, + "epoch": 0.28470774318786396, + "kl_loss": 0.08517073839902878, + "loss_ib": 0.0016128936549648643, + "step": 990 + }, + { + "ce_ib": 7.055534362792969, + "ce_orig": 0.9290443062782288, + "epoch": 0.28470774318786396, + "kl_loss": 0.15436816215515137, + "loss_ib": 0.0022492350544780493, + "step": 990 + }, + { + "ce_ib": 8.848563194274902, + "ce_orig": 0.7931856513023376, + "epoch": 0.28499532676684164, + "kl_loss": 0.16488288342952728, + "loss_ib": 0.0025336849503219128, + "step": 991 + }, + { + "ce_ib": 10.097640991210938, + "ce_orig": 1.1702800989151, + "epoch": 0.28499532676684164, + "kl_loss": 0.13779164850711823, + "loss_ib": 0.0023876805789768696, + "step": 991 + }, + { + "ce_ib": 7.169426918029785, + "ce_orig": 0.8789024949073792, + "epoch": 0.28499532676684164, + "kl_loss": 0.1385534256696701, + "loss_ib": 0.0021024770103394985, + "step": 991 + }, + { + "ce_ib": 7.275373458862305, + "ce_orig": 0.854453444480896, + "epoch": 0.28499532676684164, + "kl_loss": 0.11206218600273132, + "loss_ib": 0.0018481591250747442, + "step": 991 + }, + { + "ce_ib": 7.726741790771484, + "ce_orig": 1.2789119482040405, + "epoch": 0.28528291034581926, + "kl_loss": 0.1516597718000412, + "loss_ib": 0.0022892719134688377, + "step": 992 + }, + { + "ce_ib": 8.328351974487305, + "ce_orig": 1.348406195640564, + "epoch": 0.28528291034581926, + "kl_loss": 0.12467220425605774, + "loss_ib": 0.0020795571617782116, + "step": 992 + }, + { + "ce_ib": 5.893527507781982, + "ce_orig": 0.49985817074775696, + "epoch": 0.28528291034581926, + "kl_loss": 0.1101238876581192, + "loss_ib": 0.001690591569058597, + "step": 992 + }, + { + "ce_ib": 6.144350528717041, + "ce_orig": 0.6708614230155945, + "epoch": 0.28528291034581926, + "kl_loss": 0.1393936723470688, + "loss_ib": 0.002008371753618121, + "step": 992 + }, + { + "ce_ib": 6.9593353271484375, + "ce_orig": 0.9741218090057373, + "epoch": 0.2855704939247969, + "kl_loss": 0.10788355767726898, + "loss_ib": 0.0017747690435498953, + "step": 993 + }, + { + "ce_ib": 3.7139055728912354, + "ce_orig": 0.7304720282554626, + "epoch": 0.2855704939247969, + "kl_loss": 0.08104556798934937, + "loss_ib": 0.0011818462517112494, + "step": 993 + }, + { + "ce_ib": 10.939255714416504, + "ce_orig": 1.5362738370895386, + "epoch": 0.2855704939247969, + "kl_loss": 0.15437957644462585, + "loss_ib": 0.0026377211324870586, + "step": 993 + }, + { + "ce_ib": 5.173558712005615, + "ce_orig": 0.7878844141960144, + "epoch": 0.2855704939247969, + "kl_loss": 0.11559354513883591, + "loss_ib": 0.00167329132091254, + "step": 993 + }, + { + "ce_ib": 4.338344573974609, + "ce_orig": 0.6058388352394104, + "epoch": 0.2858580775037745, + "kl_loss": 0.1061791256070137, + "loss_ib": 0.0014956255909055471, + "step": 994 + }, + { + "ce_ib": 6.763393878936768, + "ce_orig": 0.9681786298751831, + "epoch": 0.2858580775037745, + "kl_loss": 0.10215041786432266, + "loss_ib": 0.0016978434287011623, + "step": 994 + }, + { + "ce_ib": 6.972935199737549, + "ce_orig": 0.7812452912330627, + "epoch": 0.2858580775037745, + "kl_loss": 0.07625571638345718, + "loss_ib": 0.001459850580431521, + "step": 994 + }, + { + "ce_ib": 8.671821594238281, + "ce_orig": 1.0143651962280273, + "epoch": 0.2858580775037745, + "kl_loss": 0.1577187478542328, + "loss_ib": 0.002444369485601783, + "step": 994 + }, + { + "epoch": 0.2861456610827522, + "grad_norm": 0.10612454265356064, + "learning_rate": 4.945315203017795e-05, + "loss": 0.8991, + "step": 995 + }, + { + "ce_ib": 8.718454360961914, + "ce_orig": 1.094069480895996, + "epoch": 0.2861456610827522, + "kl_loss": 0.1519099771976471, + "loss_ib": 0.0023909450974315405, + "step": 995 + }, + { + "ce_ib": 9.989187240600586, + "ce_orig": 0.839239776134491, + "epoch": 0.2861456610827522, + "kl_loss": 0.1391148716211319, + "loss_ib": 0.0023900673259049654, + "step": 995 + }, + { + "ce_ib": 5.35410737991333, + "ce_orig": 0.7497389316558838, + "epoch": 0.2861456610827522, + "kl_loss": 0.10185503959655762, + "loss_ib": 0.0015539609594270587, + "step": 995 + }, + { + "ce_ib": 3.817272901535034, + "ce_orig": 0.5471851825714111, + "epoch": 0.2861456610827522, + "kl_loss": 0.14413145184516907, + "loss_ib": 0.0018230417044833302, + "step": 995 + }, + { + "ce_ib": 4.422999382019043, + "ce_orig": 0.5248720645904541, + "epoch": 0.2864332446617298, + "kl_loss": 0.14078199863433838, + "loss_ib": 0.0018501197919249535, + "step": 996 + }, + { + "ce_ib": 6.990998268127441, + "ce_orig": 0.8086313605308533, + "epoch": 0.2864332446617298, + "kl_loss": 0.1101246327161789, + "loss_ib": 0.001800346071831882, + "step": 996 + }, + { + "ce_ib": 7.523761749267578, + "ce_orig": 0.8706881403923035, + "epoch": 0.2864332446617298, + "kl_loss": 0.11096677929162979, + "loss_ib": 0.0018620439805090427, + "step": 996 + }, + { + "ce_ib": 7.341949462890625, + "ce_orig": 0.8602787256240845, + "epoch": 0.2864332446617298, + "kl_loss": 0.10801561176776886, + "loss_ib": 0.001814351067878306, + "step": 996 + }, + { + "ce_ib": 5.043796062469482, + "ce_orig": 0.5980595350265503, + "epoch": 0.28672082824070744, + "kl_loss": 0.1614397168159485, + "loss_ib": 0.002118776785209775, + "step": 997 + }, + { + "ce_ib": 2.875126600265503, + "ce_orig": 0.45491382479667664, + "epoch": 0.28672082824070744, + "kl_loss": 0.10723177343606949, + "loss_ib": 0.001359830261208117, + "step": 997 + }, + { + "ce_ib": 6.1942009925842285, + "ce_orig": 0.8003867864608765, + "epoch": 0.28672082824070744, + "kl_loss": 0.08695151656866074, + "loss_ib": 0.0014889352023601532, + "step": 997 + }, + { + "ce_ib": 7.5594801902771, + "ce_orig": 1.2212069034576416, + "epoch": 0.28672082824070744, + "kl_loss": 0.09208080172538757, + "loss_ib": 0.0016767559573054314, + "step": 997 + }, + { + "ce_ib": 5.673044204711914, + "ce_orig": 0.5247984528541565, + "epoch": 0.2870084118196851, + "kl_loss": 0.15386104583740234, + "loss_ib": 0.0021059149876236916, + "step": 998 + }, + { + "ce_ib": 7.351710319519043, + "ce_orig": 1.040869116783142, + "epoch": 0.2870084118196851, + "kl_loss": 0.09939703345298767, + "loss_ib": 0.0017291413387283683, + "step": 998 + }, + { + "ce_ib": 7.567747116088867, + "ce_orig": 0.5997657179832458, + "epoch": 0.2870084118196851, + "kl_loss": 0.13425284624099731, + "loss_ib": 0.002099303063005209, + "step": 998 + }, + { + "ce_ib": 8.138786315917969, + "ce_orig": 1.1768231391906738, + "epoch": 0.2870084118196851, + "kl_loss": 0.1242784708738327, + "loss_ib": 0.002056663390249014, + "step": 998 + }, + { + "ce_ib": 4.522161483764648, + "ce_orig": 0.48833024501800537, + "epoch": 0.28729599539866274, + "kl_loss": 0.22905391454696655, + "loss_ib": 0.0027427554596215487, + "step": 999 + }, + { + "ce_ib": 5.886623859405518, + "ce_orig": 0.8945769667625427, + "epoch": 0.28729599539866274, + "kl_loss": 0.12364333122968674, + "loss_ib": 0.0018250956200063229, + "step": 999 + }, + { + "ce_ib": 8.664619445800781, + "ce_orig": 0.699190616607666, + "epoch": 0.28729599539866274, + "kl_loss": 0.18423990905284882, + "loss_ib": 0.002708860905840993, + "step": 999 + }, + { + "ce_ib": 8.612241744995117, + "ce_orig": 1.0460861921310425, + "epoch": 0.28729599539866274, + "kl_loss": 0.13352595269680023, + "loss_ib": 0.0021964835468679667, + "step": 999 + }, + { + "epoch": 0.28758357897764036, + "grad_norm": 0.0895831435918808, + "learning_rate": 4.944505079045958e-05, + "loss": 0.8976, + "step": 1000 + }, + { + "ce_ib": 8.301177024841309, + "ce_orig": 0.6439716815948486, + "epoch": 0.28758357897764036, + "kl_loss": 0.12516939640045166, + "loss_ib": 0.002081811660900712, + "step": 1000 + }, + { + "ce_ib": 7.792233467102051, + "ce_orig": 0.8878663778305054, + "epoch": 0.28758357897764036, + "kl_loss": 0.11768750846385956, + "loss_ib": 0.00195609824731946, + "step": 1000 + }, + { + "ce_ib": 6.912393569946289, + "ce_orig": 0.5264413952827454, + "epoch": 0.28758357897764036, + "kl_loss": 0.16177913546562195, + "loss_ib": 0.002309030620381236, + "step": 1000 + }, + { + "ce_ib": 6.791064739227295, + "ce_orig": 0.8416721224784851, + "epoch": 0.28758357897764036, + "kl_loss": 0.14784878492355347, + "loss_ib": 0.002157594310119748, + "step": 1000 + }, + { + "ce_ib": 7.734585762023926, + "ce_orig": 0.657818615436554, + "epoch": 0.28787116255661804, + "kl_loss": 0.13893797993659973, + "loss_ib": 0.0021628381218761206, + "step": 1001 + }, + { + "ce_ib": 7.315647602081299, + "ce_orig": 0.6256568431854248, + "epoch": 0.28787116255661804, + "kl_loss": 0.13449688255786896, + "loss_ib": 0.0020765336230397224, + "step": 1001 + }, + { + "ce_ib": 4.7383222579956055, + "ce_orig": 0.757487952709198, + "epoch": 0.28787116255661804, + "kl_loss": 0.11639707535505295, + "loss_ib": 0.0016378029249608517, + "step": 1001 + }, + { + "ce_ib": 6.276235580444336, + "ce_orig": 0.45473286509513855, + "epoch": 0.28787116255661804, + "kl_loss": 0.14961040019989014, + "loss_ib": 0.002123727463185787, + "step": 1001 + }, + { + "ce_ib": 10.06789493560791, + "ce_orig": 1.3343020677566528, + "epoch": 0.28815874613559567, + "kl_loss": 0.22612667083740234, + "loss_ib": 0.003268056083470583, + "step": 1002 + }, + { + "ce_ib": 6.92459774017334, + "ce_orig": 0.8351874351501465, + "epoch": 0.28815874613559567, + "kl_loss": 0.07932960987091064, + "loss_ib": 0.0014857558999210596, + "step": 1002 + }, + { + "ce_ib": 8.357982635498047, + "ce_orig": 0.896270751953125, + "epoch": 0.28815874613559567, + "kl_loss": 0.08860597014427185, + "loss_ib": 0.001721857930533588, + "step": 1002 + }, + { + "ce_ib": 7.726855278015137, + "ce_orig": 0.7626195549964905, + "epoch": 0.28815874613559567, + "kl_loss": 0.17634786665439606, + "loss_ib": 0.002536164131015539, + "step": 1002 + }, + { + "ce_ib": 3.290245532989502, + "ce_orig": 0.5970833897590637, + "epoch": 0.2884463297145733, + "kl_loss": 0.07192050665616989, + "loss_ib": 0.0010482296347618103, + "step": 1003 + }, + { + "ce_ib": 11.031519889831543, + "ce_orig": 1.4460707902908325, + "epoch": 0.2884463297145733, + "kl_loss": 0.1282051056623459, + "loss_ib": 0.002385202795267105, + "step": 1003 + }, + { + "ce_ib": 8.045802116394043, + "ce_orig": 1.0216474533081055, + "epoch": 0.2884463297145733, + "kl_loss": 0.1990397721529007, + "loss_ib": 0.0027949779760092497, + "step": 1003 + }, + { + "ce_ib": 4.998257160186768, + "ce_orig": 0.4931280016899109, + "epoch": 0.2884463297145733, + "kl_loss": 0.1314055621623993, + "loss_ib": 0.0018138813320547342, + "step": 1003 + }, + { + "ce_ib": 8.403632164001465, + "ce_orig": 0.9838512539863586, + "epoch": 0.2887339132935509, + "kl_loss": 0.14859150350093842, + "loss_ib": 0.0023262782488018274, + "step": 1004 + }, + { + "ce_ib": 4.052238941192627, + "ce_orig": 0.42797771096229553, + "epoch": 0.2887339132935509, + "kl_loss": 0.10543163865804672, + "loss_ib": 0.0014595402171835303, + "step": 1004 + }, + { + "ce_ib": 7.557340621948242, + "ce_orig": 0.8052495121955872, + "epoch": 0.2887339132935509, + "kl_loss": 0.1491703987121582, + "loss_ib": 0.0022474380675703287, + "step": 1004 + }, + { + "ce_ib": 3.635923147201538, + "ce_orig": 0.3550164997577667, + "epoch": 0.2887339132935509, + "kl_loss": 0.14298149943351746, + "loss_ib": 0.0017934072529897094, + "step": 1004 + }, + { + "epoch": 0.2890214968725286, + "grad_norm": 0.08703169226646423, + "learning_rate": 4.9436890656940045e-05, + "loss": 0.8759, + "step": 1005 + }, + { + "ce_ib": 6.973578453063965, + "ce_orig": 0.7662994265556335, + "epoch": 0.2890214968725286, + "kl_loss": 0.13552214205265045, + "loss_ib": 0.0020525790750980377, + "step": 1005 + }, + { + "ce_ib": 10.7005615234375, + "ce_orig": 1.4963085651397705, + "epoch": 0.2890214968725286, + "kl_loss": 0.17894543707370758, + "loss_ib": 0.0028595104813575745, + "step": 1005 + }, + { + "ce_ib": 7.626862049102783, + "ce_orig": 0.6037044525146484, + "epoch": 0.2890214968725286, + "kl_loss": 0.10745424032211304, + "loss_ib": 0.0018372285412624478, + "step": 1005 + }, + { + "ce_ib": 4.8975138664245605, + "ce_orig": 0.36308395862579346, + "epoch": 0.2890214968725286, + "kl_loss": 0.16487862169742584, + "loss_ib": 0.002138537587597966, + "step": 1005 + }, + { + "ce_ib": 7.104249477386475, + "ce_orig": 0.9271300435066223, + "epoch": 0.2893090804515062, + "kl_loss": 0.11113549023866653, + "loss_ib": 0.001821779878810048, + "step": 1006 + }, + { + "ce_ib": 6.194459915161133, + "ce_orig": 0.5316910743713379, + "epoch": 0.2893090804515062, + "kl_loss": 0.25875431299209595, + "loss_ib": 0.0032069890294224024, + "step": 1006 + }, + { + "ce_ib": 9.120644569396973, + "ce_orig": 0.7684857249259949, + "epoch": 0.2893090804515062, + "kl_loss": 0.05433555692434311, + "loss_ib": 0.0014554199296981096, + "step": 1006 + }, + { + "ce_ib": 7.741818904876709, + "ce_orig": 0.7994129657745361, + "epoch": 0.2893090804515062, + "kl_loss": 0.16110675036907196, + "loss_ib": 0.002385249361395836, + "step": 1006 + }, + { + "ce_ib": 8.448857307434082, + "ce_orig": 1.0468580722808838, + "epoch": 0.28959666403048384, + "kl_loss": 0.13067549467086792, + "loss_ib": 0.002151640597730875, + "step": 1007 + }, + { + "ce_ib": 4.776382923126221, + "ce_orig": 0.857218325138092, + "epoch": 0.28959666403048384, + "kl_loss": 0.10937680304050446, + "loss_ib": 0.0015714062610641122, + "step": 1007 + }, + { + "ce_ib": 4.323080539703369, + "ce_orig": 0.7916050553321838, + "epoch": 0.28959666403048384, + "kl_loss": 0.09697412699460983, + "loss_ib": 0.0014020493254065514, + "step": 1007 + }, + { + "ce_ib": 8.446868896484375, + "ce_orig": 1.1747890710830688, + "epoch": 0.28959666403048384, + "kl_loss": 0.13406559824943542, + "loss_ib": 0.0021853428333997726, + "step": 1007 + }, + { + "ce_ib": 7.49606990814209, + "ce_orig": 0.980958878993988, + "epoch": 0.2898842476094615, + "kl_loss": 0.12899692356586456, + "loss_ib": 0.0020395761821419, + "step": 1008 + }, + { + "ce_ib": 6.99376106262207, + "ce_orig": 0.8433418869972229, + "epoch": 0.2898842476094615, + "kl_loss": 0.12246361374855042, + "loss_ib": 0.0019240122055634856, + "step": 1008 + }, + { + "ce_ib": 9.26354694366455, + "ce_orig": 0.5582899451255798, + "epoch": 0.2898842476094615, + "kl_loss": 0.1489538997411728, + "loss_ib": 0.0024158935993909836, + "step": 1008 + }, + { + "ce_ib": 9.026296615600586, + "ce_orig": 1.4614930152893066, + "epoch": 0.2898842476094615, + "kl_loss": 0.09286147356033325, + "loss_ib": 0.001831244328059256, + "step": 1008 + }, + { + "ce_ib": 7.721127033233643, + "ce_orig": 0.7993571162223816, + "epoch": 0.29017183118843914, + "kl_loss": 0.13815072178840637, + "loss_ib": 0.0021536198910325766, + "step": 1009 + }, + { + "ce_ib": 4.602123260498047, + "ce_orig": 0.7283000946044922, + "epoch": 0.29017183118843914, + "kl_loss": 0.09357312321662903, + "loss_ib": 0.0013959434581920505, + "step": 1009 + }, + { + "ce_ib": 7.143542766571045, + "ce_orig": 1.1499704122543335, + "epoch": 0.29017183118843914, + "kl_loss": 0.13207654654979706, + "loss_ib": 0.002035119803622365, + "step": 1009 + }, + { + "ce_ib": 6.815535068511963, + "ce_orig": 1.1950119733810425, + "epoch": 0.29017183118843914, + "kl_loss": 0.10215964913368225, + "loss_ib": 0.0017031499883159995, + "step": 1009 + }, + { + "epoch": 0.29045941476741677, + "grad_norm": 0.08291789889335632, + "learning_rate": 4.942867164927899e-05, + "loss": 0.8737, + "step": 1010 + }, + { + "ce_ib": 7.340415000915527, + "ce_orig": 0.9612502455711365, + "epoch": 0.29045941476741677, + "kl_loss": 0.11465869843959808, + "loss_ib": 0.0018806284060701728, + "step": 1010 + }, + { + "ce_ib": 6.426059722900391, + "ce_orig": 0.9481909275054932, + "epoch": 0.29045941476741677, + "kl_loss": 0.11971482634544373, + "loss_ib": 0.001839754288084805, + "step": 1010 + }, + { + "ce_ib": 6.488442420959473, + "ce_orig": 0.889805793762207, + "epoch": 0.29045941476741677, + "kl_loss": 0.11869333684444427, + "loss_ib": 0.001835777540691197, + "step": 1010 + }, + { + "ce_ib": 2.905393362045288, + "ce_orig": 0.5455615520477295, + "epoch": 0.29045941476741677, + "kl_loss": 0.0905960351228714, + "loss_ib": 0.0011964996811002493, + "step": 1010 + }, + { + "ce_ib": 8.19021224975586, + "ce_orig": 1.2127585411071777, + "epoch": 0.29074699834639445, + "kl_loss": 0.25930172204971313, + "loss_ib": 0.0034120383206754923, + "step": 1011 + }, + { + "ce_ib": 5.116666793823242, + "ce_orig": 0.70909184217453, + "epoch": 0.29074699834639445, + "kl_loss": 0.11982348561286926, + "loss_ib": 0.0017099014949053526, + "step": 1011 + }, + { + "ce_ib": 6.482151031494141, + "ce_orig": 0.6337217688560486, + "epoch": 0.29074699834639445, + "kl_loss": 0.13345953822135925, + "loss_ib": 0.001982810441404581, + "step": 1011 + }, + { + "ce_ib": 7.245599746704102, + "ce_orig": 1.1574453115463257, + "epoch": 0.29074699834639445, + "kl_loss": 0.1491246074438095, + "loss_ib": 0.0022158059291541576, + "step": 1011 + }, + { + "ce_ib": 10.480112075805664, + "ce_orig": 1.089797854423523, + "epoch": 0.29103458192537207, + "kl_loss": 0.11675558984279633, + "loss_ib": 0.0022155670449137688, + "step": 1012 + }, + { + "ce_ib": 6.354058742523193, + "ce_orig": 0.7734960317611694, + "epoch": 0.29103458192537207, + "kl_loss": 0.1279613971710205, + "loss_ib": 0.001915019704028964, + "step": 1012 + }, + { + "ce_ib": 5.588810920715332, + "ce_orig": 0.7622073888778687, + "epoch": 0.29103458192537207, + "kl_loss": 0.10941126942634583, + "loss_ib": 0.0016529938438907266, + "step": 1012 + }, + { + "ce_ib": 7.674766540527344, + "ce_orig": 1.4139724969863892, + "epoch": 0.29103458192537207, + "kl_loss": 0.15296992659568787, + "loss_ib": 0.0022971758153289557, + "step": 1012 + }, + { + "ce_ib": 6.944582462310791, + "ce_orig": 0.6798045039176941, + "epoch": 0.2913221655043497, + "kl_loss": 0.13148388266563416, + "loss_ib": 0.0020092970225960016, + "step": 1013 + }, + { + "ce_ib": 9.336368560791016, + "ce_orig": 0.8694828748703003, + "epoch": 0.2913221655043497, + "kl_loss": 0.11502645909786224, + "loss_ib": 0.0020839013159275055, + "step": 1013 + }, + { + "ce_ib": 6.466903209686279, + "ce_orig": 1.046998143196106, + "epoch": 0.2913221655043497, + "kl_loss": 0.09708376228809357, + "loss_ib": 0.0016175279160961509, + "step": 1013 + }, + { + "ce_ib": 8.016132354736328, + "ce_orig": 1.270522952079773, + "epoch": 0.2913221655043497, + "kl_loss": 0.13104431331157684, + "loss_ib": 0.0021120563615113497, + "step": 1013 + }, + { + "ce_ib": 7.8399553298950195, + "ce_orig": 1.0986905097961426, + "epoch": 0.2916097490833273, + "kl_loss": 0.15167173743247986, + "loss_ib": 0.0023007127456367016, + "step": 1014 + }, + { + "ce_ib": 5.281484603881836, + "ce_orig": 0.8865677714347839, + "epoch": 0.2916097490833273, + "kl_loss": 0.09800760447978973, + "loss_ib": 0.0015082244062796235, + "step": 1014 + }, + { + "ce_ib": 5.640346527099609, + "ce_orig": 0.8331298232078552, + "epoch": 0.2916097490833273, + "kl_loss": 0.10224798321723938, + "loss_ib": 0.0015865144087001681, + "step": 1014 + }, + { + "ce_ib": 10.421279907226562, + "ce_orig": 1.338667631149292, + "epoch": 0.2916097490833273, + "kl_loss": 0.13115710020065308, + "loss_ib": 0.0023536989465355873, + "step": 1014 + }, + { + "epoch": 0.291897332662305, + "grad_norm": 0.0798521637916565, + "learning_rate": 4.9420393787277917e-05, + "loss": 0.8894, + "step": 1015 + }, + { + "ce_ib": 8.863430976867676, + "ce_orig": 1.5828720331192017, + "epoch": 0.291897332662305, + "kl_loss": 0.12114414572715759, + "loss_ib": 0.0020977845415472984, + "step": 1015 + }, + { + "ce_ib": 6.6428985595703125, + "ce_orig": 0.6384519338607788, + "epoch": 0.291897332662305, + "kl_loss": 0.14254824817180634, + "loss_ib": 0.002089772140607238, + "step": 1015 + }, + { + "ce_ib": 2.3048622608184814, + "ce_orig": 0.16694949567317963, + "epoch": 0.291897332662305, + "kl_loss": 0.17876015603542328, + "loss_ib": 0.002018087776377797, + "step": 1015 + }, + { + "ce_ib": 5.116494655609131, + "ce_orig": 0.802765965461731, + "epoch": 0.291897332662305, + "kl_loss": 0.11342580616474152, + "loss_ib": 0.0016459074104204774, + "step": 1015 + }, + { + "ce_ib": 7.742083549499512, + "ce_orig": 0.7786831855773926, + "epoch": 0.2921849162412826, + "kl_loss": 0.168631911277771, + "loss_ib": 0.002460527466610074, + "step": 1016 + }, + { + "ce_ib": 4.972054958343506, + "ce_orig": 0.7613850831985474, + "epoch": 0.2921849162412826, + "kl_loss": 0.10056689381599426, + "loss_ib": 0.001502874423749745, + "step": 1016 + }, + { + "ce_ib": 7.624302387237549, + "ce_orig": 1.0380606651306152, + "epoch": 0.2921849162412826, + "kl_loss": 0.12155883759260178, + "loss_ib": 0.0019780185539275408, + "step": 1016 + }, + { + "ce_ib": 7.306596755981445, + "ce_orig": 0.5652252435684204, + "epoch": 0.2921849162412826, + "kl_loss": 0.14521154761314392, + "loss_ib": 0.0021827751770615578, + "step": 1016 + }, + { + "ce_ib": 6.975562572479248, + "ce_orig": 0.8997268080711365, + "epoch": 0.29247249982026025, + "kl_loss": 0.11250243335962296, + "loss_ib": 0.001822580466978252, + "step": 1017 + }, + { + "ce_ib": 6.613406181335449, + "ce_orig": 0.8821706175804138, + "epoch": 0.29247249982026025, + "kl_loss": 0.0819438248872757, + "loss_ib": 0.0014807786792516708, + "step": 1017 + }, + { + "ce_ib": 4.1242146492004395, + "ce_orig": 0.7115670442581177, + "epoch": 0.29247249982026025, + "kl_loss": 0.42394354939460754, + "loss_ib": 0.004651857074350119, + "step": 1017 + }, + { + "ce_ib": 4.448148727416992, + "ce_orig": 0.862206757068634, + "epoch": 0.29247249982026025, + "kl_loss": 0.08045890927314758, + "loss_ib": 0.0012494039256125689, + "step": 1017 + }, + { + "ce_ib": 8.916611671447754, + "ce_orig": 0.8049607276916504, + "epoch": 0.2927600833992379, + "kl_loss": 0.1913137435913086, + "loss_ib": 0.0028047983068972826, + "step": 1018 + }, + { + "ce_ib": 6.2450175285339355, + "ce_orig": 0.7353116869926453, + "epoch": 0.2927600833992379, + "kl_loss": 0.09168928116559982, + "loss_ib": 0.0015413945075124502, + "step": 1018 + }, + { + "ce_ib": 6.868838310241699, + "ce_orig": 0.8467898964881897, + "epoch": 0.2927600833992379, + "kl_loss": 0.1104859858751297, + "loss_ib": 0.0017917435616254807, + "step": 1018 + }, + { + "ce_ib": 3.588919162750244, + "ce_orig": 0.4546128213405609, + "epoch": 0.2927600833992379, + "kl_loss": 0.08714288473129272, + "loss_ib": 0.0012303207768127322, + "step": 1018 + }, + { + "ce_ib": 2.2777414321899414, + "ce_orig": 0.2392132729291916, + "epoch": 0.29304766697821555, + "kl_loss": 0.3012656271457672, + "loss_ib": 0.003240430261939764, + "step": 1019 + }, + { + "ce_ib": 8.048588752746582, + "ce_orig": 0.9696701765060425, + "epoch": 0.29304766697821555, + "kl_loss": 0.16815456748008728, + "loss_ib": 0.0024864044971764088, + "step": 1019 + }, + { + "ce_ib": 9.795534133911133, + "ce_orig": 1.3076472282409668, + "epoch": 0.29304766697821555, + "kl_loss": 0.11489086598157883, + "loss_ib": 0.0021284620743244886, + "step": 1019 + }, + { + "ce_ib": 8.209424018859863, + "ce_orig": 0.8340076208114624, + "epoch": 0.29304766697821555, + "kl_loss": 0.12861773371696472, + "loss_ib": 0.0021071196533739567, + "step": 1019 + }, + { + "epoch": 0.2933352505571932, + "grad_norm": 0.08202947676181793, + "learning_rate": 4.941205709088011e-05, + "loss": 0.8508, + "step": 1020 + }, + { + "ce_ib": 3.581411361694336, + "ce_orig": 0.7349913716316223, + "epoch": 0.2933352505571932, + "kl_loss": 0.07525119930505753, + "loss_ib": 0.0011106531601399183, + "step": 1020 + }, + { + "ce_ib": 9.23768138885498, + "ce_orig": 1.4128665924072266, + "epoch": 0.2933352505571932, + "kl_loss": 0.06985117495059967, + "loss_ib": 0.0016222798731178045, + "step": 1020 + }, + { + "ce_ib": 5.13540506362915, + "ce_orig": 0.4822990894317627, + "epoch": 0.2933352505571932, + "kl_loss": 0.17090243101119995, + "loss_ib": 0.002222564769908786, + "step": 1020 + }, + { + "ce_ib": 7.272921085357666, + "ce_orig": 0.6506208777427673, + "epoch": 0.2933352505571932, + "kl_loss": 0.1548406183719635, + "loss_ib": 0.002275698119774461, + "step": 1020 + }, + { + "ce_ib": 9.71750259399414, + "ce_orig": 1.235391616821289, + "epoch": 0.29362283413617085, + "kl_loss": 0.09263736009597778, + "loss_ib": 0.0018981238827109337, + "step": 1021 + }, + { + "ce_ib": 7.791978359222412, + "ce_orig": 0.8824278116226196, + "epoch": 0.29362283413617085, + "kl_loss": 0.1360311508178711, + "loss_ib": 0.002139509189873934, + "step": 1021 + }, + { + "ce_ib": 6.253500938415527, + "ce_orig": 0.5768558382987976, + "epoch": 0.29362283413617085, + "kl_loss": 0.15343819558620453, + "loss_ib": 0.0021597319282591343, + "step": 1021 + }, + { + "ce_ib": 4.855053901672363, + "ce_orig": 0.3929537832736969, + "epoch": 0.29362283413617085, + "kl_loss": 0.07283283770084381, + "loss_ib": 0.0012138336896896362, + "step": 1021 + }, + { + "ce_ib": 6.272524833679199, + "ce_orig": 0.6167612671852112, + "epoch": 0.2939104177151485, + "kl_loss": 0.14953365921974182, + "loss_ib": 0.0021225889213383198, + "step": 1022 + }, + { + "ce_ib": 8.505683898925781, + "ce_orig": 1.1090301275253296, + "epoch": 0.2939104177151485, + "kl_loss": 0.1480690836906433, + "loss_ib": 0.0023312591947615147, + "step": 1022 + }, + { + "ce_ib": 7.212671279907227, + "ce_orig": 0.8089624047279358, + "epoch": 0.2939104177151485, + "kl_loss": 0.13812242448329926, + "loss_ib": 0.0021024912130087614, + "step": 1022 + }, + { + "ce_ib": 6.410940647125244, + "ce_orig": 0.7297493815422058, + "epoch": 0.2939104177151485, + "kl_loss": 0.15360087156295776, + "loss_ib": 0.0021771027240902185, + "step": 1022 + }, + { + "ce_ib": 6.700841426849365, + "ce_orig": 1.0308021306991577, + "epoch": 0.2941980012941261, + "kl_loss": 0.13294780254364014, + "loss_ib": 0.0019995621405541897, + "step": 1023 + }, + { + "ce_ib": 6.241668701171875, + "ce_orig": 0.6736411452293396, + "epoch": 0.2941980012941261, + "kl_loss": 0.11182098090648651, + "loss_ib": 0.0017423765966668725, + "step": 1023 + }, + { + "ce_ib": 3.8634262084960938, + "ce_orig": 0.48370254039764404, + "epoch": 0.2941980012941261, + "kl_loss": 0.13036774098873138, + "loss_ib": 0.0016900199698284268, + "step": 1023 + }, + { + "ce_ib": 8.329598426818848, + "ce_orig": 0.9687957167625427, + "epoch": 0.2941980012941261, + "kl_loss": 0.13080094754695892, + "loss_ib": 0.002140969270840287, + "step": 1023 + }, + { + "ce_ib": 7.702944755554199, + "ce_orig": 0.9619100689888, + "epoch": 0.2944855848731037, + "kl_loss": 0.09282632917165756, + "loss_ib": 0.0016985577531158924, + "step": 1024 + }, + { + "ce_ib": 7.137479305267334, + "ce_orig": 0.7066961526870728, + "epoch": 0.2944855848731037, + "kl_loss": 0.12376445531845093, + "loss_ib": 0.0019513923907652497, + "step": 1024 + }, + { + "ce_ib": 12.413986206054688, + "ce_orig": 1.8791048526763916, + "epoch": 0.2944855848731037, + "kl_loss": 0.11838547140359879, + "loss_ib": 0.002425253391265869, + "step": 1024 + }, + { + "ce_ib": 9.755016326904297, + "ce_orig": 1.334018349647522, + "epoch": 0.2944855848731037, + "kl_loss": 0.11656536161899567, + "loss_ib": 0.0021411553025245667, + "step": 1024 + }, + { + "epoch": 0.2947731684520814, + "grad_norm": 0.10105545818805695, + "learning_rate": 4.9403661580170626e-05, + "loss": 0.8413, + "step": 1025 + }, + { + "ce_ib": 5.8657989501953125, + "ce_orig": 0.4482717514038086, + "epoch": 0.2947731684520814, + "kl_loss": 0.16809025406837463, + "loss_ib": 0.0022674824576824903, + "step": 1025 + }, + { + "ce_ib": 4.58872127532959, + "ce_orig": 0.5492852926254272, + "epoch": 0.2947731684520814, + "kl_loss": 0.06326945126056671, + "loss_ib": 0.0010915666352957487, + "step": 1025 + }, + { + "ce_ib": 5.113560676574707, + "ce_orig": 0.8479498028755188, + "epoch": 0.2947731684520814, + "kl_loss": 0.10429667681455612, + "loss_ib": 0.0015543227782472968, + "step": 1025 + }, + { + "ce_ib": 8.390458106994629, + "ce_orig": 1.3818213939666748, + "epoch": 0.2947731684520814, + "kl_loss": 0.17246775329113007, + "loss_ib": 0.002563723362982273, + "step": 1025 + }, + { + "ce_ib": 6.4424309730529785, + "ce_orig": 0.8801824450492859, + "epoch": 0.295060752031059, + "kl_loss": 0.14136923849582672, + "loss_ib": 0.002057935344055295, + "step": 1026 + }, + { + "ce_ib": 6.539217472076416, + "ce_orig": 0.47349444031715393, + "epoch": 0.295060752031059, + "kl_loss": 0.09952758997678757, + "loss_ib": 0.001649197656661272, + "step": 1026 + }, + { + "ce_ib": 4.090629577636719, + "ce_orig": 0.6171263456344604, + "epoch": 0.295060752031059, + "kl_loss": 0.08213280886411667, + "loss_ib": 0.0012303909752517939, + "step": 1026 + }, + { + "ce_ib": 7.95316219329834, + "ce_orig": 1.1671141386032104, + "epoch": 0.295060752031059, + "kl_loss": 0.10801438242197037, + "loss_ib": 0.0018754599150270224, + "step": 1026 + }, + { + "ce_ib": 9.029088973999023, + "ce_orig": 0.8997545838356018, + "epoch": 0.29534833561003665, + "kl_loss": 0.09331157803535461, + "loss_ib": 0.0018360245740041137, + "step": 1027 + }, + { + "ce_ib": 6.017154693603516, + "ce_orig": 0.7910234928131104, + "epoch": 0.29534833561003665, + "kl_loss": 0.06913614273071289, + "loss_ib": 0.0012930769007652998, + "step": 1027 + }, + { + "ce_ib": 6.308470249176025, + "ce_orig": 0.4661364257335663, + "epoch": 0.29534833561003665, + "kl_loss": 0.11379620432853699, + "loss_ib": 0.0017688089283183217, + "step": 1027 + }, + { + "ce_ib": 5.081976413726807, + "ce_orig": 0.8399765491485596, + "epoch": 0.29534833561003665, + "kl_loss": 0.0796096995472908, + "loss_ib": 0.0013042945647612214, + "step": 1027 + }, + { + "ce_ib": 8.540277481079102, + "ce_orig": 1.1448512077331543, + "epoch": 0.29563591918901433, + "kl_loss": 0.2748773694038391, + "loss_ib": 0.003602801589295268, + "step": 1028 + }, + { + "ce_ib": 4.430081367492676, + "ce_orig": 0.38741615414619446, + "epoch": 0.29563591918901433, + "kl_loss": 0.11507681012153625, + "loss_ib": 0.0015937761636450887, + "step": 1028 + }, + { + "ce_ib": 9.555229187011719, + "ce_orig": 0.7497819066047668, + "epoch": 0.29563591918901433, + "kl_loss": 0.1984127163887024, + "loss_ib": 0.0029396500904113054, + "step": 1028 + }, + { + "ce_ib": 7.1798930168151855, + "ce_orig": 0.4140661656856537, + "epoch": 0.29563591918901433, + "kl_loss": 0.11173292994499207, + "loss_ib": 0.0018353185150772333, + "step": 1028 + }, + { + "ce_ib": 5.1883063316345215, + "ce_orig": 0.3764129877090454, + "epoch": 0.29592350276799195, + "kl_loss": 0.11982943117618561, + "loss_ib": 0.0017171248327940702, + "step": 1029 + }, + { + "ce_ib": 5.456558704376221, + "ce_orig": 0.44917434453964233, + "epoch": 0.29592350276799195, + "kl_loss": 0.06864330172538757, + "loss_ib": 0.0012320888927206397, + "step": 1029 + }, + { + "ce_ib": 5.454277992248535, + "ce_orig": 0.7155625224113464, + "epoch": 0.29592350276799195, + "kl_loss": 0.10746078193187714, + "loss_ib": 0.0016200356185436249, + "step": 1029 + }, + { + "ce_ib": 6.581263065338135, + "ce_orig": 1.0528942346572876, + "epoch": 0.29592350276799195, + "kl_loss": 0.11141886562108994, + "loss_ib": 0.0017723148921504617, + "step": 1029 + }, + { + "epoch": 0.2962110863469696, + "grad_norm": 0.08278331160545349, + "learning_rate": 4.9395207275376175e-05, + "loss": 0.8518, + "step": 1030 + }, + { + "ce_ib": 5.568306922912598, + "ce_orig": 0.6978300213813782, + "epoch": 0.2962110863469696, + "kl_loss": 0.1296089142560959, + "loss_ib": 0.001852919696830213, + "step": 1030 + }, + { + "ce_ib": 8.422260284423828, + "ce_orig": 0.5663745403289795, + "epoch": 0.2962110863469696, + "kl_loss": 0.13868647813796997, + "loss_ib": 0.00222909078001976, + "step": 1030 + }, + { + "ce_ib": 4.727840423583984, + "ce_orig": 0.5925155878067017, + "epoch": 0.2962110863469696, + "kl_loss": 0.10484916716814041, + "loss_ib": 0.0015212756115943193, + "step": 1030 + }, + { + "ce_ib": 11.636137008666992, + "ce_orig": 1.7095333337783813, + "epoch": 0.2962110863469696, + "kl_loss": 0.14001572132110596, + "loss_ib": 0.0025637708604335785, + "step": 1030 + }, + { + "ce_ib": 6.531741142272949, + "ce_orig": 0.6975887417793274, + "epoch": 0.29649866992594726, + "kl_loss": 0.14314265549182892, + "loss_ib": 0.002084600506350398, + "step": 1031 + }, + { + "ce_ib": 6.6402268409729, + "ce_orig": 0.5389083027839661, + "epoch": 0.29649866992594726, + "kl_loss": 0.1222705990076065, + "loss_ib": 0.001886728685349226, + "step": 1031 + }, + { + "ce_ib": 5.543909072875977, + "ce_orig": 0.39250531792640686, + "epoch": 0.29649866992594726, + "kl_loss": 0.10852260887622833, + "loss_ib": 0.001639616908505559, + "step": 1031 + }, + { + "ce_ib": 3.4556961059570312, + "ce_orig": 0.577335000038147, + "epoch": 0.29649866992594726, + "kl_loss": 0.07768696546554565, + "loss_ib": 0.001122439163736999, + "step": 1031 + }, + { + "ce_ib": 5.805666446685791, + "ce_orig": 0.9784355163574219, + "epoch": 0.2967862535049249, + "kl_loss": 0.08703579753637314, + "loss_ib": 0.0014509245520457625, + "step": 1032 + }, + { + "ce_ib": 5.100844383239746, + "ce_orig": 0.6642316579818726, + "epoch": 0.2967862535049249, + "kl_loss": 0.05223621428012848, + "loss_ib": 0.0010324466275051236, + "step": 1032 + }, + { + "ce_ib": 7.460930824279785, + "ce_orig": 1.1851062774658203, + "epoch": 0.2967862535049249, + "kl_loss": 0.08702461421489716, + "loss_ib": 0.001616339199244976, + "step": 1032 + }, + { + "ce_ib": 6.544296741485596, + "ce_orig": 0.8161260485649109, + "epoch": 0.2967862535049249, + "kl_loss": 0.06634721159934998, + "loss_ib": 0.0013179017696529627, + "step": 1032 + }, + { + "ce_ib": 5.057808876037598, + "ce_orig": 0.7515257596969604, + "epoch": 0.2970738370839025, + "kl_loss": 0.07609856128692627, + "loss_ib": 0.0012667664559558034, + "step": 1033 + }, + { + "ce_ib": 5.757803916931152, + "ce_orig": 0.6364601850509644, + "epoch": 0.2970738370839025, + "kl_loss": 0.2905520796775818, + "loss_ib": 0.003481301013380289, + "step": 1033 + }, + { + "ce_ib": 8.416952133178711, + "ce_orig": 1.2678064107894897, + "epoch": 0.2970738370839025, + "kl_loss": 0.10128816962242126, + "loss_ib": 0.0018545768689364195, + "step": 1033 + }, + { + "ce_ib": 8.29395866394043, + "ce_orig": 0.6720482707023621, + "epoch": 0.2970738370839025, + "kl_loss": 0.18802112340927124, + "loss_ib": 0.0027096071280539036, + "step": 1033 + }, + { + "ce_ib": 11.922521591186523, + "ce_orig": 2.0747408866882324, + "epoch": 0.2973614206628801, + "kl_loss": 0.22134807705879211, + "loss_ib": 0.003405732801184058, + "step": 1034 + }, + { + "ce_ib": 6.077800750732422, + "ce_orig": 0.9439600706100464, + "epoch": 0.2973614206628801, + "kl_loss": 0.1280437707901001, + "loss_ib": 0.0018882177537307143, + "step": 1034 + }, + { + "ce_ib": 10.979880332946777, + "ce_orig": 1.6073428392410278, + "epoch": 0.2973614206628801, + "kl_loss": 0.13061973452568054, + "loss_ib": 0.0024041852448135614, + "step": 1034 + }, + { + "ce_ib": 10.396629333496094, + "ce_orig": 1.6829266548156738, + "epoch": 0.2973614206628801, + "kl_loss": 0.15234152972698212, + "loss_ib": 0.0025630779564380646, + "step": 1034 + }, + { + "epoch": 0.2976490042418578, + "grad_norm": 0.09937281906604767, + "learning_rate": 4.938669419686516e-05, + "loss": 0.8606, + "step": 1035 + }, + { + "ce_ib": 7.982855319976807, + "ce_orig": 0.5196459293365479, + "epoch": 0.2976490042418578, + "kl_loss": 0.14941297471523285, + "loss_ib": 0.002292415127158165, + "step": 1035 + }, + { + "ce_ib": 5.798346519470215, + "ce_orig": 0.758726179599762, + "epoch": 0.2976490042418578, + "kl_loss": 0.11238957196474075, + "loss_ib": 0.0017037303186953068, + "step": 1035 + }, + { + "ce_ib": 6.345818042755127, + "ce_orig": 0.6751218438148499, + "epoch": 0.2976490042418578, + "kl_loss": 0.08302076905965805, + "loss_ib": 0.0014647895004600286, + "step": 1035 + }, + { + "ce_ib": 6.886514663696289, + "ce_orig": 0.911274790763855, + "epoch": 0.2976490042418578, + "kl_loss": 0.12200108170509338, + "loss_ib": 0.0019086622633039951, + "step": 1035 + }, + { + "ce_ib": 6.984527587890625, + "ce_orig": 0.8811057209968567, + "epoch": 0.29793658782083543, + "kl_loss": 0.15952670574188232, + "loss_ib": 0.0022937196772545576, + "step": 1036 + }, + { + "ce_ib": 7.364218235015869, + "ce_orig": 0.7897801399230957, + "epoch": 0.29793658782083543, + "kl_loss": 0.11431416869163513, + "loss_ib": 0.0018795634387061, + "step": 1036 + }, + { + "ce_ib": 5.554378509521484, + "ce_orig": 0.8424960970878601, + "epoch": 0.29793658782083543, + "kl_loss": 0.08800135552883148, + "loss_ib": 0.0014354513259604573, + "step": 1036 + }, + { + "ce_ib": 4.187707901000977, + "ce_orig": 0.5914511680603027, + "epoch": 0.29793658782083543, + "kl_loss": 0.08407006412744522, + "loss_ib": 0.0012594714062288404, + "step": 1036 + }, + { + "ce_ib": 7.833523750305176, + "ce_orig": 1.050671935081482, + "epoch": 0.29822417139981305, + "kl_loss": 0.12092088162899017, + "loss_ib": 0.0019925611559301615, + "step": 1037 + }, + { + "ce_ib": 7.959522724151611, + "ce_orig": 0.8137964010238647, + "epoch": 0.29822417139981305, + "kl_loss": 0.2204059660434723, + "loss_ib": 0.0030000119004398584, + "step": 1037 + }, + { + "ce_ib": 7.40364933013916, + "ce_orig": 0.7971987128257751, + "epoch": 0.29822417139981305, + "kl_loss": 0.1723841428756714, + "loss_ib": 0.002464206423610449, + "step": 1037 + }, + { + "ce_ib": 9.738763809204102, + "ce_orig": 1.3775016069412231, + "epoch": 0.29822417139981305, + "kl_loss": 0.20213446021080017, + "loss_ib": 0.0029952209442853928, + "step": 1037 + }, + { + "ce_ib": 11.012545585632324, + "ce_orig": 0.8159978985786438, + "epoch": 0.29851175497879073, + "kl_loss": 0.1513182371854782, + "loss_ib": 0.0026144366711378098, + "step": 1038 + }, + { + "ce_ib": 5.5462565422058105, + "ce_orig": 0.489400178194046, + "epoch": 0.29851175497879073, + "kl_loss": 0.11296382546424866, + "loss_ib": 0.0016842639306560159, + "step": 1038 + }, + { + "ce_ib": 8.526248931884766, + "ce_orig": 1.3904296159744263, + "epoch": 0.29851175497879073, + "kl_loss": 0.08981022238731384, + "loss_ib": 0.0017507269512861967, + "step": 1038 + }, + { + "ce_ib": 6.063345432281494, + "ce_orig": 0.8342424631118774, + "epoch": 0.29851175497879073, + "kl_loss": 0.10905717313289642, + "loss_ib": 0.0016969061689451337, + "step": 1038 + }, + { + "ce_ib": 6.95911979675293, + "ce_orig": 1.0990016460418701, + "epoch": 0.29879933855776836, + "kl_loss": 0.10464999079704285, + "loss_ib": 0.001742411870509386, + "step": 1039 + }, + { + "ce_ib": 6.887325286865234, + "ce_orig": 0.7486907243728638, + "epoch": 0.29879933855776836, + "kl_loss": 0.1422598958015442, + "loss_ib": 0.002111331559717655, + "step": 1039 + }, + { + "ce_ib": 4.417207717895508, + "ce_orig": 0.8200974464416504, + "epoch": 0.29879933855776836, + "kl_loss": 0.08904124051332474, + "loss_ib": 0.0013321330770850182, + "step": 1039 + }, + { + "ce_ib": 8.487707138061523, + "ce_orig": 1.3702133893966675, + "epoch": 0.29879933855776836, + "kl_loss": 0.13536900281906128, + "loss_ib": 0.002202460775151849, + "step": 1039 + }, + { + "epoch": 0.299086922136746, + "grad_norm": 0.10019273310899734, + "learning_rate": 4.9378122365147536e-05, + "loss": 0.8481, + "step": 1040 + }, + { + "ce_ib": 5.079341888427734, + "ce_orig": 0.6118531227111816, + "epoch": 0.299086922136746, + "kl_loss": 0.08346180617809296, + "loss_ib": 0.0013425522483885288, + "step": 1040 + }, + { + "ce_ib": 5.607305526733398, + "ce_orig": 0.753025472164154, + "epoch": 0.299086922136746, + "kl_loss": 0.05994057655334473, + "loss_ib": 0.0011601363075897098, + "step": 1040 + }, + { + "ce_ib": 5.748326301574707, + "ce_orig": 0.856826901435852, + "epoch": 0.299086922136746, + "kl_loss": 0.09018149971961975, + "loss_ib": 0.0014766475651413202, + "step": 1040 + }, + { + "ce_ib": 7.7500901222229, + "ce_orig": 0.7174243330955505, + "epoch": 0.299086922136746, + "kl_loss": 0.1808132529258728, + "loss_ib": 0.0025831416714936495, + "step": 1040 + }, + { + "ce_ib": 5.7866387367248535, + "ce_orig": 0.5503837466239929, + "epoch": 0.2993745057157236, + "kl_loss": 0.10648153722286224, + "loss_ib": 0.0016434791032224894, + "step": 1041 + }, + { + "ce_ib": 5.319000244140625, + "ce_orig": 0.6512373089790344, + "epoch": 0.2993745057157236, + "kl_loss": 0.09968775510787964, + "loss_ib": 0.001528777414932847, + "step": 1041 + }, + { + "ce_ib": 5.886722564697266, + "ce_orig": 0.8125132322311401, + "epoch": 0.2993745057157236, + "kl_loss": 0.09538036584854126, + "loss_ib": 0.0015424757730215788, + "step": 1041 + }, + { + "ce_ib": 6.476065158843994, + "ce_orig": 0.7084008455276489, + "epoch": 0.2993745057157236, + "kl_loss": 0.14250393211841583, + "loss_ib": 0.0020726455841213465, + "step": 1041 + }, + { + "ce_ib": 6.568038463592529, + "ce_orig": 0.975024402141571, + "epoch": 0.2996620892947013, + "kl_loss": 0.09894341975450516, + "loss_ib": 0.001646237913519144, + "step": 1042 + }, + { + "ce_ib": 3.8546910285949707, + "ce_orig": 0.7323302626609802, + "epoch": 0.2996620892947013, + "kl_loss": 0.051041483879089355, + "loss_ib": 0.0008958838880062103, + "step": 1042 + }, + { + "ce_ib": 9.201594352722168, + "ce_orig": 1.5363073348999023, + "epoch": 0.2996620892947013, + "kl_loss": 0.14216488599777222, + "loss_ib": 0.002341808285564184, + "step": 1042 + }, + { + "ce_ib": 7.402287483215332, + "ce_orig": 1.1694655418395996, + "epoch": 0.2996620892947013, + "kl_loss": 0.13332687318325043, + "loss_ib": 0.002073497511446476, + "step": 1042 + }, + { + "ce_ib": 5.4790544509887695, + "ce_orig": 0.8705706596374512, + "epoch": 0.2999496728736789, + "kl_loss": 0.10294032841920853, + "loss_ib": 0.0015773087507113814, + "step": 1043 + }, + { + "ce_ib": 5.983057022094727, + "ce_orig": 0.9364078640937805, + "epoch": 0.2999496728736789, + "kl_loss": 0.11039305478334427, + "loss_ib": 0.00170223624445498, + "step": 1043 + }, + { + "ce_ib": 6.705295562744141, + "ce_orig": 0.7721993923187256, + "epoch": 0.2999496728736789, + "kl_loss": 0.1317780315876007, + "loss_ib": 0.0019883099012076855, + "step": 1043 + }, + { + "ce_ib": 5.997276782989502, + "ce_orig": 0.9583907127380371, + "epoch": 0.2999496728736789, + "kl_loss": 0.14914849400520325, + "loss_ib": 0.0020912124309688807, + "step": 1043 + }, + { + "ce_ib": 8.191937446594238, + "ce_orig": 1.0112303495407104, + "epoch": 0.30023725645265653, + "kl_loss": 0.12284594774246216, + "loss_ib": 0.002047653077170253, + "step": 1044 + }, + { + "ce_ib": 10.865947723388672, + "ce_orig": 0.9174994826316833, + "epoch": 0.30023725645265653, + "kl_loss": 0.11603246629238129, + "loss_ib": 0.002246919320896268, + "step": 1044 + }, + { + "ce_ib": 10.262970924377441, + "ce_orig": 1.3994874954223633, + "epoch": 0.30023725645265653, + "kl_loss": 0.11475729942321777, + "loss_ib": 0.0021738701034337282, + "step": 1044 + }, + { + "ce_ib": 8.54948902130127, + "ce_orig": 0.46436163783073425, + "epoch": 0.30023725645265653, + "kl_loss": 0.12399379909038544, + "loss_ib": 0.002094886964187026, + "step": 1044 + }, + { + "epoch": 0.3005248400316342, + "grad_norm": 0.08161374926567078, + "learning_rate": 4.936949180087486e-05, + "loss": 0.8708, + "step": 1045 + }, + { + "ce_ib": 3.662956476211548, + "ce_orig": 0.6253257989883423, + "epoch": 0.3005248400316342, + "kl_loss": 0.05958956480026245, + "loss_ib": 0.0009621912613511086, + "step": 1045 + }, + { + "ce_ib": 4.044605731964111, + "ce_orig": 0.745154619216919, + "epoch": 0.3005248400316342, + "kl_loss": 0.10994522273540497, + "loss_ib": 0.0015039127320051193, + "step": 1045 + }, + { + "ce_ib": 7.987272262573242, + "ce_orig": 1.0346519947052002, + "epoch": 0.3005248400316342, + "kl_loss": 0.17194198071956635, + "loss_ib": 0.002518146764487028, + "step": 1045 + }, + { + "ce_ib": 5.648697376251221, + "ce_orig": 1.072109580039978, + "epoch": 0.3005248400316342, + "kl_loss": 0.11444295197725296, + "loss_ib": 0.00170929916203022, + "step": 1045 + }, + { + "ce_ib": 5.086002826690674, + "ce_orig": 0.6663815379142761, + "epoch": 0.30081242361061183, + "kl_loss": 0.09517970681190491, + "loss_ib": 0.001460397383198142, + "step": 1046 + }, + { + "ce_ib": 7.656687259674072, + "ce_orig": 1.0890663862228394, + "epoch": 0.30081242361061183, + "kl_loss": 0.10373960435390472, + "loss_ib": 0.0018030646024271846, + "step": 1046 + }, + { + "ce_ib": 7.713747024536133, + "ce_orig": 1.1821391582489014, + "epoch": 0.30081242361061183, + "kl_loss": 0.16192738711833954, + "loss_ib": 0.0023906484711915255, + "step": 1046 + }, + { + "ce_ib": 6.953602313995361, + "ce_orig": 1.0051076412200928, + "epoch": 0.30081242361061183, + "kl_loss": 0.10963944345712662, + "loss_ib": 0.0017917546210810542, + "step": 1046 + }, + { + "ce_ib": 7.628968238830566, + "ce_orig": 0.8743991255760193, + "epoch": 0.30110000718958946, + "kl_loss": 0.1502256691455841, + "loss_ib": 0.0022651534527540207, + "step": 1047 + }, + { + "ce_ib": 9.052668571472168, + "ce_orig": 1.0976598262786865, + "epoch": 0.30110000718958946, + "kl_loss": 0.22826503217220306, + "loss_ib": 0.003187917172908783, + "step": 1047 + }, + { + "ce_ib": 4.832150936126709, + "ce_orig": 0.648388683795929, + "epoch": 0.30110000718958946, + "kl_loss": 0.13129526376724243, + "loss_ib": 0.0017961676931008697, + "step": 1047 + }, + { + "ce_ib": 6.754344940185547, + "ce_orig": 0.9024156332015991, + "epoch": 0.30110000718958946, + "kl_loss": 0.22326111793518066, + "loss_ib": 0.002908045658841729, + "step": 1047 + }, + { + "ce_ib": 5.170908451080322, + "ce_orig": 0.4192695915699005, + "epoch": 0.30138759076856714, + "kl_loss": 0.10928487777709961, + "loss_ib": 0.0016099396161735058, + "step": 1048 + }, + { + "ce_ib": 6.1424407958984375, + "ce_orig": 0.7895532846450806, + "epoch": 0.30138759076856714, + "kl_loss": 0.08899113535881042, + "loss_ib": 0.0015041553415358067, + "step": 1048 + }, + { + "ce_ib": 5.934038162231445, + "ce_orig": 0.6904341578483582, + "epoch": 0.30138759076856714, + "kl_loss": 0.09739300608634949, + "loss_ib": 0.0015673339366912842, + "step": 1048 + }, + { + "ce_ib": 8.76187801361084, + "ce_orig": 1.6208300590515137, + "epoch": 0.30138759076856714, + "kl_loss": 0.13631777465343475, + "loss_ib": 0.0022393655963242054, + "step": 1048 + }, + { + "ce_ib": 6.55295467376709, + "ce_orig": 0.5757778882980347, + "epoch": 0.30167517434754476, + "kl_loss": 0.0959104374051094, + "loss_ib": 0.0016143998363986611, + "step": 1049 + }, + { + "ce_ib": 8.004493713378906, + "ce_orig": 1.1587669849395752, + "epoch": 0.30167517434754476, + "kl_loss": 0.09329302608966827, + "loss_ib": 0.0017333796713501215, + "step": 1049 + }, + { + "ce_ib": 11.593831062316895, + "ce_orig": 1.764388918876648, + "epoch": 0.30167517434754476, + "kl_loss": 0.20481374859809875, + "loss_ib": 0.003207520581781864, + "step": 1049 + }, + { + "ce_ib": 8.537985801696777, + "ce_orig": 1.107876181602478, + "epoch": 0.30167517434754476, + "kl_loss": 0.21293607354164124, + "loss_ib": 0.0029831593856215477, + "step": 1049 + }, + { + "epoch": 0.3019627579265224, + "grad_norm": 0.13020655512809753, + "learning_rate": 4.9360802524840156e-05, + "loss": 0.9349, + "step": 1050 + }, + { + "ce_ib": 7.10082483291626, + "ce_orig": 0.9058458805084229, + "epoch": 0.3019627579265224, + "kl_loss": 0.10159776359796524, + "loss_ib": 0.0017260601744055748, + "step": 1050 + }, + { + "ce_ib": 5.80906343460083, + "ce_orig": 0.597679078578949, + "epoch": 0.3019627579265224, + "kl_loss": 0.10538171231746674, + "loss_ib": 0.0016347235068678856, + "step": 1050 + }, + { + "ce_ib": 6.977040767669678, + "ce_orig": 0.5290160179138184, + "epoch": 0.3019627579265224, + "kl_loss": 0.13264355063438416, + "loss_ib": 0.0020241395104676485, + "step": 1050 + }, + { + "ce_ib": 7.416954517364502, + "ce_orig": 1.0477352142333984, + "epoch": 0.3019627579265224, + "kl_loss": 0.14112350344657898, + "loss_ib": 0.0021529304794967175, + "step": 1050 + }, + { + "ce_ib": 4.349487781524658, + "ce_orig": 0.6894300580024719, + "epoch": 0.3022503415055, + "kl_loss": 0.06405559182167053, + "loss_ib": 0.0010755046969279647, + "step": 1051 + }, + { + "ce_ib": 6.142280101776123, + "ce_orig": 0.8738764524459839, + "epoch": 0.3022503415055, + "kl_loss": 0.10166037082672119, + "loss_ib": 0.0016308316262438893, + "step": 1051 + }, + { + "ce_ib": 5.816431522369385, + "ce_orig": 0.8697793483734131, + "epoch": 0.3022503415055, + "kl_loss": 0.09778453409671783, + "loss_ib": 0.0015594884753227234, + "step": 1051 + }, + { + "ce_ib": 5.512357234954834, + "ce_orig": 0.618314802646637, + "epoch": 0.3022503415055, + "kl_loss": 0.12777763605117798, + "loss_ib": 0.0018290119478479028, + "step": 1051 + }, + { + "ce_ib": 8.661779403686523, + "ce_orig": 1.062333106994629, + "epoch": 0.3025379250844777, + "kl_loss": 0.14228984713554382, + "loss_ib": 0.0022890763357281685, + "step": 1052 + }, + { + "ce_ib": 6.235224723815918, + "ce_orig": 0.600572943687439, + "epoch": 0.3025379250844777, + "kl_loss": 0.110453762114048, + "loss_ib": 0.0017280600732192397, + "step": 1052 + }, + { + "ce_ib": 3.774848699569702, + "ce_orig": 0.46214503049850464, + "epoch": 0.3025379250844777, + "kl_loss": 0.23360615968704224, + "loss_ib": 0.0027135463897138834, + "step": 1052 + }, + { + "ce_ib": 6.417864799499512, + "ce_orig": 1.1410952806472778, + "epoch": 0.3025379250844777, + "kl_loss": 0.10604594647884369, + "loss_ib": 0.0017022459069266915, + "step": 1052 + }, + { + "ce_ib": 10.220464706420898, + "ce_orig": 1.548168659210205, + "epoch": 0.3028255086634553, + "kl_loss": 0.11154457926750183, + "loss_ib": 0.0021374921780079603, + "step": 1053 + }, + { + "ce_ib": 4.950402736663818, + "ce_orig": 0.6234180331230164, + "epoch": 0.3028255086634553, + "kl_loss": 0.11562936753034592, + "loss_ib": 0.0016513338778167963, + "step": 1053 + }, + { + "ce_ib": 6.0322699546813965, + "ce_orig": 1.0492932796478271, + "epoch": 0.3028255086634553, + "kl_loss": 0.12673497200012207, + "loss_ib": 0.001870576641522348, + "step": 1053 + }, + { + "ce_ib": 6.646451473236084, + "ce_orig": 1.0099207162857056, + "epoch": 0.3028255086634553, + "kl_loss": 0.115445077419281, + "loss_ib": 0.0018190959235653281, + "step": 1053 + }, + { + "ce_ib": 7.095217704772949, + "ce_orig": 0.9569370746612549, + "epoch": 0.30311309224243294, + "kl_loss": 0.15879088640213013, + "loss_ib": 0.002297430532053113, + "step": 1054 + }, + { + "ce_ib": 7.884916305541992, + "ce_orig": 1.0214117765426636, + "epoch": 0.30311309224243294, + "kl_loss": 0.14951792359352112, + "loss_ib": 0.0022836709395051003, + "step": 1054 + }, + { + "ce_ib": 4.689992904663086, + "ce_orig": 0.7026373147964478, + "epoch": 0.30311309224243294, + "kl_loss": 0.0819244235754013, + "loss_ib": 0.0012882434530183673, + "step": 1054 + }, + { + "ce_ib": 6.779373645782471, + "ce_orig": 0.4927053451538086, + "epoch": 0.30311309224243294, + "kl_loss": 0.15542671084403992, + "loss_ib": 0.0022322044242173433, + "step": 1054 + }, + { + "epoch": 0.3034006758214106, + "grad_norm": 0.09209935367107391, + "learning_rate": 4.9352054557977905e-05, + "loss": 0.85, + "step": 1055 + }, + { + "ce_ib": 7.405393123626709, + "ce_orig": 0.7001031637191772, + "epoch": 0.3034006758214106, + "kl_loss": 0.19542153179645538, + "loss_ib": 0.0026947546284645796, + "step": 1055 + }, + { + "ce_ib": 6.689201354980469, + "ce_orig": 0.8217753767967224, + "epoch": 0.3034006758214106, + "kl_loss": 0.0826321691274643, + "loss_ib": 0.0014952417695894837, + "step": 1055 + }, + { + "ce_ib": 4.1700825691223145, + "ce_orig": 0.6262937188148499, + "epoch": 0.3034006758214106, + "kl_loss": 0.0850646048784256, + "loss_ib": 0.0012676542392000556, + "step": 1055 + }, + { + "ce_ib": 3.701383590698242, + "ce_orig": 0.7168072462081909, + "epoch": 0.3034006758214106, + "kl_loss": 0.07148706912994385, + "loss_ib": 0.0010850090766325593, + "step": 1055 + }, + { + "ce_ib": 11.786508560180664, + "ce_orig": 1.2961478233337402, + "epoch": 0.30368825940038824, + "kl_loss": 0.12000411748886108, + "loss_ib": 0.0023786919191479683, + "step": 1056 + }, + { + "ce_ib": 5.3256402015686035, + "ce_orig": 0.5889015793800354, + "epoch": 0.30368825940038824, + "kl_loss": 0.10559151321649551, + "loss_ib": 0.0015884791500866413, + "step": 1056 + }, + { + "ce_ib": 6.689587593078613, + "ce_orig": 0.7410926222801208, + "epoch": 0.30368825940038824, + "kl_loss": 0.13415184617042542, + "loss_ib": 0.002010477241128683, + "step": 1056 + }, + { + "ce_ib": 4.489993095397949, + "ce_orig": 0.4996775984764099, + "epoch": 0.30368825940038824, + "kl_loss": 0.09554598480463028, + "loss_ib": 0.001404459122568369, + "step": 1056 + }, + { + "ce_ib": 4.924744129180908, + "ce_orig": 0.47715672850608826, + "epoch": 0.30397584297936586, + "kl_loss": 0.13373792171478271, + "loss_ib": 0.0018298536306247115, + "step": 1057 + }, + { + "ce_ib": 8.683158874511719, + "ce_orig": 0.9591328501701355, + "epoch": 0.30397584297936586, + "kl_loss": 0.08485978841781616, + "loss_ib": 0.0017169136554002762, + "step": 1057 + }, + { + "ce_ib": 8.079586029052734, + "ce_orig": 1.0162447690963745, + "epoch": 0.30397584297936586, + "kl_loss": 0.20607218146324158, + "loss_ib": 0.0028686802834272385, + "step": 1057 + }, + { + "ce_ib": 3.9310555458068848, + "ce_orig": 0.3155190646648407, + "epoch": 0.30397584297936586, + "kl_loss": 0.3025915026664734, + "loss_ib": 0.0034190204460173845, + "step": 1057 + }, + { + "ce_ib": 5.745120048522949, + "ce_orig": 0.41896164417266846, + "epoch": 0.30426342655834354, + "kl_loss": 0.12001323699951172, + "loss_ib": 0.0017746443627402186, + "step": 1058 + }, + { + "ce_ib": 4.246820449829102, + "ce_orig": 0.39408591389656067, + "epoch": 0.30426342655834354, + "kl_loss": 0.10306812822818756, + "loss_ib": 0.0014553633518517017, + "step": 1058 + }, + { + "ce_ib": 4.1398844718933105, + "ce_orig": 0.6241393685340881, + "epoch": 0.30426342655834354, + "kl_loss": 0.11600920557975769, + "loss_ib": 0.0015740805538371205, + "step": 1058 + }, + { + "ce_ib": 5.1157755851745605, + "ce_orig": 0.5613738298416138, + "epoch": 0.30426342655834354, + "kl_loss": 0.15554389357566833, + "loss_ib": 0.002067016437649727, + "step": 1058 + }, + { + "ce_ib": 8.72223949432373, + "ce_orig": 0.7960661053657532, + "epoch": 0.30455101013732117, + "kl_loss": 0.15092414617538452, + "loss_ib": 0.002381465397775173, + "step": 1059 + }, + { + "ce_ib": 7.334806442260742, + "ce_orig": 0.8918651342391968, + "epoch": 0.30455101013732117, + "kl_loss": 0.11382514238357544, + "loss_ib": 0.0018717319471761584, + "step": 1059 + }, + { + "ce_ib": 3.012343406677246, + "ce_orig": 0.585025429725647, + "epoch": 0.30455101013732117, + "kl_loss": 0.06840424239635468, + "loss_ib": 0.0009852767689153552, + "step": 1059 + }, + { + "ce_ib": 10.939859390258789, + "ce_orig": 1.4614068269729614, + "epoch": 0.30455101013732117, + "kl_loss": 0.16254915297031403, + "loss_ib": 0.0027194772846996784, + "step": 1059 + }, + { + "epoch": 0.3048385937162988, + "grad_norm": 0.08189968019723892, + "learning_rate": 4.934324792136399e-05, + "loss": 0.8205, + "step": 1060 + }, + { + "ce_ib": 11.700628280639648, + "ce_orig": 1.5280909538269043, + "epoch": 0.3048385937162988, + "kl_loss": 0.10970384627580643, + "loss_ib": 0.002267101313918829, + "step": 1060 + }, + { + "ce_ib": 5.980067253112793, + "ce_orig": 0.7644383311271667, + "epoch": 0.3048385937162988, + "kl_loss": 0.07235151529312134, + "loss_ib": 0.001321521820500493, + "step": 1060 + }, + { + "ce_ib": 7.974216938018799, + "ce_orig": 0.8650918006896973, + "epoch": 0.3048385937162988, + "kl_loss": 0.13781039416790009, + "loss_ib": 0.002175525762140751, + "step": 1060 + }, + { + "ce_ib": 2.881650686264038, + "ce_orig": 0.31516513228416443, + "epoch": 0.3048385937162988, + "kl_loss": 0.3138682246208191, + "loss_ib": 0.003426847280934453, + "step": 1060 + }, + { + "ce_ib": 10.01279354095459, + "ce_orig": 1.4876623153686523, + "epoch": 0.3051261772952764, + "kl_loss": 0.1276686042547226, + "loss_ib": 0.002277965424582362, + "step": 1061 + }, + { + "ce_ib": 7.159035682678223, + "ce_orig": 0.40885889530181885, + "epoch": 0.3051261772952764, + "kl_loss": 0.1310914009809494, + "loss_ib": 0.0020268175285309553, + "step": 1061 + }, + { + "ce_ib": 10.178457260131836, + "ce_orig": 1.53799569606781, + "epoch": 0.3051261772952764, + "kl_loss": 0.15176355838775635, + "loss_ib": 0.002535481471568346, + "step": 1061 + }, + { + "ce_ib": 8.092935562133789, + "ce_orig": 0.9259153604507446, + "epoch": 0.3051261772952764, + "kl_loss": 0.22177819907665253, + "loss_ib": 0.0030270754359662533, + "step": 1061 + }, + { + "ce_ib": 4.42587947845459, + "ce_orig": 0.5612567663192749, + "epoch": 0.3054137608742541, + "kl_loss": 0.11645185202360153, + "loss_ib": 0.0016071064164862037, + "step": 1062 + }, + { + "ce_ib": 5.609553337097168, + "ce_orig": 0.908251941204071, + "epoch": 0.3054137608742541, + "kl_loss": 0.11812228709459305, + "loss_ib": 0.0017421781085431576, + "step": 1062 + }, + { + "ce_ib": 6.579468250274658, + "ce_orig": 0.693250298500061, + "epoch": 0.3054137608742541, + "kl_loss": 0.08764868974685669, + "loss_ib": 0.0015344336861744523, + "step": 1062 + }, + { + "ce_ib": 10.069143295288086, + "ce_orig": 1.441835641860962, + "epoch": 0.3054137608742541, + "kl_loss": 0.2499997913837433, + "loss_ib": 0.003506912151351571, + "step": 1062 + }, + { + "ce_ib": 5.1232991218566895, + "ce_orig": 0.5163192749023438, + "epoch": 0.3057013444532317, + "kl_loss": 0.10996317863464355, + "loss_ib": 0.0016119616338983178, + "step": 1063 + }, + { + "ce_ib": 4.89235782623291, + "ce_orig": 0.40680912137031555, + "epoch": 0.3057013444532317, + "kl_loss": 0.09841237962245941, + "loss_ib": 0.0014733595307916403, + "step": 1063 + }, + { + "ce_ib": 6.575563430786133, + "ce_orig": 0.8432719111442566, + "epoch": 0.3057013444532317, + "kl_loss": 0.08154290169477463, + "loss_ib": 0.0014729853719472885, + "step": 1063 + }, + { + "ce_ib": 5.160298824310303, + "ce_orig": 0.6140801906585693, + "epoch": 0.3057013444532317, + "kl_loss": 0.06841346621513367, + "loss_ib": 0.001200164551846683, + "step": 1063 + }, + { + "ce_ib": 8.059441566467285, + "ce_orig": 0.7148778438568115, + "epoch": 0.30598892803220934, + "kl_loss": 0.11310233175754547, + "loss_ib": 0.0019369673682376742, + "step": 1064 + }, + { + "ce_ib": 5.755097389221191, + "ce_orig": 0.5257096290588379, + "epoch": 0.30598892803220934, + "kl_loss": 0.10900678485631943, + "loss_ib": 0.0016655775252729654, + "step": 1064 + }, + { + "ce_ib": 9.884034156799316, + "ce_orig": 1.1813336610794067, + "epoch": 0.30598892803220934, + "kl_loss": 0.12008243799209595, + "loss_ib": 0.002189227845519781, + "step": 1064 + }, + { + "ce_ib": 5.514344215393066, + "ce_orig": 0.6407434940338135, + "epoch": 0.30598892803220934, + "kl_loss": 0.07819973677396774, + "loss_ib": 0.0013334316899999976, + "step": 1064 + }, + { + "epoch": 0.306276511611187, + "grad_norm": 0.08639845997095108, + "learning_rate": 4.9334382636215646e-05, + "loss": 0.8505, + "step": 1065 + }, + { + "ce_ib": 7.71027135848999, + "ce_orig": 0.8334816694259644, + "epoch": 0.306276511611187, + "kl_loss": 0.1781705617904663, + "loss_ib": 0.002552732825279236, + "step": 1065 + }, + { + "ce_ib": 7.65843391418457, + "ce_orig": 1.1109449863433838, + "epoch": 0.306276511611187, + "kl_loss": 0.09281620383262634, + "loss_ib": 0.001694005448371172, + "step": 1065 + }, + { + "ce_ib": 5.949745178222656, + "ce_orig": 0.5543390512466431, + "epoch": 0.306276511611187, + "kl_loss": 0.10283538699150085, + "loss_ib": 0.001623328309506178, + "step": 1065 + }, + { + "ce_ib": 8.59256649017334, + "ce_orig": 0.7991865873336792, + "epoch": 0.306276511611187, + "kl_loss": 0.1408337950706482, + "loss_ib": 0.0022675946820527315, + "step": 1065 + }, + { + "ce_ib": 5.791524887084961, + "ce_orig": 0.5286558866500854, + "epoch": 0.30656409519016464, + "kl_loss": 0.09039951115846634, + "loss_ib": 0.0014831476146355271, + "step": 1066 + }, + { + "ce_ib": 11.240863800048828, + "ce_orig": 1.9939417839050293, + "epoch": 0.30656409519016464, + "kl_loss": 0.15476444363594055, + "loss_ib": 0.0026717307046055794, + "step": 1066 + }, + { + "ce_ib": 5.748254776000977, + "ce_orig": 0.6310633420944214, + "epoch": 0.30656409519016464, + "kl_loss": 0.1464613676071167, + "loss_ib": 0.002039439044892788, + "step": 1066 + }, + { + "ce_ib": 5.478874206542969, + "ce_orig": 0.9024366736412048, + "epoch": 0.30656409519016464, + "kl_loss": 0.0984884575009346, + "loss_ib": 0.0015327719738706946, + "step": 1066 + }, + { + "ce_ib": 8.004805564880371, + "ce_orig": 0.8263446688652039, + "epoch": 0.30685167876914227, + "kl_loss": 0.1449379026889801, + "loss_ib": 0.002249859506264329, + "step": 1067 + }, + { + "ce_ib": 6.605048179626465, + "ce_orig": 0.7789648175239563, + "epoch": 0.30685167876914227, + "kl_loss": 0.08828707039356232, + "loss_ib": 0.0015433755470439792, + "step": 1067 + }, + { + "ce_ib": 10.462506294250488, + "ce_orig": 1.3526828289031982, + "epoch": 0.30685167876914227, + "kl_loss": 0.1412639617919922, + "loss_ib": 0.0024588902015239, + "step": 1067 + }, + { + "ce_ib": 8.052318572998047, + "ce_orig": 1.0146571397781372, + "epoch": 0.30685167876914227, + "kl_loss": 0.14023709297180176, + "loss_ib": 0.0022076028399169445, + "step": 1067 + }, + { + "ce_ib": 7.057815074920654, + "ce_orig": 0.7951789498329163, + "epoch": 0.30713926234811995, + "kl_loss": 0.19950871169567108, + "loss_ib": 0.0027008685283362865, + "step": 1068 + }, + { + "ce_ib": 7.53904914855957, + "ce_orig": 0.5687925219535828, + "epoch": 0.30713926234811995, + "kl_loss": 0.17302866280078888, + "loss_ib": 0.0024841914419084787, + "step": 1068 + }, + { + "ce_ib": 4.6915178298950195, + "ce_orig": 0.6768220067024231, + "epoch": 0.30713926234811995, + "kl_loss": 0.0843624621629715, + "loss_ib": 0.0013127763522788882, + "step": 1068 + }, + { + "ce_ib": 6.781597137451172, + "ce_orig": 1.1427780389785767, + "epoch": 0.30713926234811995, + "kl_loss": 0.1272558867931366, + "loss_ib": 0.0019507184624671936, + "step": 1068 + }, + { + "ce_ib": 8.581425666809082, + "ce_orig": 1.328827977180481, + "epoch": 0.30742684592709757, + "kl_loss": 0.18549340963363647, + "loss_ib": 0.0027130763046443462, + "step": 1069 + }, + { + "ce_ib": 6.743143558502197, + "ce_orig": 1.0867711305618286, + "epoch": 0.30742684592709757, + "kl_loss": 0.13806043565273285, + "loss_ib": 0.002054918557405472, + "step": 1069 + }, + { + "ce_ib": 7.102622985839844, + "ce_orig": 1.0041338205337524, + "epoch": 0.30742684592709757, + "kl_loss": 0.1152433380484581, + "loss_ib": 0.00186269567348063, + "step": 1069 + }, + { + "ce_ib": 7.737115383148193, + "ce_orig": 0.7306109666824341, + "epoch": 0.30742684592709757, + "kl_loss": 0.10602246224880219, + "loss_ib": 0.0018339360831305385, + "step": 1069 + }, + { + "epoch": 0.3077144295060752, + "grad_norm": 0.10589238256216049, + "learning_rate": 4.9325458723891405e-05, + "loss": 0.8881, + "step": 1070 + }, + { + "ce_ib": 8.593700408935547, + "ce_orig": 1.2556962966918945, + "epoch": 0.3077144295060752, + "kl_loss": 0.12282206118106842, + "loss_ib": 0.0020875907503068447, + "step": 1070 + }, + { + "ce_ib": 7.65969181060791, + "ce_orig": 0.8783318996429443, + "epoch": 0.3077144295060752, + "kl_loss": 0.1287730187177658, + "loss_ib": 0.0020536992233246565, + "step": 1070 + }, + { + "ce_ib": 6.38693904876709, + "ce_orig": 0.8617218732833862, + "epoch": 0.3077144295060752, + "kl_loss": 0.10807206481695175, + "loss_ib": 0.0017194146057590842, + "step": 1070 + }, + { + "ce_ib": 6.419977188110352, + "ce_orig": 0.9678337574005127, + "epoch": 0.3077144295060752, + "kl_loss": 0.09231145679950714, + "loss_ib": 0.0015651121502742171, + "step": 1070 + }, + { + "ce_ib": 5.127742290496826, + "ce_orig": 0.3816016614437103, + "epoch": 0.3080020130850528, + "kl_loss": 0.16862201690673828, + "loss_ib": 0.00219899439252913, + "step": 1071 + }, + { + "ce_ib": 6.4471116065979, + "ce_orig": 1.1590927839279175, + "epoch": 0.3080020130850528, + "kl_loss": 0.11116702854633331, + "loss_ib": 0.0017563813598826528, + "step": 1071 + }, + { + "ce_ib": 8.026755332946777, + "ce_orig": 1.3219317197799683, + "epoch": 0.3080020130850528, + "kl_loss": 0.10397316515445709, + "loss_ib": 0.0018424070440232754, + "step": 1071 + }, + { + "ce_ib": 4.885175704956055, + "ce_orig": 0.6101803183555603, + "epoch": 0.3080020130850528, + "kl_loss": 0.14653921127319336, + "loss_ib": 0.0019539096392691135, + "step": 1071 + }, + { + "ce_ib": 8.081513404846191, + "ce_orig": 1.0486963987350464, + "epoch": 0.3082895966640305, + "kl_loss": 0.1061769351363182, + "loss_ib": 0.0018699206411838531, + "step": 1072 + }, + { + "ce_ib": 4.16774845123291, + "ce_orig": 0.545195996761322, + "epoch": 0.3082895966640305, + "kl_loss": 0.10797026008367538, + "loss_ib": 0.0014964774018153548, + "step": 1072 + }, + { + "ce_ib": 5.15360164642334, + "ce_orig": 0.7959751486778259, + "epoch": 0.3082895966640305, + "kl_loss": 0.1221914291381836, + "loss_ib": 0.0017372744623571634, + "step": 1072 + }, + { + "ce_ib": 8.818731307983398, + "ce_orig": 1.3822993040084839, + "epoch": 0.3082895966640305, + "kl_loss": 0.21638715267181396, + "loss_ib": 0.0030457444954663515, + "step": 1072 + }, + { + "ce_ib": 4.697595119476318, + "ce_orig": 0.648749589920044, + "epoch": 0.3085771802430081, + "kl_loss": 0.09851400554180145, + "loss_ib": 0.0014548995532095432, + "step": 1073 + }, + { + "ce_ib": 8.36463737487793, + "ce_orig": 1.2512192726135254, + "epoch": 0.3085771802430081, + "kl_loss": 0.1037246435880661, + "loss_ib": 0.0018737100763246417, + "step": 1073 + }, + { + "ce_ib": 6.351157188415527, + "ce_orig": 0.5725387334823608, + "epoch": 0.3085771802430081, + "kl_loss": 0.1913139820098877, + "loss_ib": 0.002548255492001772, + "step": 1073 + }, + { + "ce_ib": 8.301779747009277, + "ce_orig": 1.3257883787155151, + "epoch": 0.3085771802430081, + "kl_loss": 0.0830536037683487, + "loss_ib": 0.0016607139259576797, + "step": 1073 + }, + { + "ce_ib": 9.013845443725586, + "ce_orig": 1.4739587306976318, + "epoch": 0.30886476382198574, + "kl_loss": 0.08682604134082794, + "loss_ib": 0.001769644906744361, + "step": 1074 + }, + { + "ce_ib": 5.944709300994873, + "ce_orig": 0.6181069016456604, + "epoch": 0.30886476382198574, + "kl_loss": 0.08755936473608017, + "loss_ib": 0.0014700645115226507, + "step": 1074 + }, + { + "ce_ib": 4.296208381652832, + "ce_orig": 0.5747141242027283, + "epoch": 0.30886476382198574, + "kl_loss": 0.08138515800237656, + "loss_ib": 0.0012434723321348429, + "step": 1074 + }, + { + "ce_ib": 8.861016273498535, + "ce_orig": 0.9520947933197021, + "epoch": 0.30886476382198574, + "kl_loss": 0.11723033338785172, + "loss_ib": 0.002058404963463545, + "step": 1074 + }, + { + "epoch": 0.3091523474009634, + "grad_norm": 0.09396370500326157, + "learning_rate": 4.931647620589104e-05, + "loss": 0.9133, + "step": 1075 + }, + { + "ce_ib": 9.394133567810059, + "ce_orig": 1.0916211605072021, + "epoch": 0.3091523474009634, + "kl_loss": 0.11132264137268066, + "loss_ib": 0.0020526396110653877, + "step": 1075 + }, + { + "ce_ib": 5.052248001098633, + "ce_orig": 0.8652608394622803, + "epoch": 0.3091523474009634, + "kl_loss": 0.0959598571062088, + "loss_ib": 0.0014648232609033585, + "step": 1075 + }, + { + "ce_ib": 9.793839454650879, + "ce_orig": 1.2052462100982666, + "epoch": 0.3091523474009634, + "kl_loss": 0.1138681098818779, + "loss_ib": 0.002118065021932125, + "step": 1075 + }, + { + "ce_ib": 3.8647079467773438, + "ce_orig": 0.571103036403656, + "epoch": 0.3091523474009634, + "kl_loss": 0.12573961913585663, + "loss_ib": 0.0016438668826594949, + "step": 1075 + }, + { + "ce_ib": 4.259771823883057, + "ce_orig": 0.6271834373474121, + "epoch": 0.30943993097994105, + "kl_loss": 0.07756983488798141, + "loss_ib": 0.0012016755063086748, + "step": 1076 + }, + { + "ce_ib": 8.445744514465332, + "ce_orig": 0.760082483291626, + "epoch": 0.30943993097994105, + "kl_loss": 0.10391563922166824, + "loss_ib": 0.0018837308743968606, + "step": 1076 + }, + { + "ce_ib": 3.1767988204956055, + "ce_orig": 0.40816348791122437, + "epoch": 0.30943993097994105, + "kl_loss": 0.06804482638835907, + "loss_ib": 0.0009981280891224742, + "step": 1076 + }, + { + "ce_ib": 7.188921928405762, + "ce_orig": 1.1699033975601196, + "epoch": 0.30943993097994105, + "kl_loss": 0.18664929270744324, + "loss_ib": 0.0025853849947452545, + "step": 1076 + }, + { + "ce_ib": 9.287099838256836, + "ce_orig": 1.3222893476486206, + "epoch": 0.30972751455891867, + "kl_loss": 0.26636114716529846, + "loss_ib": 0.0035923211835324764, + "step": 1077 + }, + { + "ce_ib": 5.982432842254639, + "ce_orig": 0.6701129674911499, + "epoch": 0.30972751455891867, + "kl_loss": 0.08747244626283646, + "loss_ib": 0.0014729676768183708, + "step": 1077 + }, + { + "ce_ib": 6.506511211395264, + "ce_orig": 1.0417073965072632, + "epoch": 0.30972751455891867, + "kl_loss": 0.15278568863868713, + "loss_ib": 0.0021785080898553133, + "step": 1077 + }, + { + "ce_ib": 3.9755756855010986, + "ce_orig": 0.711229681968689, + "epoch": 0.30972751455891867, + "kl_loss": 0.055553995072841644, + "loss_ib": 0.0009530974784865975, + "step": 1077 + }, + { + "ce_ib": 6.50349760055542, + "ce_orig": 0.8074575066566467, + "epoch": 0.31001509813789635, + "kl_loss": 0.13486388325691223, + "loss_ib": 0.00199898867867887, + "step": 1078 + }, + { + "ce_ib": 4.8907952308654785, + "ce_orig": 0.7938336133956909, + "epoch": 0.31001509813789635, + "kl_loss": 0.09148908406496048, + "loss_ib": 0.0014039704110473394, + "step": 1078 + }, + { + "ce_ib": 4.82853889465332, + "ce_orig": 0.6393706202507019, + "epoch": 0.31001509813789635, + "kl_loss": 0.16172416508197784, + "loss_ib": 0.002100095385685563, + "step": 1078 + }, + { + "ce_ib": 8.266220092773438, + "ce_orig": 0.8295358419418335, + "epoch": 0.31001509813789635, + "kl_loss": 0.08213948458433151, + "loss_ib": 0.0016480168560519814, + "step": 1078 + }, + { + "ce_ib": 6.136702537536621, + "ce_orig": 0.36163759231567383, + "epoch": 0.310302681716874, + "kl_loss": 0.1180807575583458, + "loss_ib": 0.0017944778082892299, + "step": 1079 + }, + { + "ce_ib": 7.075868606567383, + "ce_orig": 0.8117911219596863, + "epoch": 0.310302681716874, + "kl_loss": 0.18309111893177032, + "loss_ib": 0.0025384980253875256, + "step": 1079 + }, + { + "ce_ib": 4.986417293548584, + "ce_orig": 0.5661327242851257, + "epoch": 0.310302681716874, + "kl_loss": 0.14899908006191254, + "loss_ib": 0.001988632371649146, + "step": 1079 + }, + { + "ce_ib": 7.7030439376831055, + "ce_orig": 0.769463062286377, + "epoch": 0.310302681716874, + "kl_loss": 0.12090402096509933, + "loss_ib": 0.0019793445244431496, + "step": 1079 + }, + { + "epoch": 0.3105902652958516, + "grad_norm": 0.08785349130630493, + "learning_rate": 4.9307435103855507e-05, + "loss": 0.8484, + "step": 1080 + }, + { + "ce_ib": 7.207575798034668, + "ce_orig": 0.9277626872062683, + "epoch": 0.3105902652958516, + "kl_loss": 0.09557029604911804, + "loss_ib": 0.0016764604952186346, + "step": 1080 + }, + { + "ce_ib": 5.483087062835693, + "ce_orig": 1.1695438623428345, + "epoch": 0.3105902652958516, + "kl_loss": 0.1018943339586258, + "loss_ib": 0.001567251980304718, + "step": 1080 + }, + { + "ce_ib": 9.196081161499023, + "ce_orig": 0.6741942763328552, + "epoch": 0.3105902652958516, + "kl_loss": 0.1952982246875763, + "loss_ib": 0.0028725904412567616, + "step": 1080 + }, + { + "ce_ib": 7.061942100524902, + "ce_orig": 0.766249418258667, + "epoch": 0.3105902652958516, + "kl_loss": 0.07882822304964066, + "loss_ib": 0.0014944764552637935, + "step": 1080 + }, + { + "ce_ib": 3.37656569480896, + "ce_orig": 0.1894591599702835, + "epoch": 0.3108778488748292, + "kl_loss": 0.3519730567932129, + "loss_ib": 0.0038573869969695807, + "step": 1081 + }, + { + "ce_ib": 9.07180404663086, + "ce_orig": 0.8924823999404907, + "epoch": 0.3108778488748292, + "kl_loss": 0.12007517367601395, + "loss_ib": 0.002107931999489665, + "step": 1081 + }, + { + "ce_ib": 7.9613938331604, + "ce_orig": 0.8214685320854187, + "epoch": 0.3108778488748292, + "kl_loss": 0.13357852399349213, + "loss_ib": 0.0021319244988262653, + "step": 1081 + }, + { + "ce_ib": 3.412221670150757, + "ce_orig": 0.38754329085350037, + "epoch": 0.3108778488748292, + "kl_loss": 0.1467348337173462, + "loss_ib": 0.0018085704650729895, + "step": 1081 + }, + { + "ce_ib": 10.103753089904785, + "ce_orig": 0.83680260181427, + "epoch": 0.3111654324538069, + "kl_loss": 0.1199386864900589, + "loss_ib": 0.0022097621113061905, + "step": 1082 + }, + { + "ce_ib": 7.5057172775268555, + "ce_orig": 1.240213394165039, + "epoch": 0.3111654324538069, + "kl_loss": 0.10108008980751038, + "loss_ib": 0.0017613726668059826, + "step": 1082 + }, + { + "ce_ib": 6.571592330932617, + "ce_orig": 0.8511654734611511, + "epoch": 0.3111654324538069, + "kl_loss": 0.10485070198774338, + "loss_ib": 0.0017056661890819669, + "step": 1082 + }, + { + "ce_ib": 8.051729202270508, + "ce_orig": 1.262660264968872, + "epoch": 0.3111654324538069, + "kl_loss": 0.16858017444610596, + "loss_ib": 0.002490974497050047, + "step": 1082 + }, + { + "ce_ib": 8.413061141967773, + "ce_orig": 1.2371526956558228, + "epoch": 0.3114530160327845, + "kl_loss": 0.10371717810630798, + "loss_ib": 0.0018784778658300638, + "step": 1083 + }, + { + "ce_ib": 4.668495178222656, + "ce_orig": 0.5106315016746521, + "epoch": 0.3114530160327845, + "kl_loss": 0.11991654336452484, + "loss_ib": 0.0016660148976370692, + "step": 1083 + }, + { + "ce_ib": 6.911253929138184, + "ce_orig": 0.8380542993545532, + "epoch": 0.3114530160327845, + "kl_loss": 0.13106882572174072, + "loss_ib": 0.0020018136128783226, + "step": 1083 + }, + { + "ce_ib": 5.011559009552002, + "ce_orig": 0.5626703500747681, + "epoch": 0.3114530160327845, + "kl_loss": 0.14843079447746277, + "loss_ib": 0.0019854637794196606, + "step": 1083 + }, + { + "ce_ib": 8.654923439025879, + "ce_orig": 1.2997244596481323, + "epoch": 0.31174059961176215, + "kl_loss": 0.15330947935581207, + "loss_ib": 0.002398587064817548, + "step": 1084 + }, + { + "ce_ib": 6.45515251159668, + "ce_orig": 1.1039117574691772, + "epoch": 0.31174059961176215, + "kl_loss": 0.15635761618614197, + "loss_ib": 0.0022090913262218237, + "step": 1084 + }, + { + "ce_ib": 6.613543510437012, + "ce_orig": 0.7879582643508911, + "epoch": 0.31174059961176215, + "kl_loss": 0.1213221549987793, + "loss_ib": 0.0018745758570730686, + "step": 1084 + }, + { + "ce_ib": 11.970968246459961, + "ce_orig": 1.7786223888397217, + "epoch": 0.31174059961176215, + "kl_loss": 0.20531702041625977, + "loss_ib": 0.0032502668909728527, + "step": 1084 + }, + { + "epoch": 0.3120281831907398, + "grad_norm": 0.10645577311515808, + "learning_rate": 4.9298335439566946e-05, + "loss": 0.9048, + "step": 1085 + }, + { + "ce_ib": 5.543363094329834, + "ce_orig": 0.7198330760002136, + "epoch": 0.3120281831907398, + "kl_loss": 0.09693525731563568, + "loss_ib": 0.0015236889012157917, + "step": 1085 + }, + { + "ce_ib": 4.480995178222656, + "ce_orig": 0.5485464930534363, + "epoch": 0.3120281831907398, + "kl_loss": 0.08271978050470352, + "loss_ib": 0.0012752973707392812, + "step": 1085 + }, + { + "ce_ib": 6.836972713470459, + "ce_orig": 0.661550760269165, + "epoch": 0.3120281831907398, + "kl_loss": 0.15786096453666687, + "loss_ib": 0.0022623068653047085, + "step": 1085 + }, + { + "ce_ib": 5.3108320236206055, + "ce_orig": 0.6704514622688293, + "epoch": 0.3120281831907398, + "kl_loss": 0.08122578263282776, + "loss_ib": 0.0013433409621939063, + "step": 1085 + }, + { + "ce_ib": 5.4588189125061035, + "ce_orig": 0.7251988649368286, + "epoch": 0.31231576676971745, + "kl_loss": 0.09487950801849365, + "loss_ib": 0.0014946769224479795, + "step": 1086 + }, + { + "ce_ib": 8.106192588806152, + "ce_orig": 1.0608155727386475, + "epoch": 0.31231576676971745, + "kl_loss": 0.13376294076442719, + "loss_ib": 0.002148248488083482, + "step": 1086 + }, + { + "ce_ib": 4.68678092956543, + "ce_orig": 0.4922115206718445, + "epoch": 0.31231576676971745, + "kl_loss": 0.13289615511894226, + "loss_ib": 0.0017976395320147276, + "step": 1086 + }, + { + "ce_ib": 7.68958854675293, + "ce_orig": 1.3556071519851685, + "epoch": 0.31231576676971745, + "kl_loss": 0.11010056734085083, + "loss_ib": 0.001869964529760182, + "step": 1086 + }, + { + "ce_ib": 4.1490583419799805, + "ce_orig": 0.5750277638435364, + "epoch": 0.3126033503486951, + "kl_loss": 0.07980804145336151, + "loss_ib": 0.0012129861861467361, + "step": 1087 + }, + { + "ce_ib": 4.900514125823975, + "ce_orig": 0.6933945417404175, + "epoch": 0.3126033503486951, + "kl_loss": 0.11946623772382736, + "loss_ib": 0.0016847137594595551, + "step": 1087 + }, + { + "ce_ib": 8.583094596862793, + "ce_orig": 1.2328472137451172, + "epoch": 0.3126033503486951, + "kl_loss": 0.23440392315387726, + "loss_ib": 0.003202348481863737, + "step": 1087 + }, + { + "ce_ib": 5.387114524841309, + "ce_orig": 0.8373020887374878, + "epoch": 0.3126033503486951, + "kl_loss": 0.05764302611351013, + "loss_ib": 0.0011151416692882776, + "step": 1087 + }, + { + "ce_ib": 5.245874881744385, + "ce_orig": 0.7845153212547302, + "epoch": 0.31289093392767275, + "kl_loss": 0.13156041502952576, + "loss_ib": 0.001840191544033587, + "step": 1088 + }, + { + "ce_ib": 6.439451217651367, + "ce_orig": 1.0369670391082764, + "epoch": 0.31289093392767275, + "kl_loss": 0.148685485124588, + "loss_ib": 0.002130799926817417, + "step": 1088 + }, + { + "ce_ib": 9.124055862426758, + "ce_orig": 1.409205675125122, + "epoch": 0.31289093392767275, + "kl_loss": 0.20957261323928833, + "loss_ib": 0.0030081316363066435, + "step": 1088 + }, + { + "ce_ib": 7.134158611297607, + "ce_orig": 0.9640491008758545, + "epoch": 0.31289093392767275, + "kl_loss": 0.11817365884780884, + "loss_ib": 0.0018951522652059793, + "step": 1088 + }, + { + "ce_ib": 8.807918548583984, + "ce_orig": 1.233889102935791, + "epoch": 0.3131785175066504, + "kl_loss": 0.12623171508312225, + "loss_ib": 0.0021431089844554663, + "step": 1089 + }, + { + "ce_ib": 4.509085178375244, + "ce_orig": 0.8164548277854919, + "epoch": 0.3131785175066504, + "kl_loss": 0.08705399185419083, + "loss_ib": 0.0013214483624324203, + "step": 1089 + }, + { + "ce_ib": 6.4740095138549805, + "ce_orig": 0.8254353404045105, + "epoch": 0.3131785175066504, + "kl_loss": 0.11919526755809784, + "loss_ib": 0.0018393535865470767, + "step": 1089 + }, + { + "ce_ib": 8.610673904418945, + "ce_orig": 0.8705013990402222, + "epoch": 0.3131785175066504, + "kl_loss": 0.18129542469978333, + "loss_ib": 0.0026740217581391335, + "step": 1089 + }, + { + "epoch": 0.313466101085628, + "grad_norm": 0.09719450771808624, + "learning_rate": 4.9289177234948535e-05, + "loss": 0.9306, + "step": 1090 + }, + { + "ce_ib": 6.462231159210205, + "ce_orig": 0.8928760290145874, + "epoch": 0.313466101085628, + "kl_loss": 0.14842715859413147, + "loss_ib": 0.002130494685843587, + "step": 1090 + }, + { + "ce_ib": 4.3265485763549805, + "ce_orig": 0.44724979996681213, + "epoch": 0.313466101085628, + "kl_loss": 0.22264862060546875, + "loss_ib": 0.0026591410860419273, + "step": 1090 + }, + { + "ce_ib": 7.629527568817139, + "ce_orig": 0.6570071578025818, + "epoch": 0.313466101085628, + "kl_loss": 0.09228412061929703, + "loss_ib": 0.0016857939772307873, + "step": 1090 + }, + { + "ce_ib": 4.116368770599365, + "ce_orig": 0.7895284295082092, + "epoch": 0.313466101085628, + "kl_loss": 0.0641932412981987, + "loss_ib": 0.0010535692563280463, + "step": 1090 + }, + { + "ce_ib": 5.082763671875, + "ce_orig": 0.6230810284614563, + "epoch": 0.3137536846646056, + "kl_loss": 0.09477473795413971, + "loss_ib": 0.001456023775972426, + "step": 1091 + }, + { + "ce_ib": 4.967955589294434, + "ce_orig": 0.4889741539955139, + "epoch": 0.3137536846646056, + "kl_loss": 0.1820719838142395, + "loss_ib": 0.0023175152018666267, + "step": 1091 + }, + { + "ce_ib": 7.032950401306152, + "ce_orig": 0.8132173418998718, + "epoch": 0.3137536846646056, + "kl_loss": 0.10906738042831421, + "loss_ib": 0.0017939688405022025, + "step": 1091 + }, + { + "ce_ib": 2.5505075454711914, + "ce_orig": 0.4912968873977661, + "epoch": 0.3137536846646056, + "kl_loss": 0.05822046473622322, + "loss_ib": 0.0008372553857043386, + "step": 1091 + }, + { + "ce_ib": 6.664619445800781, + "ce_orig": 0.940836489200592, + "epoch": 0.3140412682435833, + "kl_loss": 0.09742730855941772, + "loss_ib": 0.0016407349612563848, + "step": 1092 + }, + { + "ce_ib": 8.534614562988281, + "ce_orig": 1.0564587116241455, + "epoch": 0.3140412682435833, + "kl_loss": 0.19022688269615173, + "loss_ib": 0.002755730180069804, + "step": 1092 + }, + { + "ce_ib": 9.131776809692383, + "ce_orig": 1.1223782300949097, + "epoch": 0.3140412682435833, + "kl_loss": 0.12838055193424225, + "loss_ib": 0.002196982968598604, + "step": 1092 + }, + { + "ce_ib": 7.103231430053711, + "ce_orig": 1.214754581451416, + "epoch": 0.3140412682435833, + "kl_loss": 0.2429865449666977, + "loss_ib": 0.0031401882879436016, + "step": 1092 + }, + { + "ce_ib": 4.4372429847717285, + "ce_orig": 0.611566424369812, + "epoch": 0.31432885182256093, + "kl_loss": 0.10111133754253387, + "loss_ib": 0.0014548376202583313, + "step": 1093 + }, + { + "ce_ib": 6.007613182067871, + "ce_orig": 0.5547209978103638, + "epoch": 0.31432885182256093, + "kl_loss": 0.16770076751708984, + "loss_ib": 0.0022777689155191183, + "step": 1093 + }, + { + "ce_ib": 6.475616455078125, + "ce_orig": 0.9302380681037903, + "epoch": 0.31432885182256093, + "kl_loss": 0.10777818411588669, + "loss_ib": 0.0017253434052690864, + "step": 1093 + }, + { + "ce_ib": 6.58966064453125, + "ce_orig": 0.8381888270378113, + "epoch": 0.31432885182256093, + "kl_loss": 0.4541102647781372, + "loss_ib": 0.005200068932026625, + "step": 1093 + }, + { + "ce_ib": 4.925845146179199, + "ce_orig": 0.7416554689407349, + "epoch": 0.31461643540153855, + "kl_loss": 0.1399184912443161, + "loss_ib": 0.0018917694687843323, + "step": 1094 + }, + { + "ce_ib": 7.991376876831055, + "ce_orig": 0.6140057444572449, + "epoch": 0.31461643540153855, + "kl_loss": 0.11923287063837051, + "loss_ib": 0.0019914661534130573, + "step": 1094 + }, + { + "ce_ib": 7.087644100189209, + "ce_orig": 0.7012661695480347, + "epoch": 0.31461643540153855, + "kl_loss": 0.10533401370048523, + "loss_ib": 0.0017621045699343085, + "step": 1094 + }, + { + "ce_ib": 5.176535129547119, + "ce_orig": 0.5360772609710693, + "epoch": 0.31461643540153855, + "kl_loss": 0.10969047248363495, + "loss_ib": 0.0016145581612363458, + "step": 1094 + }, + { + "epoch": 0.31490401898051623, + "grad_norm": 0.08524588495492935, + "learning_rate": 4.927996051206454e-05, + "loss": 0.7879, + "step": 1095 + }, + { + "ce_ib": 7.138000965118408, + "ce_orig": 0.6079102158546448, + "epoch": 0.31490401898051623, + "kl_loss": 0.1486617475748062, + "loss_ib": 0.0022004174534231424, + "step": 1095 + }, + { + "ce_ib": 4.854630947113037, + "ce_orig": 0.11275182664394379, + "epoch": 0.31490401898051623, + "kl_loss": 0.1564667969942093, + "loss_ib": 0.0020501308608800173, + "step": 1095 + }, + { + "ce_ib": 5.65756368637085, + "ce_orig": 0.7183927893638611, + "epoch": 0.31490401898051623, + "kl_loss": 0.09430578351020813, + "loss_ib": 0.0015088141662999988, + "step": 1095 + }, + { + "ce_ib": 8.676382064819336, + "ce_orig": 0.9703883528709412, + "epoch": 0.31490401898051623, + "kl_loss": 0.11354076862335205, + "loss_ib": 0.0020030459854751825, + "step": 1095 + }, + { + "ce_ib": 9.102498054504395, + "ce_orig": 1.273013710975647, + "epoch": 0.31519160255949386, + "kl_loss": 0.1675441563129425, + "loss_ib": 0.0025856911670416594, + "step": 1096 + }, + { + "ce_ib": 5.018817901611328, + "ce_orig": 0.8406528234481812, + "epoch": 0.31519160255949386, + "kl_loss": 0.09821783006191254, + "loss_ib": 0.0014840599615126848, + "step": 1096 + }, + { + "ce_ib": 6.319728374481201, + "ce_orig": 0.7162959575653076, + "epoch": 0.31519160255949386, + "kl_loss": 0.08511405438184738, + "loss_ib": 0.00148311338853091, + "step": 1096 + }, + { + "ce_ib": 6.150030612945557, + "ce_orig": 0.9151242971420288, + "epoch": 0.31519160255949386, + "kl_loss": 0.15213200449943542, + "loss_ib": 0.0021363231353461742, + "step": 1096 + }, + { + "ce_ib": 2.826596975326538, + "ce_orig": 0.5897778868675232, + "epoch": 0.3154791861384715, + "kl_loss": 0.05788389965891838, + "loss_ib": 0.000861498701851815, + "step": 1097 + }, + { + "ce_ib": 6.561267852783203, + "ce_orig": 0.6017415523529053, + "epoch": 0.3154791861384715, + "kl_loss": 0.19087156653404236, + "loss_ib": 0.0025648423470556736, + "step": 1097 + }, + { + "ce_ib": 9.689705848693848, + "ce_orig": 0.8445053100585938, + "epoch": 0.3154791861384715, + "kl_loss": 0.12390824407339096, + "loss_ib": 0.0022080529015511274, + "step": 1097 + }, + { + "ce_ib": 3.773801803588867, + "ce_orig": 0.5975720286369324, + "epoch": 0.3154791861384715, + "kl_loss": 0.07443420588970184, + "loss_ib": 0.001121722161769867, + "step": 1097 + }, + { + "ce_ib": 7.749790668487549, + "ce_orig": 1.0078861713409424, + "epoch": 0.31576676971744916, + "kl_loss": 0.11658213287591934, + "loss_ib": 0.001940800342708826, + "step": 1098 + }, + { + "ce_ib": 7.260580539703369, + "ce_orig": 0.6796541213989258, + "epoch": 0.31576676971744916, + "kl_loss": 0.0810522586107254, + "loss_ib": 0.0015365806175395846, + "step": 1098 + }, + { + "ce_ib": 5.737454414367676, + "ce_orig": 0.9053107500076294, + "epoch": 0.31576676971744916, + "kl_loss": 0.12374285608530045, + "loss_ib": 0.001811173977330327, + "step": 1098 + }, + { + "ce_ib": 7.831982612609863, + "ce_orig": 1.2352817058563232, + "epoch": 0.31576676971744916, + "kl_loss": 0.1046704649925232, + "loss_ib": 0.0018299027578905225, + "step": 1098 + }, + { + "ce_ib": 4.12555456161499, + "ce_orig": 0.6692237854003906, + "epoch": 0.3160543532964268, + "kl_loss": 0.07782041281461716, + "loss_ib": 0.0011907594744116068, + "step": 1099 + }, + { + "ce_ib": 3.2291836738586426, + "ce_orig": 0.5272323489189148, + "epoch": 0.3160543532964268, + "kl_loss": 0.08906276524066925, + "loss_ib": 0.0012135460274294019, + "step": 1099 + }, + { + "ce_ib": 4.781414031982422, + "ce_orig": 0.6674253344535828, + "epoch": 0.3160543532964268, + "kl_loss": 0.11731104552745819, + "loss_ib": 0.0016512519214302301, + "step": 1099 + }, + { + "ce_ib": 4.504537582397461, + "ce_orig": 0.6338003277778625, + "epoch": 0.3160543532964268, + "kl_loss": 0.08345992863178253, + "loss_ib": 0.0012850529747083783, + "step": 1099 + }, + { + "epoch": 0.3163419368754044, + "grad_norm": 0.10443267971277237, + "learning_rate": 4.9270685293120164e-05, + "loss": 0.823, + "step": 1100 + }, + { + "ce_ib": 2.7047183513641357, + "ce_orig": 0.5154035091400146, + "epoch": 0.3163419368754044, + "kl_loss": 0.07203371077775955, + "loss_ib": 0.0009908088250085711, + "step": 1100 + }, + { + "ce_ib": 8.287751197814941, + "ce_orig": 1.131018877029419, + "epoch": 0.3163419368754044, + "kl_loss": 0.10799284279346466, + "loss_ib": 0.0019087033579126, + "step": 1100 + }, + { + "ce_ib": 9.141654968261719, + "ce_orig": 1.1767055988311768, + "epoch": 0.3163419368754044, + "kl_loss": 0.14867722988128662, + "loss_ib": 0.0024009377229958773, + "step": 1100 + }, + { + "ce_ib": 4.616094589233398, + "ce_orig": 0.7783187627792358, + "epoch": 0.3163419368754044, + "kl_loss": 0.07596094906330109, + "loss_ib": 0.0012212188448756933, + "step": 1100 + }, + { + "ce_ib": 2.9336156845092773, + "ce_orig": 0.1434953659772873, + "epoch": 0.31662952045438203, + "kl_loss": 0.3486359715461731, + "loss_ib": 0.003779721213504672, + "step": 1101 + }, + { + "ce_ib": 7.112979888916016, + "ce_orig": 0.8758820295333862, + "epoch": 0.31662952045438203, + "kl_loss": 0.06863940507173538, + "loss_ib": 0.0013976918999105692, + "step": 1101 + }, + { + "ce_ib": 9.88776969909668, + "ce_orig": 1.4645624160766602, + "epoch": 0.31662952045438203, + "kl_loss": 0.328406423330307, + "loss_ib": 0.004272840917110443, + "step": 1101 + }, + { + "ce_ib": 4.820501804351807, + "ce_orig": 0.47723662853240967, + "epoch": 0.31662952045438203, + "kl_loss": 0.09538324177265167, + "loss_ib": 0.0014358825283125043, + "step": 1101 + }, + { + "ce_ib": 5.636781215667725, + "ce_orig": 0.602377712726593, + "epoch": 0.3169171040333597, + "kl_loss": 0.11122557520866394, + "loss_ib": 0.0016759338323026896, + "step": 1102 + }, + { + "ce_ib": 6.5077972412109375, + "ce_orig": 0.5252118110656738, + "epoch": 0.3169171040333597, + "kl_loss": 0.19280904531478882, + "loss_ib": 0.002578869927674532, + "step": 1102 + }, + { + "ce_ib": 4.5214409828186035, + "ce_orig": 0.8415386080741882, + "epoch": 0.3169171040333597, + "kl_loss": 0.06283672153949738, + "loss_ib": 0.0010805112542584538, + "step": 1102 + }, + { + "ce_ib": 4.760388374328613, + "ce_orig": 0.5159482955932617, + "epoch": 0.3169171040333597, + "kl_loss": 0.13038045167922974, + "loss_ib": 0.001779843238182366, + "step": 1102 + }, + { + "ce_ib": 8.62554931640625, + "ce_orig": 1.2505888938903809, + "epoch": 0.31720468761233733, + "kl_loss": 0.12411917746067047, + "loss_ib": 0.002103746635839343, + "step": 1103 + }, + { + "ce_ib": 6.545633792877197, + "ce_orig": 1.0744396448135376, + "epoch": 0.31720468761233733, + "kl_loss": 0.1949809491634369, + "loss_ib": 0.002604372799396515, + "step": 1103 + }, + { + "ce_ib": 5.3097310066223145, + "ce_orig": 0.7355318069458008, + "epoch": 0.31720468761233733, + "kl_loss": 0.13482140004634857, + "loss_ib": 0.0018791870679706335, + "step": 1103 + }, + { + "ce_ib": 7.22735595703125, + "ce_orig": 0.7363559007644653, + "epoch": 0.31720468761233733, + "kl_loss": 0.13793884217739105, + "loss_ib": 0.0021021240390837193, + "step": 1103 + }, + { + "ce_ib": 3.681156873703003, + "ce_orig": 0.7018173336982727, + "epoch": 0.31749227119131496, + "kl_loss": 0.06493549793958664, + "loss_ib": 0.0010174706112593412, + "step": 1104 + }, + { + "ce_ib": 6.435656547546387, + "ce_orig": 1.0511516332626343, + "epoch": 0.31749227119131496, + "kl_loss": 0.12981092929840088, + "loss_ib": 0.00194167485460639, + "step": 1104 + }, + { + "ce_ib": 6.3591718673706055, + "ce_orig": 0.8714869022369385, + "epoch": 0.31749227119131496, + "kl_loss": 0.07830870151519775, + "loss_ib": 0.001419004169292748, + "step": 1104 + }, + { + "ce_ib": 4.863466262817383, + "ce_orig": 0.524034857749939, + "epoch": 0.31749227119131496, + "kl_loss": 0.09495489299297333, + "loss_ib": 0.001435895566828549, + "step": 1104 + }, + { + "epoch": 0.31777985477029264, + "grad_norm": 0.11291716247797012, + "learning_rate": 4.926135160046157e-05, + "loss": 0.8209, + "step": 1105 + }, + { + "ce_ib": 5.778404235839844, + "ce_orig": 0.6216086745262146, + "epoch": 0.31777985477029264, + "kl_loss": 0.13615110516548157, + "loss_ib": 0.0019393513211980462, + "step": 1105 + }, + { + "ce_ib": 8.00284481048584, + "ce_orig": 1.2720705270767212, + "epoch": 0.31777985477029264, + "kl_loss": 0.1311652958393097, + "loss_ib": 0.0021119373850524426, + "step": 1105 + }, + { + "ce_ib": 5.706634998321533, + "ce_orig": 0.5876952409744263, + "epoch": 0.31777985477029264, + "kl_loss": 0.12042839080095291, + "loss_ib": 0.0017749472754076123, + "step": 1105 + }, + { + "ce_ib": 4.406344890594482, + "ce_orig": 0.6132227778434753, + "epoch": 0.31777985477029264, + "kl_loss": 0.0869908332824707, + "loss_ib": 0.0013105428079143167, + "step": 1105 + }, + { + "ce_ib": 5.951411247253418, + "ce_orig": 0.6965615153312683, + "epoch": 0.31806743834927026, + "kl_loss": 0.0936947911977768, + "loss_ib": 0.0015320890815928578, + "step": 1106 + }, + { + "ce_ib": 4.328317165374756, + "ce_orig": 0.48852014541625977, + "epoch": 0.31806743834927026, + "kl_loss": 0.11511750519275665, + "loss_ib": 0.001584006822668016, + "step": 1106 + }, + { + "ce_ib": 5.561557769775391, + "ce_orig": 0.8280245661735535, + "epoch": 0.31806743834927026, + "kl_loss": 0.13165396451950073, + "loss_ib": 0.0018726954003795981, + "step": 1106 + }, + { + "ce_ib": 9.187907218933105, + "ce_orig": 1.1692800521850586, + "epoch": 0.31806743834927026, + "kl_loss": 0.1220262423157692, + "loss_ib": 0.002139053074643016, + "step": 1106 + }, + { + "ce_ib": 6.820673942565918, + "ce_orig": 0.9661232829093933, + "epoch": 0.3183550219282479, + "kl_loss": 0.08730831742286682, + "loss_ib": 0.0015551503747701645, + "step": 1107 + }, + { + "ce_ib": 6.703719615936279, + "ce_orig": 0.6939508318901062, + "epoch": 0.3183550219282479, + "kl_loss": 0.21232838928699493, + "loss_ib": 0.002793655963614583, + "step": 1107 + }, + { + "ce_ib": 9.952964782714844, + "ce_orig": 1.418191909790039, + "epoch": 0.3183550219282479, + "kl_loss": 0.10500174760818481, + "loss_ib": 0.002045314060524106, + "step": 1107 + }, + { + "ce_ib": 4.250537395477295, + "ce_orig": 0.5294516086578369, + "epoch": 0.3183550219282479, + "kl_loss": 0.09490614384412766, + "loss_ib": 0.0013741151196882129, + "step": 1107 + }, + { + "ce_ib": 2.295755624771118, + "ce_orig": 0.15018223226070404, + "epoch": 0.31864260550722556, + "kl_loss": 0.15408454835414886, + "loss_ib": 0.0017704209312796593, + "step": 1108 + }, + { + "ce_ib": 6.062755584716797, + "ce_orig": 0.8133069276809692, + "epoch": 0.31864260550722556, + "kl_loss": 0.13426713645458221, + "loss_ib": 0.0019489468540996313, + "step": 1108 + }, + { + "ce_ib": 5.790729999542236, + "ce_orig": 0.8639890551567078, + "epoch": 0.31864260550722556, + "kl_loss": 0.1315470188856125, + "loss_ib": 0.0018945431802421808, + "step": 1108 + }, + { + "ce_ib": 5.948178291320801, + "ce_orig": 1.021532654762268, + "epoch": 0.31864260550722556, + "kl_loss": 0.09913386404514313, + "loss_ib": 0.0015861564315855503, + "step": 1108 + }, + { + "ce_ib": 4.356767177581787, + "ce_orig": 0.6611163020133972, + "epoch": 0.3189301890862032, + "kl_loss": 0.12729594111442566, + "loss_ib": 0.0017086360603570938, + "step": 1109 + }, + { + "ce_ib": 5.728999137878418, + "ce_orig": 0.9258163571357727, + "epoch": 0.3189301890862032, + "kl_loss": 0.10851135849952698, + "loss_ib": 0.0016580134397372603, + "step": 1109 + }, + { + "ce_ib": 6.743879318237305, + "ce_orig": 0.4497535824775696, + "epoch": 0.3189301890862032, + "kl_loss": 0.1591167449951172, + "loss_ib": 0.0022655553184449673, + "step": 1109 + }, + { + "ce_ib": 9.192666053771973, + "ce_orig": 1.3244154453277588, + "epoch": 0.3189301890862032, + "kl_loss": 0.07153521478176117, + "loss_ib": 0.0016346186166629195, + "step": 1109 + }, + { + "epoch": 0.3192177726651808, + "grad_norm": 0.09862250834703445, + "learning_rate": 4.92519594565758e-05, + "loss": 0.7799, + "step": 1110 + }, + { + "ce_ib": 6.488005638122559, + "ce_orig": 0.6318284869194031, + "epoch": 0.3192177726651808, + "kl_loss": 0.10362868010997772, + "loss_ib": 0.0016850873362272978, + "step": 1110 + }, + { + "ce_ib": 7.06200647354126, + "ce_orig": 1.099223256111145, + "epoch": 0.3192177726651808, + "kl_loss": 0.15589573979377747, + "loss_ib": 0.00226515787653625, + "step": 1110 + }, + { + "ce_ib": 6.631660461425781, + "ce_orig": 1.0255488157272339, + "epoch": 0.3192177726651808, + "kl_loss": 0.08422824740409851, + "loss_ib": 0.0015054484829306602, + "step": 1110 + }, + { + "ce_ib": 4.097757816314697, + "ce_orig": 0.6223424077033997, + "epoch": 0.3192177726651808, + "kl_loss": 0.11507381498813629, + "loss_ib": 0.0015605139778926969, + "step": 1110 + }, + { + "ce_ib": 7.600182056427002, + "ce_orig": 1.1621298789978027, + "epoch": 0.31950535624415843, + "kl_loss": 0.1489579677581787, + "loss_ib": 0.002249597804620862, + "step": 1111 + }, + { + "ce_ib": 4.432876110076904, + "ce_orig": 0.43630632758140564, + "epoch": 0.31950535624415843, + "kl_loss": 0.22790104150772095, + "loss_ib": 0.0027222977951169014, + "step": 1111 + }, + { + "ce_ib": 4.863165855407715, + "ce_orig": 0.8000689148902893, + "epoch": 0.31950535624415843, + "kl_loss": 0.043151505291461945, + "loss_ib": 0.0009178316104225814, + "step": 1111 + }, + { + "ce_ib": 4.82898473739624, + "ce_orig": 0.652249813079834, + "epoch": 0.31950535624415843, + "kl_loss": 0.10808897018432617, + "loss_ib": 0.0015637881588190794, + "step": 1111 + }, + { + "ce_ib": 5.253769874572754, + "ce_orig": 0.9016509652137756, + "epoch": 0.3197929398231361, + "kl_loss": 0.09644834697246552, + "loss_ib": 0.0014898603549227118, + "step": 1112 + }, + { + "ce_ib": 7.7812886238098145, + "ce_orig": 0.9018441438674927, + "epoch": 0.3197929398231361, + "kl_loss": 0.14024998247623444, + "loss_ib": 0.0021806287113577127, + "step": 1112 + }, + { + "ce_ib": 4.902985095977783, + "ce_orig": 0.6490185856819153, + "epoch": 0.3197929398231361, + "kl_loss": 0.08993034064769745, + "loss_ib": 0.00138960184995085, + "step": 1112 + }, + { + "ce_ib": 4.417405128479004, + "ce_orig": 0.5911821126937866, + "epoch": 0.3197929398231361, + "kl_loss": 0.09331687539815903, + "loss_ib": 0.0013749093050137162, + "step": 1112 + }, + { + "ce_ib": 7.669703483581543, + "ce_orig": 1.1216349601745605, + "epoch": 0.32008052340211374, + "kl_loss": 0.08051162958145142, + "loss_ib": 0.0015720865922048688, + "step": 1113 + }, + { + "ce_ib": 4.903026103973389, + "ce_orig": 0.6339471936225891, + "epoch": 0.32008052340211374, + "kl_loss": 0.05427828058600426, + "loss_ib": 0.001033085398375988, + "step": 1113 + }, + { + "ce_ib": 7.180636405944824, + "ce_orig": 0.9941835403442383, + "epoch": 0.32008052340211374, + "kl_loss": 0.12049823254346848, + "loss_ib": 0.001923045958392322, + "step": 1113 + }, + { + "ce_ib": 8.455522537231445, + "ce_orig": 1.3635344505310059, + "epoch": 0.32008052340211374, + "kl_loss": 0.14797498285770416, + "loss_ib": 0.0023253019899129868, + "step": 1113 + }, + { + "ce_ib": 5.931006908416748, + "ce_orig": 0.658078134059906, + "epoch": 0.32036810698109136, + "kl_loss": 0.08447106182575226, + "loss_ib": 0.0014378111809492111, + "step": 1114 + }, + { + "ce_ib": 7.390595436096191, + "ce_orig": 0.950509250164032, + "epoch": 0.32036810698109136, + "kl_loss": 0.11157698929309845, + "loss_ib": 0.001854829490184784, + "step": 1114 + }, + { + "ce_ib": 5.800291061401367, + "ce_orig": 0.9057431817054749, + "epoch": 0.32036810698109136, + "kl_loss": 0.13156373798847198, + "loss_ib": 0.001895666355267167, + "step": 1114 + }, + { + "ce_ib": 4.867544174194336, + "ce_orig": 0.4064299762248993, + "epoch": 0.32036810698109136, + "kl_loss": 0.10979843884706497, + "loss_ib": 0.001584738725796342, + "step": 1114 + }, + { + "epoch": 0.32065569056006904, + "grad_norm": 0.09395995736122131, + "learning_rate": 4.924250888409069e-05, + "loss": 0.8017, + "step": 1115 + }, + { + "ce_ib": 8.88463020324707, + "ce_orig": 1.196564793586731, + "epoch": 0.32065569056006904, + "kl_loss": 0.14164546132087708, + "loss_ib": 0.002304917434230447, + "step": 1115 + }, + { + "ce_ib": 6.908357620239258, + "ce_orig": 1.1473089456558228, + "epoch": 0.32065569056006904, + "kl_loss": 0.12058570235967636, + "loss_ib": 0.0018966927891597152, + "step": 1115 + }, + { + "ce_ib": 6.580382347106934, + "ce_orig": 0.7484446167945862, + "epoch": 0.32065569056006904, + "kl_loss": 0.07694339752197266, + "loss_ib": 0.0014274722198024392, + "step": 1115 + }, + { + "ce_ib": 6.1991047859191895, + "ce_orig": 0.7058902382850647, + "epoch": 0.32065569056006904, + "kl_loss": 0.13657167553901672, + "loss_ib": 0.0019856272265315056, + "step": 1115 + }, + { + "ce_ib": 3.3146166801452637, + "ce_orig": 0.3218761086463928, + "epoch": 0.32094327413904666, + "kl_loss": 0.1967342048883438, + "loss_ib": 0.00229880353435874, + "step": 1116 + }, + { + "ce_ib": 7.196146011352539, + "ce_orig": 0.8870516419410706, + "epoch": 0.32094327413904666, + "kl_loss": 0.2019130289554596, + "loss_ib": 0.0027387449517846107, + "step": 1116 + }, + { + "ce_ib": 2.6767497062683105, + "ce_orig": 0.29367595911026, + "epoch": 0.32094327413904666, + "kl_loss": 0.19664621353149414, + "loss_ib": 0.0022341369185596704, + "step": 1116 + }, + { + "ce_ib": 7.518855571746826, + "ce_orig": 1.0005066394805908, + "epoch": 0.32094327413904666, + "kl_loss": 0.12175123393535614, + "loss_ib": 0.0019693979993462563, + "step": 1116 + }, + { + "ce_ib": 9.178970336914062, + "ce_orig": 1.2377865314483643, + "epoch": 0.3212308577180243, + "kl_loss": 0.1200728565454483, + "loss_ib": 0.0021186256781220436, + "step": 1117 + }, + { + "ce_ib": 8.34536361694336, + "ce_orig": 0.9707791209220886, + "epoch": 0.3212308577180243, + "kl_loss": 0.10708945989608765, + "loss_ib": 0.001905430806800723, + "step": 1117 + }, + { + "ce_ib": 10.378780364990234, + "ce_orig": 1.690807819366455, + "epoch": 0.3212308577180243, + "kl_loss": 0.13962461054325104, + "loss_ib": 0.002434124005958438, + "step": 1117 + }, + { + "ce_ib": 6.600798606872559, + "ce_orig": 0.7438252568244934, + "epoch": 0.3212308577180243, + "kl_loss": 0.10462431609630585, + "loss_ib": 0.0017063230043277144, + "step": 1117 + }, + { + "ce_ib": 4.960424423217773, + "ce_orig": 0.6897417306900024, + "epoch": 0.32151844129700197, + "kl_loss": 0.1169540211558342, + "loss_ib": 0.0016655826475471258, + "step": 1118 + }, + { + "ce_ib": 6.60028600692749, + "ce_orig": 0.9910760521888733, + "epoch": 0.32151844129700197, + "kl_loss": 0.1475672870874405, + "loss_ib": 0.0021357014775276184, + "step": 1118 + }, + { + "ce_ib": 3.456740617752075, + "ce_orig": 0.5141904950141907, + "epoch": 0.32151844129700197, + "kl_loss": 0.09396034479141235, + "loss_ib": 0.0012852774234488606, + "step": 1118 + }, + { + "ce_ib": 6.369697570800781, + "ce_orig": 0.9031816124916077, + "epoch": 0.32151844129700197, + "kl_loss": 0.1125561073422432, + "loss_ib": 0.001762530766427517, + "step": 1118 + }, + { + "ce_ib": 3.408698320388794, + "ce_orig": 0.41078513860702515, + "epoch": 0.3218060248759796, + "kl_loss": 0.10214546322822571, + "loss_ib": 0.001362324459478259, + "step": 1119 + }, + { + "ce_ib": 5.6393585205078125, + "ce_orig": 0.5337005853652954, + "epoch": 0.3218060248759796, + "kl_loss": 0.07566121220588684, + "loss_ib": 0.001320547889918089, + "step": 1119 + }, + { + "ce_ib": 6.25368595123291, + "ce_orig": 0.8251882791519165, + "epoch": 0.3218060248759796, + "kl_loss": 0.14441141486167908, + "loss_ib": 0.0020694828126579523, + "step": 1119 + }, + { + "ce_ib": 4.823451042175293, + "ce_orig": 0.5779725313186646, + "epoch": 0.3218060248759796, + "kl_loss": 0.09135837107896805, + "loss_ib": 0.0013959287898615003, + "step": 1119 + }, + { + "epoch": 0.3220936084549572, + "grad_norm": 0.08428891748189926, + "learning_rate": 4.923299990577488e-05, + "loss": 0.8434, + "step": 1120 + }, + { + "ce_ib": 5.67487907409668, + "ce_orig": 0.8123293519020081, + "epoch": 0.3220936084549572, + "kl_loss": 0.08672993630170822, + "loss_ib": 0.0014347871765494347, + "step": 1120 + }, + { + "ce_ib": 7.47396183013916, + "ce_orig": 0.6569360494613647, + "epoch": 0.3220936084549572, + "kl_loss": 0.15070360898971558, + "loss_ib": 0.002254432300105691, + "step": 1120 + }, + { + "ce_ib": 6.670865058898926, + "ce_orig": 0.4752155840396881, + "epoch": 0.3220936084549572, + "kl_loss": 0.18852224946022034, + "loss_ib": 0.002552309073507786, + "step": 1120 + }, + { + "ce_ib": 4.767125129699707, + "ce_orig": 0.691856861114502, + "epoch": 0.3220936084549572, + "kl_loss": 0.08532582223415375, + "loss_ib": 0.0013299706624820828, + "step": 1120 + }, + { + "ce_ib": 2.6434950828552246, + "ce_orig": 0.3332846462726593, + "epoch": 0.32238119203393484, + "kl_loss": 0.08305683732032776, + "loss_ib": 0.0010949178831651807, + "step": 1121 + }, + { + "ce_ib": 10.54102897644043, + "ce_orig": 1.667731523513794, + "epoch": 0.32238119203393484, + "kl_loss": 0.13803747296333313, + "loss_ib": 0.0024344774428755045, + "step": 1121 + }, + { + "ce_ib": 6.683977127075195, + "ce_orig": 0.6325282454490662, + "epoch": 0.32238119203393484, + "kl_loss": 0.12083125114440918, + "loss_ib": 0.0018767102155834436, + "step": 1121 + }, + { + "ce_ib": 9.683277130126953, + "ce_orig": 1.1499894857406616, + "epoch": 0.32238119203393484, + "kl_loss": 0.15769684314727783, + "loss_ib": 0.0025452959816902876, + "step": 1121 + }, + { + "ce_ib": 9.049882888793945, + "ce_orig": 1.3263394832611084, + "epoch": 0.3226687756129125, + "kl_loss": 0.10114380717277527, + "loss_ib": 0.001916426350362599, + "step": 1122 + }, + { + "ce_ib": 5.353783130645752, + "ce_orig": 0.6200342178344727, + "epoch": 0.3226687756129125, + "kl_loss": 0.07643938064575195, + "loss_ib": 0.0012997720623388886, + "step": 1122 + }, + { + "ce_ib": 6.108999729156494, + "ce_orig": 0.7126256227493286, + "epoch": 0.3226687756129125, + "kl_loss": 0.12426118552684784, + "loss_ib": 0.0018535117851570249, + "step": 1122 + }, + { + "ce_ib": 8.723121643066406, + "ce_orig": 1.3226583003997803, + "epoch": 0.3226687756129125, + "kl_loss": 0.14628058671951294, + "loss_ib": 0.0023351178970187902, + "step": 1122 + }, + { + "ce_ib": 9.964115142822266, + "ce_orig": 1.3997361660003662, + "epoch": 0.32295635919189014, + "kl_loss": 0.14625918865203857, + "loss_ib": 0.002459003357216716, + "step": 1123 + }, + { + "ce_ib": 4.798125267028809, + "ce_orig": 0.5897971391677856, + "epoch": 0.32295635919189014, + "kl_loss": 0.12656289339065552, + "loss_ib": 0.0017454413464292884, + "step": 1123 + }, + { + "ce_ib": 5.80087947845459, + "ce_orig": 1.046662449836731, + "epoch": 0.32295635919189014, + "kl_loss": 0.11369995772838593, + "loss_ib": 0.0017170874634757638, + "step": 1123 + }, + { + "ce_ib": 10.707456588745117, + "ce_orig": 1.7433552742004395, + "epoch": 0.32295635919189014, + "kl_loss": 0.19035384058952332, + "loss_ib": 0.002974283881485462, + "step": 1123 + }, + { + "ce_ib": 9.244044303894043, + "ce_orig": 1.1507402658462524, + "epoch": 0.32324394277086776, + "kl_loss": 0.13072650134563446, + "loss_ib": 0.0022316693793982267, + "step": 1124 + }, + { + "ce_ib": 5.5080389976501465, + "ce_orig": 0.8370607495307922, + "epoch": 0.32324394277086776, + "kl_loss": 0.06126859784126282, + "loss_ib": 0.0011634897673502564, + "step": 1124 + }, + { + "ce_ib": 7.186861038208008, + "ce_orig": 0.6832764744758606, + "epoch": 0.32324394277086776, + "kl_loss": 0.1627086102962494, + "loss_ib": 0.0023457719944417477, + "step": 1124 + }, + { + "ce_ib": 8.068244934082031, + "ce_orig": 0.9853929877281189, + "epoch": 0.32324394277086776, + "kl_loss": 0.1222214549779892, + "loss_ib": 0.002029038965702057, + "step": 1124 + }, + { + "epoch": 0.32353152634984544, + "grad_norm": 0.09608148038387299, + "learning_rate": 4.922343254453768e-05, + "loss": 0.846, + "step": 1125 + }, + { + "ce_ib": 10.975310325622559, + "ce_orig": 1.636178731918335, + "epoch": 0.32353152634984544, + "kl_loss": 0.0799994021654129, + "loss_ib": 0.001897525042295456, + "step": 1125 + }, + { + "ce_ib": 7.978636264801025, + "ce_orig": 0.6996608972549438, + "epoch": 0.32353152634984544, + "kl_loss": 0.15940502285957336, + "loss_ib": 0.0023919136729091406, + "step": 1125 + }, + { + "ce_ib": 6.658046722412109, + "ce_orig": 0.9537755250930786, + "epoch": 0.32353152634984544, + "kl_loss": 0.10534384846687317, + "loss_ib": 0.001719243242405355, + "step": 1125 + }, + { + "ce_ib": 6.551901340484619, + "ce_orig": 0.853861391544342, + "epoch": 0.32353152634984544, + "kl_loss": 0.10188733041286469, + "loss_ib": 0.001674063503742218, + "step": 1125 + }, + { + "ce_ib": 9.186063766479492, + "ce_orig": 0.34158048033714294, + "epoch": 0.32381910992882307, + "kl_loss": 0.15255634486675262, + "loss_ib": 0.0024441697169095278, + "step": 1126 + }, + { + "ce_ib": 7.030545234680176, + "ce_orig": 1.3549855947494507, + "epoch": 0.32381910992882307, + "kl_loss": 0.07460334897041321, + "loss_ib": 0.0014490879839286208, + "step": 1126 + }, + { + "ce_ib": 4.667653560638428, + "ce_orig": 0.8390925526618958, + "epoch": 0.32381910992882307, + "kl_loss": 0.10421881079673767, + "loss_ib": 0.0015089533990249038, + "step": 1126 + }, + { + "ce_ib": 9.643956184387207, + "ce_orig": 0.7430564761161804, + "epoch": 0.32381910992882307, + "kl_loss": 0.13251788914203644, + "loss_ib": 0.002289574360474944, + "step": 1126 + }, + { + "ce_ib": 7.423088073730469, + "ce_orig": 1.0442339181900024, + "epoch": 0.3241066935078007, + "kl_loss": 0.10334056615829468, + "loss_ib": 0.0017757144523784518, + "step": 1127 + }, + { + "ce_ib": 5.813991546630859, + "ce_orig": 0.9771583676338196, + "epoch": 0.3241066935078007, + "kl_loss": 0.1183927059173584, + "loss_ib": 0.0017653262475505471, + "step": 1127 + }, + { + "ce_ib": 4.857069969177246, + "ce_orig": 0.7142473459243774, + "epoch": 0.3241066935078007, + "kl_loss": 0.20338529348373413, + "loss_ib": 0.0025195598136633635, + "step": 1127 + }, + { + "ce_ib": 3.728487730026245, + "ce_orig": 0.4864121973514557, + "epoch": 0.3241066935078007, + "kl_loss": 0.2202530950307846, + "loss_ib": 0.0025753795634955168, + "step": 1127 + }, + { + "ce_ib": 5.813749313354492, + "ce_orig": 0.6549258232116699, + "epoch": 0.32439427708677837, + "kl_loss": 0.17920701205730438, + "loss_ib": 0.0023734450805932283, + "step": 1128 + }, + { + "ce_ib": 4.604310512542725, + "ce_orig": 1.2016593217849731, + "epoch": 0.32439427708677837, + "kl_loss": 0.08224816620349884, + "loss_ib": 0.001282912795431912, + "step": 1128 + }, + { + "ce_ib": 5.9080634117126465, + "ce_orig": 0.9900830388069153, + "epoch": 0.32439427708677837, + "kl_loss": 0.09860847145318985, + "loss_ib": 0.0015768910525366664, + "step": 1128 + }, + { + "ce_ib": 7.773683071136475, + "ce_orig": 1.154776692390442, + "epoch": 0.32439427708677837, + "kl_loss": 0.093455970287323, + "loss_ib": 0.001711927936412394, + "step": 1128 + }, + { + "ce_ib": 7.422971248626709, + "ce_orig": 1.3916915655136108, + "epoch": 0.324681860665756, + "kl_loss": 0.09650696814060211, + "loss_ib": 0.001707366667687893, + "step": 1129 + }, + { + "ce_ib": 5.6086297035217285, + "ce_orig": 0.8838496804237366, + "epoch": 0.324681860665756, + "kl_loss": 0.13013264536857605, + "loss_ib": 0.0018621893832460046, + "step": 1129 + }, + { + "ce_ib": 9.813456535339355, + "ce_orig": 1.1540263891220093, + "epoch": 0.324681860665756, + "kl_loss": 0.11320735514163971, + "loss_ib": 0.0021134191192686558, + "step": 1129 + }, + { + "ce_ib": 7.494252681732178, + "ce_orig": 1.1366146802902222, + "epoch": 0.324681860665756, + "kl_loss": 0.10920100659132004, + "loss_ib": 0.0018414352089166641, + "step": 1129 + }, + { + "epoch": 0.3249694442447336, + "grad_norm": 0.1281149685382843, + "learning_rate": 4.921380682342912e-05, + "loss": 0.8778, + "step": 1130 + }, + { + "ce_ib": 7.405741214752197, + "ce_orig": 1.4356822967529297, + "epoch": 0.3249694442447336, + "kl_loss": 0.08208800852298737, + "loss_ib": 0.0015614541480317712, + "step": 1130 + }, + { + "ce_ib": 5.840486526489258, + "ce_orig": 0.8860843777656555, + "epoch": 0.3249694442447336, + "kl_loss": 0.10055000334978104, + "loss_ib": 0.0015895485412329435, + "step": 1130 + }, + { + "ce_ib": 5.952149391174316, + "ce_orig": 0.6938628554344177, + "epoch": 0.3249694442447336, + "kl_loss": 0.13012732565402985, + "loss_ib": 0.0018964881310239434, + "step": 1130 + }, + { + "ce_ib": 5.876269340515137, + "ce_orig": 0.7113330364227295, + "epoch": 0.3249694442447336, + "kl_loss": 0.13111966848373413, + "loss_ib": 0.0018988236552104354, + "step": 1130 + }, + { + "ce_ib": 7.067512512207031, + "ce_orig": 0.7368212342262268, + "epoch": 0.32525702782371124, + "kl_loss": 0.13936303555965424, + "loss_ib": 0.0021003815345466137, + "step": 1131 + }, + { + "ce_ib": 4.883144378662109, + "ce_orig": 0.7645682096481323, + "epoch": 0.32525702782371124, + "kl_loss": 0.11572670936584473, + "loss_ib": 0.0016455815639346838, + "step": 1131 + }, + { + "ce_ib": 12.358269691467285, + "ce_orig": 2.1583681106567383, + "epoch": 0.32525702782371124, + "kl_loss": 0.10350771248340607, + "loss_ib": 0.002270903903990984, + "step": 1131 + }, + { + "ce_ib": 5.750890254974365, + "ce_orig": 0.5138911604881287, + "epoch": 0.32525702782371124, + "kl_loss": 0.18141251802444458, + "loss_ib": 0.0023892142344266176, + "step": 1131 + }, + { + "ce_ib": 5.482202529907227, + "ce_orig": 0.4194678068161011, + "epoch": 0.3255446114026889, + "kl_loss": 0.16889557242393494, + "loss_ib": 0.0022371760569512844, + "step": 1132 + }, + { + "ce_ib": 7.464616298675537, + "ce_orig": 0.6531148552894592, + "epoch": 0.3255446114026889, + "kl_loss": 0.1452089548110962, + "loss_ib": 0.002198551082983613, + "step": 1132 + }, + { + "ce_ib": 7.846071720123291, + "ce_orig": 1.1021744012832642, + "epoch": 0.3255446114026889, + "kl_loss": 0.2762680947780609, + "loss_ib": 0.0035472880117595196, + "step": 1132 + }, + { + "ce_ib": 5.043225288391113, + "ce_orig": 0.9153444766998291, + "epoch": 0.3255446114026889, + "kl_loss": 0.05944227799773216, + "loss_ib": 0.0010987452697008848, + "step": 1132 + }, + { + "ce_ib": 4.470819473266602, + "ce_orig": 0.5241734981536865, + "epoch": 0.32583219498166655, + "kl_loss": 0.10007923096418381, + "loss_ib": 0.0014478742377832532, + "step": 1133 + }, + { + "ce_ib": 7.373484134674072, + "ce_orig": 0.9568267464637756, + "epoch": 0.32583219498166655, + "kl_loss": 0.11206928640604019, + "loss_ib": 0.0018580412724986672, + "step": 1133 + }, + { + "ce_ib": 4.963825702667236, + "ce_orig": 0.6650660037994385, + "epoch": 0.32583219498166655, + "kl_loss": 0.17485421895980835, + "loss_ib": 0.002244924660772085, + "step": 1133 + }, + { + "ce_ib": 4.26594352722168, + "ce_orig": 0.5342496037483215, + "epoch": 0.32583219498166655, + "kl_loss": 0.12280713766813278, + "loss_ib": 0.0016546656843274832, + "step": 1133 + }, + { + "ce_ib": 4.813910961151123, + "ce_orig": 0.6347959041595459, + "epoch": 0.32611977856064417, + "kl_loss": 0.10050021857023239, + "loss_ib": 0.0014863931573927402, + "step": 1134 + }, + { + "ce_ib": 5.719820976257324, + "ce_orig": 0.7599513530731201, + "epoch": 0.32611977856064417, + "kl_loss": 0.14795079827308655, + "loss_ib": 0.0020514901261776686, + "step": 1134 + }, + { + "ce_ib": 7.878007888793945, + "ce_orig": 1.600778579711914, + "epoch": 0.32611977856064417, + "kl_loss": 0.08611056953668594, + "loss_ib": 0.001648906385526061, + "step": 1134 + }, + { + "ce_ib": 6.737755298614502, + "ce_orig": 0.7938176393508911, + "epoch": 0.32611977856064417, + "kl_loss": 0.10635479539632797, + "loss_ib": 0.0017373233567923307, + "step": 1134 + }, + { + "epoch": 0.32640736213962185, + "grad_norm": 0.08962231129407883, + "learning_rate": 4.920412276563977e-05, + "loss": 0.7977, + "step": 1135 + }, + { + "ce_ib": 6.567104816436768, + "ce_orig": 0.9490758180618286, + "epoch": 0.32640736213962185, + "kl_loss": 0.10037635266780853, + "loss_ib": 0.0016604738775640726, + "step": 1135 + }, + { + "ce_ib": 3.867321729660034, + "ce_orig": 0.6551004648208618, + "epoch": 0.32640736213962185, + "kl_loss": 0.12545770406723022, + "loss_ib": 0.0016413092380389571, + "step": 1135 + }, + { + "ce_ib": 5.023824691772461, + "ce_orig": 0.549644947052002, + "epoch": 0.32640736213962185, + "kl_loss": 0.10960295051336288, + "loss_ib": 0.001598411938175559, + "step": 1135 + }, + { + "ce_ib": 9.607556343078613, + "ce_orig": 1.194618821144104, + "epoch": 0.32640736213962185, + "kl_loss": 0.09962357580661774, + "loss_ib": 0.001956991385668516, + "step": 1135 + }, + { + "ce_ib": 5.548495292663574, + "ce_orig": 0.6250153183937073, + "epoch": 0.32669494571859947, + "kl_loss": 0.12165582925081253, + "loss_ib": 0.0017714076675474644, + "step": 1136 + }, + { + "ce_ib": 8.4020357131958, + "ce_orig": 0.9137877225875854, + "epoch": 0.32669494571859947, + "kl_loss": 0.12433409690856934, + "loss_ib": 0.002083544386550784, + "step": 1136 + }, + { + "ce_ib": 4.013749599456787, + "ce_orig": 0.5571913123130798, + "epoch": 0.32669494571859947, + "kl_loss": 0.157148078083992, + "loss_ib": 0.00197285576723516, + "step": 1136 + }, + { + "ce_ib": 5.231479644775391, + "ce_orig": 0.7425175905227661, + "epoch": 0.32669494571859947, + "kl_loss": 0.13806116580963135, + "loss_ib": 0.0019037595484405756, + "step": 1136 + }, + { + "ce_ib": 4.444094657897949, + "ce_orig": 0.5933840274810791, + "epoch": 0.3269825292975771, + "kl_loss": 0.08529434353113174, + "loss_ib": 0.0012973528355360031, + "step": 1137 + }, + { + "ce_ib": 6.240205764770508, + "ce_orig": 0.9035214781761169, + "epoch": 0.3269825292975771, + "kl_loss": 0.08053313940763474, + "loss_ib": 0.001429351861588657, + "step": 1137 + }, + { + "ce_ib": 10.129720687866211, + "ce_orig": 1.1901086568832397, + "epoch": 0.3269825292975771, + "kl_loss": 0.15460729598999023, + "loss_ib": 0.0025590448640286922, + "step": 1137 + }, + { + "ce_ib": 5.573910236358643, + "ce_orig": 0.6092091202735901, + "epoch": 0.3269825292975771, + "kl_loss": 0.2540702223777771, + "loss_ib": 0.003098093206062913, + "step": 1137 + }, + { + "ce_ib": 5.491816520690918, + "ce_orig": 0.702403724193573, + "epoch": 0.3272701128765548, + "kl_loss": 0.13322144746780396, + "loss_ib": 0.0018813961651176214, + "step": 1138 + }, + { + "ce_ib": 8.6897554397583, + "ce_orig": 1.1769925355911255, + "epoch": 0.3272701128765548, + "kl_loss": 0.1068679541349411, + "loss_ib": 0.001937655033543706, + "step": 1138 + }, + { + "ce_ib": 6.18388032913208, + "ce_orig": 0.5875335335731506, + "epoch": 0.3272701128765548, + "kl_loss": 0.13150034844875336, + "loss_ib": 0.001933391555212438, + "step": 1138 + }, + { + "ce_ib": 4.367621421813965, + "ce_orig": 0.771990954875946, + "epoch": 0.3272701128765548, + "kl_loss": 0.10100337862968445, + "loss_ib": 0.0014467958826571703, + "step": 1138 + }, + { + "ce_ib": 3.696974277496338, + "ce_orig": 0.5121734738349915, + "epoch": 0.3275576964555324, + "kl_loss": 0.09040553122758865, + "loss_ib": 0.0012737527722492814, + "step": 1139 + }, + { + "ce_ib": 8.0768404006958, + "ce_orig": 1.3299756050109863, + "epoch": 0.3275576964555324, + "kl_loss": 0.10132066160440445, + "loss_ib": 0.0018208905821666121, + "step": 1139 + }, + { + "ce_ib": 5.821397304534912, + "ce_orig": 0.8105670809745789, + "epoch": 0.3275576964555324, + "kl_loss": 0.10337543487548828, + "loss_ib": 0.0016158941434696317, + "step": 1139 + }, + { + "ce_ib": 7.536628723144531, + "ce_orig": 0.7028417587280273, + "epoch": 0.3275576964555324, + "kl_loss": 0.15450000762939453, + "loss_ib": 0.002298662904649973, + "step": 1139 + }, + { + "epoch": 0.32784528003451, + "grad_norm": 0.08544071763753891, + "learning_rate": 4.919438039450078e-05, + "loss": 0.862, + "step": 1140 + }, + { + "ce_ib": 5.5745720863342285, + "ce_orig": 1.0211468935012817, + "epoch": 0.32784528003451, + "kl_loss": 0.09475058317184448, + "loss_ib": 0.001504963031038642, + "step": 1140 + }, + { + "ce_ib": 8.765974044799805, + "ce_orig": 0.9023078680038452, + "epoch": 0.32784528003451, + "kl_loss": 0.1581837236881256, + "loss_ib": 0.0024584345519542694, + "step": 1140 + }, + { + "ce_ib": 6.658539772033691, + "ce_orig": 0.40172821283340454, + "epoch": 0.32784528003451, + "kl_loss": 0.16421042382717133, + "loss_ib": 0.0023079582024365664, + "step": 1140 + }, + { + "ce_ib": 5.496406078338623, + "ce_orig": 0.5838255286216736, + "epoch": 0.32784528003451, + "kl_loss": 0.11605791002511978, + "loss_ib": 0.0017102196579799056, + "step": 1140 + }, + { + "ce_ib": 9.511661529541016, + "ce_orig": 1.2888432741165161, + "epoch": 0.32813286361348765, + "kl_loss": 0.14782628417015076, + "loss_ib": 0.0024294289760291576, + "step": 1141 + }, + { + "ce_ib": 6.050619602203369, + "ce_orig": 0.732530951499939, + "epoch": 0.32813286361348765, + "kl_loss": 0.10595589131116867, + "loss_ib": 0.0016646209405735135, + "step": 1141 + }, + { + "ce_ib": 7.932619571685791, + "ce_orig": 0.702617347240448, + "epoch": 0.32813286361348765, + "kl_loss": 0.16292458772659302, + "loss_ib": 0.0024225078523159027, + "step": 1141 + }, + { + "ce_ib": 7.2254743576049805, + "ce_orig": 0.72353595495224, + "epoch": 0.32813286361348765, + "kl_loss": 0.13075268268585205, + "loss_ib": 0.0020300743635743856, + "step": 1141 + }, + { + "ce_ib": 5.295469284057617, + "ce_orig": 0.5043757557868958, + "epoch": 0.3284204471924653, + "kl_loss": 0.11717567592859268, + "loss_ib": 0.0017013036413118243, + "step": 1142 + }, + { + "ce_ib": 5.344768047332764, + "ce_orig": 0.623521625995636, + "epoch": 0.3284204471924653, + "kl_loss": 0.071539506316185, + "loss_ib": 0.0012498717987909913, + "step": 1142 + }, + { + "ce_ib": 7.219688415527344, + "ce_orig": 0.8054973483085632, + "epoch": 0.3284204471924653, + "kl_loss": 0.18407700955867767, + "loss_ib": 0.0025627389550209045, + "step": 1142 + }, + { + "ce_ib": 7.002214431762695, + "ce_orig": 0.7284294962882996, + "epoch": 0.3284204471924653, + "kl_loss": 0.07964880764484406, + "loss_ib": 0.0014967095339670777, + "step": 1142 + }, + { + "ce_ib": 8.63122272491455, + "ce_orig": 0.7607702016830444, + "epoch": 0.32870803077144295, + "kl_loss": 0.15003398060798645, + "loss_ib": 0.0023634620010852814, + "step": 1143 + }, + { + "ce_ib": 5.379940032958984, + "ce_orig": 0.7627663612365723, + "epoch": 0.32870803077144295, + "kl_loss": 0.1444094330072403, + "loss_ib": 0.0019820884335786104, + "step": 1143 + }, + { + "ce_ib": 4.847815036773682, + "ce_orig": 0.7149484157562256, + "epoch": 0.32870803077144295, + "kl_loss": 0.09257819503545761, + "loss_ib": 0.0014105633599683642, + "step": 1143 + }, + { + "ce_ib": 9.955081939697266, + "ce_orig": 0.9020329713821411, + "epoch": 0.32870803077144295, + "kl_loss": 0.1339617371559143, + "loss_ib": 0.002335125347599387, + "step": 1143 + }, + { + "ce_ib": 8.813331604003906, + "ce_orig": 0.7900945544242859, + "epoch": 0.3289956143504206, + "kl_loss": 0.16139021515846252, + "loss_ib": 0.002495235064998269, + "step": 1144 + }, + { + "ce_ib": 8.96036434173584, + "ce_orig": 1.1033498048782349, + "epoch": 0.3289956143504206, + "kl_loss": 0.1222626268863678, + "loss_ib": 0.002118662465363741, + "step": 1144 + }, + { + "ce_ib": 4.615363597869873, + "ce_orig": 0.3875173032283783, + "epoch": 0.3289956143504206, + "kl_loss": 0.10488709807395935, + "loss_ib": 0.0015104073099792004, + "step": 1144 + }, + { + "ce_ib": 9.573678970336914, + "ce_orig": 1.2343288660049438, + "epoch": 0.3289956143504206, + "kl_loss": 0.13394644856452942, + "loss_ib": 0.002296832390129566, + "step": 1144 + }, + { + "epoch": 0.32928319792939825, + "grad_norm": 0.0860443264245987, + "learning_rate": 4.9184579733483796e-05, + "loss": 0.854, + "step": 1145 + }, + { + "ce_ib": 5.269725322723389, + "ce_orig": 0.6295099854469299, + "epoch": 0.32928319792939825, + "kl_loss": 0.09791259467601776, + "loss_ib": 0.00150609842967242, + "step": 1145 + }, + { + "ce_ib": 6.847803592681885, + "ce_orig": 0.955610990524292, + "epoch": 0.32928319792939825, + "kl_loss": 0.1154763400554657, + "loss_ib": 0.00183954369276762, + "step": 1145 + }, + { + "ce_ib": 6.657177448272705, + "ce_orig": 0.5914230942726135, + "epoch": 0.32928319792939825, + "kl_loss": 0.09953590482473373, + "loss_ib": 0.0016610767925158143, + "step": 1145 + }, + { + "ce_ib": 7.972986221313477, + "ce_orig": 0.8305418491363525, + "epoch": 0.32928319792939825, + "kl_loss": 0.19533243775367737, + "loss_ib": 0.002750622807070613, + "step": 1145 + }, + { + "ce_ib": 8.388483047485352, + "ce_orig": 1.120553731918335, + "epoch": 0.3295707815083759, + "kl_loss": 0.10701696574687958, + "loss_ib": 0.0019090177956968546, + "step": 1146 + }, + { + "ce_ib": 4.477085590362549, + "ce_orig": 0.739358127117157, + "epoch": 0.3295707815083759, + "kl_loss": 0.11214235424995422, + "loss_ib": 0.0015691319713369012, + "step": 1146 + }, + { + "ce_ib": 6.1088972091674805, + "ce_orig": 0.8950079083442688, + "epoch": 0.3295707815083759, + "kl_loss": 0.14025771617889404, + "loss_ib": 0.0020134670194238424, + "step": 1146 + }, + { + "ce_ib": 7.437435626983643, + "ce_orig": 0.8652719855308533, + "epoch": 0.3295707815083759, + "kl_loss": 0.10425149649381638, + "loss_ib": 0.001786258420906961, + "step": 1146 + }, + { + "ce_ib": 6.148717880249023, + "ce_orig": 0.5317816138267517, + "epoch": 0.3298583650873535, + "kl_loss": 0.0778564065694809, + "loss_ib": 0.0013934358721598983, + "step": 1147 + }, + { + "ce_ib": 8.765717506408691, + "ce_orig": 0.6234492659568787, + "epoch": 0.3298583650873535, + "kl_loss": 0.10909809172153473, + "loss_ib": 0.0019675525836646557, + "step": 1147 + }, + { + "ce_ib": 6.933672904968262, + "ce_orig": 0.6965823769569397, + "epoch": 0.3298583650873535, + "kl_loss": 0.11135978251695633, + "loss_ib": 0.001806965097784996, + "step": 1147 + }, + { + "ce_ib": 4.914548873901367, + "ce_orig": 0.6677202582359314, + "epoch": 0.3298583650873535, + "kl_loss": 0.09731614589691162, + "loss_ib": 0.0014646162744611502, + "step": 1147 + }, + { + "ce_ib": 5.5134711265563965, + "ce_orig": 0.826160192489624, + "epoch": 0.3301459486663312, + "kl_loss": 0.07226017117500305, + "loss_ib": 0.001273948815651238, + "step": 1148 + }, + { + "ce_ib": 8.220447540283203, + "ce_orig": 1.2149168252944946, + "epoch": 0.3301459486663312, + "kl_loss": 0.1457306444644928, + "loss_ib": 0.00227935123257339, + "step": 1148 + }, + { + "ce_ib": 5.302207946777344, + "ce_orig": 0.7826932072639465, + "epoch": 0.3301459486663312, + "kl_loss": 0.10453931242227554, + "loss_ib": 0.001575613860040903, + "step": 1148 + }, + { + "ce_ib": 3.9958691596984863, + "ce_orig": 0.6564205288887024, + "epoch": 0.3301459486663312, + "kl_loss": 0.06783327460289001, + "loss_ib": 0.0010779196163639426, + "step": 1148 + }, + { + "ce_ib": 4.644093990325928, + "ce_orig": 0.4070664942264557, + "epoch": 0.3304335322453088, + "kl_loss": 0.14819657802581787, + "loss_ib": 0.0019463751232251525, + "step": 1149 + }, + { + "ce_ib": 4.468021392822266, + "ce_orig": 0.6336247324943542, + "epoch": 0.3304335322453088, + "kl_loss": 0.09321756660938263, + "loss_ib": 0.0013789776712656021, + "step": 1149 + }, + { + "ce_ib": 4.387839317321777, + "ce_orig": 0.30651336908340454, + "epoch": 0.3304335322453088, + "kl_loss": 0.1521071493625641, + "loss_ib": 0.0019598554354161024, + "step": 1149 + }, + { + "ce_ib": 7.417322635650635, + "ce_orig": 0.9508139491081238, + "epoch": 0.3304335322453088, + "kl_loss": 0.08571872115135193, + "loss_ib": 0.0015989193925634027, + "step": 1149 + }, + { + "epoch": 0.3307211158242864, + "grad_norm": 0.09715892374515533, + "learning_rate": 4.917472080620086e-05, + "loss": 0.8048, + "step": 1150 + }, + { + "ce_ib": 9.67892074584961, + "ce_orig": 1.3727235794067383, + "epoch": 0.3307211158242864, + "kl_loss": 0.1465449333190918, + "loss_ib": 0.0024333414621651173, + "step": 1150 + }, + { + "ce_ib": 4.275513648986816, + "ce_orig": 0.8803659677505493, + "epoch": 0.3307211158242864, + "kl_loss": 0.08852915465831757, + "loss_ib": 0.0013128429418429732, + "step": 1150 + }, + { + "ce_ib": 8.305662155151367, + "ce_orig": 0.8471624851226807, + "epoch": 0.3307211158242864, + "kl_loss": 0.19541916251182556, + "loss_ib": 0.002784757874906063, + "step": 1150 + }, + { + "ce_ib": 7.027608871459961, + "ce_orig": 0.49434998631477356, + "epoch": 0.3307211158242864, + "kl_loss": 0.13142399489879608, + "loss_ib": 0.002017000922933221, + "step": 1150 + }, + { + "ce_ib": 4.126051902770996, + "ce_orig": 0.47386792302131653, + "epoch": 0.33100869940326405, + "kl_loss": 0.08531898260116577, + "loss_ib": 0.0012657948536798358, + "step": 1151 + }, + { + "ce_ib": 4.111065864562988, + "ce_orig": 0.8229820132255554, + "epoch": 0.33100869940326405, + "kl_loss": 0.09980519860982895, + "loss_ib": 0.0014091585762798786, + "step": 1151 + }, + { + "ce_ib": 10.319543838500977, + "ce_orig": 1.7134897708892822, + "epoch": 0.33100869940326405, + "kl_loss": 0.14278292655944824, + "loss_ib": 0.0024597835727036, + "step": 1151 + }, + { + "ce_ib": 6.541288375854492, + "ce_orig": 1.1026983261108398, + "epoch": 0.33100869940326405, + "kl_loss": 0.12487839162349701, + "loss_ib": 0.0019029126269742846, + "step": 1151 + }, + { + "ce_ib": 8.641268730163574, + "ce_orig": 1.2764012813568115, + "epoch": 0.33129628298224173, + "kl_loss": 0.1464216709136963, + "loss_ib": 0.0023283434566110373, + "step": 1152 + }, + { + "ce_ib": 8.653627395629883, + "ce_orig": 0.9396832585334778, + "epoch": 0.33129628298224173, + "kl_loss": 0.12554971873760223, + "loss_ib": 0.002120859920978546, + "step": 1152 + }, + { + "ce_ib": 6.012033939361572, + "ce_orig": 0.34361323714256287, + "epoch": 0.33129628298224173, + "kl_loss": 0.13796600699424744, + "loss_ib": 0.0019808635115623474, + "step": 1152 + }, + { + "ce_ib": 5.108508586883545, + "ce_orig": 0.748778223991394, + "epoch": 0.33129628298224173, + "kl_loss": 0.10739289224147797, + "loss_ib": 0.001584779703989625, + "step": 1152 + }, + { + "ce_ib": 5.372162342071533, + "ce_orig": 0.6447109580039978, + "epoch": 0.33158386656121935, + "kl_loss": 0.11223471164703369, + "loss_ib": 0.0016595632769167423, + "step": 1153 + }, + { + "ce_ib": 6.9633612632751465, + "ce_orig": 0.930819571018219, + "epoch": 0.33158386656121935, + "kl_loss": 0.126115083694458, + "loss_ib": 0.0019574868492782116, + "step": 1153 + }, + { + "ce_ib": 8.455766677856445, + "ce_orig": 1.0738056898117065, + "epoch": 0.33158386656121935, + "kl_loss": 0.16140224039554596, + "loss_ib": 0.0024595989380031824, + "step": 1153 + }, + { + "ce_ib": 7.3503265380859375, + "ce_orig": 1.1345939636230469, + "epoch": 0.33158386656121935, + "kl_loss": 0.14327988028526306, + "loss_ib": 0.0021678314078599215, + "step": 1153 + }, + { + "ce_ib": 4.839994430541992, + "ce_orig": 0.5257171392440796, + "epoch": 0.331871450140197, + "kl_loss": 0.09663469344377518, + "loss_ib": 0.00145034643355757, + "step": 1154 + }, + { + "ce_ib": 6.527209281921387, + "ce_orig": 0.6023959517478943, + "epoch": 0.331871450140197, + "kl_loss": 0.10009995102882385, + "loss_ib": 0.0016537203919142485, + "step": 1154 + }, + { + "ce_ib": 7.900789260864258, + "ce_orig": 1.0611294507980347, + "epoch": 0.331871450140197, + "kl_loss": 0.14826743304729462, + "loss_ib": 0.0022727532777935266, + "step": 1154 + }, + { + "ce_ib": 4.652851104736328, + "ce_orig": 0.8940951824188232, + "epoch": 0.331871450140197, + "kl_loss": 0.09196630120277405, + "loss_ib": 0.0013849481474608183, + "step": 1154 + }, + { + "epoch": 0.33215903371917466, + "grad_norm": 0.08988802134990692, + "learning_rate": 4.916480363640443e-05, + "loss": 0.8431, + "step": 1155 + }, + { + "ce_ib": 5.224212646484375, + "ce_orig": 0.8306000232696533, + "epoch": 0.33215903371917466, + "kl_loss": 0.09435658156871796, + "loss_ib": 0.0014659870648756623, + "step": 1155 + }, + { + "ce_ib": 4.522388458251953, + "ce_orig": 0.5686253309249878, + "epoch": 0.33215903371917466, + "kl_loss": 0.08125274628400803, + "loss_ib": 0.0012647663243114948, + "step": 1155 + }, + { + "ce_ib": 7.296513080596924, + "ce_orig": 0.76470547914505, + "epoch": 0.33215903371917466, + "kl_loss": 0.1121765673160553, + "loss_ib": 0.0018514168914407492, + "step": 1155 + }, + { + "ce_ib": 5.358783721923828, + "ce_orig": 0.8416937589645386, + "epoch": 0.33215903371917466, + "kl_loss": 0.0656447559595108, + "loss_ib": 0.00119232595898211, + "step": 1155 + }, + { + "ce_ib": 7.572870254516602, + "ce_orig": 1.0910744667053223, + "epoch": 0.3324466172981523, + "kl_loss": 0.09344696253538132, + "loss_ib": 0.0016917565371841192, + "step": 1156 + }, + { + "ce_ib": 6.930093765258789, + "ce_orig": 1.1059174537658691, + "epoch": 0.3324466172981523, + "kl_loss": 0.10829582810401917, + "loss_ib": 0.0017759675392881036, + "step": 1156 + }, + { + "ce_ib": 6.122500896453857, + "ce_orig": 0.8133541941642761, + "epoch": 0.3324466172981523, + "kl_loss": 0.10187964141368866, + "loss_ib": 0.0016310465289279819, + "step": 1156 + }, + { + "ce_ib": 4.905231952667236, + "ce_orig": 0.46612951159477234, + "epoch": 0.3324466172981523, + "kl_loss": 0.09792307019233704, + "loss_ib": 0.0014697537990286946, + "step": 1156 + }, + { + "ce_ib": 4.672698974609375, + "ce_orig": 0.5849744081497192, + "epoch": 0.3327342008771299, + "kl_loss": 0.351125568151474, + "loss_ib": 0.0039785257540643215, + "step": 1157 + }, + { + "ce_ib": 9.564355850219727, + "ce_orig": 1.7009320259094238, + "epoch": 0.3327342008771299, + "kl_loss": 0.09928872436285019, + "loss_ib": 0.0019493227591738105, + "step": 1157 + }, + { + "ce_ib": 4.546131610870361, + "ce_orig": 0.8941453695297241, + "epoch": 0.3327342008771299, + "kl_loss": 0.06103827804327011, + "loss_ib": 0.0010649960022419691, + "step": 1157 + }, + { + "ce_ib": 7.061286926269531, + "ce_orig": 0.9295308589935303, + "epoch": 0.3327342008771299, + "kl_loss": 0.11667195707559586, + "loss_ib": 0.001872848253697157, + "step": 1157 + }, + { + "ce_ib": 9.296477317810059, + "ce_orig": 1.4580363035202026, + "epoch": 0.3330217844561076, + "kl_loss": 0.11636004596948624, + "loss_ib": 0.0020932480692863464, + "step": 1158 + }, + { + "ce_ib": 8.64181900024414, + "ce_orig": 1.3367538452148438, + "epoch": 0.3330217844561076, + "kl_loss": 0.1105409562587738, + "loss_ib": 0.001969591248780489, + "step": 1158 + }, + { + "ce_ib": 3.508538007736206, + "ce_orig": 0.4009856879711151, + "epoch": 0.3330217844561076, + "kl_loss": 0.13424761593341827, + "loss_ib": 0.0016933298902586102, + "step": 1158 + }, + { + "ce_ib": 7.230615139007568, + "ce_orig": 0.8530421257019043, + "epoch": 0.3330217844561076, + "kl_loss": 0.12663200497627258, + "loss_ib": 0.0019893816206604242, + "step": 1158 + }, + { + "ce_ib": 6.42070198059082, + "ce_orig": 0.6392161250114441, + "epoch": 0.3333093680350852, + "kl_loss": 0.12846484780311584, + "loss_ib": 0.0019267186289653182, + "step": 1159 + }, + { + "ce_ib": 9.253157615661621, + "ce_orig": 1.423343300819397, + "epoch": 0.3333093680350852, + "kl_loss": 0.15739576518535614, + "loss_ib": 0.002499273279681802, + "step": 1159 + }, + { + "ce_ib": 8.63932991027832, + "ce_orig": 1.5198041200637817, + "epoch": 0.3333093680350852, + "kl_loss": 0.13295453786849976, + "loss_ib": 0.002193478401750326, + "step": 1159 + }, + { + "ce_ib": 7.621316432952881, + "ce_orig": 0.928875744342804, + "epoch": 0.3333093680350852, + "kl_loss": 0.13527259230613708, + "loss_ib": 0.0021148575469851494, + "step": 1159 + }, + { + "epoch": 0.33359695161406283, + "grad_norm": 0.08410744369029999, + "learning_rate": 4.9154828247987275e-05, + "loss": 0.9149, + "step": 1160 + }, + { + "ce_ib": 5.347615718841553, + "ce_orig": 0.6876154541969299, + "epoch": 0.33359695161406283, + "kl_loss": 0.14489194750785828, + "loss_ib": 0.001983680995181203, + "step": 1160 + }, + { + "ce_ib": 5.55272102355957, + "ce_orig": 0.39458954334259033, + "epoch": 0.33359695161406283, + "kl_loss": 0.1338169127702713, + "loss_ib": 0.001893441192805767, + "step": 1160 + }, + { + "ce_ib": 5.986666679382324, + "ce_orig": 0.9985933899879456, + "epoch": 0.33359695161406283, + "kl_loss": 0.1406264454126358, + "loss_ib": 0.0020049309823662043, + "step": 1160 + }, + { + "ce_ib": 5.156513214111328, + "ce_orig": 0.8448862433433533, + "epoch": 0.33359695161406283, + "kl_loss": 0.12365314364433289, + "loss_ib": 0.0017521826084703207, + "step": 1160 + }, + { + "ce_ib": 2.9134364128112793, + "ce_orig": 0.5574214458465576, + "epoch": 0.33388453519304045, + "kl_loss": 0.05487529933452606, + "loss_ib": 0.0008400966180488467, + "step": 1161 + }, + { + "ce_ib": 5.857978343963623, + "ce_orig": 0.8570786118507385, + "epoch": 0.33388453519304045, + "kl_loss": 0.10978295654058456, + "loss_ib": 0.0016836273716762662, + "step": 1161 + }, + { + "ce_ib": 4.626968860626221, + "ce_orig": 0.6697400212287903, + "epoch": 0.33388453519304045, + "kl_loss": 0.142439603805542, + "loss_ib": 0.0018870928324759007, + "step": 1161 + }, + { + "ce_ib": 6.765016078948975, + "ce_orig": 1.0183452367782593, + "epoch": 0.33388453519304045, + "kl_loss": 0.21222910284996033, + "loss_ib": 0.0027987926732748747, + "step": 1161 + }, + { + "ce_ib": 3.8295748233795166, + "ce_orig": 0.6635098457336426, + "epoch": 0.33417211877201813, + "kl_loss": 0.10773129761219025, + "loss_ib": 0.0014602703740820289, + "step": 1162 + }, + { + "ce_ib": 7.3349928855896, + "ce_orig": 0.8590999841690063, + "epoch": 0.33417211877201813, + "kl_loss": 0.15064631402492523, + "loss_ib": 0.002239962574094534, + "step": 1162 + }, + { + "ce_ib": 7.64417839050293, + "ce_orig": 1.1145079135894775, + "epoch": 0.33417211877201813, + "kl_loss": 0.0957171767950058, + "loss_ib": 0.001721589476801455, + "step": 1162 + }, + { + "ce_ib": 7.106287479400635, + "ce_orig": 0.46655088663101196, + "epoch": 0.33417211877201813, + "kl_loss": 0.11425713449716568, + "loss_ib": 0.0018532000249251723, + "step": 1162 + }, + { + "ce_ib": 5.427411079406738, + "ce_orig": 0.7361214756965637, + "epoch": 0.33445970235099576, + "kl_loss": 0.10274486243724823, + "loss_ib": 0.0015701897209510207, + "step": 1163 + }, + { + "ce_ib": 6.271327018737793, + "ce_orig": 1.0419801473617554, + "epoch": 0.33445970235099576, + "kl_loss": 0.05931294336915016, + "loss_ib": 0.0012202620273455977, + "step": 1163 + }, + { + "ce_ib": 8.937814712524414, + "ce_orig": 1.2333555221557617, + "epoch": 0.33445970235099576, + "kl_loss": 0.07176488637924194, + "loss_ib": 0.0016114303143694997, + "step": 1163 + }, + { + "ce_ib": 5.644059658050537, + "ce_orig": 0.42556285858154297, + "epoch": 0.33445970235099576, + "kl_loss": 0.09541893005371094, + "loss_ib": 0.0015185951488092542, + "step": 1163 + }, + { + "ce_ib": 6.685428619384766, + "ce_orig": 0.4348882734775543, + "epoch": 0.3347472859299734, + "kl_loss": 0.09693093597888947, + "loss_ib": 0.0016378521686419845, + "step": 1164 + }, + { + "ce_ib": 4.863894939422607, + "ce_orig": 0.6920062899589539, + "epoch": 0.3347472859299734, + "kl_loss": 0.10376952588558197, + "loss_ib": 0.001524084829725325, + "step": 1164 + }, + { + "ce_ib": 8.66261100769043, + "ce_orig": 1.1178714036941528, + "epoch": 0.3347472859299734, + "kl_loss": 0.14406812191009521, + "loss_ib": 0.0023069423623383045, + "step": 1164 + }, + { + "ce_ib": 7.869550704956055, + "ce_orig": 1.0147631168365479, + "epoch": 0.3347472859299734, + "kl_loss": 0.11258833855390549, + "loss_ib": 0.0019128384301438928, + "step": 1164 + }, + { + "epoch": 0.33503486950895106, + "grad_norm": 0.09307786077260971, + "learning_rate": 4.9144794664982413e-05, + "loss": 0.8537, + "step": 1165 + }, + { + "ce_ib": 7.7619948387146, + "ce_orig": 1.240858793258667, + "epoch": 0.33503486950895106, + "kl_loss": 0.09256239235401154, + "loss_ib": 0.0017018234357237816, + "step": 1165 + }, + { + "ce_ib": 8.073474884033203, + "ce_orig": 1.4170767068862915, + "epoch": 0.33503486950895106, + "kl_loss": 0.09512190520763397, + "loss_ib": 0.0017585664754733443, + "step": 1165 + }, + { + "ce_ib": 6.9882378578186035, + "ce_orig": 1.0470027923583984, + "epoch": 0.33503486950895106, + "kl_loss": 0.12313065677881241, + "loss_ib": 0.0019301304128021002, + "step": 1165 + }, + { + "ce_ib": 5.528054237365723, + "ce_orig": 0.1656164973974228, + "epoch": 0.33503486950895106, + "kl_loss": 0.19854716956615448, + "loss_ib": 0.002538277069106698, + "step": 1165 + }, + { + "ce_ib": 6.298125743865967, + "ce_orig": 0.44228455424308777, + "epoch": 0.3353224530879287, + "kl_loss": 0.09168311953544617, + "loss_ib": 0.0015466436743736267, + "step": 1166 + }, + { + "ce_ib": 6.714663982391357, + "ce_orig": 0.5558184385299683, + "epoch": 0.3353224530879287, + "kl_loss": 0.1673613041639328, + "loss_ib": 0.0023450793232768774, + "step": 1166 + }, + { + "ce_ib": 4.478449821472168, + "ce_orig": 0.7729062438011169, + "epoch": 0.3353224530879287, + "kl_loss": 0.10390207916498184, + "loss_ib": 0.0014868656871840358, + "step": 1166 + }, + { + "ce_ib": 8.8925142288208, + "ce_orig": 1.3629997968673706, + "epoch": 0.3353224530879287, + "kl_loss": 0.12358202040195465, + "loss_ib": 0.002125071594491601, + "step": 1166 + }, + { + "ce_ib": 6.042492389678955, + "ce_orig": 0.8548057079315186, + "epoch": 0.3356100366669063, + "kl_loss": 0.10940991342067719, + "loss_ib": 0.0016983483219519258, + "step": 1167 + }, + { + "ce_ib": 3.1412997245788574, + "ce_orig": 0.34335675835609436, + "epoch": 0.3356100366669063, + "kl_loss": 0.08861685544252396, + "loss_ib": 0.0012002985458821058, + "step": 1167 + }, + { + "ce_ib": 7.260244846343994, + "ce_orig": 1.2476314306259155, + "epoch": 0.3356100366669063, + "kl_loss": 0.10213663429021835, + "loss_ib": 0.0017473907209932804, + "step": 1167 + }, + { + "ce_ib": 7.48936653137207, + "ce_orig": 1.0032376050949097, + "epoch": 0.3356100366669063, + "kl_loss": 0.1021503359079361, + "loss_ib": 0.0017704400233924389, + "step": 1167 + }, + { + "ce_ib": 6.35796594619751, + "ce_orig": 1.062179446220398, + "epoch": 0.335897620245884, + "kl_loss": 0.2645478844642639, + "loss_ib": 0.003281275276094675, + "step": 1168 + }, + { + "ce_ib": 7.562673568725586, + "ce_orig": 1.0366406440734863, + "epoch": 0.335897620245884, + "kl_loss": 0.13738609850406647, + "loss_ib": 0.0021301282104104757, + "step": 1168 + }, + { + "ce_ib": 7.556421279907227, + "ce_orig": 1.2071533203125, + "epoch": 0.335897620245884, + "kl_loss": 0.10314060747623444, + "loss_ib": 0.0017870481824502349, + "step": 1168 + }, + { + "ce_ib": 4.021859645843506, + "ce_orig": 0.38638952374458313, + "epoch": 0.335897620245884, + "kl_loss": 0.09852783381938934, + "loss_ib": 0.0013874642318114638, + "step": 1168 + }, + { + "ce_ib": 8.36083698272705, + "ce_orig": 0.5689254403114319, + "epoch": 0.3361852038248616, + "kl_loss": 0.19644485414028168, + "loss_ib": 0.0028005321510136127, + "step": 1169 + }, + { + "ce_ib": 4.525207996368408, + "ce_orig": 0.4924004077911377, + "epoch": 0.3361852038248616, + "kl_loss": 0.09402735531330109, + "loss_ib": 0.0013927941909059882, + "step": 1169 + }, + { + "ce_ib": 5.216853618621826, + "ce_orig": 0.7216584086418152, + "epoch": 0.3361852038248616, + "kl_loss": 0.11405150592327118, + "loss_ib": 0.001662200316786766, + "step": 1169 + }, + { + "ce_ib": 5.908849716186523, + "ce_orig": 0.6451670527458191, + "epoch": 0.3361852038248616, + "kl_loss": 0.09382858872413635, + "loss_ib": 0.0015291707823053002, + "step": 1169 + }, + { + "epoch": 0.33647278740383924, + "grad_norm": 0.07818438857793808, + "learning_rate": 4.913470291156308e-05, + "loss": 0.8495, + "step": 1170 + }, + { + "ce_ib": 7.972014904022217, + "ce_orig": 1.0323179960250854, + "epoch": 0.33647278740383924, + "kl_loss": 0.12066707015037537, + "loss_ib": 0.0020038720685988665, + "step": 1170 + }, + { + "ce_ib": 4.814333915710449, + "ce_orig": 1.0412518978118896, + "epoch": 0.33647278740383924, + "kl_loss": 0.09671176224946976, + "loss_ib": 0.0014485509600490332, + "step": 1170 + }, + { + "ce_ib": 7.407196044921875, + "ce_orig": 0.6773426532745361, + "epoch": 0.33647278740383924, + "kl_loss": 0.16202744841575623, + "loss_ib": 0.0023609939962625504, + "step": 1170 + }, + { + "ce_ib": 5.377325534820557, + "ce_orig": 0.7081344723701477, + "epoch": 0.33647278740383924, + "kl_loss": 0.12697473168373108, + "loss_ib": 0.001807479769922793, + "step": 1170 + }, + { + "ce_ib": 4.1743035316467285, + "ce_orig": 0.3675990104675293, + "epoch": 0.33676037098281686, + "kl_loss": 0.07218644767999649, + "loss_ib": 0.001139294821768999, + "step": 1171 + }, + { + "ce_ib": 5.744592666625977, + "ce_orig": 0.6870226263999939, + "epoch": 0.33676037098281686, + "kl_loss": 0.07420578598976135, + "loss_ib": 0.0013165171258151531, + "step": 1171 + }, + { + "ce_ib": 7.887357711791992, + "ce_orig": 1.7738786935806274, + "epoch": 0.33676037098281686, + "kl_loss": 0.11280599236488342, + "loss_ib": 0.001916795619763434, + "step": 1171 + }, + { + "ce_ib": 5.786555290222168, + "ce_orig": 0.8866435289382935, + "epoch": 0.33676037098281686, + "kl_loss": 0.0884263664484024, + "loss_ib": 0.0014629190554842353, + "step": 1171 + }, + { + "ce_ib": 7.102581024169922, + "ce_orig": 1.0043176412582397, + "epoch": 0.33704795456179454, + "kl_loss": 0.16740411520004272, + "loss_ib": 0.0023842991795390844, + "step": 1172 + }, + { + "ce_ib": 4.370709419250488, + "ce_orig": 0.5108112692832947, + "epoch": 0.33704795456179454, + "kl_loss": 0.1324308067560196, + "loss_ib": 0.0017613789532333612, + "step": 1172 + }, + { + "ce_ib": 6.938513278961182, + "ce_orig": 1.0623221397399902, + "epoch": 0.33704795456179454, + "kl_loss": 0.10967092216014862, + "loss_ib": 0.0017905604327097535, + "step": 1172 + }, + { + "ce_ib": 9.058341979980469, + "ce_orig": 0.9803903102874756, + "epoch": 0.33704795456179454, + "kl_loss": 0.12941080331802368, + "loss_ib": 0.0021999420132488012, + "step": 1172 + }, + { + "ce_ib": 6.757338523864746, + "ce_orig": 0.7687369585037231, + "epoch": 0.33733553814077216, + "kl_loss": 0.12288660556077957, + "loss_ib": 0.0019045999506488442, + "step": 1173 + }, + { + "ce_ib": 7.767806529998779, + "ce_orig": 0.9069968461990356, + "epoch": 0.33733553814077216, + "kl_loss": 0.12564904987812042, + "loss_ib": 0.0020332711283117533, + "step": 1173 + }, + { + "ce_ib": 10.300495147705078, + "ce_orig": 1.603036880493164, + "epoch": 0.33733553814077216, + "kl_loss": 0.12140758335590363, + "loss_ib": 0.0022441253531724215, + "step": 1173 + }, + { + "ce_ib": 5.464423656463623, + "ce_orig": 0.5434802770614624, + "epoch": 0.33733553814077216, + "kl_loss": 0.10957443714141846, + "loss_ib": 0.0016421866603195667, + "step": 1173 + }, + { + "ce_ib": 6.049656391143799, + "ce_orig": 0.7471191883087158, + "epoch": 0.3376231217197498, + "kl_loss": 0.14804700016975403, + "loss_ib": 0.0020854356698691845, + "step": 1174 + }, + { + "ce_ib": 7.153292655944824, + "ce_orig": 0.8166505098342896, + "epoch": 0.3376231217197498, + "kl_loss": 0.09506618976593018, + "loss_ib": 0.0016659912653267384, + "step": 1174 + }, + { + "ce_ib": 4.490015506744385, + "ce_orig": 0.6768890619277954, + "epoch": 0.3376231217197498, + "kl_loss": 0.15018723905086517, + "loss_ib": 0.0019508738769218326, + "step": 1174 + }, + { + "ce_ib": 7.776271343231201, + "ce_orig": 1.1005843877792358, + "epoch": 0.3376231217197498, + "kl_loss": 0.14981666207313538, + "loss_ib": 0.002275793580338359, + "step": 1174 + }, + { + "epoch": 0.33791070529872747, + "grad_norm": 0.08677754551172256, + "learning_rate": 4.912455301204264e-05, + "loss": 0.9106, + "step": 1175 + }, + { + "ce_ib": 6.626636505126953, + "ce_orig": 0.7961812615394592, + "epoch": 0.33791070529872747, + "kl_loss": 0.14855235815048218, + "loss_ib": 0.002148187020793557, + "step": 1175 + }, + { + "ce_ib": 5.3702616691589355, + "ce_orig": 0.35888469219207764, + "epoch": 0.33791070529872747, + "kl_loss": 0.056084148585796356, + "loss_ib": 0.0010978676145896316, + "step": 1175 + }, + { + "ce_ib": 9.21849536895752, + "ce_orig": 1.443953275680542, + "epoch": 0.33791070529872747, + "kl_loss": 0.11867404729127884, + "loss_ib": 0.002108589978888631, + "step": 1175 + }, + { + "ce_ib": 7.601804256439209, + "ce_orig": 1.0470943450927734, + "epoch": 0.33791070529872747, + "kl_loss": 0.18766771256923676, + "loss_ib": 0.0026368575636297464, + "step": 1175 + }, + { + "ce_ib": 4.885053634643555, + "ce_orig": 0.8753991723060608, + "epoch": 0.3381982888777051, + "kl_loss": 0.09170867502689362, + "loss_ib": 0.0014055920764803886, + "step": 1176 + }, + { + "ce_ib": 6.546106815338135, + "ce_orig": 0.5040268898010254, + "epoch": 0.3381982888777051, + "kl_loss": 0.15207909047603607, + "loss_ib": 0.0021754016634076834, + "step": 1176 + }, + { + "ce_ib": 7.3167853355407715, + "ce_orig": 1.3194838762283325, + "epoch": 0.3381982888777051, + "kl_loss": 0.07922293990850449, + "loss_ib": 0.0015239078784361482, + "step": 1176 + }, + { + "ce_ib": 9.338371276855469, + "ce_orig": 1.1496158838272095, + "epoch": 0.3381982888777051, + "kl_loss": 0.10646973550319672, + "loss_ib": 0.0019985344260931015, + "step": 1176 + }, + { + "ce_ib": 4.440505504608154, + "ce_orig": 0.5622341632843018, + "epoch": 0.3384858724566827, + "kl_loss": 0.0706862211227417, + "loss_ib": 0.0011509127216413617, + "step": 1177 + }, + { + "ce_ib": 4.418371200561523, + "ce_orig": 0.6522874236106873, + "epoch": 0.3384858724566827, + "kl_loss": 0.09112890809774399, + "loss_ib": 0.0013531261356547475, + "step": 1177 + }, + { + "ce_ib": 6.717861652374268, + "ce_orig": 0.743884265422821, + "epoch": 0.3384858724566827, + "kl_loss": 0.19413869082927704, + "loss_ib": 0.0026131730992347, + "step": 1177 + }, + { + "ce_ib": 4.397217273712158, + "ce_orig": 0.32822972536087036, + "epoch": 0.3384858724566827, + "kl_loss": 0.09809207916259766, + "loss_ib": 0.0014206423657014966, + "step": 1177 + }, + { + "ce_ib": 4.6463751792907715, + "ce_orig": 0.5614964962005615, + "epoch": 0.33877345603566034, + "kl_loss": 0.10629543662071228, + "loss_ib": 0.0015275919577106833, + "step": 1178 + }, + { + "ce_ib": 10.708198547363281, + "ce_orig": 2.025780439376831, + "epoch": 0.33877345603566034, + "kl_loss": 0.148756206035614, + "loss_ib": 0.002558381762355566, + "step": 1178 + }, + { + "ce_ib": 7.28439998626709, + "ce_orig": 0.9603209495544434, + "epoch": 0.33877345603566034, + "kl_loss": 0.07741572707891464, + "loss_ib": 0.0015025973552837968, + "step": 1178 + }, + { + "ce_ib": 5.248563289642334, + "ce_orig": 0.4768196642398834, + "epoch": 0.33877345603566034, + "kl_loss": 0.12366919219493866, + "loss_ib": 0.0017615482211112976, + "step": 1178 + }, + { + "ce_ib": 7.977797031402588, + "ce_orig": 1.0344756841659546, + "epoch": 0.339061039614638, + "kl_loss": 0.11780491471290588, + "loss_ib": 0.001975828781723976, + "step": 1179 + }, + { + "ce_ib": 7.562273979187012, + "ce_orig": 0.921010434627533, + "epoch": 0.339061039614638, + "kl_loss": 0.13255661725997925, + "loss_ib": 0.002081793500110507, + "step": 1179 + }, + { + "ce_ib": 4.792715072631836, + "ce_orig": 0.789234459400177, + "epoch": 0.339061039614638, + "kl_loss": 0.08384158462285995, + "loss_ib": 0.0013176873326301575, + "step": 1179 + }, + { + "ce_ib": 6.483942985534668, + "ce_orig": 1.0698708295822144, + "epoch": 0.339061039614638, + "kl_loss": 0.10677614063024521, + "loss_ib": 0.001716155675239861, + "step": 1179 + }, + { + "epoch": 0.33934862319361564, + "grad_norm": 0.09814277291297913, + "learning_rate": 4.911434499087457e-05, + "loss": 0.8816, + "step": 1180 + }, + { + "ce_ib": 6.803238868713379, + "ce_orig": 0.81837397813797, + "epoch": 0.33934862319361564, + "kl_loss": 0.15313437581062317, + "loss_ib": 0.0022116675972938538, + "step": 1180 + }, + { + "ce_ib": 4.936272144317627, + "ce_orig": 0.6542596817016602, + "epoch": 0.33934862319361564, + "kl_loss": 0.11818765103816986, + "loss_ib": 0.001675503677688539, + "step": 1180 + }, + { + "ce_ib": 6.393680572509766, + "ce_orig": 0.8188475966453552, + "epoch": 0.33934862319361564, + "kl_loss": 0.12543442845344543, + "loss_ib": 0.0018937122076749802, + "step": 1180 + }, + { + "ce_ib": 4.207862854003906, + "ce_orig": 0.602934718132019, + "epoch": 0.33934862319361564, + "kl_loss": 0.13584521412849426, + "loss_ib": 0.0017792383441701531, + "step": 1180 + }, + { + "ce_ib": 7.836092472076416, + "ce_orig": 1.3347225189208984, + "epoch": 0.33963620677259326, + "kl_loss": 0.1234862357378006, + "loss_ib": 0.0020184717141091824, + "step": 1181 + }, + { + "ce_ib": 5.5221757888793945, + "ce_orig": 0.4668470323085785, + "epoch": 0.33963620677259326, + "kl_loss": 0.1307273656129837, + "loss_ib": 0.0018594911089166999, + "step": 1181 + }, + { + "ce_ib": 8.858410835266113, + "ce_orig": 1.2801547050476074, + "epoch": 0.33963620677259326, + "kl_loss": 0.14932915568351746, + "loss_ib": 0.0023791324347257614, + "step": 1181 + }, + { + "ce_ib": 4.417026996612549, + "ce_orig": 0.6737905144691467, + "epoch": 0.33963620677259326, + "kl_loss": 0.08919131010770798, + "loss_ib": 0.001333615742623806, + "step": 1181 + }, + { + "ce_ib": 5.903581619262695, + "ce_orig": 0.5304061770439148, + "epoch": 0.33992379035157094, + "kl_loss": 0.09965941309928894, + "loss_ib": 0.0015869521303102374, + "step": 1182 + }, + { + "ce_ib": 9.489951133728027, + "ce_orig": 1.0668960809707642, + "epoch": 0.33992379035157094, + "kl_loss": 0.13894206285476685, + "loss_ib": 0.0023384157102555037, + "step": 1182 + }, + { + "ce_ib": 8.417776107788086, + "ce_orig": 1.4453734159469604, + "epoch": 0.33992379035157094, + "kl_loss": 0.15153531730175018, + "loss_ib": 0.0023571306373924017, + "step": 1182 + }, + { + "ce_ib": 9.380576133728027, + "ce_orig": 1.4487861394882202, + "epoch": 0.33992379035157094, + "kl_loss": 0.07837877422571182, + "loss_ib": 0.0017218452412635088, + "step": 1182 + }, + { + "ce_ib": 5.527102470397949, + "ce_orig": 0.7645937204360962, + "epoch": 0.34021137393054857, + "kl_loss": 0.3171248435974121, + "loss_ib": 0.0037239587400108576, + "step": 1183 + }, + { + "ce_ib": 5.285163879394531, + "ce_orig": 0.40946123003959656, + "epoch": 0.34021137393054857, + "kl_loss": 0.11538106203079224, + "loss_ib": 0.0016823268961161375, + "step": 1183 + }, + { + "ce_ib": 6.348681449890137, + "ce_orig": 1.2779086828231812, + "epoch": 0.34021137393054857, + "kl_loss": 0.11431214213371277, + "loss_ib": 0.0017779895570129156, + "step": 1183 + }, + { + "ce_ib": 7.705173969268799, + "ce_orig": 1.1170061826705933, + "epoch": 0.34021137393054857, + "kl_loss": 0.12195007503032684, + "loss_ib": 0.001990018179640174, + "step": 1183 + }, + { + "ce_ib": 6.010852813720703, + "ce_orig": 0.9313013553619385, + "epoch": 0.3404989575095262, + "kl_loss": 0.10329874604940414, + "loss_ib": 0.0016340726288035512, + "step": 1184 + }, + { + "ce_ib": 6.481108665466309, + "ce_orig": 0.8383347392082214, + "epoch": 0.3404989575095262, + "kl_loss": 0.12615548074245453, + "loss_ib": 0.0019096657633781433, + "step": 1184 + }, + { + "ce_ib": 5.928338527679443, + "ce_orig": 0.5312626361846924, + "epoch": 0.3404989575095262, + "kl_loss": 0.16932004690170288, + "loss_ib": 0.002286034170538187, + "step": 1184 + }, + { + "ce_ib": 6.648979187011719, + "ce_orig": 1.1185437440872192, + "epoch": 0.3404989575095262, + "kl_loss": 0.12974053621292114, + "loss_ib": 0.0019623031839728355, + "step": 1184 + }, + { + "epoch": 0.34078654108850387, + "grad_norm": 0.09255577623844147, + "learning_rate": 4.9104078872652356e-05, + "loss": 0.8955, + "step": 1185 + }, + { + "ce_ib": 6.759406089782715, + "ce_orig": 0.7852493524551392, + "epoch": 0.34078654108850387, + "kl_loss": 0.10004919767379761, + "loss_ib": 0.0016764324391260743, + "step": 1185 + }, + { + "ce_ib": 5.248275279998779, + "ce_orig": 0.7133815288543701, + "epoch": 0.34078654108850387, + "kl_loss": 0.07941492646932602, + "loss_ib": 0.0013189767487347126, + "step": 1185 + }, + { + "ce_ib": 6.77577543258667, + "ce_orig": 1.3345876932144165, + "epoch": 0.34078654108850387, + "kl_loss": 0.18256714940071106, + "loss_ib": 0.0025032490957528353, + "step": 1185 + }, + { + "ce_ib": 5.814952373504639, + "ce_orig": 0.705886960029602, + "epoch": 0.34078654108850387, + "kl_loss": 0.17423757910728455, + "loss_ib": 0.00232387101277709, + "step": 1185 + }, + { + "ce_ib": 7.930660247802734, + "ce_orig": 0.6901426911354065, + "epoch": 0.3410741246674815, + "kl_loss": 0.121292844414711, + "loss_ib": 0.0020059943199157715, + "step": 1186 + }, + { + "ce_ib": 7.607260227203369, + "ce_orig": 0.8918116092681885, + "epoch": 0.3410741246674815, + "kl_loss": 0.12813860177993774, + "loss_ib": 0.0020421119406819344, + "step": 1186 + }, + { + "ce_ib": 4.995139122009277, + "ce_orig": 0.812433123588562, + "epoch": 0.3410741246674815, + "kl_loss": 0.08000831305980682, + "loss_ib": 0.001299596973694861, + "step": 1186 + }, + { + "ce_ib": 7.650750160217285, + "ce_orig": 1.2010334730148315, + "epoch": 0.3410741246674815, + "kl_loss": 0.12894907593727112, + "loss_ib": 0.002054565818980336, + "step": 1186 + }, + { + "ce_ib": 8.260749816894531, + "ce_orig": 1.1590336561203003, + "epoch": 0.3413617082464591, + "kl_loss": 0.10733570158481598, + "loss_ib": 0.0018994319252669811, + "step": 1187 + }, + { + "ce_ib": 2.059155225753784, + "ce_orig": 0.17005686461925507, + "epoch": 0.3413617082464591, + "kl_loss": 0.29403194785118103, + "loss_ib": 0.0031462348997592926, + "step": 1187 + }, + { + "ce_ib": 5.506524085998535, + "ce_orig": 0.7021316885948181, + "epoch": 0.3413617082464591, + "kl_loss": 0.10228325426578522, + "loss_ib": 0.001573484973050654, + "step": 1187 + }, + { + "ce_ib": 7.717523574829102, + "ce_orig": 1.5595512390136719, + "epoch": 0.3413617082464591, + "kl_loss": 0.22363264858722687, + "loss_ib": 0.003008078783750534, + "step": 1187 + }, + { + "ce_ib": 5.724874973297119, + "ce_orig": 0.5891698598861694, + "epoch": 0.34164929182543674, + "kl_loss": 0.27257198095321655, + "loss_ib": 0.0032982071861624718, + "step": 1188 + }, + { + "ce_ib": 6.959388732910156, + "ce_orig": 0.865048348903656, + "epoch": 0.34164929182543674, + "kl_loss": 0.1387251764535904, + "loss_ib": 0.002083190716803074, + "step": 1188 + }, + { + "ce_ib": 5.347512722015381, + "ce_orig": 0.6758795976638794, + "epoch": 0.34164929182543674, + "kl_loss": 0.09549650549888611, + "loss_ib": 0.00148971623275429, + "step": 1188 + }, + { + "ce_ib": 5.358241081237793, + "ce_orig": 0.5190110802650452, + "epoch": 0.34164929182543674, + "kl_loss": 0.14641422033309937, + "loss_ib": 0.001999966334551573, + "step": 1188 + }, + { + "ce_ib": 6.069690704345703, + "ce_orig": 0.5385083556175232, + "epoch": 0.3419368754044144, + "kl_loss": 0.14202791452407837, + "loss_ib": 0.0020272480323910713, + "step": 1189 + }, + { + "ce_ib": 4.275310039520264, + "ce_orig": 0.9987708330154419, + "epoch": 0.3419368754044144, + "kl_loss": 0.09483664482831955, + "loss_ib": 0.0013758974382653832, + "step": 1189 + }, + { + "ce_ib": 6.6289262771606445, + "ce_orig": 0.8859413862228394, + "epoch": 0.3419368754044144, + "kl_loss": 0.14262329041957855, + "loss_ib": 0.002089125569909811, + "step": 1189 + }, + { + "ce_ib": 7.901646614074707, + "ce_orig": 1.298345685005188, + "epoch": 0.3419368754044144, + "kl_loss": 0.1411547064781189, + "loss_ib": 0.0022017115261405706, + "step": 1189 + }, + { + "epoch": 0.34222445898339204, + "grad_norm": 0.09175170212984085, + "learning_rate": 4.9093754682109474e-05, + "loss": 0.8886, + "step": 1190 + }, + { + "ce_ib": 8.60566234588623, + "ce_orig": 1.3086930513381958, + "epoch": 0.34222445898339204, + "kl_loss": 0.12472963333129883, + "loss_ib": 0.0021078623831272125, + "step": 1190 + }, + { + "ce_ib": 5.091987133026123, + "ce_orig": 0.3752136528491974, + "epoch": 0.34222445898339204, + "kl_loss": 0.16557569801807404, + "loss_ib": 0.0021649557165801525, + "step": 1190 + }, + { + "ce_ib": 5.3551025390625, + "ce_orig": 0.884888231754303, + "epoch": 0.34222445898339204, + "kl_loss": 0.22206942737102509, + "loss_ib": 0.002756204456090927, + "step": 1190 + }, + { + "ce_ib": 5.993772029876709, + "ce_orig": 0.8438685536384583, + "epoch": 0.34222445898339204, + "kl_loss": 0.11380597203969955, + "loss_ib": 0.001737436861731112, + "step": 1190 + }, + { + "ce_ib": 4.851998329162598, + "ce_orig": 0.5593371987342834, + "epoch": 0.34251204256236967, + "kl_loss": 0.16617000102996826, + "loss_ib": 0.002146899700164795, + "step": 1191 + }, + { + "ce_ib": 5.660830974578857, + "ce_orig": 0.27298951148986816, + "epoch": 0.34251204256236967, + "kl_loss": 0.20350384712219238, + "loss_ib": 0.0026011215522885323, + "step": 1191 + }, + { + "ce_ib": 10.260374069213867, + "ce_orig": 1.6395344734191895, + "epoch": 0.34251204256236967, + "kl_loss": 0.17721888422966003, + "loss_ib": 0.002798226196318865, + "step": 1191 + }, + { + "ce_ib": 7.967227935791016, + "ce_orig": 0.7440553307533264, + "epoch": 0.34251204256236967, + "kl_loss": 0.14830437302589417, + "loss_ib": 0.002279766369611025, + "step": 1191 + }, + { + "ce_ib": 13.623431205749512, + "ce_orig": 0.7643979787826538, + "epoch": 0.34279962614134735, + "kl_loss": 0.3118378520011902, + "loss_ib": 0.004480721428990364, + "step": 1192 + }, + { + "ce_ib": 5.249303817749023, + "ce_orig": 0.8421902060508728, + "epoch": 0.34279962614134735, + "kl_loss": 0.08752257376909256, + "loss_ib": 0.001400155946612358, + "step": 1192 + }, + { + "ce_ib": 3.79552960395813, + "ce_orig": 0.5759400129318237, + "epoch": 0.34279962614134735, + "kl_loss": 0.11850893497467041, + "loss_ib": 0.0015646422980353236, + "step": 1192 + }, + { + "ce_ib": 6.008509635925293, + "ce_orig": 0.8309887647628784, + "epoch": 0.34279962614134735, + "kl_loss": 0.14160123467445374, + "loss_ib": 0.0020168630871921778, + "step": 1192 + }, + { + "ce_ib": 5.897069454193115, + "ce_orig": 0.6842920780181885, + "epoch": 0.34308720972032497, + "kl_loss": 0.15098875761032104, + "loss_ib": 0.00209959433414042, + "step": 1193 + }, + { + "ce_ib": 5.2511305809021, + "ce_orig": 0.5834655165672302, + "epoch": 0.34308720972032497, + "kl_loss": 0.08498527109622955, + "loss_ib": 0.0013749657664448023, + "step": 1193 + }, + { + "ce_ib": 3.6320414543151855, + "ce_orig": 0.3882826268672943, + "epoch": 0.34308720972032497, + "kl_loss": 0.13914722204208374, + "loss_ib": 0.0017546763410791755, + "step": 1193 + }, + { + "ce_ib": 7.605522632598877, + "ce_orig": 1.264225721359253, + "epoch": 0.34308720972032497, + "kl_loss": 0.1367885321378708, + "loss_ib": 0.0021284373942762613, + "step": 1193 + }, + { + "ce_ib": 7.945181846618652, + "ce_orig": 1.2413365840911865, + "epoch": 0.3433747932993026, + "kl_loss": 0.13250866532325745, + "loss_ib": 0.002119604730978608, + "step": 1194 + }, + { + "ce_ib": 5.88801383972168, + "ce_orig": 0.8613604307174683, + "epoch": 0.3433747932993026, + "kl_loss": 0.07940033078193665, + "loss_ib": 0.001382804592140019, + "step": 1194 + }, + { + "ce_ib": 8.324530601501465, + "ce_orig": 1.506277084350586, + "epoch": 0.3433747932993026, + "kl_loss": 0.1109517514705658, + "loss_ib": 0.0019419705495238304, + "step": 1194 + }, + { + "ce_ib": 6.082563400268555, + "ce_orig": 0.7858896255493164, + "epoch": 0.3433747932993026, + "kl_loss": 0.09026671200990677, + "loss_ib": 0.001510923495516181, + "step": 1194 + }, + { + "epoch": 0.3436623768782803, + "grad_norm": 0.09013213962316513, + "learning_rate": 4.908337244411927e-05, + "loss": 0.9225, + "step": 1195 + }, + { + "ce_ib": 5.888381004333496, + "ce_orig": 0.5251995921134949, + "epoch": 0.3436623768782803, + "kl_loss": 0.09415371716022491, + "loss_ib": 0.0015303750988095999, + "step": 1195 + }, + { + "ce_ib": 3.702908754348755, + "ce_orig": 0.6380143165588379, + "epoch": 0.3436623768782803, + "kl_loss": 0.09093981981277466, + "loss_ib": 0.0012796890223398805, + "step": 1195 + }, + { + "ce_ib": 9.148548126220703, + "ce_orig": 0.8597902059555054, + "epoch": 0.3436623768782803, + "kl_loss": 0.12271426618099213, + "loss_ib": 0.0021419974509626627, + "step": 1195 + }, + { + "ce_ib": 5.375368118286133, + "ce_orig": 0.5654981732368469, + "epoch": 0.3436623768782803, + "kl_loss": 0.09991110116243362, + "loss_ib": 0.0015366477891802788, + "step": 1195 + }, + { + "ce_ib": 5.33026647567749, + "ce_orig": 0.785590648651123, + "epoch": 0.3439499604572579, + "kl_loss": 0.10818203538656235, + "loss_ib": 0.0016148469876497984, + "step": 1196 + }, + { + "ce_ib": 6.909395694732666, + "ce_orig": 0.8492533564567566, + "epoch": 0.3439499604572579, + "kl_loss": 0.14566785097122192, + "loss_ib": 0.002147617982700467, + "step": 1196 + }, + { + "ce_ib": 6.664641380310059, + "ce_orig": 0.9143601059913635, + "epoch": 0.3439499604572579, + "kl_loss": 0.0853910744190216, + "loss_ib": 0.0015203747898340225, + "step": 1196 + }, + { + "ce_ib": 4.416428565979004, + "ce_orig": 0.30311673879623413, + "epoch": 0.3439499604572579, + "kl_loss": 0.09974046051502228, + "loss_ib": 0.0014390473952516913, + "step": 1196 + }, + { + "ce_ib": 4.570541858673096, + "ce_orig": 0.5030437111854553, + "epoch": 0.3442375440362355, + "kl_loss": 0.08337417244911194, + "loss_ib": 0.0012907959753647447, + "step": 1197 + }, + { + "ce_ib": 4.714263916015625, + "ce_orig": 0.5458657145500183, + "epoch": 0.3442375440362355, + "kl_loss": 0.07602685689926147, + "loss_ib": 0.0012316949432715774, + "step": 1197 + }, + { + "ce_ib": 5.780970096588135, + "ce_orig": 0.5303636789321899, + "epoch": 0.3442375440362355, + "kl_loss": 0.12981534004211426, + "loss_ib": 0.0018762502586469054, + "step": 1197 + }, + { + "ce_ib": 5.22108793258667, + "ce_orig": 0.7108772993087769, + "epoch": 0.3442375440362355, + "kl_loss": 0.08436115086078644, + "loss_ib": 0.0013657202944159508, + "step": 1197 + }, + { + "ce_ib": 8.01385498046875, + "ce_orig": 1.0963972806930542, + "epoch": 0.34452512761521314, + "kl_loss": 0.13373345136642456, + "loss_ib": 0.002138719893991947, + "step": 1198 + }, + { + "ce_ib": 6.439573287963867, + "ce_orig": 1.1356852054595947, + "epoch": 0.34452512761521314, + "kl_loss": 0.11461237072944641, + "loss_ib": 0.0017900809179991484, + "step": 1198 + }, + { + "ce_ib": 6.095149517059326, + "ce_orig": 0.6653095483779907, + "epoch": 0.34452512761521314, + "kl_loss": 0.1102496087551117, + "loss_ib": 0.0017120110569521785, + "step": 1198 + }, + { + "ce_ib": 8.758551597595215, + "ce_orig": 1.2035597562789917, + "epoch": 0.34452512761521314, + "kl_loss": 0.1343771070241928, + "loss_ib": 0.0022196262143552303, + "step": 1198 + }, + { + "ce_ib": 6.242119789123535, + "ce_orig": 0.6952025294303894, + "epoch": 0.3448127111941908, + "kl_loss": 0.09257640689611435, + "loss_ib": 0.0015499759465456009, + "step": 1199 + }, + { + "ce_ib": 6.205290794372559, + "ce_orig": 0.7195900678634644, + "epoch": 0.3448127111941908, + "kl_loss": 0.19770804047584534, + "loss_ib": 0.0025976093020290136, + "step": 1199 + }, + { + "ce_ib": 7.515991687774658, + "ce_orig": 0.9152024388313293, + "epoch": 0.3448127111941908, + "kl_loss": 0.0929107517004013, + "loss_ib": 0.001680706744082272, + "step": 1199 + }, + { + "ce_ib": 6.326588153839111, + "ce_orig": 0.9795824885368347, + "epoch": 0.3448127111941908, + "kl_loss": 0.13213500380516052, + "loss_ib": 0.00195400882512331, + "step": 1199 + }, + { + "epoch": 0.34510029477316845, + "grad_norm": 0.09391340613365173, + "learning_rate": 4.907293218369499e-05, + "loss": 0.8149, + "step": 1200 + }, + { + "ce_ib": 2.401818037033081, + "ce_orig": 0.34821420907974243, + "epoch": 0.34510029477316845, + "kl_loss": 0.2819208800792694, + "loss_ib": 0.003059390466660261, + "step": 1200 + }, + { + "ce_ib": 3.6997857093811035, + "ce_orig": 0.4937102794647217, + "epoch": 0.34510029477316845, + "kl_loss": 0.07171545922756195, + "loss_ib": 0.0010871330741792917, + "step": 1200 + }, + { + "ce_ib": 2.2247281074523926, + "ce_orig": 0.21438711881637573, + "epoch": 0.34510029477316845, + "kl_loss": 0.10463137924671173, + "loss_ib": 0.0012687866110354662, + "step": 1200 + }, + { + "ce_ib": 3.6803174018859863, + "ce_orig": 0.5729905366897583, + "epoch": 0.34510029477316845, + "kl_loss": 0.06283712387084961, + "loss_ib": 0.0009964029304683208, + "step": 1200 + }, + { + "ce_ib": 6.305063247680664, + "ce_orig": 0.8172087073326111, + "epoch": 0.34538787835214607, + "kl_loss": 0.13369494676589966, + "loss_ib": 0.0019674557261168957, + "step": 1201 + }, + { + "ce_ib": 5.51954984664917, + "ce_orig": 1.072908878326416, + "epoch": 0.34538787835214607, + "kl_loss": 0.07258590310811996, + "loss_ib": 0.001277813920751214, + "step": 1201 + }, + { + "ce_ib": 8.108406066894531, + "ce_orig": 1.3059308528900146, + "epoch": 0.34538787835214607, + "kl_loss": 0.1217356026172638, + "loss_ib": 0.002028196584433317, + "step": 1201 + }, + { + "ce_ib": 7.712138652801514, + "ce_orig": 0.9945723414421082, + "epoch": 0.34538787835214607, + "kl_loss": 0.08213340491056442, + "loss_ib": 0.0015925478655844927, + "step": 1201 + }, + { + "ce_ib": 8.866911888122559, + "ce_orig": 0.5718355774879456, + "epoch": 0.34567546193112375, + "kl_loss": 0.17121396958827972, + "loss_ib": 0.002598830731585622, + "step": 1202 + }, + { + "ce_ib": 6.687133312225342, + "ce_orig": 1.0171817541122437, + "epoch": 0.34567546193112375, + "kl_loss": 0.1398601233959198, + "loss_ib": 0.002067314460873604, + "step": 1202 + }, + { + "ce_ib": 5.847391128540039, + "ce_orig": 0.6118772625923157, + "epoch": 0.34567546193112375, + "kl_loss": 0.18488171696662903, + "loss_ib": 0.00243355636484921, + "step": 1202 + }, + { + "ce_ib": 5.880655765533447, + "ce_orig": 0.7155951261520386, + "epoch": 0.34567546193112375, + "kl_loss": 0.1624474674463272, + "loss_ib": 0.0022125402465462685, + "step": 1202 + }, + { + "ce_ib": 3.520111083984375, + "ce_orig": 0.8310386538505554, + "epoch": 0.3459630455101014, + "kl_loss": 0.08313309401273727, + "loss_ib": 0.0011833419557660818, + "step": 1203 + }, + { + "ce_ib": 3.7372233867645264, + "ce_orig": 0.7661170959472656, + "epoch": 0.3459630455101014, + "kl_loss": 0.07800716161727905, + "loss_ib": 0.0011537938844412565, + "step": 1203 + }, + { + "ce_ib": 9.536810874938965, + "ce_orig": 1.6414639949798584, + "epoch": 0.3459630455101014, + "kl_loss": 0.12650559842586517, + "loss_ib": 0.002218737034127116, + "step": 1203 + }, + { + "ce_ib": 8.819865226745605, + "ce_orig": 1.2084544897079468, + "epoch": 0.3459630455101014, + "kl_loss": 0.12717044353485107, + "loss_ib": 0.002153690904378891, + "step": 1203 + }, + { + "ce_ib": 6.483260631561279, + "ce_orig": 0.8722451329231262, + "epoch": 0.346250629089079, + "kl_loss": 0.12065816670656204, + "loss_ib": 0.0018549077212810516, + "step": 1204 + }, + { + "ce_ib": 8.621737480163574, + "ce_orig": 1.4052873849868774, + "epoch": 0.346250629089079, + "kl_loss": 0.1244899183511734, + "loss_ib": 0.0021070728544145823, + "step": 1204 + }, + { + "ce_ib": 7.135779857635498, + "ce_orig": 0.940719723701477, + "epoch": 0.346250629089079, + "kl_loss": 0.1017652228474617, + "loss_ib": 0.001731230178847909, + "step": 1204 + }, + { + "ce_ib": 5.0328803062438965, + "ce_orig": 0.23511050641536713, + "epoch": 0.346250629089079, + "kl_loss": 0.06364941596984863, + "loss_ib": 0.0011397822527214885, + "step": 1204 + }, + { + "epoch": 0.3465382126680567, + "grad_norm": 0.08434471487998962, + "learning_rate": 4.906243392598962e-05, + "loss": 0.8444, + "step": 1205 + }, + { + "ce_ib": 5.679015636444092, + "ce_orig": 0.5664239525794983, + "epoch": 0.3465382126680567, + "kl_loss": 0.0821281373500824, + "loss_ib": 0.0013891828712075949, + "step": 1205 + }, + { + "ce_ib": 9.240987777709961, + "ce_orig": 1.2426046133041382, + "epoch": 0.3465382126680567, + "kl_loss": 0.130295991897583, + "loss_ib": 0.002227058634161949, + "step": 1205 + }, + { + "ce_ib": 6.624649524688721, + "ce_orig": 1.208556056022644, + "epoch": 0.3465382126680567, + "kl_loss": 0.06771036982536316, + "loss_ib": 0.0013395686401054263, + "step": 1205 + }, + { + "ce_ib": 7.402683258056641, + "ce_orig": 0.836148202419281, + "epoch": 0.3465382126680567, + "kl_loss": 0.17013391852378845, + "loss_ib": 0.002441607415676117, + "step": 1205 + }, + { + "ce_ib": 9.6786527633667, + "ce_orig": 1.5588017702102661, + "epoch": 0.3468257962470343, + "kl_loss": 0.12188644707202911, + "loss_ib": 0.0021867298055440187, + "step": 1206 + }, + { + "ce_ib": 6.250092506408691, + "ce_orig": 0.7235032320022583, + "epoch": 0.3468257962470343, + "kl_loss": 0.10161735117435455, + "loss_ib": 0.0016411826945841312, + "step": 1206 + }, + { + "ce_ib": 5.281203746795654, + "ce_orig": 0.6332097053527832, + "epoch": 0.3468257962470343, + "kl_loss": 0.1234881728887558, + "loss_ib": 0.0017630021320655942, + "step": 1206 + }, + { + "ce_ib": 6.397225856781006, + "ce_orig": 0.7792978286743164, + "epoch": 0.3468257962470343, + "kl_loss": 0.12359906733036041, + "loss_ib": 0.001875713118351996, + "step": 1206 + }, + { + "ce_ib": 5.468921661376953, + "ce_orig": 0.6424912214279175, + "epoch": 0.3471133798260119, + "kl_loss": 0.1228179931640625, + "loss_ib": 0.0017750720726326108, + "step": 1207 + }, + { + "ce_ib": 5.05369758605957, + "ce_orig": 0.4094921946525574, + "epoch": 0.3471133798260119, + "kl_loss": 0.10941329598426819, + "loss_ib": 0.0015995026333257556, + "step": 1207 + }, + { + "ce_ib": 7.615618705749512, + "ce_orig": 0.823706865310669, + "epoch": 0.3471133798260119, + "kl_loss": 0.11898506432771683, + "loss_ib": 0.0019514125306159258, + "step": 1207 + }, + { + "ce_ib": 5.8422112464904785, + "ce_orig": 0.4948480427265167, + "epoch": 0.3471133798260119, + "kl_loss": 0.0846252590417862, + "loss_ib": 0.0014304736396297812, + "step": 1207 + }, + { + "ce_ib": 8.193414688110352, + "ce_orig": 0.8717488646507263, + "epoch": 0.34740096340498955, + "kl_loss": 0.10458209365606308, + "loss_ib": 0.0018651623977348208, + "step": 1208 + }, + { + "ce_ib": 6.99772834777832, + "ce_orig": 0.9259209632873535, + "epoch": 0.34740096340498955, + "kl_loss": 0.12178865075111389, + "loss_ib": 0.001917659305036068, + "step": 1208 + }, + { + "ce_ib": 7.931736946105957, + "ce_orig": 1.271628975868225, + "epoch": 0.34740096340498955, + "kl_loss": 0.08694090694189072, + "loss_ib": 0.0016625827411189675, + "step": 1208 + }, + { + "ce_ib": 5.559422969818115, + "ce_orig": 0.6151393055915833, + "epoch": 0.34740096340498955, + "kl_loss": 0.07467767596244812, + "loss_ib": 0.0013027191162109375, + "step": 1208 + }, + { + "ce_ib": 3.7163591384887695, + "ce_orig": 0.48353731632232666, + "epoch": 0.34768854698396723, + "kl_loss": 0.11771485209465027, + "loss_ib": 0.0015487843193113804, + "step": 1209 + }, + { + "ce_ib": 8.291979789733887, + "ce_orig": 1.2653006315231323, + "epoch": 0.34768854698396723, + "kl_loss": 0.12108492106199265, + "loss_ib": 0.002040047198534012, + "step": 1209 + }, + { + "ce_ib": 7.506729602813721, + "ce_orig": 0.45977285504341125, + "epoch": 0.34768854698396723, + "kl_loss": 0.10264462232589722, + "loss_ib": 0.0017771191196516156, + "step": 1209 + }, + { + "ce_ib": 8.623528480529785, + "ce_orig": 1.3400567770004272, + "epoch": 0.34768854698396723, + "kl_loss": 0.13698822259902954, + "loss_ib": 0.002232234925031662, + "step": 1209 + }, + { + "epoch": 0.34797613056294485, + "grad_norm": 0.0997847318649292, + "learning_rate": 4.905187769629592e-05, + "loss": 0.8703, + "step": 1210 + }, + { + "ce_ib": 6.054376125335693, + "ce_orig": 0.675933301448822, + "epoch": 0.34797613056294485, + "kl_loss": 0.1163405105471611, + "loss_ib": 0.0017688425723463297, + "step": 1210 + }, + { + "ce_ib": 5.936535835266113, + "ce_orig": 0.8529510498046875, + "epoch": 0.34797613056294485, + "kl_loss": 0.08462969213724136, + "loss_ib": 0.0014399504289031029, + "step": 1210 + }, + { + "ce_ib": 5.637547492980957, + "ce_orig": 0.47358766198158264, + "epoch": 0.34797613056294485, + "kl_loss": 0.12910223007202148, + "loss_ib": 0.00185477698687464, + "step": 1210 + }, + { + "ce_ib": 7.844357490539551, + "ce_orig": 1.2808501720428467, + "epoch": 0.34797613056294485, + "kl_loss": 0.09163414686918259, + "loss_ib": 0.001700777094811201, + "step": 1210 + }, + { + "ce_ib": 4.482351779937744, + "ce_orig": 0.7072798013687134, + "epoch": 0.3482637141419225, + "kl_loss": 0.08652029931545258, + "loss_ib": 0.0013134380569681525, + "step": 1211 + }, + { + "ce_ib": 8.130898475646973, + "ce_orig": 1.357262372970581, + "epoch": 0.3482637141419225, + "kl_loss": 0.1302678883075714, + "loss_ib": 0.0021157688461244106, + "step": 1211 + }, + { + "ce_ib": 5.954352378845215, + "ce_orig": 0.5915926694869995, + "epoch": 0.3482637141419225, + "kl_loss": 0.19226759672164917, + "loss_ib": 0.002518111141398549, + "step": 1211 + }, + { + "ce_ib": 6.65979528427124, + "ce_orig": 0.9146237969398499, + "epoch": 0.3482637141419225, + "kl_loss": 0.15383297204971313, + "loss_ib": 0.002204309217631817, + "step": 1211 + }, + { + "ce_ib": 4.272495269775391, + "ce_orig": 0.6837877035140991, + "epoch": 0.34855129772090016, + "kl_loss": 0.0908074602484703, + "loss_ib": 0.0013353240210562944, + "step": 1212 + }, + { + "ce_ib": 4.43595552444458, + "ce_orig": 0.7194166779518127, + "epoch": 0.34855129772090016, + "kl_loss": 0.11665447056293488, + "loss_ib": 0.0016101401997730136, + "step": 1212 + }, + { + "ce_ib": 4.019759654998779, + "ce_orig": 0.540570080280304, + "epoch": 0.34855129772090016, + "kl_loss": 0.08123409003019333, + "loss_ib": 0.001214316813275218, + "step": 1212 + }, + { + "ce_ib": 7.188493728637695, + "ce_orig": 1.0330560207366943, + "epoch": 0.34855129772090016, + "kl_loss": 0.07747453451156616, + "loss_ib": 0.0014935946092009544, + "step": 1212 + }, + { + "ce_ib": 4.260969638824463, + "ce_orig": 0.3584219813346863, + "epoch": 0.3488388812998778, + "kl_loss": 0.08526185154914856, + "loss_ib": 0.0012787154410034418, + "step": 1213 + }, + { + "ce_ib": 4.712890148162842, + "ce_orig": 0.8325070142745972, + "epoch": 0.3488388812998778, + "kl_loss": 0.07010379433631897, + "loss_ib": 0.001172326970845461, + "step": 1213 + }, + { + "ce_ib": 6.134801387786865, + "ce_orig": 0.7156159281730652, + "epoch": 0.3488388812998778, + "kl_loss": 0.08078397810459137, + "loss_ib": 0.0014213197864592075, + "step": 1213 + }, + { + "ce_ib": 7.529421329498291, + "ce_orig": 0.9373047947883606, + "epoch": 0.3488388812998778, + "kl_loss": 0.16475126147270203, + "loss_ib": 0.0024004545994102955, + "step": 1213 + }, + { + "ce_ib": 5.597670078277588, + "ce_orig": 0.8782711625099182, + "epoch": 0.3491264648788554, + "kl_loss": 0.0708613395690918, + "loss_ib": 0.001268380437977612, + "step": 1214 + }, + { + "ce_ib": 7.250506401062012, + "ce_orig": 0.8862557411193848, + "epoch": 0.3491264648788554, + "kl_loss": 0.1195152997970581, + "loss_ib": 0.0019202035618945956, + "step": 1214 + }, + { + "ce_ib": 6.910298824310303, + "ce_orig": 1.1094839572906494, + "epoch": 0.3491264648788554, + "kl_loss": 0.11096344888210297, + "loss_ib": 0.0018006643513217568, + "step": 1214 + }, + { + "ce_ib": 7.909558296203613, + "ce_orig": 1.0560638904571533, + "epoch": 0.3491264648788554, + "kl_loss": 0.10399037599563599, + "loss_ib": 0.0018308594590052962, + "step": 1214 + }, + { + "epoch": 0.3494140484578331, + "grad_norm": 0.0876988098025322, + "learning_rate": 4.9041263520046286e-05, + "loss": 0.8914, + "step": 1215 + }, + { + "ce_ib": 7.24706506729126, + "ce_orig": 1.0320097208023071, + "epoch": 0.3494140484578331, + "kl_loss": 0.10548631846904755, + "loss_ib": 0.0017795696621760726, + "step": 1215 + }, + { + "ce_ib": 5.6692304611206055, + "ce_orig": 0.7191046476364136, + "epoch": 0.3494140484578331, + "kl_loss": 0.1607440710067749, + "loss_ib": 0.0021743637043982744, + "step": 1215 + }, + { + "ce_ib": 4.234683513641357, + "ce_orig": 0.5094934701919556, + "epoch": 0.3494140484578331, + "kl_loss": 0.09779174625873566, + "loss_ib": 0.0014013857580721378, + "step": 1215 + }, + { + "ce_ib": 5.873825550079346, + "ce_orig": 0.7927730679512024, + "epoch": 0.3494140484578331, + "kl_loss": 0.2151471972465515, + "loss_ib": 0.0027388546150177717, + "step": 1215 + }, + { + "ce_ib": 4.758713245391846, + "ce_orig": 0.526610255241394, + "epoch": 0.3497016320368107, + "kl_loss": 0.09265977144241333, + "loss_ib": 0.0014024690026417375, + "step": 1216 + }, + { + "ce_ib": 6.536386489868164, + "ce_orig": 0.8642744421958923, + "epoch": 0.3497016320368107, + "kl_loss": 0.12214571237564087, + "loss_ib": 0.0018750956514850259, + "step": 1216 + }, + { + "ce_ib": 6.256294250488281, + "ce_orig": 0.831453800201416, + "epoch": 0.3497016320368107, + "kl_loss": 0.2257319986820221, + "loss_ib": 0.002882949309423566, + "step": 1216 + }, + { + "ce_ib": 5.987385272979736, + "ce_orig": 0.956009566783905, + "epoch": 0.3497016320368107, + "kl_loss": 0.09195905178785324, + "loss_ib": 0.0015183290233835578, + "step": 1216 + }, + { + "ce_ib": 3.4908852577209473, + "ce_orig": 0.4334961473941803, + "epoch": 0.34998921561578833, + "kl_loss": 0.07544069737195969, + "loss_ib": 0.001103495480492711, + "step": 1217 + }, + { + "ce_ib": 5.128939628601074, + "ce_orig": 0.9095044136047363, + "epoch": 0.34998921561578833, + "kl_loss": 0.10558851063251495, + "loss_ib": 0.001568779000081122, + "step": 1217 + }, + { + "ce_ib": 3.9634666442871094, + "ce_orig": 0.8367429971694946, + "epoch": 0.34998921561578833, + "kl_loss": 0.05537325143814087, + "loss_ib": 0.0009500791784375906, + "step": 1217 + }, + { + "ce_ib": 3.037557601928711, + "ce_orig": 0.17842040956020355, + "epoch": 0.34998921561578833, + "kl_loss": 0.1399865448474884, + "loss_ib": 0.0017036211211234331, + "step": 1217 + }, + { + "ce_ib": 5.171171188354492, + "ce_orig": 0.7285898923873901, + "epoch": 0.35027679919476595, + "kl_loss": 0.07729800045490265, + "loss_ib": 0.0012900970177724957, + "step": 1218 + }, + { + "ce_ib": 7.920377254486084, + "ce_orig": 1.0457279682159424, + "epoch": 0.35027679919476595, + "kl_loss": 0.13923516869544983, + "loss_ib": 0.00218438939191401, + "step": 1218 + }, + { + "ce_ib": 2.397667646408081, + "ce_orig": 0.20700818300247192, + "epoch": 0.35027679919476595, + "kl_loss": 0.27611932158470154, + "loss_ib": 0.003000959986820817, + "step": 1218 + }, + { + "ce_ib": 6.343082904815674, + "ce_orig": 0.9448485374450684, + "epoch": 0.35027679919476595, + "kl_loss": 0.1542467325925827, + "loss_ib": 0.002176775597035885, + "step": 1218 + }, + { + "ce_ib": 5.304510593414307, + "ce_orig": 0.5575739741325378, + "epoch": 0.35056438277374363, + "kl_loss": 0.17677997052669525, + "loss_ib": 0.0022982507944107056, + "step": 1219 + }, + { + "ce_ib": 11.7191801071167, + "ce_orig": 1.7555348873138428, + "epoch": 0.35056438277374363, + "kl_loss": 0.1374063789844513, + "loss_ib": 0.0025459816679358482, + "step": 1219 + }, + { + "ce_ib": 6.514389991760254, + "ce_orig": 1.0503309965133667, + "epoch": 0.35056438277374363, + "kl_loss": 0.09809248894453049, + "loss_ib": 0.0016323637682944536, + "step": 1219 + }, + { + "ce_ib": 7.660717487335205, + "ce_orig": 1.3491915464401245, + "epoch": 0.35056438277374363, + "kl_loss": 0.11068889498710632, + "loss_ib": 0.00187296059448272, + "step": 1219 + }, + { + "epoch": 0.35085196635272126, + "grad_norm": 0.09520356357097626, + "learning_rate": 4.903059142281273e-05, + "loss": 0.8888, + "step": 1220 + }, + { + "ce_ib": 5.612930774688721, + "ce_orig": 1.0689162015914917, + "epoch": 0.35085196635272126, + "kl_loss": 0.09737086296081543, + "loss_ib": 0.001535001676529646, + "step": 1220 + }, + { + "ce_ib": 7.617819786071777, + "ce_orig": 0.7557920813560486, + "epoch": 0.35085196635272126, + "kl_loss": 0.10410600900650024, + "loss_ib": 0.0018028420163318515, + "step": 1220 + }, + { + "ce_ib": 5.905435085296631, + "ce_orig": 0.7370005249977112, + "epoch": 0.35085196635272126, + "kl_loss": 0.14456571638584137, + "loss_ib": 0.0020362006034702063, + "step": 1220 + }, + { + "ce_ib": 4.493112087249756, + "ce_orig": 0.5472113490104675, + "epoch": 0.35085196635272126, + "kl_loss": 0.40910571813583374, + "loss_ib": 0.0045403684489429, + "step": 1220 + }, + { + "ce_ib": 4.242155075073242, + "ce_orig": 0.4515751004219055, + "epoch": 0.3511395499316989, + "kl_loss": 0.0820990800857544, + "loss_ib": 0.0012452062219381332, + "step": 1221 + }, + { + "ce_ib": 8.886751174926758, + "ce_orig": 1.421873927116394, + "epoch": 0.3511395499316989, + "kl_loss": 0.11533261835575104, + "loss_ib": 0.002042001113295555, + "step": 1221 + }, + { + "ce_ib": 6.462634563446045, + "ce_orig": 0.9847761392593384, + "epoch": 0.3511395499316989, + "kl_loss": 0.2458612620830536, + "loss_ib": 0.0031048760283738375, + "step": 1221 + }, + { + "ce_ib": 6.834673881530762, + "ce_orig": 0.8003082871437073, + "epoch": 0.3511395499316989, + "kl_loss": 0.1408001333475113, + "loss_ib": 0.0020914687775075436, + "step": 1221 + }, + { + "ce_ib": 4.3196868896484375, + "ce_orig": 0.7693087458610535, + "epoch": 0.35142713351067656, + "kl_loss": 0.05976279079914093, + "loss_ib": 0.0010295965475961566, + "step": 1222 + }, + { + "ce_ib": 9.042092323303223, + "ce_orig": 1.5040372610092163, + "epoch": 0.35142713351067656, + "kl_loss": 0.1564899981021881, + "loss_ib": 0.0024691091384738684, + "step": 1222 + }, + { + "ce_ib": 5.826852798461914, + "ce_orig": 0.7353617548942566, + "epoch": 0.35142713351067656, + "kl_loss": 0.1619482934474945, + "loss_ib": 0.002202168107032776, + "step": 1222 + }, + { + "ce_ib": 5.496313571929932, + "ce_orig": 0.5784755349159241, + "epoch": 0.35142713351067656, + "kl_loss": 0.15220040082931519, + "loss_ib": 0.0020716353319585323, + "step": 1222 + }, + { + "ce_ib": 9.001765251159668, + "ce_orig": 1.6193560361862183, + "epoch": 0.3517147170896542, + "kl_loss": 0.12370666861534119, + "loss_ib": 0.0021372430492192507, + "step": 1223 + }, + { + "ce_ib": 7.717227935791016, + "ce_orig": 1.40751314163208, + "epoch": 0.3517147170896542, + "kl_loss": 0.12144547700881958, + "loss_ib": 0.0019861774053424597, + "step": 1223 + }, + { + "ce_ib": 3.555147409439087, + "ce_orig": 0.661032497882843, + "epoch": 0.3517147170896542, + "kl_loss": 0.09536592662334442, + "loss_ib": 0.0013091739965602756, + "step": 1223 + }, + { + "ce_ib": 6.92410945892334, + "ce_orig": 0.7192381024360657, + "epoch": 0.3517147170896542, + "kl_loss": 0.1294439435005188, + "loss_ib": 0.0019868502859026194, + "step": 1223 + }, + { + "ce_ib": 7.191586494445801, + "ce_orig": 1.0977661609649658, + "epoch": 0.3520023006686318, + "kl_loss": 0.11238360404968262, + "loss_ib": 0.001842994592152536, + "step": 1224 + }, + { + "ce_ib": 4.655055046081543, + "ce_orig": 0.49468541145324707, + "epoch": 0.3520023006686318, + "kl_loss": 0.06912495940923691, + "loss_ib": 0.0011567550245672464, + "step": 1224 + }, + { + "ce_ib": 7.1955413818359375, + "ce_orig": 0.7798792719841003, + "epoch": 0.3520023006686318, + "kl_loss": 0.13946586847305298, + "loss_ib": 0.002114212838932872, + "step": 1224 + }, + { + "ce_ib": 6.654889106750488, + "ce_orig": 0.7581393718719482, + "epoch": 0.3520023006686318, + "kl_loss": 0.1490647792816162, + "loss_ib": 0.0021561365574598312, + "step": 1224 + }, + { + "epoch": 0.3522898842476095, + "grad_norm": 0.09010814130306244, + "learning_rate": 4.9019861430306826e-05, + "loss": 0.8805, + "step": 1225 + }, + { + "ce_ib": 7.411494255065918, + "ce_orig": 1.2153384685516357, + "epoch": 0.3522898842476095, + "kl_loss": 0.15417200326919556, + "loss_ib": 0.0022828695364296436, + "step": 1225 + }, + { + "ce_ib": 8.146852493286133, + "ce_orig": 1.3873924016952515, + "epoch": 0.3522898842476095, + "kl_loss": 0.43568554520606995, + "loss_ib": 0.005171540658921003, + "step": 1225 + }, + { + "ce_ib": 7.1912007331848145, + "ce_orig": 1.285669207572937, + "epoch": 0.3522898842476095, + "kl_loss": 0.10966229438781738, + "loss_ib": 0.0018157429294660687, + "step": 1225 + }, + { + "ce_ib": 8.135693550109863, + "ce_orig": 1.074797511100769, + "epoch": 0.3522898842476095, + "kl_loss": 0.09935620427131653, + "loss_ib": 0.0018071314552798867, + "step": 1225 + }, + { + "ce_ib": 4.371519088745117, + "ce_orig": 0.5803431868553162, + "epoch": 0.3525774678265871, + "kl_loss": 0.077149398624897, + "loss_ib": 0.0012086458737030625, + "step": 1226 + }, + { + "ce_ib": 5.3036208152771, + "ce_orig": 0.7989436388015747, + "epoch": 0.3525774678265871, + "kl_loss": 0.06289440393447876, + "loss_ib": 0.00115930603351444, + "step": 1226 + }, + { + "ce_ib": 6.5967912673950195, + "ce_orig": 0.8013715744018555, + "epoch": 0.3525774678265871, + "kl_loss": 0.12518391013145447, + "loss_ib": 0.0019115182803943753, + "step": 1226 + }, + { + "ce_ib": 8.87879467010498, + "ce_orig": 1.0425429344177246, + "epoch": 0.3525774678265871, + "kl_loss": 0.1504276543855667, + "loss_ib": 0.0023921558167785406, + "step": 1226 + }, + { + "ce_ib": 4.603278160095215, + "ce_orig": 0.5779634118080139, + "epoch": 0.35286505140556473, + "kl_loss": 0.11667022109031677, + "loss_ib": 0.0016270300839096308, + "step": 1227 + }, + { + "ce_ib": 7.099244594573975, + "ce_orig": 1.0896610021591187, + "epoch": 0.35286505140556473, + "kl_loss": 0.0980406403541565, + "loss_ib": 0.0016903307987377048, + "step": 1227 + }, + { + "ce_ib": 6.287186145782471, + "ce_orig": 0.5730931758880615, + "epoch": 0.35286505140556473, + "kl_loss": 0.1500208079814911, + "loss_ib": 0.002128926571458578, + "step": 1227 + }, + { + "ce_ib": 8.6245756149292, + "ce_orig": 1.530518651008606, + "epoch": 0.35286505140556473, + "kl_loss": 0.141280397772789, + "loss_ib": 0.00227526156231761, + "step": 1227 + }, + { + "ce_ib": 3.7510311603546143, + "ce_orig": 0.5379764437675476, + "epoch": 0.35315263498454236, + "kl_loss": 0.09724898636341095, + "loss_ib": 0.0013475929154083133, + "step": 1228 + }, + { + "ce_ib": 8.606772422790527, + "ce_orig": 1.2441927194595337, + "epoch": 0.35315263498454236, + "kl_loss": 0.12222611159086227, + "loss_ib": 0.0020829380955547094, + "step": 1228 + }, + { + "ce_ib": 6.924900054931641, + "ce_orig": 1.4320727586746216, + "epoch": 0.35315263498454236, + "kl_loss": 0.08893856406211853, + "loss_ib": 0.0015818756073713303, + "step": 1228 + }, + { + "ce_ib": 11.847168922424316, + "ce_orig": 2.392171859741211, + "epoch": 0.35315263498454236, + "kl_loss": 0.08733272552490234, + "loss_ib": 0.0020580440759658813, + "step": 1228 + }, + { + "ce_ib": 6.087507724761963, + "ce_orig": 0.901356041431427, + "epoch": 0.35344021856352004, + "kl_loss": 0.1012146919965744, + "loss_ib": 0.0016208975575864315, + "step": 1229 + }, + { + "ce_ib": 3.3782029151916504, + "ce_orig": 0.6678668260574341, + "epoch": 0.35344021856352004, + "kl_loss": 0.0639922171831131, + "loss_ib": 0.000977742369286716, + "step": 1229 + }, + { + "ce_ib": 8.152597427368164, + "ce_orig": 1.268547534942627, + "epoch": 0.35344021856352004, + "kl_loss": 0.11389666050672531, + "loss_ib": 0.001954226288944483, + "step": 1229 + }, + { + "ce_ib": 7.217771053314209, + "ce_orig": 1.30826997756958, + "epoch": 0.35344021856352004, + "kl_loss": 0.09987150877714157, + "loss_ib": 0.0017204922623932362, + "step": 1229 + }, + { + "epoch": 0.35372780214249766, + "grad_norm": 0.13646991550922394, + "learning_rate": 4.900907356837961e-05, + "loss": 0.8732, + "step": 1230 + }, + { + "ce_ib": 8.69139575958252, + "ce_orig": 1.6489487886428833, + "epoch": 0.35372780214249766, + "kl_loss": 0.09994374215602875, + "loss_ib": 0.0018685769755393267, + "step": 1230 + }, + { + "ce_ib": 6.952277660369873, + "ce_orig": 1.0789859294891357, + "epoch": 0.35372780214249766, + "kl_loss": 0.08018951863050461, + "loss_ib": 0.0014971229247748852, + "step": 1230 + }, + { + "ce_ib": 6.629817485809326, + "ce_orig": 0.8269151449203491, + "epoch": 0.35372780214249766, + "kl_loss": 0.07821352034807205, + "loss_ib": 0.0014451169408857822, + "step": 1230 + }, + { + "ce_ib": 10.143780708312988, + "ce_orig": 1.7797720432281494, + "epoch": 0.35372780214249766, + "kl_loss": 0.1310126781463623, + "loss_ib": 0.002324504777789116, + "step": 1230 + }, + { + "ce_ib": 6.090595722198486, + "ce_orig": 1.0558695793151855, + "epoch": 0.3540153857214753, + "kl_loss": 0.10666792094707489, + "loss_ib": 0.0016757386038079858, + "step": 1231 + }, + { + "ce_ib": 6.059688091278076, + "ce_orig": 0.46506354212760925, + "epoch": 0.3540153857214753, + "kl_loss": 0.1932515949010849, + "loss_ib": 0.0025384845212101936, + "step": 1231 + }, + { + "ce_ib": 7.274968147277832, + "ce_orig": 0.6159886717796326, + "epoch": 0.3540153857214753, + "kl_loss": 0.08063336461782455, + "loss_ib": 0.0015338304219767451, + "step": 1231 + }, + { + "ce_ib": 6.708252906799316, + "ce_orig": 1.0086669921875, + "epoch": 0.3540153857214753, + "kl_loss": 0.07794315367937088, + "loss_ib": 0.0014502566773444414, + "step": 1231 + }, + { + "ce_ib": 3.0718331336975098, + "ce_orig": 0.4599364697933197, + "epoch": 0.35430296930045296, + "kl_loss": 0.08010546863079071, + "loss_ib": 0.001108237891457975, + "step": 1232 + }, + { + "ce_ib": 8.709245681762695, + "ce_orig": 1.4117929935455322, + "epoch": 0.35430296930045296, + "kl_loss": 0.09973221272230148, + "loss_ib": 0.0018682465888559818, + "step": 1232 + }, + { + "ce_ib": 6.9291229248046875, + "ce_orig": 0.7602047920227051, + "epoch": 0.35430296930045296, + "kl_loss": 0.1128532811999321, + "loss_ib": 0.0018214450683444738, + "step": 1232 + }, + { + "ce_ib": 9.679916381835938, + "ce_orig": 1.7955446243286133, + "epoch": 0.35430296930045296, + "kl_loss": 0.1334252804517746, + "loss_ib": 0.0023022443056106567, + "step": 1232 + }, + { + "ce_ib": 3.906406879425049, + "ce_orig": 0.3601726293563843, + "epoch": 0.3545905528794306, + "kl_loss": 0.15083280205726624, + "loss_ib": 0.00189896859228611, + "step": 1233 + }, + { + "ce_ib": 5.880161285400391, + "ce_orig": 0.9220426678657532, + "epoch": 0.3545905528794306, + "kl_loss": 0.12105061113834381, + "loss_ib": 0.0017985220765694976, + "step": 1233 + }, + { + "ce_ib": 5.015464782714844, + "ce_orig": 0.5734464526176453, + "epoch": 0.3545905528794306, + "kl_loss": 0.1499069184064865, + "loss_ib": 0.00200061546638608, + "step": 1233 + }, + { + "ce_ib": 8.755268096923828, + "ce_orig": 0.9812235832214355, + "epoch": 0.3545905528794306, + "kl_loss": 0.10746465623378754, + "loss_ib": 0.001950173289515078, + "step": 1233 + }, + { + "ce_ib": 5.069394111633301, + "ce_orig": 0.8159754276275635, + "epoch": 0.3548781364584082, + "kl_loss": 0.07964402437210083, + "loss_ib": 0.0013033796567469835, + "step": 1234 + }, + { + "ce_ib": 7.249039173126221, + "ce_orig": 0.8200325965881348, + "epoch": 0.3548781364584082, + "kl_loss": 0.13815820217132568, + "loss_ib": 0.002106485888361931, + "step": 1234 + }, + { + "ce_ib": 5.429012298583984, + "ce_orig": 0.533208966255188, + "epoch": 0.3548781364584082, + "kl_loss": 0.11810462921857834, + "loss_ib": 0.0017239474691450596, + "step": 1234 + }, + { + "ce_ib": 5.718509674072266, + "ce_orig": 0.4275233745574951, + "epoch": 0.3548781364584082, + "kl_loss": 0.2195826917886734, + "loss_ib": 0.0027676778845489025, + "step": 1234 + }, + { + "epoch": 0.3551657200373859, + "grad_norm": 0.11201301217079163, + "learning_rate": 4.899822786302154e-05, + "loss": 0.8977, + "step": 1235 + }, + { + "ce_ib": 8.761982917785645, + "ce_orig": 1.4217331409454346, + "epoch": 0.3551657200373859, + "kl_loss": 0.15910804271697998, + "loss_ib": 0.0024672786239534616, + "step": 1235 + }, + { + "ce_ib": 7.589676380157471, + "ce_orig": 0.7130719423294067, + "epoch": 0.3551657200373859, + "kl_loss": 0.10391992330551147, + "loss_ib": 0.0017981668934226036, + "step": 1235 + }, + { + "ce_ib": 7.034261703491211, + "ce_orig": 1.073976993560791, + "epoch": 0.3551657200373859, + "kl_loss": 0.10549747198820114, + "loss_ib": 0.0017584008164703846, + "step": 1235 + }, + { + "ce_ib": 6.313925743103027, + "ce_orig": 0.8495429754257202, + "epoch": 0.3551657200373859, + "kl_loss": 0.13002285361289978, + "loss_ib": 0.001931621110998094, + "step": 1235 + }, + { + "ce_ib": 9.567757606506348, + "ce_orig": 1.6718113422393799, + "epoch": 0.3554533036163635, + "kl_loss": 0.12206310778856277, + "loss_ib": 0.0021774068009108305, + "step": 1236 + }, + { + "ce_ib": 6.441582202911377, + "ce_orig": 0.9020083546638489, + "epoch": 0.3554533036163635, + "kl_loss": 0.1499362736940384, + "loss_ib": 0.00214352086186409, + "step": 1236 + }, + { + "ce_ib": 7.211685657501221, + "ce_orig": 0.8299764394760132, + "epoch": 0.3554533036163635, + "kl_loss": 0.11877041310071945, + "loss_ib": 0.0019088726257905364, + "step": 1236 + }, + { + "ce_ib": 6.965940475463867, + "ce_orig": 1.1821565628051758, + "epoch": 0.3554533036163635, + "kl_loss": 0.25949639081954956, + "loss_ib": 0.0032915580086410046, + "step": 1236 + }, + { + "ce_ib": 6.2936930656433105, + "ce_orig": 1.240807294845581, + "epoch": 0.35574088719534114, + "kl_loss": 0.11591099947690964, + "loss_ib": 0.0017884793924167752, + "step": 1237 + }, + { + "ce_ib": 7.929975986480713, + "ce_orig": 0.9705590605735779, + "epoch": 0.35574088719534114, + "kl_loss": 0.09377407282590866, + "loss_ib": 0.0017307382076978683, + "step": 1237 + }, + { + "ce_ib": 7.808045864105225, + "ce_orig": 0.6784370541572571, + "epoch": 0.35574088719534114, + "kl_loss": 0.1167113184928894, + "loss_ib": 0.0019479177426546812, + "step": 1237 + }, + { + "ce_ib": 5.054869651794434, + "ce_orig": 0.3603076636791229, + "epoch": 0.35574088719534114, + "kl_loss": 0.10277507454156876, + "loss_ib": 0.0015332376351580024, + "step": 1237 + }, + { + "ce_ib": 5.274099826812744, + "ce_orig": 0.7418103218078613, + "epoch": 0.35602847077431876, + "kl_loss": 0.15691673755645752, + "loss_ib": 0.002096577314659953, + "step": 1238 + }, + { + "ce_ib": 7.625789165496826, + "ce_orig": 1.1013562679290771, + "epoch": 0.35602847077431876, + "kl_loss": 0.1414494514465332, + "loss_ib": 0.002177073387429118, + "step": 1238 + }, + { + "ce_ib": 7.515286445617676, + "ce_orig": 0.9005677700042725, + "epoch": 0.35602847077431876, + "kl_loss": 0.11921834945678711, + "loss_ib": 0.0019437120063230395, + "step": 1238 + }, + { + "ce_ib": 4.873547554016113, + "ce_orig": 0.8680614829063416, + "epoch": 0.35602847077431876, + "kl_loss": 0.09163566678762436, + "loss_ib": 0.0014037113869562745, + "step": 1238 + }, + { + "ce_ib": 5.0864763259887695, + "ce_orig": 0.6006460189819336, + "epoch": 0.35631605435329644, + "kl_loss": 0.12252326309680939, + "loss_ib": 0.0017338802572339773, + "step": 1239 + }, + { + "ce_ib": 6.825660705566406, + "ce_orig": 1.0597938299179077, + "epoch": 0.35631605435329644, + "kl_loss": 0.12392257153987885, + "loss_ib": 0.0019217916997149587, + "step": 1239 + }, + { + "ce_ib": 6.830639839172363, + "ce_orig": 0.7523980140686035, + "epoch": 0.35631605435329644, + "kl_loss": 0.18000862002372742, + "loss_ib": 0.002483149990439415, + "step": 1239 + }, + { + "ce_ib": 7.899068355560303, + "ce_orig": 0.7786166071891785, + "epoch": 0.35631605435329644, + "kl_loss": 0.1359153687953949, + "loss_ib": 0.002149060368537903, + "step": 1239 + }, + { + "epoch": 0.35660363793227406, + "grad_norm": 0.11122456192970276, + "learning_rate": 4.898732434036244e-05, + "loss": 0.8958, + "step": 1240 + }, + { + "ce_ib": 6.576417446136475, + "ce_orig": 1.028070330619812, + "epoch": 0.35660363793227406, + "kl_loss": 0.08930405229330063, + "loss_ib": 0.001550682121887803, + "step": 1240 + }, + { + "ce_ib": 4.134368419647217, + "ce_orig": 0.7497093677520752, + "epoch": 0.35660363793227406, + "kl_loss": 0.07175867259502411, + "loss_ib": 0.0011310235131531954, + "step": 1240 + }, + { + "ce_ib": 5.499692440032959, + "ce_orig": 0.8722239136695862, + "epoch": 0.35660363793227406, + "kl_loss": 0.24425196647644043, + "loss_ib": 0.0029924886766821146, + "step": 1240 + }, + { + "ce_ib": 5.032848358154297, + "ce_orig": 0.7244812250137329, + "epoch": 0.35660363793227406, + "kl_loss": 0.11990936845541, + "loss_ib": 0.0017023785039782524, + "step": 1240 + }, + { + "ce_ib": 5.378225803375244, + "ce_orig": 0.744158148765564, + "epoch": 0.3568912215112517, + "kl_loss": 0.09403784573078156, + "loss_ib": 0.0014782010111957788, + "step": 1241 + }, + { + "ce_ib": 8.463665008544922, + "ce_orig": 1.0507760047912598, + "epoch": 0.3568912215112517, + "kl_loss": 0.16647399961948395, + "loss_ib": 0.0025111064314842224, + "step": 1241 + }, + { + "ce_ib": 4.823368549346924, + "ce_orig": 0.5231988430023193, + "epoch": 0.3568912215112517, + "kl_loss": 0.11122848093509674, + "loss_ib": 0.0015946216881275177, + "step": 1241 + }, + { + "ce_ib": 4.179252624511719, + "ce_orig": 0.29963740706443787, + "epoch": 0.3568912215112517, + "kl_loss": 0.11958669126033783, + "loss_ib": 0.0016137921484187245, + "step": 1241 + }, + { + "ce_ib": 3.577817440032959, + "ce_orig": 0.49526020884513855, + "epoch": 0.35717880509022937, + "kl_loss": 0.08421307802200317, + "loss_ib": 0.0011999125126749277, + "step": 1242 + }, + { + "ce_ib": 6.855809688568115, + "ce_orig": 0.7919394373893738, + "epoch": 0.35717880509022937, + "kl_loss": 0.12446132302284241, + "loss_ib": 0.0019301942083984613, + "step": 1242 + }, + { + "ce_ib": 7.013069152832031, + "ce_orig": 1.062800645828247, + "epoch": 0.35717880509022937, + "kl_loss": 0.14006005227565765, + "loss_ib": 0.0021019072737544775, + "step": 1242 + }, + { + "ce_ib": 3.7290921211242676, + "ce_orig": 0.384665846824646, + "epoch": 0.35717880509022937, + "kl_loss": 0.10171931236982346, + "loss_ib": 0.001390102319419384, + "step": 1242 + }, + { + "ce_ib": 7.695999622344971, + "ce_orig": 1.3372113704681396, + "epoch": 0.357466388669207, + "kl_loss": 0.13230851292610168, + "loss_ib": 0.0020926850847899914, + "step": 1243 + }, + { + "ce_ib": 5.93403959274292, + "ce_orig": 0.6903248429298401, + "epoch": 0.357466388669207, + "kl_loss": 0.09127384424209595, + "loss_ib": 0.0015061423182487488, + "step": 1243 + }, + { + "ce_ib": 3.7722675800323486, + "ce_orig": 0.5317506790161133, + "epoch": 0.357466388669207, + "kl_loss": 0.0804959237575531, + "loss_ib": 0.0011821859516203403, + "step": 1243 + }, + { + "ce_ib": 6.712879657745361, + "ce_orig": 0.9313479661941528, + "epoch": 0.357466388669207, + "kl_loss": 0.13765740394592285, + "loss_ib": 0.0020478619262576103, + "step": 1243 + }, + { + "ce_ib": 5.3466033935546875, + "ce_orig": 0.5497527718544006, + "epoch": 0.3577539722481846, + "kl_loss": 0.17618891596794128, + "loss_ib": 0.0022965495008975267, + "step": 1244 + }, + { + "ce_ib": 3.2458081245422363, + "ce_orig": 0.4139023721218109, + "epoch": 0.3577539722481846, + "kl_loss": 0.1082507073879242, + "loss_ib": 0.001407087896950543, + "step": 1244 + }, + { + "ce_ib": 6.442051410675049, + "ce_orig": 0.7312730550765991, + "epoch": 0.3577539722481846, + "kl_loss": 0.22959205508232117, + "loss_ib": 0.0029401257634162903, + "step": 1244 + }, + { + "ce_ib": 7.719517230987549, + "ce_orig": 0.7423241138458252, + "epoch": 0.3577539722481846, + "kl_loss": 0.05368277058005333, + "loss_ib": 0.0013087793486192822, + "step": 1244 + }, + { + "epoch": 0.3580415558271623, + "grad_norm": 0.09787463396787643, + "learning_rate": 4.897636302667142e-05, + "loss": 0.7935, + "step": 1245 + }, + { + "ce_ib": 4.224658012390137, + "ce_orig": 0.7047666311264038, + "epoch": 0.3580415558271623, + "kl_loss": 0.056395336985588074, + "loss_ib": 0.000986419152468443, + "step": 1245 + }, + { + "ce_ib": 10.30202579498291, + "ce_orig": 1.5900565385818481, + "epoch": 0.3580415558271623, + "kl_loss": 0.09452299028635025, + "loss_ib": 0.0019754325039684772, + "step": 1245 + }, + { + "ce_ib": 6.778504848480225, + "ce_orig": 0.9461814761161804, + "epoch": 0.3580415558271623, + "kl_loss": 0.1325336992740631, + "loss_ib": 0.002003187546506524, + "step": 1245 + }, + { + "ce_ib": 3.336076021194458, + "ce_orig": 0.41546472907066345, + "epoch": 0.3580415558271623, + "kl_loss": 0.08192800730466843, + "loss_ib": 0.0011528875911608338, + "step": 1245 + }, + { + "ce_ib": 7.632260322570801, + "ce_orig": 0.6931596398353577, + "epoch": 0.3583291394061399, + "kl_loss": 0.11511077731847763, + "loss_ib": 0.001914333668537438, + "step": 1246 + }, + { + "ce_ib": 7.651729106903076, + "ce_orig": 0.8976790308952332, + "epoch": 0.3583291394061399, + "kl_loss": 0.10843627899885178, + "loss_ib": 0.001849535619840026, + "step": 1246 + }, + { + "ce_ib": 5.129650592803955, + "ce_orig": 0.5657326579093933, + "epoch": 0.3583291394061399, + "kl_loss": 0.18685322999954224, + "loss_ib": 0.0023814972955733538, + "step": 1246 + }, + { + "ce_ib": 5.636063098907471, + "ce_orig": 0.6586743593215942, + "epoch": 0.3583291394061399, + "kl_loss": 0.06884510815143585, + "loss_ib": 0.0012520573800429702, + "step": 1246 + }, + { + "ce_ib": 3.2437944412231445, + "ce_orig": 0.5597171187400818, + "epoch": 0.35861672298511754, + "kl_loss": 0.09059131145477295, + "loss_ib": 0.0012302924878895283, + "step": 1247 + }, + { + "ce_ib": 5.134467601776123, + "ce_orig": 0.5335696339607239, + "epoch": 0.35861672298511754, + "kl_loss": 0.14224691689014435, + "loss_ib": 0.0019359159050509334, + "step": 1247 + }, + { + "ce_ib": 7.443212509155273, + "ce_orig": 0.22147001326084137, + "epoch": 0.35861672298511754, + "kl_loss": 0.07838533818721771, + "loss_ib": 0.0015281744999811053, + "step": 1247 + }, + { + "ce_ib": 3.532418966293335, + "ce_orig": 0.4406964182853699, + "epoch": 0.35861672298511754, + "kl_loss": 0.05399385839700699, + "loss_ib": 0.0008931804914027452, + "step": 1247 + }, + { + "ce_ib": 5.0010881423950195, + "ce_orig": 0.7583406567573547, + "epoch": 0.35890430656409517, + "kl_loss": 0.11531706154346466, + "loss_ib": 0.0016532792942598462, + "step": 1248 + }, + { + "ce_ib": 3.3550872802734375, + "ce_orig": 0.5256078243255615, + "epoch": 0.35890430656409517, + "kl_loss": 0.04886241257190704, + "loss_ib": 0.0008241328177973628, + "step": 1248 + }, + { + "ce_ib": 1.7382667064666748, + "ce_orig": 0.11116386950016022, + "epoch": 0.35890430656409517, + "kl_loss": 0.23131829500198364, + "loss_ib": 0.002487009624019265, + "step": 1248 + }, + { + "ce_ib": 6.645051002502441, + "ce_orig": 1.1898750066757202, + "epoch": 0.35890430656409517, + "kl_loss": 0.084653839468956, + "loss_ib": 0.0015110434032976627, + "step": 1248 + }, + { + "ce_ib": 5.560726642608643, + "ce_orig": 0.7055838704109192, + "epoch": 0.35919189014307285, + "kl_loss": 0.12825129926204681, + "loss_ib": 0.0018385857110843062, + "step": 1249 + }, + { + "ce_ib": 4.6536760330200195, + "ce_orig": 0.4988435208797455, + "epoch": 0.35919189014307285, + "kl_loss": 0.10604903101921082, + "loss_ib": 0.0015258578350767493, + "step": 1249 + }, + { + "ce_ib": 6.926509857177734, + "ce_orig": 0.18700924515724182, + "epoch": 0.35919189014307285, + "kl_loss": 0.22794826328754425, + "loss_ib": 0.0029721336904913187, + "step": 1249 + }, + { + "ce_ib": 9.14644718170166, + "ce_orig": 1.0003561973571777, + "epoch": 0.35919189014307285, + "kl_loss": 0.150540292263031, + "loss_ib": 0.0024200475309044123, + "step": 1249 + }, + { + "epoch": 0.35947947372205047, + "grad_norm": 0.0926978662610054, + "learning_rate": 4.8965343948356846e-05, + "loss": 0.8247, + "step": 1250 + }, + { + "ce_ib": 4.405333042144775, + "ce_orig": 0.620575487613678, + "epoch": 0.35947947372205047, + "kl_loss": 0.07144202291965485, + "loss_ib": 0.0011549534974619746, + "step": 1250 + }, + { + "ce_ib": 10.016497611999512, + "ce_orig": 1.3082729578018188, + "epoch": 0.35947947372205047, + "kl_loss": 0.10396544635295868, + "loss_ib": 0.0020413040183484554, + "step": 1250 + }, + { + "ce_ib": 5.4515485763549805, + "ce_orig": 0.6093593835830688, + "epoch": 0.35947947372205047, + "kl_loss": 0.10453015565872192, + "loss_ib": 0.00159045634791255, + "step": 1250 + }, + { + "ce_ib": 3.653608560562134, + "ce_orig": 0.5887627005577087, + "epoch": 0.35947947372205047, + "kl_loss": 0.13053785264492035, + "loss_ib": 0.00167073926422745, + "step": 1250 + }, + { + "ce_ib": 7.036374092102051, + "ce_orig": 0.972096860408783, + "epoch": 0.3597670573010281, + "kl_loss": 0.12440192699432373, + "loss_ib": 0.0019476565066725016, + "step": 1251 + }, + { + "ce_ib": 5.862993240356445, + "ce_orig": 0.731389582157135, + "epoch": 0.3597670573010281, + "kl_loss": 0.09715241193771362, + "loss_ib": 0.0015578233869746327, + "step": 1251 + }, + { + "ce_ib": 6.658885478973389, + "ce_orig": 0.7261188626289368, + "epoch": 0.3597670573010281, + "kl_loss": 0.17306064069271088, + "loss_ib": 0.0023964950814843178, + "step": 1251 + }, + { + "ce_ib": 4.695159435272217, + "ce_orig": 0.7654480934143066, + "epoch": 0.3597670573010281, + "kl_loss": 0.08340594172477722, + "loss_ib": 0.0013035753509029746, + "step": 1251 + }, + { + "ce_ib": 4.917471408843994, + "ce_orig": 0.7266609072685242, + "epoch": 0.36005464088000577, + "kl_loss": 0.1739560216665268, + "loss_ib": 0.002231307327747345, + "step": 1252 + }, + { + "ce_ib": 6.264862060546875, + "ce_orig": 0.6531610488891602, + "epoch": 0.36005464088000577, + "kl_loss": 0.13514098525047302, + "loss_ib": 0.001977896085008979, + "step": 1252 + }, + { + "ce_ib": 9.444012641906738, + "ce_orig": 1.688317894935608, + "epoch": 0.36005464088000577, + "kl_loss": 0.11596217751502991, + "loss_ib": 0.00210402300581336, + "step": 1252 + }, + { + "ce_ib": 7.3551716804504395, + "ce_orig": 1.08235502243042, + "epoch": 0.36005464088000577, + "kl_loss": 0.09125493466854095, + "loss_ib": 0.0016480664489790797, + "step": 1252 + }, + { + "ce_ib": 4.903650760650635, + "ce_orig": 0.46829381585121155, + "epoch": 0.3603422244589834, + "kl_loss": 0.18105095624923706, + "loss_ib": 0.002300874562934041, + "step": 1253 + }, + { + "ce_ib": 6.1150221824646, + "ce_orig": 0.7645502686500549, + "epoch": 0.3603422244589834, + "kl_loss": 0.11290599405765533, + "loss_ib": 0.001740562147460878, + "step": 1253 + }, + { + "ce_ib": 3.758120059967041, + "ce_orig": 0.5434147119522095, + "epoch": 0.3603422244589834, + "kl_loss": 0.04552270844578743, + "loss_ib": 0.0008310390985570848, + "step": 1253 + }, + { + "ce_ib": 8.314358711242676, + "ce_orig": 1.216098427772522, + "epoch": 0.3603422244589834, + "kl_loss": 0.09088920801877975, + "loss_ib": 0.001740327919833362, + "step": 1253 + }, + { + "ce_ib": 8.22514820098877, + "ce_orig": 1.1783727407455444, + "epoch": 0.360629808037961, + "kl_loss": 0.1134648323059082, + "loss_ib": 0.0019571632146835327, + "step": 1254 + }, + { + "ce_ib": 4.879001140594482, + "ce_orig": 0.7157498002052307, + "epoch": 0.360629808037961, + "kl_loss": 0.09442845731973648, + "loss_ib": 0.0014321847120299935, + "step": 1254 + }, + { + "ce_ib": 4.070014476776123, + "ce_orig": 0.5319107174873352, + "epoch": 0.360629808037961, + "kl_loss": 0.16356313228607178, + "loss_ib": 0.0020426325500011444, + "step": 1254 + }, + { + "ce_ib": 5.681893825531006, + "ce_orig": 0.7256168723106384, + "epoch": 0.360629808037961, + "kl_loss": 0.10619882494211197, + "loss_ib": 0.0016301776049658656, + "step": 1254 + }, + { + "epoch": 0.3609173916169387, + "grad_norm": 0.10951834917068481, + "learning_rate": 4.8954267131966225e-05, + "loss": 0.9201, + "step": 1255 + }, + { + "ce_ib": 4.51348352432251, + "ce_orig": 0.5521302223205566, + "epoch": 0.3609173916169387, + "kl_loss": 0.07769614458084106, + "loss_ib": 0.0012283098185434937, + "step": 1255 + }, + { + "ce_ib": 7.691596984863281, + "ce_orig": 0.7468867897987366, + "epoch": 0.3609173916169387, + "kl_loss": 0.12418323755264282, + "loss_ib": 0.002010992029681802, + "step": 1255 + }, + { + "ce_ib": 6.768327236175537, + "ce_orig": 0.841541051864624, + "epoch": 0.3609173916169387, + "kl_loss": 0.10890116542577744, + "loss_ib": 0.001765844295732677, + "step": 1255 + }, + { + "ce_ib": 4.823737144470215, + "ce_orig": 0.7781549692153931, + "epoch": 0.3609173916169387, + "kl_loss": 0.1455298513174057, + "loss_ib": 0.0019376721465960145, + "step": 1255 + }, + { + "ce_ib": 5.633728504180908, + "ce_orig": 0.8247250914573669, + "epoch": 0.3612049751959163, + "kl_loss": 0.11697879433631897, + "loss_ib": 0.001733160694129765, + "step": 1256 + }, + { + "ce_ib": 6.972712993621826, + "ce_orig": 0.6569220423698425, + "epoch": 0.3612049751959163, + "kl_loss": 0.09403789043426514, + "loss_ib": 0.001637650071643293, + "step": 1256 + }, + { + "ce_ib": 5.785305500030518, + "ce_orig": 0.9212448596954346, + "epoch": 0.3612049751959163, + "kl_loss": 0.11507164686918259, + "loss_ib": 0.0017292469274252653, + "step": 1256 + }, + { + "ce_ib": 4.723819255828857, + "ce_orig": 0.3406326472759247, + "epoch": 0.3612049751959163, + "kl_loss": 0.13277126848697662, + "loss_ib": 0.001800094498321414, + "step": 1256 + }, + { + "ce_ib": 6.737639427185059, + "ce_orig": 0.9714068174362183, + "epoch": 0.36149255877489395, + "kl_loss": 0.11174871772527695, + "loss_ib": 0.0017912510083988309, + "step": 1257 + }, + { + "ce_ib": 3.7057688236236572, + "ce_orig": 0.6621173024177551, + "epoch": 0.36149255877489395, + "kl_loss": 0.07731989026069641, + "loss_ib": 0.0011437757639214396, + "step": 1257 + }, + { + "ce_ib": 7.429227828979492, + "ce_orig": 1.174642562866211, + "epoch": 0.36149255877489395, + "kl_loss": 0.21665500104427338, + "loss_ib": 0.0029094729106873274, + "step": 1257 + }, + { + "ce_ib": 4.477489471435547, + "ce_orig": 0.6718473434448242, + "epoch": 0.36149255877489395, + "kl_loss": 0.16298699378967285, + "loss_ib": 0.002077618846669793, + "step": 1257 + }, + { + "ce_ib": 8.133766174316406, + "ce_orig": 0.9104800820350647, + "epoch": 0.36178014235387157, + "kl_loss": 0.16997173428535461, + "loss_ib": 0.002513093873858452, + "step": 1258 + }, + { + "ce_ib": 8.973519325256348, + "ce_orig": 1.3371100425720215, + "epoch": 0.36178014235387157, + "kl_loss": 0.07697136700153351, + "loss_ib": 0.0016670655459165573, + "step": 1258 + }, + { + "ce_ib": 7.888665676116943, + "ce_orig": 0.5700468420982361, + "epoch": 0.36178014235387157, + "kl_loss": 0.1357317864894867, + "loss_ib": 0.0021461844444274902, + "step": 1258 + }, + { + "ce_ib": 6.661389350891113, + "ce_orig": 0.5237452387809753, + "epoch": 0.36178014235387157, + "kl_loss": 0.09358532726764679, + "loss_ib": 0.0016019921749830246, + "step": 1258 + }, + { + "ce_ib": 5.645505905151367, + "ce_orig": 0.7846198678016663, + "epoch": 0.36206772593284925, + "kl_loss": 0.17019706964492798, + "loss_ib": 0.002266521332785487, + "step": 1259 + }, + { + "ce_ib": 5.187131881713867, + "ce_orig": 0.6325237154960632, + "epoch": 0.36206772593284925, + "kl_loss": 0.13489633798599243, + "loss_ib": 0.0018676765030249953, + "step": 1259 + }, + { + "ce_ib": 5.370206832885742, + "ce_orig": 0.6260020732879639, + "epoch": 0.36206772593284925, + "kl_loss": 0.11543045192956924, + "loss_ib": 0.0016913251020014286, + "step": 1259 + }, + { + "ce_ib": 8.244893074035645, + "ce_orig": 0.9257451295852661, + "epoch": 0.36206772593284925, + "kl_loss": 0.21367326378822327, + "loss_ib": 0.0029612218495458364, + "step": 1259 + }, + { + "epoch": 0.3623553095118269, + "grad_norm": 0.0853675901889801, + "learning_rate": 4.894313260418617e-05, + "loss": 0.8574, + "step": 1260 + }, + { + "ce_ib": 5.996172904968262, + "ce_orig": 0.9969424605369568, + "epoch": 0.3623553095118269, + "kl_loss": 0.07828725129365921, + "loss_ib": 0.001382489805109799, + "step": 1260 + }, + { + "ce_ib": 7.6174139976501465, + "ce_orig": 1.1654242277145386, + "epoch": 0.3623553095118269, + "kl_loss": 0.11603623628616333, + "loss_ib": 0.0019221036927774549, + "step": 1260 + }, + { + "ce_ib": 6.490176200866699, + "ce_orig": 0.9987762570381165, + "epoch": 0.3623553095118269, + "kl_loss": 0.07767187803983688, + "loss_ib": 0.0014257363509386778, + "step": 1260 + }, + { + "ce_ib": 7.6730122566223145, + "ce_orig": 1.5139427185058594, + "epoch": 0.3623553095118269, + "kl_loss": 0.0912584513425827, + "loss_ib": 0.0016798856668174267, + "step": 1260 + }, + { + "ce_ib": 7.314441680908203, + "ce_orig": 1.2183951139450073, + "epoch": 0.3626428930908045, + "kl_loss": 0.10375961661338806, + "loss_ib": 0.0017690402455627918, + "step": 1261 + }, + { + "ce_ib": 9.13668155670166, + "ce_orig": 1.484241008758545, + "epoch": 0.3626428930908045, + "kl_loss": 0.1495942771434784, + "loss_ib": 0.0024096108973026276, + "step": 1261 + }, + { + "ce_ib": 4.436420440673828, + "ce_orig": 0.7606337070465088, + "epoch": 0.3626428930908045, + "kl_loss": 0.09683829545974731, + "loss_ib": 0.0014120249543339014, + "step": 1261 + }, + { + "ce_ib": 5.428145885467529, + "ce_orig": 0.5036700963973999, + "epoch": 0.3626428930908045, + "kl_loss": 0.150125652551651, + "loss_ib": 0.002044070977717638, + "step": 1261 + }, + { + "ce_ib": 2.7986819744110107, + "ce_orig": 0.2716585695743561, + "epoch": 0.3629304766697822, + "kl_loss": 0.11242157220840454, + "loss_ib": 0.0014040839159861207, + "step": 1262 + }, + { + "ce_ib": 7.776880264282227, + "ce_orig": 1.1999222040176392, + "epoch": 0.3629304766697822, + "kl_loss": 0.08300190418958664, + "loss_ib": 0.0016077071195468307, + "step": 1262 + }, + { + "ce_ib": 7.587123870849609, + "ce_orig": 1.1394379138946533, + "epoch": 0.3629304766697822, + "kl_loss": 0.12824639678001404, + "loss_ib": 0.0020411761943250895, + "step": 1262 + }, + { + "ce_ib": 6.979325771331787, + "ce_orig": 0.7309539914131165, + "epoch": 0.3629304766697822, + "kl_loss": 0.08371274173259735, + "loss_ib": 0.0015350598841905594, + "step": 1262 + }, + { + "ce_ib": 4.161574840545654, + "ce_orig": 0.5990742444992065, + "epoch": 0.3632180602487598, + "kl_loss": 0.11062033474445343, + "loss_ib": 0.0015223607188090682, + "step": 1263 + }, + { + "ce_ib": 3.989654779434204, + "ce_orig": 0.4041389524936676, + "epoch": 0.3632180602487598, + "kl_loss": 0.08145566284656525, + "loss_ib": 0.0012135220458731055, + "step": 1263 + }, + { + "ce_ib": 6.267369270324707, + "ce_orig": 0.9718990921974182, + "epoch": 0.3632180602487598, + "kl_loss": 0.14130017161369324, + "loss_ib": 0.002039738465100527, + "step": 1263 + }, + { + "ce_ib": 6.479578971862793, + "ce_orig": 0.5610483288764954, + "epoch": 0.3632180602487598, + "kl_loss": 0.15332993865013123, + "loss_ib": 0.002181257354095578, + "step": 1263 + }, + { + "ce_ib": 5.573829650878906, + "ce_orig": 0.5869829058647156, + "epoch": 0.3635056438277374, + "kl_loss": 0.10969609767198563, + "loss_ib": 0.0016543439123779535, + "step": 1264 + }, + { + "ce_ib": 6.148459434509277, + "ce_orig": 0.9820066690444946, + "epoch": 0.3635056438277374, + "kl_loss": 0.1340855062007904, + "loss_ib": 0.0019557008054107428, + "step": 1264 + }, + { + "ce_ib": 7.5752763748168945, + "ce_orig": 1.008094072341919, + "epoch": 0.3635056438277374, + "kl_loss": 0.09246852993965149, + "loss_ib": 0.001682212925516069, + "step": 1264 + }, + { + "ce_ib": 6.046870231628418, + "ce_orig": 0.8507830500602722, + "epoch": 0.3635056438277374, + "kl_loss": 0.10141552239656448, + "loss_ib": 0.0016188421286642551, + "step": 1264 + }, + { + "epoch": 0.3637932274067151, + "grad_norm": 0.1048649325966835, + "learning_rate": 4.893194039184236e-05, + "loss": 0.8718, + "step": 1265 + }, + { + "ce_ib": 5.948637962341309, + "ce_orig": 0.7164708971977234, + "epoch": 0.3637932274067151, + "kl_loss": 0.18087129294872284, + "loss_ib": 0.0024035766255110502, + "step": 1265 + }, + { + "ce_ib": 4.5438103675842285, + "ce_orig": 0.716766357421875, + "epoch": 0.3637932274067151, + "kl_loss": 0.0770442932844162, + "loss_ib": 0.0012248239945620298, + "step": 1265 + }, + { + "ce_ib": 6.885209560394287, + "ce_orig": 1.285949468612671, + "epoch": 0.3637932274067151, + "kl_loss": 0.11101078242063522, + "loss_ib": 0.0017986288294196129, + "step": 1265 + }, + { + "ce_ib": 8.132999420166016, + "ce_orig": 0.8165818452835083, + "epoch": 0.3637932274067151, + "kl_loss": 0.10078869760036469, + "loss_ib": 0.0018211867427453399, + "step": 1265 + }, + { + "ce_ib": 4.9748334884643555, + "ce_orig": 0.5936683416366577, + "epoch": 0.3640808109856927, + "kl_loss": 0.09034372121095657, + "loss_ib": 0.0014009205624461174, + "step": 1266 + }, + { + "ce_ib": 4.65391206741333, + "ce_orig": 0.8400774002075195, + "epoch": 0.3640808109856927, + "kl_loss": 0.09988667815923691, + "loss_ib": 0.0014642579481005669, + "step": 1266 + }, + { + "ce_ib": 5.725048065185547, + "ce_orig": 0.9536218047142029, + "epoch": 0.3640808109856927, + "kl_loss": 0.11655110120773315, + "loss_ib": 0.0017380157951265574, + "step": 1266 + }, + { + "ce_ib": 7.574244976043701, + "ce_orig": 1.4102758169174194, + "epoch": 0.3640808109856927, + "kl_loss": 0.12130621075630188, + "loss_ib": 0.001970486482605338, + "step": 1266 + }, + { + "ce_ib": 2.4335663318634033, + "ce_orig": 0.11583693325519562, + "epoch": 0.36436839456467035, + "kl_loss": 0.32097506523132324, + "loss_ib": 0.0034531070850789547, + "step": 1267 + }, + { + "ce_ib": 6.081480026245117, + "ce_orig": 0.8565067052841187, + "epoch": 0.36436839456467035, + "kl_loss": 0.16416233777999878, + "loss_ib": 0.002249771263450384, + "step": 1267 + }, + { + "ce_ib": 6.017972946166992, + "ce_orig": 0.7665581107139587, + "epoch": 0.36436839456467035, + "kl_loss": 0.11499577015638351, + "loss_ib": 0.0017517550149932504, + "step": 1267 + }, + { + "ce_ib": 5.2015838623046875, + "ce_orig": 0.6542770862579346, + "epoch": 0.36436839456467035, + "kl_loss": 0.11964156478643417, + "loss_ib": 0.0017165739554911852, + "step": 1267 + }, + { + "ce_ib": 5.111125946044922, + "ce_orig": 0.7856875061988831, + "epoch": 0.364655978143648, + "kl_loss": 0.11653504520654678, + "loss_ib": 0.0016764630563557148, + "step": 1268 + }, + { + "ce_ib": 7.048427104949951, + "ce_orig": 0.9540700912475586, + "epoch": 0.364655978143648, + "kl_loss": 0.0664190873503685, + "loss_ib": 0.0013690335908904672, + "step": 1268 + }, + { + "ce_ib": 7.886468410491943, + "ce_orig": 1.0301730632781982, + "epoch": 0.364655978143648, + "kl_loss": 0.08469361811876297, + "loss_ib": 0.0016355830011889338, + "step": 1268 + }, + { + "ce_ib": 6.763402462005615, + "ce_orig": 1.0440094470977783, + "epoch": 0.364655978143648, + "kl_loss": 0.1510259360074997, + "loss_ib": 0.0021865996532142162, + "step": 1268 + }, + { + "ce_ib": 5.486627578735352, + "ce_orig": 0.4695490002632141, + "epoch": 0.36494356172262565, + "kl_loss": 0.15088656544685364, + "loss_ib": 0.002057528356090188, + "step": 1269 + }, + { + "ce_ib": 5.369198799133301, + "ce_orig": 0.9851084351539612, + "epoch": 0.36494356172262565, + "kl_loss": 0.11704091727733612, + "loss_ib": 0.001707328949123621, + "step": 1269 + }, + { + "ce_ib": 5.675687313079834, + "ce_orig": 0.6099631190299988, + "epoch": 0.36494356172262565, + "kl_loss": 0.0743517056107521, + "loss_ib": 0.0013110857689753175, + "step": 1269 + }, + { + "ce_ib": 5.100560665130615, + "ce_orig": 0.9225177764892578, + "epoch": 0.36494356172262565, + "kl_loss": 0.08952625840902328, + "loss_ib": 0.0014053186168894172, + "step": 1269 + }, + { + "epoch": 0.3652311453016033, + "grad_norm": 0.08690284192562103, + "learning_rate": 4.8920690521899425e-05, + "loss": 0.8222, + "step": 1270 + }, + { + "ce_ib": 4.783676624298096, + "ce_orig": 0.7209357023239136, + "epoch": 0.3652311453016033, + "kl_loss": 0.10517607629299164, + "loss_ib": 0.0015301284147426486, + "step": 1270 + }, + { + "ce_ib": 4.74617338180542, + "ce_orig": 0.6954101324081421, + "epoch": 0.3652311453016033, + "kl_loss": 0.11000341176986694, + "loss_ib": 0.0015746514545753598, + "step": 1270 + }, + { + "ce_ib": 7.062074661254883, + "ce_orig": 1.0939521789550781, + "epoch": 0.3652311453016033, + "kl_loss": 0.10517151653766632, + "loss_ib": 0.0017579225823283195, + "step": 1270 + }, + { + "ce_ib": 6.2351484298706055, + "ce_orig": 1.0175594091415405, + "epoch": 0.3652311453016033, + "kl_loss": 0.11980479955673218, + "loss_ib": 0.0018215627642348409, + "step": 1270 + }, + { + "ce_ib": 5.415085315704346, + "ce_orig": 0.7584806084632874, + "epoch": 0.3655187288805809, + "kl_loss": 0.08196678757667542, + "loss_ib": 0.0013611763715744019, + "step": 1271 + }, + { + "ce_ib": 5.287896633148193, + "ce_orig": 0.7736698985099792, + "epoch": 0.3655187288805809, + "kl_loss": 0.11232677102088928, + "loss_ib": 0.0016520572826266289, + "step": 1271 + }, + { + "ce_ib": 4.119640350341797, + "ce_orig": 0.7556843757629395, + "epoch": 0.3655187288805809, + "kl_loss": 0.09644928574562073, + "loss_ib": 0.0013764569303020835, + "step": 1271 + }, + { + "ce_ib": 7.895566940307617, + "ce_orig": 1.4006571769714355, + "epoch": 0.3655187288805809, + "kl_loss": 0.10593666881322861, + "loss_ib": 0.0018489232752472162, + "step": 1271 + }, + { + "ce_ib": 7.084376335144043, + "ce_orig": 1.3110722303390503, + "epoch": 0.3658063124595586, + "kl_loss": 0.10005828738212585, + "loss_ib": 0.0017090203473344445, + "step": 1272 + }, + { + "ce_ib": 2.2814648151397705, + "ce_orig": 0.22170490026474, + "epoch": 0.3658063124595586, + "kl_loss": 0.2755497395992279, + "loss_ib": 0.0029836439061909914, + "step": 1272 + }, + { + "ce_ib": 10.033036231994629, + "ce_orig": 1.714808464050293, + "epoch": 0.3658063124595586, + "kl_loss": 0.2861132025718689, + "loss_ib": 0.0038644354790449142, + "step": 1272 + }, + { + "ce_ib": 5.259953498840332, + "ce_orig": 0.7123859524726868, + "epoch": 0.3658063124595586, + "kl_loss": 0.11281996965408325, + "loss_ib": 0.0016541950171813369, + "step": 1272 + }, + { + "ce_ib": 4.626567840576172, + "ce_orig": 0.5473626852035522, + "epoch": 0.3660938960385362, + "kl_loss": 0.15294522047042847, + "loss_ib": 0.0019921089988201857, + "step": 1273 + }, + { + "ce_ib": 8.993037223815918, + "ce_orig": 1.4961568117141724, + "epoch": 0.3660938960385362, + "kl_loss": 0.20619803667068481, + "loss_ib": 0.0029612837824970484, + "step": 1273 + }, + { + "ce_ib": 9.032368659973145, + "ce_orig": 1.423782229423523, + "epoch": 0.3660938960385362, + "kl_loss": 0.20301368832588196, + "loss_ib": 0.002933373674750328, + "step": 1273 + }, + { + "ce_ib": 6.045455455780029, + "ce_orig": 0.7214930057525635, + "epoch": 0.3660938960385362, + "kl_loss": 0.0861063301563263, + "loss_ib": 0.0014656087150797248, + "step": 1273 + }, + { + "ce_ib": 5.263514518737793, + "ce_orig": 0.6097703576087952, + "epoch": 0.3663814796175138, + "kl_loss": 0.10200537741184235, + "loss_ib": 0.0015464052557945251, + "step": 1274 + }, + { + "ce_ib": 7.112468242645264, + "ce_orig": 0.9531702995300293, + "epoch": 0.3663814796175138, + "kl_loss": 0.14293800294399261, + "loss_ib": 0.0021406267769634724, + "step": 1274 + }, + { + "ce_ib": 5.52689790725708, + "ce_orig": 0.868090808391571, + "epoch": 0.3663814796175138, + "kl_loss": 0.1180892065167427, + "loss_ib": 0.0017335818847641349, + "step": 1274 + }, + { + "ce_ib": 4.249706268310547, + "ce_orig": 0.4917110800743103, + "epoch": 0.3663814796175138, + "kl_loss": 0.0959208756685257, + "loss_ib": 0.0013841792242601514, + "step": 1274 + }, + { + "epoch": 0.3666690631964915, + "grad_norm": 0.09557251632213593, + "learning_rate": 4.890938302146091e-05, + "loss": 0.8762, + "step": 1275 + }, + { + "ce_ib": 6.830130577087402, + "ce_orig": 0.6520479321479797, + "epoch": 0.3666690631964915, + "kl_loss": 0.07018201053142548, + "loss_ib": 0.0013848331291228533, + "step": 1275 + }, + { + "ce_ib": 6.835606098175049, + "ce_orig": 0.4915773570537567, + "epoch": 0.3666690631964915, + "kl_loss": 0.11724641919136047, + "loss_ib": 0.001856024842709303, + "step": 1275 + }, + { + "ce_ib": 6.633254528045654, + "ce_orig": 0.8117356300354004, + "epoch": 0.3666690631964915, + "kl_loss": 0.19811943173408508, + "loss_ib": 0.0026445197872817516, + "step": 1275 + }, + { + "ce_ib": 4.69884729385376, + "ce_orig": 0.622662365436554, + "epoch": 0.3666690631964915, + "kl_loss": 0.1123548075556755, + "loss_ib": 0.0015934327384456992, + "step": 1275 + }, + { + "ce_ib": 5.555670261383057, + "ce_orig": 0.9462684988975525, + "epoch": 0.36695664677546913, + "kl_loss": 0.18677571415901184, + "loss_ib": 0.002423324156552553, + "step": 1276 + }, + { + "ce_ib": 6.706098556518555, + "ce_orig": 1.0116491317749023, + "epoch": 0.36695664677546913, + "kl_loss": 0.16157668828964233, + "loss_ib": 0.002286376664415002, + "step": 1276 + }, + { + "ce_ib": 5.311933517456055, + "ce_orig": 0.6050148010253906, + "epoch": 0.36695664677546913, + "kl_loss": 0.11970527470111847, + "loss_ib": 0.0017282459884881973, + "step": 1276 + }, + { + "ce_ib": 9.499307632446289, + "ce_orig": 1.2046724557876587, + "epoch": 0.36695664677546913, + "kl_loss": 0.14970532059669495, + "loss_ib": 0.002446983940899372, + "step": 1276 + }, + { + "ce_ib": 7.650247097015381, + "ce_orig": 1.1416409015655518, + "epoch": 0.36724423035444675, + "kl_loss": 0.1492568850517273, + "loss_ib": 0.0022575934417545795, + "step": 1277 + }, + { + "ce_ib": 8.05804443359375, + "ce_orig": 1.2605984210968018, + "epoch": 0.36724423035444675, + "kl_loss": 0.06794284284114838, + "loss_ib": 0.0014852328458800912, + "step": 1277 + }, + { + "ce_ib": 5.465357303619385, + "ce_orig": 0.6775657534599304, + "epoch": 0.36724423035444675, + "kl_loss": 0.11054760962724686, + "loss_ib": 0.0016520118806511164, + "step": 1277 + }, + { + "ce_ib": 8.604063034057617, + "ce_orig": 0.8220978379249573, + "epoch": 0.36724423035444675, + "kl_loss": 0.08851812779903412, + "loss_ib": 0.001745587564073503, + "step": 1277 + }, + { + "ce_ib": 4.675428867340088, + "ce_orig": 0.9593977332115173, + "epoch": 0.3675318139334244, + "kl_loss": 0.10727477818727493, + "loss_ib": 0.0015402906574308872, + "step": 1278 + }, + { + "ce_ib": 5.886958122253418, + "ce_orig": 0.9332735538482666, + "epoch": 0.3675318139334244, + "kl_loss": 0.11322903633117676, + "loss_ib": 0.0017209862126037478, + "step": 1278 + }, + { + "ce_ib": 4.394830703735352, + "ce_orig": 0.9940218329429626, + "epoch": 0.3675318139334244, + "kl_loss": 0.07776137441396713, + "loss_ib": 0.0012170968111604452, + "step": 1278 + }, + { + "ce_ib": 6.783079624176025, + "ce_orig": 0.7369851469993591, + "epoch": 0.3675318139334244, + "kl_loss": 0.09048350155353546, + "loss_ib": 0.0015831427881494164, + "step": 1278 + }, + { + "ce_ib": 3.8256616592407227, + "ce_orig": 0.6405181884765625, + "epoch": 0.36781939751240206, + "kl_loss": 0.09209297597408295, + "loss_ib": 0.0013034958392381668, + "step": 1279 + }, + { + "ce_ib": 6.023566722869873, + "ce_orig": 0.6345555186271667, + "epoch": 0.36781939751240206, + "kl_loss": 0.11298112571239471, + "loss_ib": 0.001732167904265225, + "step": 1279 + }, + { + "ce_ib": 2.3995261192321777, + "ce_orig": 0.2504364252090454, + "epoch": 0.36781939751240206, + "kl_loss": 0.26038891077041626, + "loss_ib": 0.0028438414447009563, + "step": 1279 + }, + { + "ce_ib": 5.019238471984863, + "ce_orig": 0.7604730725288391, + "epoch": 0.36781939751240206, + "kl_loss": 0.08353875577449799, + "loss_ib": 0.001337311347015202, + "step": 1279 + }, + { + "epoch": 0.3681069810913797, + "grad_norm": 0.08959182351827621, + "learning_rate": 4.889801791776921e-05, + "loss": 0.879, + "step": 1280 + }, + { + "ce_ib": 5.5621867179870605, + "ce_orig": 0.7939660549163818, + "epoch": 0.3681069810913797, + "kl_loss": 0.16857171058654785, + "loss_ib": 0.002241935580968857, + "step": 1280 + }, + { + "ce_ib": 7.465915203094482, + "ce_orig": 0.9574486613273621, + "epoch": 0.3681069810913797, + "kl_loss": 0.09049826860427856, + "loss_ib": 0.001651574159041047, + "step": 1280 + }, + { + "ce_ib": 5.6863579750061035, + "ce_orig": 0.8255658745765686, + "epoch": 0.3681069810913797, + "kl_loss": 0.09053034335374832, + "loss_ib": 0.0014739392790943384, + "step": 1280 + }, + { + "ce_ib": 3.539290428161621, + "ce_orig": 0.4773739278316498, + "epoch": 0.3681069810913797, + "kl_loss": 0.13628683984279633, + "loss_ib": 0.001716797356493771, + "step": 1280 + }, + { + "ce_ib": 8.499439239501953, + "ce_orig": 1.0162585973739624, + "epoch": 0.3683945646703573, + "kl_loss": 0.16252519190311432, + "loss_ib": 0.002475195797160268, + "step": 1281 + }, + { + "ce_ib": 5.038724899291992, + "ce_orig": 1.0485836267471313, + "epoch": 0.3683945646703573, + "kl_loss": 0.07839176803827286, + "loss_ib": 0.001287790248170495, + "step": 1281 + }, + { + "ce_ib": 4.911864280700684, + "ce_orig": 0.656204104423523, + "epoch": 0.3683945646703573, + "kl_loss": 0.08212752640247345, + "loss_ib": 0.0013124615652486682, + "step": 1281 + }, + { + "ce_ib": 3.5285279750823975, + "ce_orig": 0.5432829260826111, + "epoch": 0.3683945646703573, + "kl_loss": 0.07672290503978729, + "loss_ib": 0.0011200818698853254, + "step": 1281 + }, + { + "ce_ib": 8.837839126586914, + "ce_orig": 1.1560882329940796, + "epoch": 0.368682148249335, + "kl_loss": 0.15169808268547058, + "loss_ib": 0.0024007647298276424, + "step": 1282 + }, + { + "ce_ib": 6.747598171234131, + "ce_orig": 0.9385497570037842, + "epoch": 0.368682148249335, + "kl_loss": 0.15202166140079498, + "loss_ib": 0.002194976434111595, + "step": 1282 + }, + { + "ce_ib": 5.9851531982421875, + "ce_orig": 0.6511201858520508, + "epoch": 0.368682148249335, + "kl_loss": 0.16669416427612305, + "loss_ib": 0.0022654568310827017, + "step": 1282 + }, + { + "ce_ib": 7.009302139282227, + "ce_orig": 1.0727838277816772, + "epoch": 0.368682148249335, + "kl_loss": 0.14475645124912262, + "loss_ib": 0.002148494590073824, + "step": 1282 + }, + { + "ce_ib": 7.189665794372559, + "ce_orig": 0.7218466401100159, + "epoch": 0.3689697318283126, + "kl_loss": 0.1025504320859909, + "loss_ib": 0.001744470908306539, + "step": 1283 + }, + { + "ce_ib": 4.639631748199463, + "ce_orig": 0.551937997341156, + "epoch": 0.3689697318283126, + "kl_loss": 0.07025028765201569, + "loss_ib": 0.0011664660414680839, + "step": 1283 + }, + { + "ce_ib": 6.67887544631958, + "ce_orig": 1.0466539859771729, + "epoch": 0.3689697318283126, + "kl_loss": 0.07730047404766083, + "loss_ib": 0.0014408922288566828, + "step": 1283 + }, + { + "ce_ib": 6.689807891845703, + "ce_orig": 0.7414644956588745, + "epoch": 0.3689697318283126, + "kl_loss": 0.15120118856430054, + "loss_ib": 0.0021809926256537437, + "step": 1283 + }, + { + "ce_ib": 5.785625457763672, + "ce_orig": 0.6918303370475769, + "epoch": 0.36925731540729023, + "kl_loss": 0.10686799883842468, + "loss_ib": 0.0016472425777465105, + "step": 1284 + }, + { + "ce_ib": 6.988125801086426, + "ce_orig": 0.45072489976882935, + "epoch": 0.36925731540729023, + "kl_loss": 0.15907561779022217, + "loss_ib": 0.0022895687725394964, + "step": 1284 + }, + { + "ce_ib": 5.028069972991943, + "ce_orig": 0.7821161150932312, + "epoch": 0.36925731540729023, + "kl_loss": 0.11405050754547119, + "loss_ib": 0.0016433119308203459, + "step": 1284 + }, + { + "ce_ib": 7.529943466186523, + "ce_orig": 1.2662677764892578, + "epoch": 0.36925731540729023, + "kl_loss": 0.0978567972779274, + "loss_ib": 0.0017315623117610812, + "step": 1284 + }, + { + "epoch": 0.3695448989862679, + "grad_norm": 0.09508899599313736, + "learning_rate": 4.888659523820549e-05, + "loss": 0.8243, + "step": 1285 + }, + { + "ce_ib": 8.22745418548584, + "ce_orig": 1.2493830919265747, + "epoch": 0.3695448989862679, + "kl_loss": 0.13128282129764557, + "loss_ib": 0.0021355736535042524, + "step": 1285 + }, + { + "ce_ib": 7.42917537689209, + "ce_orig": 0.8488349318504333, + "epoch": 0.3695448989862679, + "kl_loss": 0.09238451719284058, + "loss_ib": 0.0016667626332491636, + "step": 1285 + }, + { + "ce_ib": 7.670847415924072, + "ce_orig": 0.818599283695221, + "epoch": 0.3695448989862679, + "kl_loss": 0.1114754006266594, + "loss_ib": 0.0018818386597558856, + "step": 1285 + }, + { + "ce_ib": 4.175411701202393, + "ce_orig": 0.6316226720809937, + "epoch": 0.3695448989862679, + "kl_loss": 0.07252389937639236, + "loss_ib": 0.0011427801800891757, + "step": 1285 + }, + { + "ce_ib": 6.936399459838867, + "ce_orig": 0.947404682636261, + "epoch": 0.36983248256524553, + "kl_loss": 0.15562579035758972, + "loss_ib": 0.002249897923320532, + "step": 1286 + }, + { + "ce_ib": 3.784386396408081, + "ce_orig": 0.6171741485595703, + "epoch": 0.36983248256524553, + "kl_loss": 0.06299932301044464, + "loss_ib": 0.0010084318928420544, + "step": 1286 + }, + { + "ce_ib": 9.482007026672363, + "ce_orig": 1.793513298034668, + "epoch": 0.36983248256524553, + "kl_loss": 0.1756105124950409, + "loss_ib": 0.002704305574297905, + "step": 1286 + }, + { + "ce_ib": 8.821845054626465, + "ce_orig": 1.5388833284378052, + "epoch": 0.36983248256524553, + "kl_loss": 0.12496484071016312, + "loss_ib": 0.0021318327635526657, + "step": 1286 + }, + { + "ce_ib": 3.470980405807495, + "ce_orig": 0.477110356092453, + "epoch": 0.37012006614422316, + "kl_loss": 0.11538689583539963, + "loss_ib": 0.0015009669587016106, + "step": 1287 + }, + { + "ce_ib": 7.616337776184082, + "ce_orig": 0.6729000210762024, + "epoch": 0.37012006614422316, + "kl_loss": 0.12497438490390778, + "loss_ib": 0.002011377364397049, + "step": 1287 + }, + { + "ce_ib": 6.450989723205566, + "ce_orig": 1.0107957124710083, + "epoch": 0.37012006614422316, + "kl_loss": 0.08995261788368225, + "loss_ib": 0.0015446251491084695, + "step": 1287 + }, + { + "ce_ib": 8.401474952697754, + "ce_orig": 1.377687692642212, + "epoch": 0.37012006614422316, + "kl_loss": 0.11534081399440765, + "loss_ib": 0.001993555575609207, + "step": 1287 + }, + { + "ce_ib": 4.6463518142700195, + "ce_orig": 0.6928462386131287, + "epoch": 0.3704076497232008, + "kl_loss": 0.07001467049121857, + "loss_ib": 0.0011647818610072136, + "step": 1288 + }, + { + "ce_ib": 5.731445789337158, + "ce_orig": 0.7224513292312622, + "epoch": 0.3704076497232008, + "kl_loss": 0.08494200557470322, + "loss_ib": 0.001422564615495503, + "step": 1288 + }, + { + "ce_ib": 6.065194606781006, + "ce_orig": 0.7397277355194092, + "epoch": 0.3704076497232008, + "kl_loss": 0.06865018606185913, + "loss_ib": 0.0012930212542414665, + "step": 1288 + }, + { + "ce_ib": 8.368396759033203, + "ce_orig": 1.4819865226745605, + "epoch": 0.3704076497232008, + "kl_loss": 0.09231487661600113, + "loss_ib": 0.0017599883722141385, + "step": 1288 + }, + { + "ce_ib": 5.247856616973877, + "ce_orig": 0.9851112961769104, + "epoch": 0.37069523330217846, + "kl_loss": 0.10900059342384338, + "loss_ib": 0.0016147915739566088, + "step": 1289 + }, + { + "ce_ib": 5.296111583709717, + "ce_orig": 0.8326603770256042, + "epoch": 0.37069523330217846, + "kl_loss": 0.09622007608413696, + "loss_ib": 0.0014918118249624968, + "step": 1289 + }, + { + "ce_ib": 7.173306465148926, + "ce_orig": 0.4392928183078766, + "epoch": 0.37069523330217846, + "kl_loss": 0.14689943194389343, + "loss_ib": 0.0021863249130547047, + "step": 1289 + }, + { + "ce_ib": 6.673140525817871, + "ce_orig": 0.8259045481681824, + "epoch": 0.37069523330217846, + "kl_loss": 0.15683668851852417, + "loss_ib": 0.0022356808185577393, + "step": 1289 + }, + { + "epoch": 0.3709828168811561, + "grad_norm": 0.10334688425064087, + "learning_rate": 4.887511501028965e-05, + "loss": 0.8809, + "step": 1290 + }, + { + "ce_ib": 6.785250186920166, + "ce_orig": 0.667524516582489, + "epoch": 0.3709828168811561, + "kl_loss": 0.10752973705530167, + "loss_ib": 0.0017538222018629313, + "step": 1290 + }, + { + "ce_ib": 5.8398003578186035, + "ce_orig": 0.6426234841346741, + "epoch": 0.3709828168811561, + "kl_loss": 0.10408969968557358, + "loss_ib": 0.0016248769825324416, + "step": 1290 + }, + { + "ce_ib": 6.559090614318848, + "ce_orig": 0.9128428101539612, + "epoch": 0.3709828168811561, + "kl_loss": 0.1208682581782341, + "loss_ib": 0.0018645914969965816, + "step": 1290 + }, + { + "ce_ib": 5.339390754699707, + "ce_orig": 0.8618748784065247, + "epoch": 0.3709828168811561, + "kl_loss": 0.1005004346370697, + "loss_ib": 0.0015389432664960623, + "step": 1290 + }, + { + "ce_ib": 6.498045921325684, + "ce_orig": 0.6593946814537048, + "epoch": 0.3712704004601337, + "kl_loss": 0.10259787738323212, + "loss_ib": 0.0016757833072915673, + "step": 1291 + }, + { + "ce_ib": 3.9394936561584473, + "ce_orig": 0.6524549126625061, + "epoch": 0.3712704004601337, + "kl_loss": 0.0753093808889389, + "loss_ib": 0.0011470431927591562, + "step": 1291 + }, + { + "ce_ib": 4.098540782928467, + "ce_orig": 0.4942637085914612, + "epoch": 0.3712704004601337, + "kl_loss": 0.07822375744581223, + "loss_ib": 0.0011920916149392724, + "step": 1291 + }, + { + "ce_ib": 5.597634792327881, + "ce_orig": 0.8362662196159363, + "epoch": 0.3712704004601337, + "kl_loss": 0.15009665489196777, + "loss_ib": 0.0020607300102710724, + "step": 1291 + }, + { + "ce_ib": 7.818037986755371, + "ce_orig": 0.9996674060821533, + "epoch": 0.3715579840391114, + "kl_loss": 0.10779958218336105, + "loss_ib": 0.0018597996095195413, + "step": 1292 + }, + { + "ce_ib": 3.643066167831421, + "ce_orig": 0.5499973297119141, + "epoch": 0.3715579840391114, + "kl_loss": 0.10857464373111725, + "loss_ib": 0.0014500529505312443, + "step": 1292 + }, + { + "ce_ib": 3.60650634765625, + "ce_orig": 0.46445924043655396, + "epoch": 0.3715579840391114, + "kl_loss": 0.10870900750160217, + "loss_ib": 0.0014477407094091177, + "step": 1292 + }, + { + "ce_ib": 3.7332096099853516, + "ce_orig": 0.6267498731613159, + "epoch": 0.3715579840391114, + "kl_loss": 0.11489161849021912, + "loss_ib": 0.001522237085737288, + "step": 1292 + }, + { + "ce_ib": 4.233822345733643, + "ce_orig": 0.5855693817138672, + "epoch": 0.371845567618089, + "kl_loss": 0.11481663584709167, + "loss_ib": 0.0015715485205873847, + "step": 1293 + }, + { + "ce_ib": 8.881464958190918, + "ce_orig": 1.2566783428192139, + "epoch": 0.371845567618089, + "kl_loss": 0.12536683678627014, + "loss_ib": 0.0021418146789073944, + "step": 1293 + }, + { + "ce_ib": 5.271763801574707, + "ce_orig": 0.42172160744667053, + "epoch": 0.371845567618089, + "kl_loss": 0.1555880904197693, + "loss_ib": 0.00208305730484426, + "step": 1293 + }, + { + "ce_ib": 5.407679080963135, + "ce_orig": 0.797361433506012, + "epoch": 0.371845567618089, + "kl_loss": 0.0833439826965332, + "loss_ib": 0.0013742076698690653, + "step": 1293 + }, + { + "ce_ib": 6.48065710067749, + "ce_orig": 0.9504364132881165, + "epoch": 0.37213315119706664, + "kl_loss": 0.07627981901168823, + "loss_ib": 0.0014108639443293214, + "step": 1294 + }, + { + "ce_ib": 5.477166652679443, + "ce_orig": 0.8126649856567383, + "epoch": 0.37213315119706664, + "kl_loss": 0.10854905098676682, + "loss_ib": 0.0016332071973010898, + "step": 1294 + }, + { + "ce_ib": 2.3084990978240967, + "ce_orig": 0.2607325613498688, + "epoch": 0.37213315119706664, + "kl_loss": 0.16034573316574097, + "loss_ib": 0.0018343072151765227, + "step": 1294 + }, + { + "ce_ib": 5.476607799530029, + "ce_orig": 0.621596097946167, + "epoch": 0.37213315119706664, + "kl_loss": 0.11543017625808716, + "loss_ib": 0.001701962435618043, + "step": 1294 + }, + { + "epoch": 0.3724207347760443, + "grad_norm": 0.091304711997509, + "learning_rate": 4.8863577261680226e-05, + "loss": 0.8258, + "step": 1295 + }, + { + "ce_ib": 4.283220291137695, + "ce_orig": 0.6745275259017944, + "epoch": 0.3724207347760443, + "kl_loss": 0.0820574015378952, + "loss_ib": 0.001248896005563438, + "step": 1295 + }, + { + "ce_ib": 3.2045159339904785, + "ce_orig": 0.529264509677887, + "epoch": 0.3724207347760443, + "kl_loss": 0.08010916411876678, + "loss_ib": 0.001121543231420219, + "step": 1295 + }, + { + "ce_ib": 6.051677703857422, + "ce_orig": 0.7465357780456543, + "epoch": 0.3724207347760443, + "kl_loss": 0.21004080772399902, + "loss_ib": 0.002705575665459037, + "step": 1295 + }, + { + "ce_ib": 6.0474324226379395, + "ce_orig": 1.0841017961502075, + "epoch": 0.3724207347760443, + "kl_loss": 0.12120488286018372, + "loss_ib": 0.0018167919479310513, + "step": 1295 + }, + { + "ce_ib": 6.126406669616699, + "ce_orig": 1.0412760972976685, + "epoch": 0.37270831835502194, + "kl_loss": 0.10262041538953781, + "loss_ib": 0.001638844725675881, + "step": 1296 + }, + { + "ce_ib": 5.098903179168701, + "ce_orig": 0.6683371067047119, + "epoch": 0.37270831835502194, + "kl_loss": 0.11465729773044586, + "loss_ib": 0.0016564632533118129, + "step": 1296 + }, + { + "ce_ib": 4.434388160705566, + "ce_orig": 0.4733821749687195, + "epoch": 0.37270831835502194, + "kl_loss": 0.13180866837501526, + "loss_ib": 0.0017615255201235414, + "step": 1296 + }, + { + "ce_ib": 4.306613445281982, + "ce_orig": 0.6650025844573975, + "epoch": 0.37270831835502194, + "kl_loss": 0.08112086355686188, + "loss_ib": 0.0012418698752298951, + "step": 1296 + }, + { + "ce_ib": 7.580479621887207, + "ce_orig": 1.093042254447937, + "epoch": 0.37299590193399956, + "kl_loss": 0.09305058419704437, + "loss_ib": 0.0016885536024346948, + "step": 1297 + }, + { + "ce_ib": 4.302249908447266, + "ce_orig": 0.537747859954834, + "epoch": 0.37299590193399956, + "kl_loss": 0.2276405543088913, + "loss_ib": 0.002706630388274789, + "step": 1297 + }, + { + "ce_ib": 4.3821821212768555, + "ce_orig": 0.8077664971351624, + "epoch": 0.37299590193399956, + "kl_loss": 0.1235668808221817, + "loss_ib": 0.0016738870181143284, + "step": 1297 + }, + { + "ce_ib": 6.15134859085083, + "ce_orig": 0.8116844892501831, + "epoch": 0.37299590193399956, + "kl_loss": 0.13009323179721832, + "loss_ib": 0.001916067092679441, + "step": 1297 + }, + { + "ce_ib": 6.408975124359131, + "ce_orig": 1.0396615266799927, + "epoch": 0.3732834855129772, + "kl_loss": 0.1137128621339798, + "loss_ib": 0.001778026227839291, + "step": 1298 + }, + { + "ce_ib": 5.81378698348999, + "ce_orig": 0.9238652586936951, + "epoch": 0.3732834855129772, + "kl_loss": 0.11552828550338745, + "loss_ib": 0.0017366614192724228, + "step": 1298 + }, + { + "ce_ib": 4.731773376464844, + "ce_orig": 0.8088876008987427, + "epoch": 0.3732834855129772, + "kl_loss": 0.1280672252178192, + "loss_ib": 0.0017538494430482388, + "step": 1298 + }, + { + "ce_ib": 7.179605484008789, + "ce_orig": 0.9171648621559143, + "epoch": 0.3732834855129772, + "kl_loss": 0.08370693027973175, + "loss_ib": 0.0015550297684967518, + "step": 1298 + }, + { + "ce_ib": 9.204768180847168, + "ce_orig": 1.7088639736175537, + "epoch": 0.37357106909195487, + "kl_loss": 0.14535124599933624, + "loss_ib": 0.0023739892058074474, + "step": 1299 + }, + { + "ce_ib": 7.621387958526611, + "ce_orig": 0.7997857332229614, + "epoch": 0.37357106909195487, + "kl_loss": 0.5037106871604919, + "loss_ib": 0.0057992455549538136, + "step": 1299 + }, + { + "ce_ib": 4.995852947235107, + "ce_orig": 0.3401322066783905, + "epoch": 0.37357106909195487, + "kl_loss": 0.09861315041780472, + "loss_ib": 0.001485716667957604, + "step": 1299 + }, + { + "ce_ib": 7.1057305335998535, + "ce_orig": 0.9840693473815918, + "epoch": 0.37357106909195487, + "kl_loss": 0.10749038308858871, + "loss_ib": 0.001785476808436215, + "step": 1299 + }, + { + "epoch": 0.3738586526709325, + "grad_norm": 0.11028212308883667, + "learning_rate": 4.8851982020174316e-05, + "loss": 0.881, + "step": 1300 + }, + { + "ce_ib": 8.377426147460938, + "ce_orig": 1.3471934795379639, + "epoch": 0.3738586526709325, + "kl_loss": 0.09995093941688538, + "loss_ib": 0.0018372520571574569, + "step": 1300 + }, + { + "ce_ib": 4.688452243804932, + "ce_orig": 0.5826649069786072, + "epoch": 0.3738586526709325, + "kl_loss": 0.12547636032104492, + "loss_ib": 0.001723608816973865, + "step": 1300 + }, + { + "ce_ib": 3.959388017654419, + "ce_orig": 0.4809862971305847, + "epoch": 0.3738586526709325, + "kl_loss": 0.06880328059196472, + "loss_ib": 0.0010839715832844377, + "step": 1300 + }, + { + "ce_ib": 7.195967197418213, + "ce_orig": 0.8778854012489319, + "epoch": 0.3738586526709325, + "kl_loss": 0.1450991928577423, + "loss_ib": 0.0021705885883420706, + "step": 1300 + }, + { + "ce_ib": 5.772943019866943, + "ce_orig": 0.7366096377372742, + "epoch": 0.3741462362499101, + "kl_loss": 0.17550821602344513, + "loss_ib": 0.0023323765490204096, + "step": 1301 + }, + { + "ce_ib": 5.280375003814697, + "ce_orig": 0.5117489695549011, + "epoch": 0.3741462362499101, + "kl_loss": 0.12708105146884918, + "loss_ib": 0.0017988479230552912, + "step": 1301 + }, + { + "ce_ib": 6.551858901977539, + "ce_orig": 1.004411220550537, + "epoch": 0.3741462362499101, + "kl_loss": 0.16514693200588226, + "loss_ib": 0.0023066550493240356, + "step": 1301 + }, + { + "ce_ib": 7.748233318328857, + "ce_orig": 1.162596344947815, + "epoch": 0.3741462362499101, + "kl_loss": 0.11069048941135406, + "loss_ib": 0.0018817281816154718, + "step": 1301 + }, + { + "ce_ib": 2.3434817790985107, + "ce_orig": 0.2155807465314865, + "epoch": 0.3744338198288878, + "kl_loss": 0.24002450704574585, + "loss_ib": 0.0026345932856202126, + "step": 1302 + }, + { + "ce_ib": 4.517918586730957, + "ce_orig": 0.6924988627433777, + "epoch": 0.3744338198288878, + "kl_loss": 0.06375230848789215, + "loss_ib": 0.0010893149301409721, + "step": 1302 + }, + { + "ce_ib": 5.591329097747803, + "ce_orig": 0.7973899841308594, + "epoch": 0.3744338198288878, + "kl_loss": 0.11718559265136719, + "loss_ib": 0.00173098873347044, + "step": 1302 + }, + { + "ce_ib": 6.910216331481934, + "ce_orig": 0.9021272659301758, + "epoch": 0.3744338198288878, + "kl_loss": 0.1621193140745163, + "loss_ib": 0.0023122145794332027, + "step": 1302 + }, + { + "ce_ib": 4.825242042541504, + "ce_orig": 0.5692858099937439, + "epoch": 0.3747214034078654, + "kl_loss": 0.12797698378562927, + "loss_ib": 0.001762293977662921, + "step": 1303 + }, + { + "ce_ib": 3.828768730163574, + "ce_orig": 0.7801523208618164, + "epoch": 0.3747214034078654, + "kl_loss": 0.11873648315668106, + "loss_ib": 0.0015702417585998774, + "step": 1303 + }, + { + "ce_ib": 3.9736738204956055, + "ce_orig": 0.7081290483474731, + "epoch": 0.3747214034078654, + "kl_loss": 0.06060061603784561, + "loss_ib": 0.0010033735306933522, + "step": 1303 + }, + { + "ce_ib": 9.061676979064941, + "ce_orig": 1.2129696607589722, + "epoch": 0.3747214034078654, + "kl_loss": 0.0726584941148758, + "loss_ib": 0.0016327527118846774, + "step": 1303 + }, + { + "ce_ib": 4.078707218170166, + "ce_orig": 0.42766043543815613, + "epoch": 0.37500898698684304, + "kl_loss": 0.0907704085111618, + "loss_ib": 0.001315574743784964, + "step": 1304 + }, + { + "ce_ib": 8.35204792022705, + "ce_orig": 1.4698388576507568, + "epoch": 0.37500898698684304, + "kl_loss": 0.13164952397346497, + "loss_ib": 0.0021516999695450068, + "step": 1304 + }, + { + "ce_ib": 7.965769290924072, + "ce_orig": 1.2763580083847046, + "epoch": 0.37500898698684304, + "kl_loss": 0.09261095523834229, + "loss_ib": 0.0017226864583790302, + "step": 1304 + }, + { + "ce_ib": 6.983380317687988, + "ce_orig": 1.0021127462387085, + "epoch": 0.37500898698684304, + "kl_loss": 0.13547343015670776, + "loss_ib": 0.0020530722104012966, + "step": 1304 + }, + { + "epoch": 0.37529657056582066, + "grad_norm": 0.08439778536558151, + "learning_rate": 4.8840329313707556e-05, + "loss": 0.859, + "step": 1305 + }, + { + "ce_ib": 7.7243733406066895, + "ce_orig": 1.0866235494613647, + "epoch": 0.37529657056582066, + "kl_loss": 0.11019238829612732, + "loss_ib": 0.0018743611872196198, + "step": 1305 + }, + { + "ce_ib": 5.882345676422119, + "ce_orig": 0.7346667647361755, + "epoch": 0.37529657056582066, + "kl_loss": 0.09288327395915985, + "loss_ib": 0.0015170671977102757, + "step": 1305 + }, + { + "ce_ib": 3.667390823364258, + "ce_orig": 0.9194813966751099, + "epoch": 0.37529657056582066, + "kl_loss": 0.05089962109923363, + "loss_ib": 0.0008757352479733527, + "step": 1305 + }, + { + "ce_ib": 7.411571502685547, + "ce_orig": 0.9807973504066467, + "epoch": 0.37529657056582066, + "kl_loss": 0.08906038105487823, + "loss_ib": 0.001631760853342712, + "step": 1305 + }, + { + "ce_ib": 8.256385803222656, + "ce_orig": 1.1893174648284912, + "epoch": 0.37558415414479834, + "kl_loss": 0.1135597825050354, + "loss_ib": 0.0019612363539636135, + "step": 1306 + }, + { + "ce_ib": 6.674210071563721, + "ce_orig": 0.8289363980293274, + "epoch": 0.37558415414479834, + "kl_loss": 0.137071430683136, + "loss_ib": 0.0020381351932883263, + "step": 1306 + }, + { + "ce_ib": 7.589658260345459, + "ce_orig": 1.0683447122573853, + "epoch": 0.37558415414479834, + "kl_loss": 0.1060531884431839, + "loss_ib": 0.001819497556425631, + "step": 1306 + }, + { + "ce_ib": 8.912439346313477, + "ce_orig": 1.4486478567123413, + "epoch": 0.37558415414479834, + "kl_loss": 0.15647874772548676, + "loss_ib": 0.002456031274050474, + "step": 1306 + }, + { + "ce_ib": 9.321609497070312, + "ce_orig": 1.840979814529419, + "epoch": 0.37587173772377597, + "kl_loss": 0.0922217071056366, + "loss_ib": 0.0018543779151514173, + "step": 1307 + }, + { + "ce_ib": 4.2626566886901855, + "ce_orig": 0.5894846320152283, + "epoch": 0.37587173772377597, + "kl_loss": 0.14015614986419678, + "loss_ib": 0.0018278270727023482, + "step": 1307 + }, + { + "ce_ib": 10.738929748535156, + "ce_orig": 1.6773637533187866, + "epoch": 0.37587173772377597, + "kl_loss": 0.0642920583486557, + "loss_ib": 0.0017168134218081832, + "step": 1307 + }, + { + "ce_ib": 3.526549816131592, + "ce_orig": 0.6496335864067078, + "epoch": 0.37587173772377597, + "kl_loss": 0.08111678808927536, + "loss_ib": 0.0011638228315860033, + "step": 1307 + }, + { + "ce_ib": 6.587804317474365, + "ce_orig": 0.6036332845687866, + "epoch": 0.3761593213027536, + "kl_loss": 0.10333004593849182, + "loss_ib": 0.0016920807538554072, + "step": 1308 + }, + { + "ce_ib": 6.432805061340332, + "ce_orig": 0.7364359498023987, + "epoch": 0.3761593213027536, + "kl_loss": 0.07528705894947052, + "loss_ib": 0.0013961511431261897, + "step": 1308 + }, + { + "ce_ib": 4.427136421203613, + "ce_orig": 0.6185834407806396, + "epoch": 0.3761593213027536, + "kl_loss": 0.09119290858507156, + "loss_ib": 0.0013546427944675088, + "step": 1308 + }, + { + "ce_ib": 7.926487922668457, + "ce_orig": 1.2750617265701294, + "epoch": 0.3761593213027536, + "kl_loss": 0.10837486386299133, + "loss_ib": 0.0018763974076136947, + "step": 1308 + }, + { + "ce_ib": 7.6131205558776855, + "ce_orig": 1.4484436511993408, + "epoch": 0.37644690488173127, + "kl_loss": 0.12636443972587585, + "loss_ib": 0.0020249562803655863, + "step": 1309 + }, + { + "ce_ib": 5.368390083312988, + "ce_orig": 0.9395446181297302, + "epoch": 0.37644690488173127, + "kl_loss": 0.09168052673339844, + "loss_ib": 0.0014536442467942834, + "step": 1309 + }, + { + "ce_ib": 5.244813442230225, + "ce_orig": 1.0128648281097412, + "epoch": 0.37644690488173127, + "kl_loss": 0.07855847477912903, + "loss_ib": 0.0013100660871714354, + "step": 1309 + }, + { + "ce_ib": 5.962604999542236, + "ce_orig": 0.9374620914459229, + "epoch": 0.37644690488173127, + "kl_loss": 0.11523380875587463, + "loss_ib": 0.0017485985299572349, + "step": 1309 + }, + { + "epoch": 0.3767344884607089, + "grad_norm": 0.10580755770206451, + "learning_rate": 4.882861917035402e-05, + "loss": 0.8392, + "step": 1310 + }, + { + "ce_ib": 9.31829833984375, + "ce_orig": 1.440765142440796, + "epoch": 0.3767344884607089, + "kl_loss": 0.19183677434921265, + "loss_ib": 0.0028501974884420633, + "step": 1310 + }, + { + "ce_ib": 10.160751342773438, + "ce_orig": 1.4808740615844727, + "epoch": 0.3767344884607089, + "kl_loss": 0.1334453672170639, + "loss_ib": 0.0023505287244915962, + "step": 1310 + }, + { + "ce_ib": 5.5851545333862305, + "ce_orig": 1.0320905447006226, + "epoch": 0.3767344884607089, + "kl_loss": 0.1124086007475853, + "loss_ib": 0.0016826014034450054, + "step": 1310 + }, + { + "ce_ib": 6.999680519104004, + "ce_orig": 0.9789513349533081, + "epoch": 0.3767344884607089, + "kl_loss": 0.17336848378181458, + "loss_ib": 0.0024336527567356825, + "step": 1310 + }, + { + "ce_ib": 5.470460891723633, + "ce_orig": 0.8240450024604797, + "epoch": 0.3770220720396865, + "kl_loss": 0.05858692526817322, + "loss_ib": 0.001132915262132883, + "step": 1311 + }, + { + "ce_ib": 6.502416133880615, + "ce_orig": 0.784637451171875, + "epoch": 0.3770220720396865, + "kl_loss": 0.09532777965068817, + "loss_ib": 0.0016035193111747503, + "step": 1311 + }, + { + "ce_ib": 4.977993488311768, + "ce_orig": 0.3519165515899658, + "epoch": 0.3770220720396865, + "kl_loss": 0.13049781322479248, + "loss_ib": 0.0018027774058282375, + "step": 1311 + }, + { + "ce_ib": 5.470664978027344, + "ce_orig": 0.5675799250602722, + "epoch": 0.3770220720396865, + "kl_loss": 0.09750883281230927, + "loss_ib": 0.0015221547801047564, + "step": 1311 + }, + { + "ce_ib": 5.6255574226379395, + "ce_orig": 0.4978528320789337, + "epoch": 0.3773096556186642, + "kl_loss": 0.13040006160736084, + "loss_ib": 0.0018665563547983766, + "step": 1312 + }, + { + "ce_ib": 6.08099365234375, + "ce_orig": 0.6202380657196045, + "epoch": 0.3773096556186642, + "kl_loss": 0.12318846583366394, + "loss_ib": 0.0018399839755147696, + "step": 1312 + }, + { + "ce_ib": 5.541018486022949, + "ce_orig": 0.7439128160476685, + "epoch": 0.3773096556186642, + "kl_loss": 0.07084212452173233, + "loss_ib": 0.0012625230010598898, + "step": 1312 + }, + { + "ce_ib": 4.4165730476379395, + "ce_orig": 0.5543098449707031, + "epoch": 0.3773096556186642, + "kl_loss": 0.07426542043685913, + "loss_ib": 0.0011843114625662565, + "step": 1312 + }, + { + "ce_ib": 3.938666820526123, + "ce_orig": 0.7918110489845276, + "epoch": 0.3775972391976418, + "kl_loss": 0.0728941410779953, + "loss_ib": 0.0011228080838918686, + "step": 1313 + }, + { + "ce_ib": 8.127522468566895, + "ce_orig": 1.214548110961914, + "epoch": 0.3775972391976418, + "kl_loss": 0.12012702226638794, + "loss_ib": 0.002014022320508957, + "step": 1313 + }, + { + "ce_ib": 6.598063945770264, + "ce_orig": 1.1606128215789795, + "epoch": 0.3775972391976418, + "kl_loss": 0.11751188337802887, + "loss_ib": 0.0018349250312894583, + "step": 1313 + }, + { + "ce_ib": 6.119964122772217, + "ce_orig": 0.8887215256690979, + "epoch": 0.3775972391976418, + "kl_loss": 0.11058682203292847, + "loss_ib": 0.0017178645357489586, + "step": 1313 + }, + { + "ce_ib": 5.292278289794922, + "ce_orig": 0.8706098794937134, + "epoch": 0.37788482277661944, + "kl_loss": 0.11289675533771515, + "loss_ib": 0.0016581953968852758, + "step": 1314 + }, + { + "ce_ib": 4.4833784103393555, + "ce_orig": 0.7065367698669434, + "epoch": 0.37788482277661944, + "kl_loss": 0.08428777754306793, + "loss_ib": 0.001291215536184609, + "step": 1314 + }, + { + "ce_ib": 5.184488773345947, + "ce_orig": 0.5441928505897522, + "epoch": 0.37788482277661944, + "kl_loss": 0.11040713638067245, + "loss_ib": 0.0016225201543420553, + "step": 1314 + }, + { + "ce_ib": 7.428981304168701, + "ce_orig": 1.2260949611663818, + "epoch": 0.37788482277661944, + "kl_loss": 0.11685201525688171, + "loss_ib": 0.001911418279632926, + "step": 1314 + }, + { + "epoch": 0.37817240635559707, + "grad_norm": 0.093324214220047, + "learning_rate": 4.881685161832617e-05, + "loss": 0.8512, + "step": 1315 + }, + { + "ce_ib": 3.4923665523529053, + "ce_orig": 0.4155826270580292, + "epoch": 0.37817240635559707, + "kl_loss": 0.11499406397342682, + "loss_ib": 0.0014991771895438433, + "step": 1315 + }, + { + "ce_ib": 8.04870891571045, + "ce_orig": 1.0350520610809326, + "epoch": 0.37817240635559707, + "kl_loss": 0.24932055175304413, + "loss_ib": 0.0032980763353407383, + "step": 1315 + }, + { + "ce_ib": 6.75008487701416, + "ce_orig": 1.0719481706619263, + "epoch": 0.37817240635559707, + "kl_loss": 0.12588632106781006, + "loss_ib": 0.0019338716519996524, + "step": 1315 + }, + { + "ce_ib": 5.137867450714111, + "ce_orig": 0.939078152179718, + "epoch": 0.37817240635559707, + "kl_loss": 0.08320405334234238, + "loss_ib": 0.0013458272442221642, + "step": 1315 + }, + { + "ce_ib": 5.601523399353027, + "ce_orig": 0.730479896068573, + "epoch": 0.37845998993457475, + "kl_loss": 0.06329482793807983, + "loss_ib": 0.0011931005865335464, + "step": 1316 + }, + { + "ce_ib": 3.6545348167419434, + "ce_orig": 0.6439960598945618, + "epoch": 0.37845998993457475, + "kl_loss": 0.08682240545749664, + "loss_ib": 0.00123367749620229, + "step": 1316 + }, + { + "ce_ib": 6.024953842163086, + "ce_orig": 0.6438689231872559, + "epoch": 0.37845998993457475, + "kl_loss": 0.16080550849437714, + "loss_ib": 0.0022105504758656025, + "step": 1316 + }, + { + "ce_ib": 4.515985012054443, + "ce_orig": 0.544121265411377, + "epoch": 0.37845998993457475, + "kl_loss": 0.09402793645858765, + "loss_ib": 0.0013918777694925666, + "step": 1316 + }, + { + "ce_ib": 5.014019012451172, + "ce_orig": 0.4737093150615692, + "epoch": 0.37874757351355237, + "kl_loss": 0.1738872081041336, + "loss_ib": 0.002240273868665099, + "step": 1317 + }, + { + "ce_ib": 5.783313751220703, + "ce_orig": 0.6481950879096985, + "epoch": 0.37874757351355237, + "kl_loss": 0.14250211417675018, + "loss_ib": 0.002003352390602231, + "step": 1317 + }, + { + "ce_ib": 6.43743371963501, + "ce_orig": 0.850814163684845, + "epoch": 0.37874757351355237, + "kl_loss": 0.11499704420566559, + "loss_ib": 0.0017937137745320797, + "step": 1317 + }, + { + "ce_ib": 4.943159580230713, + "ce_orig": 0.6938974857330322, + "epoch": 0.37874757351355237, + "kl_loss": 0.1488955020904541, + "loss_ib": 0.0019832709804177284, + "step": 1317 + }, + { + "ce_ib": 4.182210922241211, + "ce_orig": 0.5845946669578552, + "epoch": 0.37903515709253, + "kl_loss": 0.0654899999499321, + "loss_ib": 0.0010731210932135582, + "step": 1318 + }, + { + "ce_ib": 9.34213924407959, + "ce_orig": 1.4825302362442017, + "epoch": 0.37903515709253, + "kl_loss": 0.18803195655345917, + "loss_ib": 0.002814533421769738, + "step": 1318 + }, + { + "ce_ib": 6.903960227966309, + "ce_orig": 1.253632664680481, + "epoch": 0.37903515709253, + "kl_loss": 0.1699225902557373, + "loss_ib": 0.0023896219208836555, + "step": 1318 + }, + { + "ce_ib": 6.289240837097168, + "ce_orig": 1.0074694156646729, + "epoch": 0.37903515709253, + "kl_loss": 0.0989120751619339, + "loss_ib": 0.0016180448001250625, + "step": 1318 + }, + { + "ce_ib": 5.321438789367676, + "ce_orig": 0.6448070406913757, + "epoch": 0.3793227406715077, + "kl_loss": 0.10022042691707611, + "loss_ib": 0.0015343481209129095, + "step": 1319 + }, + { + "ce_ib": 4.7291975021362305, + "ce_orig": 0.8807885050773621, + "epoch": 0.3793227406715077, + "kl_loss": 0.08091727644205093, + "loss_ib": 0.0012820924166589975, + "step": 1319 + }, + { + "ce_ib": 3.3131628036499023, + "ce_orig": 0.6549236178398132, + "epoch": 0.3793227406715077, + "kl_loss": 0.07724500447511673, + "loss_ib": 0.0011037662625312805, + "step": 1319 + }, + { + "ce_ib": 7.72067403793335, + "ce_orig": 1.068918228149414, + "epoch": 0.3793227406715077, + "kl_loss": 0.08838605880737305, + "loss_ib": 0.0016559278592467308, + "step": 1319 + }, + { + "epoch": 0.3796103242504853, + "grad_norm": 0.10508622229099274, + "learning_rate": 4.880502668597475e-05, + "loss": 0.867, + "step": 1320 + }, + { + "ce_ib": 3.1229352951049805, + "ce_orig": 0.6481002569198608, + "epoch": 0.3796103242504853, + "kl_loss": 0.07972665131092072, + "loss_ib": 0.0011095600202679634, + "step": 1320 + }, + { + "ce_ib": 5.122760772705078, + "ce_orig": 0.7584824562072754, + "epoch": 0.3796103242504853, + "kl_loss": 0.1352473497390747, + "loss_ib": 0.0018647494725883007, + "step": 1320 + }, + { + "ce_ib": 4.91263484954834, + "ce_orig": 0.8516531586647034, + "epoch": 0.3796103242504853, + "kl_loss": 0.08866020292043686, + "loss_ib": 0.0013778654392808676, + "step": 1320 + }, + { + "ce_ib": 4.272454738616943, + "ce_orig": 0.5512766242027283, + "epoch": 0.3796103242504853, + "kl_loss": 0.0826980322599411, + "loss_ib": 0.0012542258482426405, + "step": 1320 + }, + { + "ce_ib": 5.513332366943359, + "ce_orig": 0.5332241654396057, + "epoch": 0.3798979078294629, + "kl_loss": 0.10527944564819336, + "loss_ib": 0.0016041276976466179, + "step": 1321 + }, + { + "ce_ib": 7.259631633758545, + "ce_orig": 1.1701531410217285, + "epoch": 0.3798979078294629, + "kl_loss": 0.1175379604101181, + "loss_ib": 0.0019013426499441266, + "step": 1321 + }, + { + "ce_ib": 5.761963844299316, + "ce_orig": 1.0812888145446777, + "epoch": 0.3798979078294629, + "kl_loss": 0.06597635895013809, + "loss_ib": 0.001235959935002029, + "step": 1321 + }, + { + "ce_ib": 4.677454948425293, + "ce_orig": 0.7489356994628906, + "epoch": 0.3798979078294629, + "kl_loss": 0.12764066457748413, + "loss_ib": 0.001744152163155377, + "step": 1321 + }, + { + "ce_ib": 5.295727252960205, + "ce_orig": 0.7943225502967834, + "epoch": 0.3801854914084406, + "kl_loss": 0.121961809694767, + "loss_ib": 0.0017491908511146903, + "step": 1322 + }, + { + "ce_ib": 4.072883129119873, + "ce_orig": 0.8747122883796692, + "epoch": 0.3801854914084406, + "kl_loss": 0.1996590495109558, + "loss_ib": 0.002403878839686513, + "step": 1322 + }, + { + "ce_ib": 4.075475215911865, + "ce_orig": 0.5803650617599487, + "epoch": 0.3801854914084406, + "kl_loss": 0.05399131029844284, + "loss_ib": 0.0009474605903960764, + "step": 1322 + }, + { + "ce_ib": 6.995948791503906, + "ce_orig": 1.0964949131011963, + "epoch": 0.3801854914084406, + "kl_loss": 0.11446275562047958, + "loss_ib": 0.0018442223081365228, + "step": 1322 + }, + { + "ce_ib": 2.323742389678955, + "ce_orig": 0.1683950126171112, + "epoch": 0.3804730749874182, + "kl_loss": 0.312514990568161, + "loss_ib": 0.00335752428509295, + "step": 1323 + }, + { + "ce_ib": 7.685324192047119, + "ce_orig": 1.3328412771224976, + "epoch": 0.3804730749874182, + "kl_loss": 0.14166969060897827, + "loss_ib": 0.0021852292120456696, + "step": 1323 + }, + { + "ce_ib": 7.503167629241943, + "ce_orig": 0.979636549949646, + "epoch": 0.3804730749874182, + "kl_loss": 0.10989056527614594, + "loss_ib": 0.0018492224626243114, + "step": 1323 + }, + { + "ce_ib": 4.438648223876953, + "ce_orig": 0.644780158996582, + "epoch": 0.3804730749874182, + "kl_loss": 0.11196550726890564, + "loss_ib": 0.0015635198215022683, + "step": 1323 + }, + { + "ce_ib": 7.35436487197876, + "ce_orig": 1.2329034805297852, + "epoch": 0.38076065856639585, + "kl_loss": 0.08079203963279724, + "loss_ib": 0.0015433566877618432, + "step": 1324 + }, + { + "ce_ib": 5.28987979888916, + "ce_orig": 0.5954443216323853, + "epoch": 0.38076065856639585, + "kl_loss": 0.10509554296731949, + "loss_ib": 0.0015799434622749686, + "step": 1324 + }, + { + "ce_ib": 3.9158504009246826, + "ce_orig": 0.6997315287590027, + "epoch": 0.38076065856639585, + "kl_loss": 0.08156973123550415, + "loss_ib": 0.0012072824174538255, + "step": 1324 + }, + { + "ce_ib": 5.5528340339660645, + "ce_orig": 0.7494857907295227, + "epoch": 0.38076065856639585, + "kl_loss": 0.09654660522937775, + "loss_ib": 0.0015207494143396616, + "step": 1324 + }, + { + "epoch": 0.3810482421453735, + "grad_norm": 0.09944023191928864, + "learning_rate": 4.879314440178879e-05, + "loss": 0.866, + "step": 1325 + }, + { + "ce_ib": 6.527103424072266, + "ce_orig": 1.413140892982483, + "epoch": 0.3810482421453735, + "kl_loss": 0.10402053594589233, + "loss_ib": 0.00169291568454355, + "step": 1325 + }, + { + "ce_ib": 7.178508281707764, + "ce_orig": 0.9281395077705383, + "epoch": 0.3810482421453735, + "kl_loss": 0.12966975569725037, + "loss_ib": 0.0020145485177636147, + "step": 1325 + }, + { + "ce_ib": 5.939761161804199, + "ce_orig": 0.7400992512702942, + "epoch": 0.3810482421453735, + "kl_loss": 0.11570632457733154, + "loss_ib": 0.0017510392935946584, + "step": 1325 + }, + { + "ce_ib": 6.276197910308838, + "ce_orig": 0.8784988522529602, + "epoch": 0.3810482421453735, + "kl_loss": 0.11074468493461609, + "loss_ib": 0.0017350665293633938, + "step": 1325 + }, + { + "ce_ib": 7.449951648712158, + "ce_orig": 0.5432068705558777, + "epoch": 0.38133582572435115, + "kl_loss": 0.14821617305278778, + "loss_ib": 0.002227156888693571, + "step": 1326 + }, + { + "ce_ib": 6.362620830535889, + "ce_orig": 0.872480034828186, + "epoch": 0.38133582572435115, + "kl_loss": 0.11232534050941467, + "loss_ib": 0.0017595153767615557, + "step": 1326 + }, + { + "ce_ib": 4.248157978057861, + "ce_orig": 0.6296955943107605, + "epoch": 0.38133582572435115, + "kl_loss": 0.10103049874305725, + "loss_ib": 0.0014351207064464688, + "step": 1326 + }, + { + "ce_ib": 5.756939888000488, + "ce_orig": 0.7265815138816833, + "epoch": 0.38133582572435115, + "kl_loss": 0.1828758269548416, + "loss_ib": 0.0024044523015618324, + "step": 1326 + }, + { + "ce_ib": 7.763768196105957, + "ce_orig": 1.4668439626693726, + "epoch": 0.3816234093033288, + "kl_loss": 0.12431351840496063, + "loss_ib": 0.002019512001425028, + "step": 1327 + }, + { + "ce_ib": 4.5980119705200195, + "ce_orig": 0.7858704328536987, + "epoch": 0.3816234093033288, + "kl_loss": 0.09927660971879959, + "loss_ib": 0.0014525672886520624, + "step": 1327 + }, + { + "ce_ib": 4.350864887237549, + "ce_orig": 0.5971255898475647, + "epoch": 0.3816234093033288, + "kl_loss": 0.05036499351263046, + "loss_ib": 0.000938736426178366, + "step": 1327 + }, + { + "ce_ib": 12.01596736907959, + "ce_orig": 2.1367225646972656, + "epoch": 0.3816234093033288, + "kl_loss": 0.1739673614501953, + "loss_ib": 0.0029412703588604927, + "step": 1327 + }, + { + "ce_ib": 4.798478603363037, + "ce_orig": 0.35993748903274536, + "epoch": 0.3819109928823064, + "kl_loss": 0.14714062213897705, + "loss_ib": 0.0019512539729475975, + "step": 1328 + }, + { + "ce_ib": 4.909856796264648, + "ce_orig": 0.7560886740684509, + "epoch": 0.3819109928823064, + "kl_loss": 0.09511459618806839, + "loss_ib": 0.0014421317027881742, + "step": 1328 + }, + { + "ce_ib": 5.837655544281006, + "ce_orig": 0.7816391587257385, + "epoch": 0.3819109928823064, + "kl_loss": 0.16186535358428955, + "loss_ib": 0.0022024190984666348, + "step": 1328 + }, + { + "ce_ib": 5.54555606842041, + "ce_orig": 0.6967433094978333, + "epoch": 0.3819109928823064, + "kl_loss": 0.1746070683002472, + "loss_ib": 0.002300626365467906, + "step": 1328 + }, + { + "ce_ib": 5.7812042236328125, + "ce_orig": 0.5719181299209595, + "epoch": 0.3821985764612841, + "kl_loss": 0.11337076872587204, + "loss_ib": 0.0017118280520662665, + "step": 1329 + }, + { + "ce_ib": 4.787136077880859, + "ce_orig": 0.44207286834716797, + "epoch": 0.3821985764612841, + "kl_loss": 0.1073162704706192, + "loss_ib": 0.0015518763102591038, + "step": 1329 + }, + { + "ce_ib": 6.563467979431152, + "ce_orig": 0.9291197657585144, + "epoch": 0.3821985764612841, + "kl_loss": 0.07843677699565887, + "loss_ib": 0.0014407145790755749, + "step": 1329 + }, + { + "ce_ib": 4.12862491607666, + "ce_orig": 0.8059925436973572, + "epoch": 0.3821985764612841, + "kl_loss": 0.06666150689125061, + "loss_ib": 0.0010794774862006307, + "step": 1329 + }, + { + "epoch": 0.3824861600402617, + "grad_norm": 0.08576754480600357, + "learning_rate": 4.878120479439545e-05, + "loss": 0.8664, + "step": 1330 + }, + { + "ce_ib": 7.024020195007324, + "ce_orig": 1.1852842569351196, + "epoch": 0.3824861600402617, + "kl_loss": 0.08465856313705444, + "loss_ib": 0.0015489875804632902, + "step": 1330 + }, + { + "ce_ib": 4.564798355102539, + "ce_orig": 0.5595995187759399, + "epoch": 0.3824861600402617, + "kl_loss": 0.2181047797203064, + "loss_ib": 0.0026375274173915386, + "step": 1330 + }, + { + "ce_ib": 3.40017032623291, + "ce_orig": 0.4371066987514496, + "epoch": 0.3824861600402617, + "kl_loss": 0.13506951928138733, + "loss_ib": 0.0016907122917473316, + "step": 1330 + }, + { + "ce_ib": 7.465134143829346, + "ce_orig": 0.8330971002578735, + "epoch": 0.3824861600402617, + "kl_loss": 0.12516441941261292, + "loss_ib": 0.001998157473281026, + "step": 1330 + }, + { + "ce_ib": 7.266862392425537, + "ce_orig": 0.905171811580658, + "epoch": 0.3827737436192393, + "kl_loss": 0.08720521628856659, + "loss_ib": 0.0015987383667379618, + "step": 1331 + }, + { + "ce_ib": 5.786927700042725, + "ce_orig": 0.7975708246231079, + "epoch": 0.3827737436192393, + "kl_loss": 0.08120322972536087, + "loss_ib": 0.0013907250249758363, + "step": 1331 + }, + { + "ce_ib": 3.518803358078003, + "ce_orig": 0.5128543376922607, + "epoch": 0.3827737436192393, + "kl_loss": 0.06692732125520706, + "loss_ib": 0.0010211535263806581, + "step": 1331 + }, + { + "ce_ib": 4.056680679321289, + "ce_orig": 0.5318570137023926, + "epoch": 0.3827737436192393, + "kl_loss": 0.1076204776763916, + "loss_ib": 0.0014818727504462004, + "step": 1331 + }, + { + "ce_ib": 3.84483003616333, + "ce_orig": 0.6785795092582703, + "epoch": 0.383061327198217, + "kl_loss": 0.09602093696594238, + "loss_ib": 0.0013446924276649952, + "step": 1332 + }, + { + "ce_ib": 5.861329078674316, + "ce_orig": 0.8904538750648499, + "epoch": 0.383061327198217, + "kl_loss": 0.07035691291093826, + "loss_ib": 0.001289702020585537, + "step": 1332 + }, + { + "ce_ib": 6.79622220993042, + "ce_orig": 0.944312572479248, + "epoch": 0.383061327198217, + "kl_loss": 0.11347562074661255, + "loss_ib": 0.0018143784254789352, + "step": 1332 + }, + { + "ce_ib": 7.302602767944336, + "ce_orig": 1.311141848564148, + "epoch": 0.383061327198217, + "kl_loss": 0.07018446922302246, + "loss_ib": 0.001432104967534542, + "step": 1332 + }, + { + "ce_ib": 5.1223578453063965, + "ce_orig": 0.6385131478309631, + "epoch": 0.38334891077719463, + "kl_loss": 0.12935715913772583, + "loss_ib": 0.0018058073474094272, + "step": 1333 + }, + { + "ce_ib": 4.770997047424316, + "ce_orig": 0.5485048890113831, + "epoch": 0.38334891077719463, + "kl_loss": 0.1274576187133789, + "loss_ib": 0.001751675852574408, + "step": 1333 + }, + { + "ce_ib": 4.000513553619385, + "ce_orig": 0.7967947125434875, + "epoch": 0.38334891077719463, + "kl_loss": 0.08080033212900162, + "loss_ib": 0.0012080547166988254, + "step": 1333 + }, + { + "ce_ib": 8.564045906066895, + "ce_orig": 1.0766440629959106, + "epoch": 0.38334891077719463, + "kl_loss": 0.12121468037366867, + "loss_ib": 0.002068551490083337, + "step": 1333 + }, + { + "ce_ib": 6.580170631408691, + "ce_orig": 1.1447159051895142, + "epoch": 0.38363649435617225, + "kl_loss": 0.16854263842105865, + "loss_ib": 0.0023434432223439217, + "step": 1334 + }, + { + "ce_ib": 9.237170219421387, + "ce_orig": 1.8113460540771484, + "epoch": 0.38363649435617225, + "kl_loss": 0.14716824889183044, + "loss_ib": 0.0023953993804752827, + "step": 1334 + }, + { + "ce_ib": 3.421854019165039, + "ce_orig": 0.6700695753097534, + "epoch": 0.38363649435617225, + "kl_loss": 0.04630805179476738, + "loss_ib": 0.0008052659104578197, + "step": 1334 + }, + { + "ce_ib": 6.679248332977295, + "ce_orig": 1.0066481828689575, + "epoch": 0.38363649435617225, + "kl_loss": 0.1421959400177002, + "loss_ib": 0.0020898841321468353, + "step": 1334 + }, + { + "epoch": 0.3839240779351499, + "grad_norm": 0.09364533424377441, + "learning_rate": 4.876920789256003e-05, + "loss": 0.8202, + "step": 1335 + }, + { + "ce_ib": 3.789226531982422, + "ce_orig": 0.7831275463104248, + "epoch": 0.3839240779351499, + "kl_loss": 0.1242968887090683, + "loss_ib": 0.0016218915116041899, + "step": 1335 + }, + { + "ce_ib": 3.81172776222229, + "ce_orig": 0.7005083560943604, + "epoch": 0.3839240779351499, + "kl_loss": 0.06589465588331223, + "loss_ib": 0.0010401193285360932, + "step": 1335 + }, + { + "ce_ib": 7.375043869018555, + "ce_orig": 1.4221928119659424, + "epoch": 0.3839240779351499, + "kl_loss": 0.11741450428962708, + "loss_ib": 0.0019116493640467525, + "step": 1335 + }, + { + "ce_ib": 8.176351547241211, + "ce_orig": 1.0930140018463135, + "epoch": 0.3839240779351499, + "kl_loss": 0.08987772464752197, + "loss_ib": 0.0017164122546091676, + "step": 1335 + }, + { + "ce_ib": 8.336294174194336, + "ce_orig": 0.8998260498046875, + "epoch": 0.38421166151412756, + "kl_loss": 0.09345562011003494, + "loss_ib": 0.0017681854078546166, + "step": 1336 + }, + { + "ce_ib": 4.029426574707031, + "ce_orig": 0.7282453179359436, + "epoch": 0.38421166151412756, + "kl_loss": 0.09021305292844772, + "loss_ib": 0.0013050731504336, + "step": 1336 + }, + { + "ce_ib": 8.017088890075684, + "ce_orig": 0.8555032014846802, + "epoch": 0.38421166151412756, + "kl_loss": 0.12558424472808838, + "loss_ib": 0.002057551173493266, + "step": 1336 + }, + { + "ce_ib": 7.581797122955322, + "ce_orig": 1.004011631011963, + "epoch": 0.38421166151412756, + "kl_loss": 0.10867396742105484, + "loss_ib": 0.0018449192866683006, + "step": 1336 + }, + { + "ce_ib": 6.137843132019043, + "ce_orig": 1.3972002267837524, + "epoch": 0.3844992450931052, + "kl_loss": 0.11302444338798523, + "loss_ib": 0.0017440287629142404, + "step": 1337 + }, + { + "ce_ib": 5.292674541473389, + "ce_orig": 0.5526427626609802, + "epoch": 0.3844992450931052, + "kl_loss": 0.1625136137008667, + "loss_ib": 0.0021544035989791155, + "step": 1337 + }, + { + "ce_ib": 4.300665855407715, + "ce_orig": 1.0206716060638428, + "epoch": 0.3844992450931052, + "kl_loss": 0.07899816334247589, + "loss_ib": 0.0012200481723994017, + "step": 1337 + }, + { + "ce_ib": 5.7134904861450195, + "ce_orig": 1.067671537399292, + "epoch": 0.3844992450931052, + "kl_loss": 0.09659279882907867, + "loss_ib": 0.0015372770139947534, + "step": 1337 + }, + { + "ce_ib": 5.606064796447754, + "ce_orig": 0.9170923829078674, + "epoch": 0.3847868286720828, + "kl_loss": 0.10247627645730972, + "loss_ib": 0.0015853692311793566, + "step": 1338 + }, + { + "ce_ib": 5.497860908508301, + "ce_orig": 0.44184476137161255, + "epoch": 0.3847868286720828, + "kl_loss": 0.13341489434242249, + "loss_ib": 0.0018839349504560232, + "step": 1338 + }, + { + "ce_ib": 7.231921195983887, + "ce_orig": 0.9874067902565002, + "epoch": 0.3847868286720828, + "kl_loss": 0.2059246450662613, + "loss_ib": 0.0027824384160339832, + "step": 1338 + }, + { + "ce_ib": 7.948070049285889, + "ce_orig": 1.2974672317504883, + "epoch": 0.3847868286720828, + "kl_loss": 0.14574149250984192, + "loss_ib": 0.0022522220388054848, + "step": 1338 + }, + { + "ce_ib": 7.4506072998046875, + "ce_orig": 0.5894262790679932, + "epoch": 0.3850744122510605, + "kl_loss": 0.138429194688797, + "loss_ib": 0.0021293526515364647, + "step": 1339 + }, + { + "ce_ib": 8.624794006347656, + "ce_orig": 1.0650362968444824, + "epoch": 0.3850744122510605, + "kl_loss": 0.07382220774888992, + "loss_ib": 0.0016007014783099294, + "step": 1339 + }, + { + "ce_ib": 5.549061298370361, + "ce_orig": 0.5607146620750427, + "epoch": 0.3850744122510605, + "kl_loss": 0.12252309173345566, + "loss_ib": 0.001780137070454657, + "step": 1339 + }, + { + "ce_ib": 7.076119422912598, + "ce_orig": 0.8423207998275757, + "epoch": 0.3850744122510605, + "kl_loss": 0.10627346485853195, + "loss_ib": 0.0017703465418890119, + "step": 1339 + }, + { + "epoch": 0.3853619958300381, + "grad_norm": 0.08929329365491867, + "learning_rate": 4.875715372518585e-05, + "loss": 0.838, + "step": 1340 + }, + { + "ce_ib": 7.541855812072754, + "ce_orig": 0.8731908202171326, + "epoch": 0.3853619958300381, + "kl_loss": 0.1492196023464203, + "loss_ib": 0.0022463814821094275, + "step": 1340 + }, + { + "ce_ib": 8.010886192321777, + "ce_orig": 1.08771550655365, + "epoch": 0.3853619958300381, + "kl_loss": 0.10336001217365265, + "loss_ib": 0.0018346887081861496, + "step": 1340 + }, + { + "ce_ib": 5.893514156341553, + "ce_orig": 0.9211190938949585, + "epoch": 0.3853619958300381, + "kl_loss": 0.13187764585018158, + "loss_ib": 0.0019081278005614877, + "step": 1340 + }, + { + "ce_ib": 5.321976184844971, + "ce_orig": 0.9493192434310913, + "epoch": 0.3853619958300381, + "kl_loss": 0.1045265644788742, + "loss_ib": 0.0015774632338434458, + "step": 1340 + }, + { + "ce_ib": 11.51617431640625, + "ce_orig": 2.110943078994751, + "epoch": 0.38564957940901573, + "kl_loss": 0.1284325271844864, + "loss_ib": 0.0024359426461160183, + "step": 1341 + }, + { + "ce_ib": 7.60202169418335, + "ce_orig": 1.3231353759765625, + "epoch": 0.38564957940901573, + "kl_loss": 0.13365906476974487, + "loss_ib": 0.002096792683005333, + "step": 1341 + }, + { + "ce_ib": 7.612880229949951, + "ce_orig": 1.0295933485031128, + "epoch": 0.38564957940901573, + "kl_loss": 0.13664115965366364, + "loss_ib": 0.002127699553966522, + "step": 1341 + }, + { + "ce_ib": 3.26138973236084, + "ce_orig": 0.49690404534339905, + "epoch": 0.38564957940901573, + "kl_loss": 0.13129015266895294, + "loss_ib": 0.001639040419831872, + "step": 1341 + }, + { + "ce_ib": 5.174814701080322, + "ce_orig": 0.7611823081970215, + "epoch": 0.3859371629879934, + "kl_loss": 0.12216943502426147, + "loss_ib": 0.001739175757393241, + "step": 1342 + }, + { + "ce_ib": 3.6963768005371094, + "ce_orig": 0.6149874329566956, + "epoch": 0.3859371629879934, + "kl_loss": 0.10835998505353928, + "loss_ib": 0.00145323749165982, + "step": 1342 + }, + { + "ce_ib": 6.840673923492432, + "ce_orig": 0.946303129196167, + "epoch": 0.3859371629879934, + "kl_loss": 0.1355583667755127, + "loss_ib": 0.002039650920778513, + "step": 1342 + }, + { + "ce_ib": 6.208271026611328, + "ce_orig": 0.9422585368156433, + "epoch": 0.3859371629879934, + "kl_loss": 0.14368371665477753, + "loss_ib": 0.0020576640963554382, + "step": 1342 + }, + { + "ce_ib": 5.676513195037842, + "ce_orig": 0.9785840511322021, + "epoch": 0.38622474656697103, + "kl_loss": 0.12683354318141937, + "loss_ib": 0.001835986622609198, + "step": 1343 + }, + { + "ce_ib": 6.242606163024902, + "ce_orig": 0.5287706851959229, + "epoch": 0.38622474656697103, + "kl_loss": 0.10230138152837753, + "loss_ib": 0.001647274475544691, + "step": 1343 + }, + { + "ce_ib": 4.061772346496582, + "ce_orig": 0.5952426195144653, + "epoch": 0.38622474656697103, + "kl_loss": 0.05828586965799332, + "loss_ib": 0.0009890359360724688, + "step": 1343 + }, + { + "ce_ib": 6.636641025543213, + "ce_orig": 1.2166383266448975, + "epoch": 0.38622474656697103, + "kl_loss": 0.13114970922470093, + "loss_ib": 0.001975161023437977, + "step": 1343 + }, + { + "ce_ib": 5.310240268707275, + "ce_orig": 0.640544056892395, + "epoch": 0.38651233014594866, + "kl_loss": 0.23128513991832733, + "loss_ib": 0.002843875205144286, + "step": 1344 + }, + { + "ce_ib": 3.497588872909546, + "ce_orig": 0.634738028049469, + "epoch": 0.38651233014594866, + "kl_loss": 0.07952392846345901, + "loss_ib": 0.0011449981248006225, + "step": 1344 + }, + { + "ce_ib": 4.5079827308654785, + "ce_orig": 0.8078311681747437, + "epoch": 0.38651233014594866, + "kl_loss": 0.10402487218379974, + "loss_ib": 0.0014910469762980938, + "step": 1344 + }, + { + "ce_ib": 6.0893473625183105, + "ce_orig": 0.9539284110069275, + "epoch": 0.38651233014594866, + "kl_loss": 0.10625547170639038, + "loss_ib": 0.0016714894445613027, + "step": 1344 + }, + { + "epoch": 0.3867999137249263, + "grad_norm": 0.09451144933700562, + "learning_rate": 4.8745042321314186e-05, + "loss": 0.8364, + "step": 1345 + }, + { + "ce_ib": 4.948427200317383, + "ce_orig": 0.993834376335144, + "epoch": 0.3867999137249263, + "kl_loss": 0.10724367201328278, + "loss_ib": 0.0015672793379053473, + "step": 1345 + }, + { + "ce_ib": 4.9981207847595215, + "ce_orig": 0.7428358793258667, + "epoch": 0.3867999137249263, + "kl_loss": 0.10282860696315765, + "loss_ib": 0.0015280981315299869, + "step": 1345 + }, + { + "ce_ib": 6.816267013549805, + "ce_orig": 1.0093276500701904, + "epoch": 0.3867999137249263, + "kl_loss": 0.08815869688987732, + "loss_ib": 0.0015632137656211853, + "step": 1345 + }, + { + "ce_ib": 3.8050029277801514, + "ce_orig": 0.6337125897407532, + "epoch": 0.3867999137249263, + "kl_loss": 0.149078831076622, + "loss_ib": 0.0018712885212153196, + "step": 1345 + }, + { + "ce_ib": 4.451251029968262, + "ce_orig": 0.7932842969894409, + "epoch": 0.38708749730390396, + "kl_loss": 0.06703363358974457, + "loss_ib": 0.0011154614621773362, + "step": 1346 + }, + { + "ce_ib": 5.360795974731445, + "ce_orig": 0.6554229259490967, + "epoch": 0.38708749730390396, + "kl_loss": 0.08724645525217056, + "loss_ib": 0.001408544136211276, + "step": 1346 + }, + { + "ce_ib": 5.607513427734375, + "ce_orig": 0.7316446304321289, + "epoch": 0.38708749730390396, + "kl_loss": 0.12154290080070496, + "loss_ib": 0.0017761802300810814, + "step": 1346 + }, + { + "ce_ib": 3.820282220840454, + "ce_orig": 0.7290668487548828, + "epoch": 0.38708749730390396, + "kl_loss": 0.07640884816646576, + "loss_ib": 0.0011461166432127357, + "step": 1346 + }, + { + "ce_ib": 4.426151752471924, + "ce_orig": 0.4975670278072357, + "epoch": 0.3873750808828816, + "kl_loss": 0.09992469847202301, + "loss_ib": 0.001441862084902823, + "step": 1347 + }, + { + "ce_ib": 3.601679563522339, + "ce_orig": 0.6812854409217834, + "epoch": 0.3873750808828816, + "kl_loss": 0.07636852562427521, + "loss_ib": 0.001123853144235909, + "step": 1347 + }, + { + "ce_ib": 4.288147449493408, + "ce_orig": 0.7270475029945374, + "epoch": 0.3873750808828816, + "kl_loss": 0.09309622645378113, + "loss_ib": 0.0013597769429907203, + "step": 1347 + }, + { + "ce_ib": 6.769469261169434, + "ce_orig": 0.5788599252700806, + "epoch": 0.3873750808828816, + "kl_loss": 0.09748753160238266, + "loss_ib": 0.0016518222400918603, + "step": 1347 + }, + { + "ce_ib": 5.6365180015563965, + "ce_orig": 0.9563528895378113, + "epoch": 0.3876626644618592, + "kl_loss": 0.22810500860214233, + "loss_ib": 0.002844701986759901, + "step": 1348 + }, + { + "ce_ib": 6.9823126792907715, + "ce_orig": 0.7459795475006104, + "epoch": 0.3876626644618592, + "kl_loss": 0.16720719635486603, + "loss_ib": 0.002370303263887763, + "step": 1348 + }, + { + "ce_ib": 4.878572940826416, + "ce_orig": 0.6489146947860718, + "epoch": 0.3876626644618592, + "kl_loss": 0.07554194331169128, + "loss_ib": 0.0012432767543941736, + "step": 1348 + }, + { + "ce_ib": 5.692221641540527, + "ce_orig": 0.9350115060806274, + "epoch": 0.3876626644618592, + "kl_loss": 0.09622718393802643, + "loss_ib": 0.0015314939664676785, + "step": 1348 + }, + { + "ce_ib": 7.5320258140563965, + "ce_orig": 1.023134469985962, + "epoch": 0.3879502480408369, + "kl_loss": 0.13200296461582184, + "loss_ib": 0.0020732320845127106, + "step": 1349 + }, + { + "ce_ib": 6.651448726654053, + "ce_orig": 0.8953782320022583, + "epoch": 0.3879502480408369, + "kl_loss": 0.11706017702817917, + "loss_ib": 0.001835746574215591, + "step": 1349 + }, + { + "ce_ib": 4.424673080444336, + "ce_orig": 0.5250123739242554, + "epoch": 0.3879502480408369, + "kl_loss": 0.09768466651439667, + "loss_ib": 0.0014193139504641294, + "step": 1349 + }, + { + "ce_ib": 5.916868209838867, + "ce_orig": 0.8698412179946899, + "epoch": 0.3879502480408369, + "kl_loss": 0.05977011099457741, + "loss_ib": 0.0011893878690898418, + "step": 1349 + }, + { + "epoch": 0.3882378316198145, + "grad_norm": 0.10131075978279114, + "learning_rate": 4.8732873710124235e-05, + "loss": 0.8555, + "step": 1350 + }, + { + "ce_ib": 5.2028398513793945, + "ce_orig": 0.9036562442779541, + "epoch": 0.3882378316198145, + "kl_loss": 0.08852796256542206, + "loss_ib": 0.0014055636711418629, + "step": 1350 + }, + { + "ce_ib": 8.196189880371094, + "ce_orig": 1.0980186462402344, + "epoch": 0.3882378316198145, + "kl_loss": 0.14874669909477234, + "loss_ib": 0.002307086018845439, + "step": 1350 + }, + { + "ce_ib": 7.3839263916015625, + "ce_orig": 0.9047736525535583, + "epoch": 0.3882378316198145, + "kl_loss": 0.09531474858522415, + "loss_ib": 0.001691540121100843, + "step": 1350 + }, + { + "ce_ib": 10.06218433380127, + "ce_orig": 1.7880454063415527, + "epoch": 0.3882378316198145, + "kl_loss": 0.15133428573608398, + "loss_ib": 0.0025195612106472254, + "step": 1350 + }, + { + "ce_ib": 8.778315544128418, + "ce_orig": 1.6668694019317627, + "epoch": 0.38852541519879213, + "kl_loss": 0.11268506199121475, + "loss_ib": 0.0020046820864081383, + "step": 1351 + }, + { + "ce_ib": 7.385611057281494, + "ce_orig": 0.7190949320793152, + "epoch": 0.38852541519879213, + "kl_loss": 0.1424417793750763, + "loss_ib": 0.002162978984415531, + "step": 1351 + }, + { + "ce_ib": 4.4950337409973145, + "ce_orig": 0.4972643554210663, + "epoch": 0.38852541519879213, + "kl_loss": 0.0660613402724266, + "loss_ib": 0.0011101167183369398, + "step": 1351 + }, + { + "ce_ib": 6.403132915496826, + "ce_orig": 1.0407614707946777, + "epoch": 0.38852541519879213, + "kl_loss": 0.12615454196929932, + "loss_ib": 0.0019018587190657854, + "step": 1351 + }, + { + "ce_ib": 6.487383842468262, + "ce_orig": 0.7886338829994202, + "epoch": 0.3888129987777698, + "kl_loss": 0.12929624319076538, + "loss_ib": 0.0019417008152231574, + "step": 1352 + }, + { + "ce_ib": 7.565323829650879, + "ce_orig": 0.9060402512550354, + "epoch": 0.3888129987777698, + "kl_loss": 0.1719561070203781, + "loss_ib": 0.0024760933592915535, + "step": 1352 + }, + { + "ce_ib": 10.652222633361816, + "ce_orig": 2.0009143352508545, + "epoch": 0.3888129987777698, + "kl_loss": 0.12192914634943008, + "loss_ib": 0.0022845135536044836, + "step": 1352 + }, + { + "ce_ib": 3.8408257961273193, + "ce_orig": 0.7704099416732788, + "epoch": 0.3888129987777698, + "kl_loss": 0.2431485503911972, + "loss_ib": 0.002815568121150136, + "step": 1352 + }, + { + "ce_ib": 7.328763961791992, + "ce_orig": 1.0332540273666382, + "epoch": 0.38910058235674744, + "kl_loss": 0.16994734108448029, + "loss_ib": 0.0024323496036231518, + "step": 1353 + }, + { + "ce_ib": 6.36226224899292, + "ce_orig": 0.5826173424720764, + "epoch": 0.38910058235674744, + "kl_loss": 0.1299600452184677, + "loss_ib": 0.001935826614499092, + "step": 1353 + }, + { + "ce_ib": 7.506458282470703, + "ce_orig": 1.0630770921707153, + "epoch": 0.38910058235674744, + "kl_loss": 0.1475929617881775, + "loss_ib": 0.0022265755105763674, + "step": 1353 + }, + { + "ce_ib": 6.903659820556641, + "ce_orig": 0.50584876537323, + "epoch": 0.38910058235674744, + "kl_loss": 0.12740664184093475, + "loss_ib": 0.0019644321873784065, + "step": 1353 + }, + { + "ce_ib": 3.8798747062683105, + "ce_orig": 0.5197550058364868, + "epoch": 0.38938816593572506, + "kl_loss": 0.11910569667816162, + "loss_ib": 0.0015790443867444992, + "step": 1354 + }, + { + "ce_ib": 5.564952850341797, + "ce_orig": 0.758701741695404, + "epoch": 0.38938816593572506, + "kl_loss": 0.13943016529083252, + "loss_ib": 0.0019507968099787831, + "step": 1354 + }, + { + "ce_ib": 5.663549423217773, + "ce_orig": 0.8459624648094177, + "epoch": 0.38938816593572506, + "kl_loss": 0.1469283103942871, + "loss_ib": 0.0020356380846351385, + "step": 1354 + }, + { + "ce_ib": 4.190431118011475, + "ce_orig": 0.8275007009506226, + "epoch": 0.38938816593572506, + "kl_loss": 0.09464414417743683, + "loss_ib": 0.0013654845533892512, + "step": 1354 + }, + { + "epoch": 0.3896757495147027, + "grad_norm": 0.0893436148762703, + "learning_rate": 4.872064792093299e-05, + "loss": 0.8674, + "step": 1355 + }, + { + "ce_ib": 6.213205337524414, + "ce_orig": 1.2385234832763672, + "epoch": 0.3896757495147027, + "kl_loss": 0.10977844893932343, + "loss_ib": 0.0017191049410030246, + "step": 1355 + }, + { + "ce_ib": 2.949284553527832, + "ce_orig": 0.5540933012962341, + "epoch": 0.3896757495147027, + "kl_loss": 0.06217679753899574, + "loss_ib": 0.0009166963864117861, + "step": 1355 + }, + { + "ce_ib": 8.168116569519043, + "ce_orig": 1.1756985187530518, + "epoch": 0.3896757495147027, + "kl_loss": 0.1437525898218155, + "loss_ib": 0.002254337538033724, + "step": 1355 + }, + { + "ce_ib": 8.252790451049805, + "ce_orig": 1.310275673866272, + "epoch": 0.3896757495147027, + "kl_loss": 0.14780230820178986, + "loss_ib": 0.0023033020552247763, + "step": 1355 + }, + { + "ce_ib": 4.930200099945068, + "ce_orig": 1.1736384630203247, + "epoch": 0.38996333309368036, + "kl_loss": 0.11331868171691895, + "loss_ib": 0.0016262067947536707, + "step": 1356 + }, + { + "ce_ib": 5.806600093841553, + "ce_orig": 0.9677255749702454, + "epoch": 0.38996333309368036, + "kl_loss": 0.10435892641544342, + "loss_ib": 0.001624249154701829, + "step": 1356 + }, + { + "ce_ib": 4.6264801025390625, + "ce_orig": 0.6200498342514038, + "epoch": 0.38996333309368036, + "kl_loss": 0.08878219872713089, + "loss_ib": 0.0013504700036719441, + "step": 1356 + }, + { + "ce_ib": 5.591928005218506, + "ce_orig": 1.0976577997207642, + "epoch": 0.38996333309368036, + "kl_loss": 0.10171683132648468, + "loss_ib": 0.0015763610135763884, + "step": 1356 + }, + { + "ce_ib": 6.655910491943359, + "ce_orig": 1.143782615661621, + "epoch": 0.390250916672658, + "kl_loss": 0.07978525012731552, + "loss_ib": 0.0014634436229243875, + "step": 1357 + }, + { + "ce_ib": 5.03174352645874, + "ce_orig": 0.8346872925758362, + "epoch": 0.390250916672658, + "kl_loss": 0.11731187999248505, + "loss_ib": 0.0016762930899858475, + "step": 1357 + }, + { + "ce_ib": 8.140095710754395, + "ce_orig": 1.4168970584869385, + "epoch": 0.390250916672658, + "kl_loss": 0.1354292333126068, + "loss_ib": 0.002168301958590746, + "step": 1357 + }, + { + "ce_ib": 4.460680961608887, + "ce_orig": 0.4250698983669281, + "epoch": 0.390250916672658, + "kl_loss": 0.10067728161811829, + "loss_ib": 0.0014528408646583557, + "step": 1357 + }, + { + "ce_ib": 5.446596622467041, + "ce_orig": 0.7209237217903137, + "epoch": 0.3905385002516356, + "kl_loss": 0.13299855589866638, + "loss_ib": 0.0018746451241895556, + "step": 1358 + }, + { + "ce_ib": 4.268868446350098, + "ce_orig": 0.6162483096122742, + "epoch": 0.3905385002516356, + "kl_loss": 0.10890024900436401, + "loss_ib": 0.0015158893074840307, + "step": 1358 + }, + { + "ce_ib": 6.677936553955078, + "ce_orig": 1.0623646974563599, + "epoch": 0.3905385002516356, + "kl_loss": 0.263736367225647, + "loss_ib": 0.0033051574137061834, + "step": 1358 + }, + { + "ce_ib": 6.475733757019043, + "ce_orig": 1.0443817377090454, + "epoch": 0.3905385002516356, + "kl_loss": 0.11893180012702942, + "loss_ib": 0.001836891402490437, + "step": 1358 + }, + { + "ce_ib": 7.569817066192627, + "ce_orig": 1.2050386667251587, + "epoch": 0.3908260838306133, + "kl_loss": 0.16140955686569214, + "loss_ib": 0.0023710771929472685, + "step": 1359 + }, + { + "ce_ib": 7.5049567222595215, + "ce_orig": 0.826344907283783, + "epoch": 0.3908260838306133, + "kl_loss": 0.14760777354240417, + "loss_ib": 0.0022265734151005745, + "step": 1359 + }, + { + "ce_ib": 7.747564792633057, + "ce_orig": 0.8755878210067749, + "epoch": 0.3908260838306133, + "kl_loss": 0.10878226161003113, + "loss_ib": 0.0018625789089128375, + "step": 1359 + }, + { + "ce_ib": 8.582348823547363, + "ce_orig": 1.2040704488754272, + "epoch": 0.3908260838306133, + "kl_loss": 0.09434117376804352, + "loss_ib": 0.0018016466638073325, + "step": 1359 + }, + { + "epoch": 0.3911136674095909, + "grad_norm": 0.08629854023456573, + "learning_rate": 4.870836498319523e-05, + "loss": 0.8496, + "step": 1360 + }, + { + "ce_ib": 5.060675144195557, + "ce_orig": 0.8511158227920532, + "epoch": 0.3911136674095909, + "kl_loss": 0.05880500376224518, + "loss_ib": 0.0010941175278276205, + "step": 1360 + }, + { + "ce_ib": 1.9455045461654663, + "ce_orig": 0.21738861501216888, + "epoch": 0.3911136674095909, + "kl_loss": 0.2701791524887085, + "loss_ib": 0.00289634196087718, + "step": 1360 + }, + { + "ce_ib": 5.370924472808838, + "ce_orig": 1.0326734781265259, + "epoch": 0.3911136674095909, + "kl_loss": 0.08216007053852081, + "loss_ib": 0.0013586931163445115, + "step": 1360 + }, + { + "ce_ib": 6.79683256149292, + "ce_orig": 1.3858628273010254, + "epoch": 0.3911136674095909, + "kl_loss": 0.1067119687795639, + "loss_ib": 0.0017468029400333762, + "step": 1360 + }, + { + "ce_ib": 4.886713981628418, + "ce_orig": 0.43234968185424805, + "epoch": 0.39140125098856854, + "kl_loss": 0.11526788771152496, + "loss_ib": 0.0016413502162322402, + "step": 1361 + }, + { + "ce_ib": 3.7346255779266357, + "ce_orig": 0.4888816475868225, + "epoch": 0.39140125098856854, + "kl_loss": 0.049753397703170776, + "loss_ib": 0.0008709965622983873, + "step": 1361 + }, + { + "ce_ib": 3.8233001232147217, + "ce_orig": 0.5764029026031494, + "epoch": 0.39140125098856854, + "kl_loss": 0.0692904144525528, + "loss_ib": 0.0010752341477200389, + "step": 1361 + }, + { + "ce_ib": 3.1981613636016846, + "ce_orig": 0.5599377751350403, + "epoch": 0.39140125098856854, + "kl_loss": 0.07236847281455994, + "loss_ib": 0.0010435008443892002, + "step": 1361 + }, + { + "ce_ib": 7.6928181648254395, + "ce_orig": 1.2115129232406616, + "epoch": 0.3916888345675462, + "kl_loss": 0.11177465319633484, + "loss_ib": 0.0018870283383876085, + "step": 1362 + }, + { + "ce_ib": 5.256848335266113, + "ce_orig": 0.43086475133895874, + "epoch": 0.3916888345675462, + "kl_loss": 0.09716189652681351, + "loss_ib": 0.0014973038341850042, + "step": 1362 + }, + { + "ce_ib": 5.027642250061035, + "ce_orig": 0.5509925484657288, + "epoch": 0.3916888345675462, + "kl_loss": 0.185089111328125, + "loss_ib": 0.002353655407205224, + "step": 1362 + }, + { + "ce_ib": 6.421619892120361, + "ce_orig": 1.2486196756362915, + "epoch": 0.3916888345675462, + "kl_loss": 0.0833258405327797, + "loss_ib": 0.0014754203148186207, + "step": 1362 + }, + { + "ce_ib": 5.488348007202148, + "ce_orig": 0.9962913990020752, + "epoch": 0.39197641814652384, + "kl_loss": 0.08890173584222794, + "loss_ib": 0.0014378520427271724, + "step": 1363 + }, + { + "ce_ib": 4.584591865539551, + "ce_orig": 0.6659078598022461, + "epoch": 0.39197641814652384, + "kl_loss": 0.07413169741630554, + "loss_ib": 0.0011997760739177465, + "step": 1363 + }, + { + "ce_ib": 6.793995380401611, + "ce_orig": 0.8062725067138672, + "epoch": 0.39197641814652384, + "kl_loss": 0.12296256422996521, + "loss_ib": 0.0019090251298621297, + "step": 1363 + }, + { + "ce_ib": 12.83924674987793, + "ce_orig": 2.1623098850250244, + "epoch": 0.39197641814652384, + "kl_loss": 0.11011422425508499, + "loss_ib": 0.0023850665893405676, + "step": 1363 + }, + { + "ce_ib": 8.598384857177734, + "ce_orig": 1.3614503145217896, + "epoch": 0.39226400172550147, + "kl_loss": 0.13396115601062775, + "loss_ib": 0.0021994500420987606, + "step": 1364 + }, + { + "ce_ib": 6.677243232727051, + "ce_orig": 0.9641056656837463, + "epoch": 0.39226400172550147, + "kl_loss": 0.1300760805606842, + "loss_ib": 0.0019684850703924894, + "step": 1364 + }, + { + "ce_ib": 8.595512390136719, + "ce_orig": 1.4280240535736084, + "epoch": 0.39226400172550147, + "kl_loss": 0.09861013293266296, + "loss_ib": 0.0018456524703651667, + "step": 1364 + }, + { + "ce_ib": 4.735291004180908, + "ce_orig": 0.9403985738754272, + "epoch": 0.39226400172550147, + "kl_loss": 0.08944929391145706, + "loss_ib": 0.001368021941743791, + "step": 1364 + }, + { + "epoch": 0.3925515853044791, + "grad_norm": 0.09735099226236343, + "learning_rate": 4.8696024926503396e-05, + "loss": 0.8484, + "step": 1365 + }, + { + "ce_ib": 3.710118293762207, + "ce_orig": 0.45066648721694946, + "epoch": 0.3925515853044791, + "kl_loss": 0.14439572393894196, + "loss_ib": 0.0018149690004065633, + "step": 1365 + }, + { + "ce_ib": 3.6635892391204834, + "ce_orig": 0.4009067416191101, + "epoch": 0.3925515853044791, + "kl_loss": 0.06153864786028862, + "loss_ib": 0.000981745426543057, + "step": 1365 + }, + { + "ce_ib": 4.0791144371032715, + "ce_orig": 0.5721445083618164, + "epoch": 0.3925515853044791, + "kl_loss": 0.25153499841690063, + "loss_ib": 0.002923261374235153, + "step": 1365 + }, + { + "ce_ib": 3.123744487762451, + "ce_orig": 0.5085943341255188, + "epoch": 0.3925515853044791, + "kl_loss": 0.04723348468542099, + "loss_ib": 0.0007847092929296196, + "step": 1365 + }, + { + "ce_ib": 8.169525146484375, + "ce_orig": 1.6943016052246094, + "epoch": 0.39283916888345677, + "kl_loss": 0.08591040968894958, + "loss_ib": 0.001676056650467217, + "step": 1366 + }, + { + "ce_ib": 8.289563179016113, + "ce_orig": 1.3593651056289673, + "epoch": 0.39283916888345677, + "kl_loss": 0.14579978585243225, + "loss_ib": 0.0022869540844112635, + "step": 1366 + }, + { + "ce_ib": 4.006882190704346, + "ce_orig": 0.5923643112182617, + "epoch": 0.39283916888345677, + "kl_loss": 0.10397645831108093, + "loss_ib": 0.001440452761016786, + "step": 1366 + }, + { + "ce_ib": 6.16815710067749, + "ce_orig": 1.0465580224990845, + "epoch": 0.39283916888345677, + "kl_loss": 0.07608169317245483, + "loss_ib": 0.0013776326086372137, + "step": 1366 + }, + { + "ce_ib": 3.9323885440826416, + "ce_orig": 0.6568378210067749, + "epoch": 0.3931267524624344, + "kl_loss": 0.12474965304136276, + "loss_ib": 0.0016407354269176722, + "step": 1367 + }, + { + "ce_ib": 2.930619955062866, + "ce_orig": 0.5585191249847412, + "epoch": 0.3931267524624344, + "kl_loss": 0.06461334228515625, + "loss_ib": 0.0009391954517923295, + "step": 1367 + }, + { + "ce_ib": 3.9692986011505127, + "ce_orig": 0.5245909690856934, + "epoch": 0.3931267524624344, + "kl_loss": 0.14356094598770142, + "loss_ib": 0.001832539215683937, + "step": 1367 + }, + { + "ce_ib": 4.616350173950195, + "ce_orig": 0.6103743314743042, + "epoch": 0.3931267524624344, + "kl_loss": 0.1619236171245575, + "loss_ib": 0.0020808710251003504, + "step": 1367 + }, + { + "ce_ib": 5.887413501739502, + "ce_orig": 0.722166121006012, + "epoch": 0.393414336041412, + "kl_loss": 0.06748103350400925, + "loss_ib": 0.0012635516468435526, + "step": 1368 + }, + { + "ce_ib": 5.695414066314697, + "ce_orig": 0.6787561774253845, + "epoch": 0.393414336041412, + "kl_loss": 0.1149643212556839, + "loss_ib": 0.0017191844526678324, + "step": 1368 + }, + { + "ce_ib": 5.008102893829346, + "ce_orig": 0.9535930752754211, + "epoch": 0.393414336041412, + "kl_loss": 0.07903116941452026, + "loss_ib": 0.00129112193826586, + "step": 1368 + }, + { + "ce_ib": 3.252168893814087, + "ce_orig": 0.3988747000694275, + "epoch": 0.393414336041412, + "kl_loss": 0.13532951474189758, + "loss_ib": 0.0016785120824351907, + "step": 1368 + }, + { + "ce_ib": 5.28767728805542, + "ce_orig": 0.7203333377838135, + "epoch": 0.3937019196203897, + "kl_loss": 0.13053059577941895, + "loss_ib": 0.001834073569625616, + "step": 1369 + }, + { + "ce_ib": 7.581104278564453, + "ce_orig": 1.2444112300872803, + "epoch": 0.3937019196203897, + "kl_loss": 0.09139028191566467, + "loss_ib": 0.001672013197094202, + "step": 1369 + }, + { + "ce_ib": 3.3103346824645996, + "ce_orig": 0.5839555263519287, + "epoch": 0.3937019196203897, + "kl_loss": 0.05729393661022186, + "loss_ib": 0.0009039728320203722, + "step": 1369 + }, + { + "ce_ib": 3.8431239128112793, + "ce_orig": 0.6491223573684692, + "epoch": 0.3937019196203897, + "kl_loss": 0.09113702923059464, + "loss_ib": 0.001295682624913752, + "step": 1369 + }, + { + "epoch": 0.3939895031993673, + "grad_norm": 0.0974932387471199, + "learning_rate": 4.8683627780587546e-05, + "loss": 0.7966, + "step": 1370 + }, + { + "ce_ib": 4.4268012046813965, + "ce_orig": 0.7763320803642273, + "epoch": 0.3939895031993673, + "kl_loss": 0.11070152372121811, + "loss_ib": 0.001549695385619998, + "step": 1370 + }, + { + "ce_ib": 5.235965728759766, + "ce_orig": 0.7106630206108093, + "epoch": 0.3939895031993673, + "kl_loss": 0.05476393550634384, + "loss_ib": 0.0010712358634918928, + "step": 1370 + }, + { + "ce_ib": 7.041452407836914, + "ce_orig": 1.3121201992034912, + "epoch": 0.3939895031993673, + "kl_loss": 0.08947965502738953, + "loss_ib": 0.0015989416278898716, + "step": 1370 + }, + { + "ce_ib": 9.253966331481934, + "ce_orig": 1.2900464534759521, + "epoch": 0.3939895031993673, + "kl_loss": 0.10270275175571442, + "loss_ib": 0.00195242406334728, + "step": 1370 + }, + { + "ce_ib": 6.555151462554932, + "ce_orig": 0.9583280086517334, + "epoch": 0.39427708677834494, + "kl_loss": 0.14792247116565704, + "loss_ib": 0.0021347396541386843, + "step": 1371 + }, + { + "ce_ib": 5.8640851974487305, + "ce_orig": 1.2533522844314575, + "epoch": 0.39427708677834494, + "kl_loss": 0.12330082803964615, + "loss_ib": 0.0018194166477769613, + "step": 1371 + }, + { + "ce_ib": 5.276181697845459, + "ce_orig": 0.9645226001739502, + "epoch": 0.39427708677834494, + "kl_loss": 0.04578051716089249, + "loss_ib": 0.0009854233358055353, + "step": 1371 + }, + { + "ce_ib": 3.6213691234588623, + "ce_orig": 0.7831363081932068, + "epoch": 0.39427708677834494, + "kl_loss": 0.04129724204540253, + "loss_ib": 0.000775109336245805, + "step": 1371 + }, + { + "ce_ib": 5.293245315551758, + "ce_orig": 1.1399786472320557, + "epoch": 0.3945646703573226, + "kl_loss": 0.11456447839736938, + "loss_ib": 0.0016749693313613534, + "step": 1372 + }, + { + "ce_ib": 4.88385009765625, + "ce_orig": 0.5638843178749084, + "epoch": 0.3945646703573226, + "kl_loss": 0.1422284096479416, + "loss_ib": 0.0019106690306216478, + "step": 1372 + }, + { + "ce_ib": 3.2278645038604736, + "ce_orig": 0.5578948259353638, + "epoch": 0.3945646703573226, + "kl_loss": 0.06911308318376541, + "loss_ib": 0.0010139172663912177, + "step": 1372 + }, + { + "ce_ib": 6.677456855773926, + "ce_orig": 1.0336394309997559, + "epoch": 0.3945646703573226, + "kl_loss": 0.12147732079029083, + "loss_ib": 0.0018825187580659986, + "step": 1372 + }, + { + "ce_ib": 6.061744213104248, + "ce_orig": 0.6529628038406372, + "epoch": 0.39485225393630025, + "kl_loss": 0.09120302647352219, + "loss_ib": 0.0015182045754045248, + "step": 1373 + }, + { + "ce_ib": 7.936959266662598, + "ce_orig": 1.052087664604187, + "epoch": 0.39485225393630025, + "kl_loss": 0.11748115718364716, + "loss_ib": 0.00196850742213428, + "step": 1373 + }, + { + "ce_ib": 4.3993821144104, + "ce_orig": 0.602247953414917, + "epoch": 0.39485225393630025, + "kl_loss": 0.08443524688482285, + "loss_ib": 0.0012842906871810555, + "step": 1373 + }, + { + "ce_ib": 4.7374091148376465, + "ce_orig": 0.9693533182144165, + "epoch": 0.39485225393630025, + "kl_loss": 0.09733524918556213, + "loss_ib": 0.0014470933238044381, + "step": 1373 + }, + { + "ce_ib": 4.107109069824219, + "ce_orig": 0.6020128726959229, + "epoch": 0.39513983751527787, + "kl_loss": 0.09796494990587234, + "loss_ib": 0.0013903604121878743, + "step": 1374 + }, + { + "ce_ib": 4.63660192489624, + "ce_orig": 0.5517060160636902, + "epoch": 0.39513983751527787, + "kl_loss": 0.09080956876277924, + "loss_ib": 0.0013717558467760682, + "step": 1374 + }, + { + "ce_ib": 5.575448989868164, + "ce_orig": 0.6968877911567688, + "epoch": 0.39513983751527787, + "kl_loss": 0.14009396731853485, + "loss_ib": 0.0019584845285862684, + "step": 1374 + }, + { + "ce_ib": 8.282516479492188, + "ce_orig": 1.5205111503601074, + "epoch": 0.39513983751527787, + "kl_loss": 0.1510375738143921, + "loss_ib": 0.0023386271204799414, + "step": 1374 + }, + { + "epoch": 0.3954274210942555, + "grad_norm": 0.07745600491762161, + "learning_rate": 4.867117357531529e-05, + "loss": 0.8143, + "step": 1375 + }, + { + "ce_ib": 8.65802001953125, + "ce_orig": 1.6579588651657104, + "epoch": 0.3954274210942555, + "kl_loss": 0.1071658581495285, + "loss_ib": 0.0019374605035409331, + "step": 1375 + }, + { + "ce_ib": 7.021322727203369, + "ce_orig": 1.399658203125, + "epoch": 0.3954274210942555, + "kl_loss": 0.12391219288110733, + "loss_ib": 0.0019412541296333075, + "step": 1375 + }, + { + "ce_ib": 6.552249908447266, + "ce_orig": 1.0461649894714355, + "epoch": 0.3954274210942555, + "kl_loss": 0.10464093089103699, + "loss_ib": 0.001701634144410491, + "step": 1375 + }, + { + "ce_ib": 4.81926965713501, + "ce_orig": 0.7743741869926453, + "epoch": 0.3954274210942555, + "kl_loss": 0.09038940817117691, + "loss_ib": 0.0013858210295438766, + "step": 1375 + }, + { + "ce_ib": 5.60307502746582, + "ce_orig": 0.7910189628601074, + "epoch": 0.3957150046732332, + "kl_loss": 0.08102002739906311, + "loss_ib": 0.0013705077581107616, + "step": 1376 + }, + { + "ce_ib": 5.879729747772217, + "ce_orig": 0.9454348087310791, + "epoch": 0.3957150046732332, + "kl_loss": 0.10077565163373947, + "loss_ib": 0.001595729379914701, + "step": 1376 + }, + { + "ce_ib": 4.007662296295166, + "ce_orig": 0.909389078617096, + "epoch": 0.3957150046732332, + "kl_loss": 0.07688228785991669, + "loss_ib": 0.001169589115306735, + "step": 1376 + }, + { + "ce_ib": 5.555829048156738, + "ce_orig": 0.8984038233757019, + "epoch": 0.3957150046732332, + "kl_loss": 0.11498790234327316, + "loss_ib": 0.0017054618801921606, + "step": 1376 + }, + { + "ce_ib": 6.684864044189453, + "ce_orig": 1.4516613483428955, + "epoch": 0.3960025882522108, + "kl_loss": 0.08933814615011215, + "loss_ib": 0.0015618678880855441, + "step": 1377 + }, + { + "ce_ib": 7.712717533111572, + "ce_orig": 0.7735072374343872, + "epoch": 0.3960025882522108, + "kl_loss": 0.130492702126503, + "loss_ib": 0.002076198812574148, + "step": 1377 + }, + { + "ce_ib": 4.936470985412598, + "ce_orig": 0.5509382486343384, + "epoch": 0.3960025882522108, + "kl_loss": 0.10004114359617233, + "loss_ib": 0.0014940585242584348, + "step": 1377 + }, + { + "ce_ib": 3.2147631645202637, + "ce_orig": 0.4478761851787567, + "epoch": 0.3960025882522108, + "kl_loss": 0.092786505818367, + "loss_ib": 0.001249341294169426, + "step": 1377 + }, + { + "ce_ib": 5.846179962158203, + "ce_orig": 0.7358298897743225, + "epoch": 0.3962901718311884, + "kl_loss": 0.10214491933584213, + "loss_ib": 0.0016060670604929328, + "step": 1378 + }, + { + "ce_ib": 4.022353649139404, + "ce_orig": 0.6607013940811157, + "epoch": 0.3962901718311884, + "kl_loss": 0.0745079442858696, + "loss_ib": 0.0011473146732896566, + "step": 1378 + }, + { + "ce_ib": 5.024077892303467, + "ce_orig": 0.9707644581794739, + "epoch": 0.3962901718311884, + "kl_loss": 0.08207986503839493, + "loss_ib": 0.0013232063502073288, + "step": 1378 + }, + { + "ce_ib": 8.740952491760254, + "ce_orig": 1.6276335716247559, + "epoch": 0.3962901718311884, + "kl_loss": 0.10734099894762039, + "loss_ib": 0.0019475051667541265, + "step": 1378 + }, + { + "ce_ib": 7.33891487121582, + "ce_orig": 1.0861576795578003, + "epoch": 0.3965777554101661, + "kl_loss": 0.11862307786941528, + "loss_ib": 0.0019201221875846386, + "step": 1379 + }, + { + "ce_ib": 5.514404773712158, + "ce_orig": 0.8761352896690369, + "epoch": 0.3965777554101661, + "kl_loss": 0.10926657915115356, + "loss_ib": 0.0016441061161458492, + "step": 1379 + }, + { + "ce_ib": 2.858499765396118, + "ce_orig": 0.1799149066209793, + "epoch": 0.3965777554101661, + "kl_loss": 0.48103827238082886, + "loss_ib": 0.005096232984215021, + "step": 1379 + }, + { + "ce_ib": 6.853128433227539, + "ce_orig": 1.1733317375183105, + "epoch": 0.3965777554101661, + "kl_loss": 0.12556175887584686, + "loss_ib": 0.0019409304950386286, + "step": 1379 + }, + { + "epoch": 0.3968653389891437, + "grad_norm": 0.0979207307100296, + "learning_rate": 4.865866234069169e-05, + "loss": 0.9198, + "step": 1380 + }, + { + "ce_ib": 5.861191272735596, + "ce_orig": 0.8538046479225159, + "epoch": 0.3968653389891437, + "kl_loss": 0.16768991947174072, + "loss_ib": 0.0022630183957517147, + "step": 1380 + }, + { + "ce_ib": 4.218904972076416, + "ce_orig": 0.6871565580368042, + "epoch": 0.3968653389891437, + "kl_loss": 0.1207076832652092, + "loss_ib": 0.0016289673512801528, + "step": 1380 + }, + { + "ce_ib": 4.815241813659668, + "ce_orig": 0.8256959915161133, + "epoch": 0.3968653389891437, + "kl_loss": 0.08690144121646881, + "loss_ib": 0.0013505385722965002, + "step": 1380 + }, + { + "ce_ib": 5.396557807922363, + "ce_orig": 0.6450198292732239, + "epoch": 0.3968653389891437, + "kl_loss": 0.08772458136081696, + "loss_ib": 0.0014169015921652317, + "step": 1380 + }, + { + "ce_ib": 7.949460983276367, + "ce_orig": 1.2881579399108887, + "epoch": 0.39715292256812135, + "kl_loss": 0.11858942359685898, + "loss_ib": 0.001980840228497982, + "step": 1381 + }, + { + "ce_ib": 9.407849311828613, + "ce_orig": 1.7857203483581543, + "epoch": 0.39715292256812135, + "kl_loss": 0.11156527698040009, + "loss_ib": 0.0020564377773553133, + "step": 1381 + }, + { + "ce_ib": 4.317052841186523, + "ce_orig": 0.4395979344844818, + "epoch": 0.39715292256812135, + "kl_loss": 0.12274263799190521, + "loss_ib": 0.001659131608903408, + "step": 1381 + }, + { + "ce_ib": 4.266213893890381, + "ce_orig": 0.46749892830848694, + "epoch": 0.39715292256812135, + "kl_loss": 0.08208754658699036, + "loss_ib": 0.0012474968098104, + "step": 1381 + }, + { + "ce_ib": 3.835118293762207, + "ce_orig": 0.36215564608573914, + "epoch": 0.397440506147099, + "kl_loss": 0.1314554661512375, + "loss_ib": 0.001698066364042461, + "step": 1382 + }, + { + "ce_ib": 7.1647233963012695, + "ce_orig": 0.9199727773666382, + "epoch": 0.397440506147099, + "kl_loss": 0.07061807811260223, + "loss_ib": 0.0014226532075554132, + "step": 1382 + }, + { + "ce_ib": 6.599220275878906, + "ce_orig": 0.9260650277137756, + "epoch": 0.397440506147099, + "kl_loss": 0.08752088993787766, + "loss_ib": 0.0015351308975368738, + "step": 1382 + }, + { + "ce_ib": 6.4383392333984375, + "ce_orig": 0.8768721222877502, + "epoch": 0.397440506147099, + "kl_loss": 0.13634315133094788, + "loss_ib": 0.002007265342399478, + "step": 1382 + }, + { + "ce_ib": 5.870787143707275, + "ce_orig": 1.382786750793457, + "epoch": 0.39772808972607665, + "kl_loss": 0.17133145034313202, + "loss_ib": 0.0023003933019936085, + "step": 1383 + }, + { + "ce_ib": 2.237919569015503, + "ce_orig": 0.5899192094802856, + "epoch": 0.39772808972607665, + "kl_loss": 0.3677208423614502, + "loss_ib": 0.0039010001346468925, + "step": 1383 + }, + { + "ce_ib": 10.419187545776367, + "ce_orig": 1.5305267572402954, + "epoch": 0.39772808972607665, + "kl_loss": 0.13531950116157532, + "loss_ib": 0.0023951136972755194, + "step": 1383 + }, + { + "ce_ib": 4.558192729949951, + "ce_orig": 0.5665358304977417, + "epoch": 0.39772808972607665, + "kl_loss": 0.0764642208814621, + "loss_ib": 0.0012204614467918873, + "step": 1383 + }, + { + "ce_ib": 7.549960136413574, + "ce_orig": 1.4197278022766113, + "epoch": 0.3980156733050543, + "kl_loss": 0.110089972615242, + "loss_ib": 0.0018558957381173968, + "step": 1384 + }, + { + "ce_ib": 4.553320407867432, + "ce_orig": 0.8299973011016846, + "epoch": 0.3980156733050543, + "kl_loss": 0.07558947056531906, + "loss_ib": 0.0012112266849726439, + "step": 1384 + }, + { + "ce_ib": 6.357949733734131, + "ce_orig": 1.0268474817276, + "epoch": 0.3980156733050543, + "kl_loss": 0.1523449420928955, + "loss_ib": 0.002159244380891323, + "step": 1384 + }, + { + "ce_ib": 4.918845176696777, + "ce_orig": 0.48912161588668823, + "epoch": 0.3980156733050543, + "kl_loss": 0.1415548324584961, + "loss_ib": 0.0019074328010901809, + "step": 1384 + }, + { + "epoch": 0.3983032568840319, + "grad_norm": 0.09846891462802887, + "learning_rate": 4.864609410685922e-05, + "loss": 0.8625, + "step": 1385 + }, + { + "ce_ib": 5.032691478729248, + "ce_orig": 0.9307861328125, + "epoch": 0.3983032568840319, + "kl_loss": 0.10518186539411545, + "loss_ib": 0.0015550878597423434, + "step": 1385 + }, + { + "ce_ib": 7.9908952713012695, + "ce_orig": 1.7475234270095825, + "epoch": 0.3983032568840319, + "kl_loss": 0.09814848750829697, + "loss_ib": 0.001780574326403439, + "step": 1385 + }, + { + "ce_ib": 9.869513511657715, + "ce_orig": 1.627759575843811, + "epoch": 0.3983032568840319, + "kl_loss": 0.10387978702783585, + "loss_ib": 0.0020257493015378714, + "step": 1385 + }, + { + "ce_ib": 5.364109992980957, + "ce_orig": 0.8500359654426575, + "epoch": 0.3983032568840319, + "kl_loss": 0.1431843340396881, + "loss_ib": 0.0019682543352246284, + "step": 1385 + }, + { + "ce_ib": 6.705972194671631, + "ce_orig": 1.0989729166030884, + "epoch": 0.3985908404630096, + "kl_loss": 0.12062980979681015, + "loss_ib": 0.0018768951995298266, + "step": 1386 + }, + { + "ce_ib": 4.8373613357543945, + "ce_orig": 0.650296688079834, + "epoch": 0.3985908404630096, + "kl_loss": 0.10792292654514313, + "loss_ib": 0.0015629653353244066, + "step": 1386 + }, + { + "ce_ib": 5.477198123931885, + "ce_orig": 0.9474713206291199, + "epoch": 0.3985908404630096, + "kl_loss": 0.12362131476402283, + "loss_ib": 0.0017839329084381461, + "step": 1386 + }, + { + "ce_ib": 6.538940906524658, + "ce_orig": 1.1148537397384644, + "epoch": 0.3985908404630096, + "kl_loss": 0.07181133329868317, + "loss_ib": 0.0013720074202865362, + "step": 1386 + }, + { + "ce_ib": 4.820619583129883, + "ce_orig": 0.3477015793323517, + "epoch": 0.3988784240419872, + "kl_loss": 0.10620693862438202, + "loss_ib": 0.0015441313153132796, + "step": 1387 + }, + { + "ce_ib": 3.6405653953552246, + "ce_orig": 0.7365078926086426, + "epoch": 0.3988784240419872, + "kl_loss": 0.07146044075489044, + "loss_ib": 0.0010786609491333365, + "step": 1387 + }, + { + "ce_ib": 6.429567813873291, + "ce_orig": 1.3540785312652588, + "epoch": 0.3988784240419872, + "kl_loss": 0.08094966411590576, + "loss_ib": 0.0014524534344673157, + "step": 1387 + }, + { + "ce_ib": 4.190028667449951, + "ce_orig": 0.5769890546798706, + "epoch": 0.3988784240419872, + "kl_loss": 0.11371330171823502, + "loss_ib": 0.0015561358304694295, + "step": 1387 + }, + { + "ce_ib": 7.87295389175415, + "ce_orig": 1.2537925243377686, + "epoch": 0.3991660076209648, + "kl_loss": 0.16727596521377563, + "loss_ib": 0.0024600550532341003, + "step": 1388 + }, + { + "ce_ib": 6.019086837768555, + "ce_orig": 1.0837427377700806, + "epoch": 0.3991660076209648, + "kl_loss": 0.09157024323940277, + "loss_ib": 0.001517611090093851, + "step": 1388 + }, + { + "ce_ib": 5.586955547332764, + "ce_orig": 0.8558295369148254, + "epoch": 0.3991660076209648, + "kl_loss": 0.1290510892868042, + "loss_ib": 0.0018492063973098993, + "step": 1388 + }, + { + "ce_ib": 5.672644138336182, + "ce_orig": 0.9201558232307434, + "epoch": 0.3991660076209648, + "kl_loss": 0.12830939888954163, + "loss_ib": 0.0018503583269193769, + "step": 1388 + }, + { + "ce_ib": 4.650105953216553, + "ce_orig": 0.7557408809661865, + "epoch": 0.3994535911999425, + "kl_loss": 0.12210938334465027, + "loss_ib": 0.0016861043404787779, + "step": 1389 + }, + { + "ce_ib": 6.44442892074585, + "ce_orig": 1.0067079067230225, + "epoch": 0.3994535911999425, + "kl_loss": 0.10133783519268036, + "loss_ib": 0.001657821238040924, + "step": 1389 + }, + { + "ce_ib": 7.7318902015686035, + "ce_orig": 1.4324907064437866, + "epoch": 0.3994535911999425, + "kl_loss": 0.08357816934585571, + "loss_ib": 0.0016089706914499402, + "step": 1389 + }, + { + "ce_ib": 7.588091850280762, + "ce_orig": 1.3219072818756104, + "epoch": 0.3994535911999425, + "kl_loss": 0.08463755249977112, + "loss_ib": 0.0016051846323534846, + "step": 1389 + }, + { + "epoch": 0.3997411747789201, + "grad_norm": 0.11438465863466263, + "learning_rate": 4.863346890409767e-05, + "loss": 0.9591, + "step": 1390 + }, + { + "ce_ib": 5.451303005218506, + "ce_orig": 0.9760143160820007, + "epoch": 0.3997411747789201, + "kl_loss": 0.11309187114238739, + "loss_ib": 0.0016760488506406546, + "step": 1390 + }, + { + "ce_ib": 9.699860572814941, + "ce_orig": 1.3062336444854736, + "epoch": 0.3997411747789201, + "kl_loss": 0.11411392688751221, + "loss_ib": 0.002111125271767378, + "step": 1390 + }, + { + "ce_ib": 4.81472110748291, + "ce_orig": 0.6785033345222473, + "epoch": 0.3997411747789201, + "kl_loss": 0.07530829310417175, + "loss_ib": 0.0012345550348982215, + "step": 1390 + }, + { + "ce_ib": 4.894866466522217, + "ce_orig": 0.5927574634552002, + "epoch": 0.3997411747789201, + "kl_loss": 0.13816197216510773, + "loss_ib": 0.0018711063312366605, + "step": 1390 + }, + { + "ce_ib": 3.740232467651367, + "ce_orig": 0.7145044803619385, + "epoch": 0.40002875835789775, + "kl_loss": 0.07702134549617767, + "loss_ib": 0.0011442366521805525, + "step": 1391 + }, + { + "ce_ib": 6.7156219482421875, + "ce_orig": 1.1468040943145752, + "epoch": 0.40002875835789775, + "kl_loss": 0.07577596604824066, + "loss_ib": 0.0014293217100203037, + "step": 1391 + }, + { + "ce_ib": 4.911206245422363, + "ce_orig": 0.6374475359916687, + "epoch": 0.40002875835789775, + "kl_loss": 0.11240187287330627, + "loss_ib": 0.0016151393065229058, + "step": 1391 + }, + { + "ce_ib": 4.137016773223877, + "ce_orig": 0.5586443543434143, + "epoch": 0.40002875835789775, + "kl_loss": 0.09084071218967438, + "loss_ib": 0.0013221087865531445, + "step": 1391 + }, + { + "ce_ib": 7.145791530609131, + "ce_orig": 0.7220593690872192, + "epoch": 0.40031634193687543, + "kl_loss": 0.18535080552101135, + "loss_ib": 0.0025680873077362776, + "step": 1392 + }, + { + "ce_ib": 8.202098846435547, + "ce_orig": 0.8149229884147644, + "epoch": 0.40031634193687543, + "kl_loss": 0.1332991123199463, + "loss_ib": 0.0021532007958739996, + "step": 1392 + }, + { + "ce_ib": 10.058478355407715, + "ce_orig": 1.2291303873062134, + "epoch": 0.40031634193687543, + "kl_loss": 0.10170267522335052, + "loss_ib": 0.002022874541580677, + "step": 1392 + }, + { + "ce_ib": 6.339056015014648, + "ce_orig": 0.8793673515319824, + "epoch": 0.40031634193687543, + "kl_loss": 0.10560115426778793, + "loss_ib": 0.0016899170586839318, + "step": 1392 + }, + { + "ce_ib": 6.754205226898193, + "ce_orig": 0.8358240723609924, + "epoch": 0.40060392551585305, + "kl_loss": 0.08306322246789932, + "loss_ib": 0.001506052678450942, + "step": 1393 + }, + { + "ce_ib": 3.8460471630096436, + "ce_orig": 0.5023829340934753, + "epoch": 0.40060392551585305, + "kl_loss": 0.12775641679763794, + "loss_ib": 0.001662168768234551, + "step": 1393 + }, + { + "ce_ib": 5.579369068145752, + "ce_orig": 0.6354689598083496, + "epoch": 0.40060392551585305, + "kl_loss": 0.22461557388305664, + "loss_ib": 0.0028040925972163677, + "step": 1393 + }, + { + "ce_ib": 3.25400972366333, + "ce_orig": 0.5061582326889038, + "epoch": 0.40060392551585305, + "kl_loss": 0.23202760517597198, + "loss_ib": 0.0026456769555807114, + "step": 1393 + }, + { + "ce_ib": 5.32595682144165, + "ce_orig": 1.127187967300415, + "epoch": 0.4008915090948307, + "kl_loss": 0.12879681587219238, + "loss_ib": 0.001820563804358244, + "step": 1394 + }, + { + "ce_ib": 6.9711737632751465, + "ce_orig": 1.1765919923782349, + "epoch": 0.4008915090948307, + "kl_loss": 0.0906248465180397, + "loss_ib": 0.0016033657593652606, + "step": 1394 + }, + { + "ce_ib": 4.378665447235107, + "ce_orig": 0.5809182524681091, + "epoch": 0.4008915090948307, + "kl_loss": 0.11743704974651337, + "loss_ib": 0.0016122370725497603, + "step": 1394 + }, + { + "ce_ib": 5.15740442276001, + "ce_orig": 0.9412955641746521, + "epoch": 0.4008915090948307, + "kl_loss": 0.080172598361969, + "loss_ib": 0.0013174664927646518, + "step": 1394 + }, + { + "epoch": 0.4011790926738083, + "grad_norm": 0.09315615147352219, + "learning_rate": 4.862078676282409e-05, + "loss": 0.8388, + "step": 1395 + }, + { + "ce_ib": 8.188400268554688, + "ce_orig": 1.2357627153396606, + "epoch": 0.4011790926738083, + "kl_loss": 0.1002885177731514, + "loss_ib": 0.0018217251636087894, + "step": 1395 + }, + { + "ce_ib": 6.079811096191406, + "ce_orig": 1.1144137382507324, + "epoch": 0.4011790926738083, + "kl_loss": 0.07685106992721558, + "loss_ib": 0.0013764917384833097, + "step": 1395 + }, + { + "ce_ib": 7.778738021850586, + "ce_orig": 1.4507184028625488, + "epoch": 0.4011790926738083, + "kl_loss": 0.12209288775920868, + "loss_ib": 0.0019988026469945908, + "step": 1395 + }, + { + "ce_ib": 7.983975887298584, + "ce_orig": 1.0864462852478027, + "epoch": 0.4011790926738083, + "kl_loss": 0.11116814613342285, + "loss_ib": 0.001910079037770629, + "step": 1395 + }, + { + "ce_ib": 9.038493156433105, + "ce_orig": 1.6814976930618286, + "epoch": 0.401466676252786, + "kl_loss": 0.12096206843852997, + "loss_ib": 0.0021134698763489723, + "step": 1396 + }, + { + "ce_ib": 4.606503963470459, + "ce_orig": 0.38909873366355896, + "epoch": 0.401466676252786, + "kl_loss": 0.13731586933135986, + "loss_ib": 0.0018338089575991035, + "step": 1396 + }, + { + "ce_ib": 4.8945770263671875, + "ce_orig": 0.6191275119781494, + "epoch": 0.401466676252786, + "kl_loss": 0.0936412438750267, + "loss_ib": 0.0014258699957281351, + "step": 1396 + }, + { + "ce_ib": 7.423152923583984, + "ce_orig": 0.8506887555122375, + "epoch": 0.401466676252786, + "kl_loss": 0.14094209671020508, + "loss_ib": 0.0021517362911254168, + "step": 1396 + }, + { + "ce_ib": 9.513692855834961, + "ce_orig": 1.5091493129730225, + "epoch": 0.4017542598317636, + "kl_loss": 0.09959740936756134, + "loss_ib": 0.0019473433494567871, + "step": 1397 + }, + { + "ce_ib": 6.524710178375244, + "ce_orig": 0.8636010885238647, + "epoch": 0.4017542598317636, + "kl_loss": 0.13173584640026093, + "loss_ib": 0.001969829434528947, + "step": 1397 + }, + { + "ce_ib": 4.396295070648193, + "ce_orig": 0.619438648223877, + "epoch": 0.4017542598317636, + "kl_loss": 0.09558691829442978, + "loss_ib": 0.0013954986352473497, + "step": 1397 + }, + { + "ce_ib": 4.203965663909912, + "ce_orig": 0.7036296129226685, + "epoch": 0.4017542598317636, + "kl_loss": 0.07829403877258301, + "loss_ib": 0.001203336869366467, + "step": 1397 + }, + { + "ce_ib": 5.514439105987549, + "ce_orig": 0.520076334476471, + "epoch": 0.40204184341074123, + "kl_loss": 0.08803772926330566, + "loss_ib": 0.001431821146979928, + "step": 1398 + }, + { + "ce_ib": 1.4410609006881714, + "ce_orig": 0.13030670583248138, + "epoch": 0.40204184341074123, + "kl_loss": 0.16449351608753204, + "loss_ib": 0.001789041212759912, + "step": 1398 + }, + { + "ce_ib": 5.51670503616333, + "ce_orig": 0.3515918254852295, + "epoch": 0.40204184341074123, + "kl_loss": 0.19496026635169983, + "loss_ib": 0.002501273062080145, + "step": 1398 + }, + { + "ce_ib": 5.9502973556518555, + "ce_orig": 1.3472964763641357, + "epoch": 0.40204184341074123, + "kl_loss": 0.08377566188573837, + "loss_ib": 0.0014327862299978733, + "step": 1398 + }, + { + "ce_ib": 5.6048688888549805, + "ce_orig": 1.193996787071228, + "epoch": 0.4023294269897189, + "kl_loss": 0.0680387020111084, + "loss_ib": 0.0012408739421516657, + "step": 1399 + }, + { + "ce_ib": 7.806824684143066, + "ce_orig": 0.9458127021789551, + "epoch": 0.4023294269897189, + "kl_loss": 0.13807079195976257, + "loss_ib": 0.002161390380933881, + "step": 1399 + }, + { + "ce_ib": 4.314865589141846, + "ce_orig": 0.6677300930023193, + "epoch": 0.4023294269897189, + "kl_loss": 0.1180049255490303, + "loss_ib": 0.0016115357866510749, + "step": 1399 + }, + { + "ce_ib": 8.716057777404785, + "ce_orig": 1.3581658601760864, + "epoch": 0.4023294269897189, + "kl_loss": 0.09681472927331924, + "loss_ib": 0.001839753007516265, + "step": 1399 + }, + { + "epoch": 0.40261701056869653, + "grad_norm": 0.09289965033531189, + "learning_rate": 4.86080477135927e-05, + "loss": 0.9395, + "step": 1400 + }, + { + "ce_ib": 5.28957462310791, + "ce_orig": 0.6641263365745544, + "epoch": 0.40261701056869653, + "kl_loss": 0.1141863688826561, + "loss_ib": 0.0016708211041986942, + "step": 1400 + }, + { + "ce_ib": 5.174437046051025, + "ce_orig": 0.3216610848903656, + "epoch": 0.40261701056869653, + "kl_loss": 0.14265108108520508, + "loss_ib": 0.001943954499438405, + "step": 1400 + }, + { + "ce_ib": 4.932038307189941, + "ce_orig": 0.6975332498550415, + "epoch": 0.40261701056869653, + "kl_loss": 0.08903782814741135, + "loss_ib": 0.0013835820136591792, + "step": 1400 + }, + { + "ce_ib": 6.784277439117432, + "ce_orig": 1.5330122709274292, + "epoch": 0.40261701056869653, + "kl_loss": 0.08088655024766922, + "loss_ib": 0.0014872931642457843, + "step": 1400 + }, + { + "ce_ib": 5.346994876861572, + "ce_orig": 1.0070792436599731, + "epoch": 0.40290459414767416, + "kl_loss": 0.05356618016958237, + "loss_ib": 0.001070361235179007, + "step": 1401 + }, + { + "ce_ib": 1.9412345886230469, + "ce_orig": 0.25424134731292725, + "epoch": 0.40290459414767416, + "kl_loss": 0.1259712427854538, + "loss_ib": 0.0014538359828293324, + "step": 1401 + }, + { + "ce_ib": 4.817016124725342, + "ce_orig": 0.9583339095115662, + "epoch": 0.40290459414767416, + "kl_loss": 0.06185237318277359, + "loss_ib": 0.0011002252576872706, + "step": 1401 + }, + { + "ce_ib": 5.685846328735352, + "ce_orig": 0.7392550706863403, + "epoch": 0.40290459414767416, + "kl_loss": 0.07985693216323853, + "loss_ib": 0.0013671539491042495, + "step": 1401 + }, + { + "ce_ib": 7.376675605773926, + "ce_orig": 0.9400675892829895, + "epoch": 0.40319217772665183, + "kl_loss": 0.10595303773880005, + "loss_ib": 0.0017971978522837162, + "step": 1402 + }, + { + "ce_ib": 4.847853183746338, + "ce_orig": 0.5265023708343506, + "epoch": 0.40319217772665183, + "kl_loss": 0.08615823090076447, + "loss_ib": 0.001346367527730763, + "step": 1402 + }, + { + "ce_ib": 6.800249099731445, + "ce_orig": 0.6501719951629639, + "epoch": 0.40319217772665183, + "kl_loss": 0.12006954848766327, + "loss_ib": 0.001880720374174416, + "step": 1402 + }, + { + "ce_ib": 2.961667060852051, + "ce_orig": 0.4114344120025635, + "epoch": 0.40319217772665183, + "kl_loss": 0.13707970082759857, + "loss_ib": 0.0016669636825099587, + "step": 1402 + }, + { + "ce_ib": 10.904898643493652, + "ce_orig": 2.0329442024230957, + "epoch": 0.40347976130562946, + "kl_loss": 0.11201053857803345, + "loss_ib": 0.002210595179349184, + "step": 1403 + }, + { + "ce_ib": 4.756246566772461, + "ce_orig": 0.6134840846061707, + "epoch": 0.40347976130562946, + "kl_loss": 0.08142095059156418, + "loss_ib": 0.0012898340355604887, + "step": 1403 + }, + { + "ce_ib": 6.535433292388916, + "ce_orig": 0.9841384291648865, + "epoch": 0.40347976130562946, + "kl_loss": 0.08830951154232025, + "loss_ib": 0.0015366383595392108, + "step": 1403 + }, + { + "ce_ib": 3.6593120098114014, + "ce_orig": 0.5448381304740906, + "epoch": 0.40347976130562946, + "kl_loss": 0.11145228147506714, + "loss_ib": 0.0014804539969190955, + "step": 1403 + }, + { + "ce_ib": 4.8199782371521, + "ce_orig": 0.7326803207397461, + "epoch": 0.4037673448846071, + "kl_loss": 0.12226551026105881, + "loss_ib": 0.0017046529101207852, + "step": 1404 + }, + { + "ce_ib": 6.233541965484619, + "ce_orig": 0.5725308060646057, + "epoch": 0.4037673448846071, + "kl_loss": 0.1417505443096161, + "loss_ib": 0.00204085954464972, + "step": 1404 + }, + { + "ce_ib": 9.850833892822266, + "ce_orig": 1.7082631587982178, + "epoch": 0.4037673448846071, + "kl_loss": 0.12727659940719604, + "loss_ib": 0.002257849322631955, + "step": 1404 + }, + { + "ce_ib": 6.463018894195557, + "ce_orig": 0.44856294989585876, + "epoch": 0.4037673448846071, + "kl_loss": 0.09182668477296829, + "loss_ib": 0.001564568723551929, + "step": 1404 + }, + { + "epoch": 0.4040549284635847, + "grad_norm": 0.08630051463842392, + "learning_rate": 4.859525178709481e-05, + "loss": 0.877, + "step": 1405 + }, + { + "ce_ib": 5.258861064910889, + "ce_orig": 0.7958999872207642, + "epoch": 0.4040549284635847, + "kl_loss": 0.09843520820140839, + "loss_ib": 0.0015102381585165858, + "step": 1405 + }, + { + "ce_ib": 5.847157955169678, + "ce_orig": 0.846228301525116, + "epoch": 0.4040549284635847, + "kl_loss": 0.09086570888757706, + "loss_ib": 0.0014933728380128741, + "step": 1405 + }, + { + "ce_ib": 8.750494956970215, + "ce_orig": 1.657193899154663, + "epoch": 0.4040549284635847, + "kl_loss": 0.21177685260772705, + "loss_ib": 0.002992817899212241, + "step": 1405 + }, + { + "ce_ib": 5.047918796539307, + "ce_orig": 0.783190906047821, + "epoch": 0.4040549284635847, + "kl_loss": 0.10703468322753906, + "loss_ib": 0.0015751386526972055, + "step": 1405 + }, + { + "ce_ib": 7.651825428009033, + "ce_orig": 0.9800135493278503, + "epoch": 0.4043425120425624, + "kl_loss": 0.13863235712051392, + "loss_ib": 0.002151506021618843, + "step": 1406 + }, + { + "ce_ib": 4.351835250854492, + "ce_orig": 0.6803432106971741, + "epoch": 0.4043425120425624, + "kl_loss": 0.10353352129459381, + "loss_ib": 0.0014705186476930976, + "step": 1406 + }, + { + "ce_ib": 9.203539848327637, + "ce_orig": 1.6314523220062256, + "epoch": 0.4043425120425624, + "kl_loss": 0.10844020545482635, + "loss_ib": 0.0020047558937221766, + "step": 1406 + }, + { + "ce_ib": 5.073543071746826, + "ce_orig": 0.6256998777389526, + "epoch": 0.4043425120425624, + "kl_loss": 0.12684963643550873, + "loss_ib": 0.0017758506583049893, + "step": 1406 + }, + { + "ce_ib": 2.1065125465393066, + "ce_orig": 0.45548203587532043, + "epoch": 0.40463009562154, + "kl_loss": 0.047039005905389786, + "loss_ib": 0.0006810413324274123, + "step": 1407 + }, + { + "ce_ib": 7.553897857666016, + "ce_orig": 0.9205471873283386, + "epoch": 0.40463009562154, + "kl_loss": 0.17443448305130005, + "loss_ib": 0.00249973451718688, + "step": 1407 + }, + { + "ce_ib": 6.973599433898926, + "ce_orig": 0.46554118394851685, + "epoch": 0.40463009562154, + "kl_loss": 0.15451180934906006, + "loss_ib": 0.0022424780763685703, + "step": 1407 + }, + { + "ce_ib": 7.436211109161377, + "ce_orig": 1.1756176948547363, + "epoch": 0.40463009562154, + "kl_loss": 0.10663871467113495, + "loss_ib": 0.0018100081942975521, + "step": 1407 + }, + { + "ce_ib": 6.034943580627441, + "ce_orig": 0.30098381638526917, + "epoch": 0.40491767920051763, + "kl_loss": 0.15259245038032532, + "loss_ib": 0.0021294187754392624, + "step": 1408 + }, + { + "ce_ib": 7.934663772583008, + "ce_orig": 0.7295154929161072, + "epoch": 0.40491767920051763, + "kl_loss": 0.12445910274982452, + "loss_ib": 0.002038057427853346, + "step": 1408 + }, + { + "ce_ib": 7.308053493499756, + "ce_orig": 1.1165850162506104, + "epoch": 0.40491767920051763, + "kl_loss": 0.10732346028089523, + "loss_ib": 0.0018040399299934506, + "step": 1408 + }, + { + "ce_ib": 6.947267532348633, + "ce_orig": 0.9952074885368347, + "epoch": 0.40491767920051763, + "kl_loss": 0.12138228863477707, + "loss_ib": 0.0019085495732724667, + "step": 1408 + }, + { + "ce_ib": 5.599508285522461, + "ce_orig": 0.9172304272651672, + "epoch": 0.4052052627794953, + "kl_loss": 0.11137841641902924, + "loss_ib": 0.0016737348632887006, + "step": 1409 + }, + { + "ce_ib": 8.022232055664062, + "ce_orig": 1.4531203508377075, + "epoch": 0.4052052627794953, + "kl_loss": 0.12734998762607574, + "loss_ib": 0.0020757231395691633, + "step": 1409 + }, + { + "ce_ib": 7.1103196144104, + "ce_orig": 1.23030686378479, + "epoch": 0.4052052627794953, + "kl_loss": 0.09444105625152588, + "loss_ib": 0.0016554424073547125, + "step": 1409 + }, + { + "ce_ib": 5.490854263305664, + "ce_orig": 0.6522265672683716, + "epoch": 0.4052052627794953, + "kl_loss": 0.06643573939800262, + "loss_ib": 0.0012134427670389414, + "step": 1409 + }, + { + "epoch": 0.40549284635847294, + "grad_norm": 0.0967244878411293, + "learning_rate": 4.8582399014158794e-05, + "loss": 0.8162, + "step": 1410 + }, + { + "ce_ib": 5.121457576751709, + "ce_orig": 0.7949561476707458, + "epoch": 0.40549284635847294, + "kl_loss": 0.12705135345458984, + "loss_ib": 0.0017826592084020376, + "step": 1410 + }, + { + "ce_ib": 4.26038122177124, + "ce_orig": 0.7149118781089783, + "epoch": 0.40549284635847294, + "kl_loss": 0.10282117128372192, + "loss_ib": 0.001454249839298427, + "step": 1410 + }, + { + "ce_ib": 4.0952372550964355, + "ce_orig": 0.8095517158508301, + "epoch": 0.40549284635847294, + "kl_loss": 0.10190194100141525, + "loss_ib": 0.0014285431243479252, + "step": 1410 + }, + { + "ce_ib": 6.364243507385254, + "ce_orig": 0.6983405947685242, + "epoch": 0.40549284635847294, + "kl_loss": 0.09122467041015625, + "loss_ib": 0.0015486710472032428, + "step": 1410 + }, + { + "ce_ib": 6.581660747528076, + "ce_orig": 0.8682206869125366, + "epoch": 0.40578042993745056, + "kl_loss": 0.1153961569070816, + "loss_ib": 0.0018121275352314115, + "step": 1411 + }, + { + "ce_ib": 8.94925594329834, + "ce_orig": 1.347611665725708, + "epoch": 0.40578042993745056, + "kl_loss": 0.17726653814315796, + "loss_ib": 0.0026675909757614136, + "step": 1411 + }, + { + "ce_ib": 3.1764252185821533, + "ce_orig": 0.5812899470329285, + "epoch": 0.40578042993745056, + "kl_loss": 0.09140671789646149, + "loss_ib": 0.0012317097280174494, + "step": 1411 + }, + { + "ce_ib": 5.011621952056885, + "ce_orig": 0.7421811819076538, + "epoch": 0.40578042993745056, + "kl_loss": 0.08518759906291962, + "loss_ib": 0.0013530382420867682, + "step": 1411 + }, + { + "ce_ib": 5.735240459442139, + "ce_orig": 0.9989059567451477, + "epoch": 0.40606801351642824, + "kl_loss": 0.08036477863788605, + "loss_ib": 0.0013771718367934227, + "step": 1412 + }, + { + "ce_ib": 6.586531639099121, + "ce_orig": 0.8133410811424255, + "epoch": 0.40606801351642824, + "kl_loss": 0.09770262241363525, + "loss_ib": 0.0016356792766600847, + "step": 1412 + }, + { + "ce_ib": 5.184510707855225, + "ce_orig": 0.8797780871391296, + "epoch": 0.40606801351642824, + "kl_loss": 0.10436460375785828, + "loss_ib": 0.0015620969934388995, + "step": 1412 + }, + { + "ce_ib": 5.362227916717529, + "ce_orig": 1.1667356491088867, + "epoch": 0.40606801351642824, + "kl_loss": 0.09264987707138062, + "loss_ib": 0.001462721498683095, + "step": 1412 + }, + { + "ce_ib": 2.8289527893066406, + "ce_orig": 0.6173007488250732, + "epoch": 0.40635559709540586, + "kl_loss": 0.060516029596328735, + "loss_ib": 0.0008880555396899581, + "step": 1413 + }, + { + "ce_ib": 7.956472396850586, + "ce_orig": 1.0977833271026611, + "epoch": 0.40635559709540586, + "kl_loss": 0.12055166065692902, + "loss_ib": 0.0020011637825518847, + "step": 1413 + }, + { + "ce_ib": 4.129430770874023, + "ce_orig": 0.6660087704658508, + "epoch": 0.40635559709540586, + "kl_loss": 0.0664260983467102, + "loss_ib": 0.0010772040113806725, + "step": 1413 + }, + { + "ce_ib": 5.287503719329834, + "ce_orig": 0.6029794216156006, + "epoch": 0.40635559709540586, + "kl_loss": 0.1367393434047699, + "loss_ib": 0.0018961437745019794, + "step": 1413 + }, + { + "ce_ib": 3.3757805824279785, + "ce_orig": 0.5221848487854004, + "epoch": 0.4066431806743835, + "kl_loss": 0.11435914784669876, + "loss_ib": 0.0014811694854870439, + "step": 1414 + }, + { + "ce_ib": 9.696231842041016, + "ce_orig": 1.6972432136535645, + "epoch": 0.4066431806743835, + "kl_loss": 0.08618634939193726, + "loss_ib": 0.001831486588343978, + "step": 1414 + }, + { + "ce_ib": 4.770078659057617, + "ce_orig": 0.8775630593299866, + "epoch": 0.4066431806743835, + "kl_loss": 0.07194533944129944, + "loss_ib": 0.0011964612640440464, + "step": 1414 + }, + { + "ce_ib": 1.6736482381820679, + "ce_orig": 0.18917334079742432, + "epoch": 0.4066431806743835, + "kl_loss": 0.22798651456832886, + "loss_ib": 0.0024472298100590706, + "step": 1414 + }, + { + "epoch": 0.4069307642533611, + "grad_norm": 0.09320499002933502, + "learning_rate": 4.856948942574997e-05, + "loss": 0.8688, + "step": 1415 + }, + { + "ce_ib": 6.214433193206787, + "ce_orig": 0.8773539662361145, + "epoch": 0.4069307642533611, + "kl_loss": 0.10065476596355438, + "loss_ib": 0.0016279908595606685, + "step": 1415 + }, + { + "ce_ib": 5.722317218780518, + "ce_orig": 1.2242968082427979, + "epoch": 0.4069307642533611, + "kl_loss": 0.10353957861661911, + "loss_ib": 0.001607627491466701, + "step": 1415 + }, + { + "ce_ib": 7.316473007202148, + "ce_orig": 1.4600270986557007, + "epoch": 0.4069307642533611, + "kl_loss": 0.146986186504364, + "loss_ib": 0.0022015091963112354, + "step": 1415 + }, + { + "ce_ib": 4.994661331176758, + "ce_orig": 0.562883198261261, + "epoch": 0.4069307642533611, + "kl_loss": 0.09666267037391663, + "loss_ib": 0.0014660927699878812, + "step": 1415 + }, + { + "ce_ib": 3.781491994857788, + "ce_orig": 0.6928848624229431, + "epoch": 0.4072183478323388, + "kl_loss": 0.05718432366847992, + "loss_ib": 0.0009499923908151686, + "step": 1416 + }, + { + "ce_ib": 9.2673978805542, + "ce_orig": 1.2964565753936768, + "epoch": 0.4072183478323388, + "kl_loss": 0.10217346251010895, + "loss_ib": 0.0019484743243083358, + "step": 1416 + }, + { + "ce_ib": 6.029436111450195, + "ce_orig": 0.9657694697380066, + "epoch": 0.4072183478323388, + "kl_loss": 0.1299174427986145, + "loss_ib": 0.001902117975987494, + "step": 1416 + }, + { + "ce_ib": 5.017213821411133, + "ce_orig": 0.7008000612258911, + "epoch": 0.4072183478323388, + "kl_loss": 0.09423135966062546, + "loss_ib": 0.0014440348604694009, + "step": 1416 + }, + { + "ce_ib": 4.500516891479492, + "ce_orig": 0.34214717149734497, + "epoch": 0.4075059314113164, + "kl_loss": 0.11767271906137466, + "loss_ib": 0.0016267788596451283, + "step": 1417 + }, + { + "ce_ib": 3.1806514263153076, + "ce_orig": 0.6471096873283386, + "epoch": 0.4075059314113164, + "kl_loss": 0.0917370468378067, + "loss_ib": 0.0012354356003925204, + "step": 1417 + }, + { + "ce_ib": 4.803709983825684, + "ce_orig": 1.184767484664917, + "epoch": 0.4075059314113164, + "kl_loss": 0.056204065680503845, + "loss_ib": 0.0010424115462228656, + "step": 1417 + }, + { + "ce_ib": 4.624536514282227, + "ce_orig": 0.8192501664161682, + "epoch": 0.4075059314113164, + "kl_loss": 0.11898795515298843, + "loss_ib": 0.0016523330705240369, + "step": 1417 + }, + { + "ce_ib": 4.678039073944092, + "ce_orig": 0.4023797810077667, + "epoch": 0.40779351499029404, + "kl_loss": 0.09496867656707764, + "loss_ib": 0.001417490653693676, + "step": 1418 + }, + { + "ce_ib": 5.111543655395508, + "ce_orig": 0.7812928557395935, + "epoch": 0.40779351499029404, + "kl_loss": 0.08022376894950867, + "loss_ib": 0.0013133920729160309, + "step": 1418 + }, + { + "ce_ib": 5.8873138427734375, + "ce_orig": 0.6747121810913086, + "epoch": 0.40779351499029404, + "kl_loss": 0.13775616884231567, + "loss_ib": 0.0019662929698824883, + "step": 1418 + }, + { + "ce_ib": 8.242557525634766, + "ce_orig": 1.7442784309387207, + "epoch": 0.40779351499029404, + "kl_loss": 0.10227921605110168, + "loss_ib": 0.0018470477079972625, + "step": 1418 + }, + { + "ce_ib": 4.78331995010376, + "ce_orig": 0.8966225981712341, + "epoch": 0.4080810985692717, + "kl_loss": 0.1043175607919693, + "loss_ib": 0.0015215075109153986, + "step": 1419 + }, + { + "ce_ib": 5.467410087585449, + "ce_orig": 1.2548538446426392, + "epoch": 0.4080810985692717, + "kl_loss": 0.05958162248134613, + "loss_ib": 0.0011425572447478771, + "step": 1419 + }, + { + "ce_ib": 5.79073429107666, + "ce_orig": 0.5919792652130127, + "epoch": 0.4080810985692717, + "kl_loss": 0.08852129429578781, + "loss_ib": 0.0014642864698544145, + "step": 1419 + }, + { + "ce_ib": 8.342966079711914, + "ce_orig": 0.7671210169792175, + "epoch": 0.4080810985692717, + "kl_loss": 0.38292115926742554, + "loss_ib": 0.004663507919758558, + "step": 1419 + }, + { + "epoch": 0.40836868214824934, + "grad_norm": 0.10169852524995804, + "learning_rate": 4.855652305297052e-05, + "loss": 0.8024, + "step": 1420 + }, + { + "ce_ib": 6.110755443572998, + "ce_orig": 1.0325820446014404, + "epoch": 0.40836868214824934, + "kl_loss": 0.06489177793264389, + "loss_ib": 0.0012599932961165905, + "step": 1420 + }, + { + "ce_ib": 5.731692790985107, + "ce_orig": 1.1643919944763184, + "epoch": 0.40836868214824934, + "kl_loss": 0.11741343140602112, + "loss_ib": 0.001747303525917232, + "step": 1420 + }, + { + "ce_ib": 6.906783580780029, + "ce_orig": 0.9419347047805786, + "epoch": 0.40836868214824934, + "kl_loss": 0.1597922444343567, + "loss_ib": 0.0022886006627231836, + "step": 1420 + }, + { + "ce_ib": 3.428995132446289, + "ce_orig": 0.46887871623039246, + "epoch": 0.40836868214824934, + "kl_loss": 0.05160287767648697, + "loss_ib": 0.0008589282515458763, + "step": 1420 + }, + { + "ce_ib": 7.4091644287109375, + "ce_orig": 1.4213844537734985, + "epoch": 0.40865626572722696, + "kl_loss": 0.16700538992881775, + "loss_ib": 0.002410970162600279, + "step": 1421 + }, + { + "ce_ib": 7.73345947265625, + "ce_orig": 0.7962655425071716, + "epoch": 0.40865626572722696, + "kl_loss": 0.13736525177955627, + "loss_ib": 0.0021469981875270605, + "step": 1421 + }, + { + "ce_ib": 6.495778560638428, + "ce_orig": 1.1647603511810303, + "epoch": 0.40865626572722696, + "kl_loss": 0.07768432796001434, + "loss_ib": 0.0014264211058616638, + "step": 1421 + }, + { + "ce_ib": 5.779698848724365, + "ce_orig": 0.8063614964485168, + "epoch": 0.40865626572722696, + "kl_loss": 0.08508419990539551, + "loss_ib": 0.0014288118109107018, + "step": 1421 + }, + { + "ce_ib": 4.216153621673584, + "ce_orig": 0.5847137570381165, + "epoch": 0.40894384930620464, + "kl_loss": 0.06408952176570892, + "loss_ib": 0.001062510535120964, + "step": 1422 + }, + { + "ce_ib": 7.842023849487305, + "ce_orig": 1.3211596012115479, + "epoch": 0.40894384930620464, + "kl_loss": 0.12538115680217743, + "loss_ib": 0.0020380138885229826, + "step": 1422 + }, + { + "ce_ib": 6.368346691131592, + "ce_orig": 1.4015376567840576, + "epoch": 0.40894384930620464, + "kl_loss": 0.1733558028936386, + "loss_ib": 0.002370392670854926, + "step": 1422 + }, + { + "ce_ib": 4.551137924194336, + "ce_orig": 0.6162861585617065, + "epoch": 0.40894384930620464, + "kl_loss": 0.10174643993377686, + "loss_ib": 0.001472578151151538, + "step": 1422 + }, + { + "ce_ib": 6.886654376983643, + "ce_orig": 0.9630997180938721, + "epoch": 0.40923143288518227, + "kl_loss": 0.12046155333518982, + "loss_ib": 0.0018932810053229332, + "step": 1423 + }, + { + "ce_ib": 7.230597019195557, + "ce_orig": 1.1543656587600708, + "epoch": 0.40923143288518227, + "kl_loss": 0.0784279853105545, + "loss_ib": 0.0015073394170030951, + "step": 1423 + }, + { + "ce_ib": 5.933324813842773, + "ce_orig": 0.7128235697746277, + "epoch": 0.40923143288518227, + "kl_loss": 0.1509019285440445, + "loss_ib": 0.0021023517474532127, + "step": 1423 + }, + { + "ce_ib": 5.982076644897461, + "ce_orig": 1.2698583602905273, + "epoch": 0.40923143288518227, + "kl_loss": 0.06490836292505264, + "loss_ib": 0.0012472912203520536, + "step": 1423 + }, + { + "ce_ib": 3.7193098068237305, + "ce_orig": 0.6517233848571777, + "epoch": 0.4095190164641599, + "kl_loss": 0.08744284510612488, + "loss_ib": 0.0012463594321161509, + "step": 1424 + }, + { + "ce_ib": 6.195068359375, + "ce_orig": 1.2152502536773682, + "epoch": 0.4095190164641599, + "kl_loss": 0.11578704416751862, + "loss_ib": 0.001777377212420106, + "step": 1424 + }, + { + "ce_ib": 9.438179016113281, + "ce_orig": 1.714571475982666, + "epoch": 0.4095190164641599, + "kl_loss": 0.09612904489040375, + "loss_ib": 0.0019051083363592625, + "step": 1424 + }, + { + "ce_ib": 5.258744716644287, + "ce_orig": 0.7220935225486755, + "epoch": 0.4095190164641599, + "kl_loss": 0.08531811088323593, + "loss_ib": 0.0013790555531159043, + "step": 1424 + }, + { + "epoch": 0.4098066000431375, + "grad_norm": 0.1047549843788147, + "learning_rate": 4.8543499927059445e-05, + "loss": 0.8855, + "step": 1425 + }, + { + "ce_ib": 5.544464588165283, + "ce_orig": 0.5749992728233337, + "epoch": 0.4098066000431375, + "kl_loss": 0.15693159401416779, + "loss_ib": 0.0021237623877823353, + "step": 1425 + }, + { + "ce_ib": 2.2752645015716553, + "ce_orig": 0.35088467597961426, + "epoch": 0.4098066000431375, + "kl_loss": 0.07889710366725922, + "loss_ib": 0.00101649749558419, + "step": 1425 + }, + { + "ce_ib": 8.617973327636719, + "ce_orig": 1.2295658588409424, + "epoch": 0.4098066000431375, + "kl_loss": 0.08672799915075302, + "loss_ib": 0.0017290773103013635, + "step": 1425 + }, + { + "ce_ib": 4.017047882080078, + "ce_orig": 0.2988843619823456, + "epoch": 0.4098066000431375, + "kl_loss": 0.3063962459564209, + "loss_ib": 0.0034656673669815063, + "step": 1425 + }, + { + "ce_ib": 6.636280059814453, + "ce_orig": 0.8577524423599243, + "epoch": 0.4100941836221152, + "kl_loss": 0.13353979587554932, + "loss_ib": 0.001999025931581855, + "step": 1426 + }, + { + "ce_ib": 6.605571269989014, + "ce_orig": 0.9017672538757324, + "epoch": 0.4100941836221152, + "kl_loss": 0.057967811822891235, + "loss_ib": 0.0012402351712808013, + "step": 1426 + }, + { + "ce_ib": 5.672203540802002, + "ce_orig": 0.9474138021469116, + "epoch": 0.4100941836221152, + "kl_loss": 0.09774202108383179, + "loss_ib": 0.0015446405159309506, + "step": 1426 + }, + { + "ce_ib": 5.820497512817383, + "ce_orig": 0.7509312629699707, + "epoch": 0.4100941836221152, + "kl_loss": 0.12614545226097107, + "loss_ib": 0.0018435042584314942, + "step": 1426 + }, + { + "ce_ib": 5.530405521392822, + "ce_orig": 0.7405644059181213, + "epoch": 0.4103817672010928, + "kl_loss": 0.12352365255355835, + "loss_ib": 0.0017882769461721182, + "step": 1427 + }, + { + "ce_ib": 4.921144962310791, + "ce_orig": 0.7050065994262695, + "epoch": 0.4103817672010928, + "kl_loss": 0.08236676454544067, + "loss_ib": 0.0013157820794731379, + "step": 1427 + }, + { + "ce_ib": 5.164494514465332, + "ce_orig": 0.8321438431739807, + "epoch": 0.4103817672010928, + "kl_loss": 0.09443796426057816, + "loss_ib": 0.0014608290512114763, + "step": 1427 + }, + { + "ce_ib": 7.48508358001709, + "ce_orig": 1.5289044380187988, + "epoch": 0.4103817672010928, + "kl_loss": 0.09666682779788971, + "loss_ib": 0.0017151766223832965, + "step": 1427 + }, + { + "ce_ib": 8.054293632507324, + "ce_orig": 1.314834713935852, + "epoch": 0.41066935078007044, + "kl_loss": 0.07268555462360382, + "loss_ib": 0.0015322848921641707, + "step": 1428 + }, + { + "ce_ib": 5.46734619140625, + "ce_orig": 0.7332492470741272, + "epoch": 0.41066935078007044, + "kl_loss": 0.18445497751235962, + "loss_ib": 0.002391284331679344, + "step": 1428 + }, + { + "ce_ib": 6.619436264038086, + "ce_orig": 1.3106836080551147, + "epoch": 0.41066935078007044, + "kl_loss": 0.0980033203959465, + "loss_ib": 0.0016419767634943128, + "step": 1428 + }, + { + "ce_ib": 8.549188613891602, + "ce_orig": 1.5234109163284302, + "epoch": 0.41066935078007044, + "kl_loss": 0.10396061837673187, + "loss_ib": 0.001894524903036654, + "step": 1428 + }, + { + "ce_ib": 5.3329386711120605, + "ce_orig": 0.9647698402404785, + "epoch": 0.4109569343590481, + "kl_loss": 0.12533004581928253, + "loss_ib": 0.0017865943955257535, + "step": 1429 + }, + { + "ce_ib": 7.284860134124756, + "ce_orig": 0.9161247611045837, + "epoch": 0.4109569343590481, + "kl_loss": 0.08901812136173248, + "loss_ib": 0.0016186671564355493, + "step": 1429 + }, + { + "ce_ib": 4.861490249633789, + "ce_orig": 0.8348015546798706, + "epoch": 0.4109569343590481, + "kl_loss": 0.08867591619491577, + "loss_ib": 0.0013729081256315112, + "step": 1429 + }, + { + "ce_ib": 4.184719562530518, + "ce_orig": 0.5124539136886597, + "epoch": 0.4109569343590481, + "kl_loss": 0.1092284768819809, + "loss_ib": 0.001510756672360003, + "step": 1429 + }, + { + "epoch": 0.41124451793802574, + "grad_norm": 0.0929122045636177, + "learning_rate": 4.853042007939248e-05, + "loss": 0.8756, + "step": 1430 + }, + { + "ce_ib": 5.132282257080078, + "ce_orig": 0.7177113890647888, + "epoch": 0.41124451793802574, + "kl_loss": 0.11695947498083115, + "loss_ib": 0.001682822941802442, + "step": 1430 + }, + { + "ce_ib": 5.9083943367004395, + "ce_orig": 0.6927317380905151, + "epoch": 0.41124451793802574, + "kl_loss": 0.13192662596702576, + "loss_ib": 0.0019101055804640055, + "step": 1430 + }, + { + "ce_ib": 7.165446758270264, + "ce_orig": 0.8966021537780762, + "epoch": 0.41124451793802574, + "kl_loss": 0.12918083369731903, + "loss_ib": 0.002008352894335985, + "step": 1430 + }, + { + "ce_ib": 6.5472588539123535, + "ce_orig": 1.065467357635498, + "epoch": 0.41124451793802574, + "kl_loss": 0.1149042397737503, + "loss_ib": 0.0018037682166323066, + "step": 1430 + }, + { + "ce_ib": 4.255852222442627, + "ce_orig": 0.44100260734558105, + "epoch": 0.41153210151700337, + "kl_loss": 0.12202838808298111, + "loss_ib": 0.0016458689933642745, + "step": 1431 + }, + { + "ce_ib": 4.779837131500244, + "ce_orig": 0.7545100450515747, + "epoch": 0.41153210151700337, + "kl_loss": 0.12677210569381714, + "loss_ib": 0.001745704677887261, + "step": 1431 + }, + { + "ce_ib": 7.361011981964111, + "ce_orig": 0.9949771761894226, + "epoch": 0.41153210151700337, + "kl_loss": 0.07376141101121902, + "loss_ib": 0.0014737152960151434, + "step": 1431 + }, + { + "ce_ib": 6.311118125915527, + "ce_orig": 0.9260743856430054, + "epoch": 0.41153210151700337, + "kl_loss": 0.13274039328098297, + "loss_ib": 0.0019585154950618744, + "step": 1431 + }, + { + "ce_ib": 3.5083696842193604, + "ce_orig": 0.1979556828737259, + "epoch": 0.411819685095981, + "kl_loss": 0.1574898660182953, + "loss_ib": 0.0019257356179878116, + "step": 1432 + }, + { + "ce_ib": 5.359910011291504, + "ce_orig": 1.0081112384796143, + "epoch": 0.411819685095981, + "kl_loss": 0.06903597712516785, + "loss_ib": 0.00122635078150779, + "step": 1432 + }, + { + "ce_ib": 5.4759521484375, + "ce_orig": 0.9224492907524109, + "epoch": 0.411819685095981, + "kl_loss": 0.07184388488531113, + "loss_ib": 0.0012660340871661901, + "step": 1432 + }, + { + "ce_ib": 4.915720462799072, + "ce_orig": 0.6493841409683228, + "epoch": 0.411819685095981, + "kl_loss": 0.09995627403259277, + "loss_ib": 0.0014911347534507513, + "step": 1432 + }, + { + "ce_ib": 4.013033390045166, + "ce_orig": 0.6558687686920166, + "epoch": 0.41210726867495867, + "kl_loss": 0.18210524320602417, + "loss_ib": 0.0022223556879907846, + "step": 1433 + }, + { + "ce_ib": 7.42601203918457, + "ce_orig": 1.2959660291671753, + "epoch": 0.41210726867495867, + "kl_loss": 0.09777391701936722, + "loss_ib": 0.0017203402239829302, + "step": 1433 + }, + { + "ce_ib": 5.428860664367676, + "ce_orig": 0.4534249007701874, + "epoch": 0.41210726867495867, + "kl_loss": 0.1439604014158249, + "loss_ib": 0.0019824900664389133, + "step": 1433 + }, + { + "ce_ib": 7.018229007720947, + "ce_orig": 1.1912530660629272, + "epoch": 0.41210726867495867, + "kl_loss": 0.13741852343082428, + "loss_ib": 0.002076007891446352, + "step": 1433 + }, + { + "ce_ib": 5.00704288482666, + "ce_orig": 0.5389499068260193, + "epoch": 0.4123948522539363, + "kl_loss": 0.09618522226810455, + "loss_ib": 0.0014625564217567444, + "step": 1434 + }, + { + "ce_ib": 5.019461154937744, + "ce_orig": 0.7763354778289795, + "epoch": 0.4123948522539363, + "kl_loss": 0.08913825452327728, + "loss_ib": 0.0013933285372331738, + "step": 1434 + }, + { + "ce_ib": 5.427734375, + "ce_orig": 0.8100016713142395, + "epoch": 0.4123948522539363, + "kl_loss": 0.0638791173696518, + "loss_ib": 0.00118156464304775, + "step": 1434 + }, + { + "ce_ib": 4.386058330535889, + "ce_orig": 0.6597939133644104, + "epoch": 0.4123948522539363, + "kl_loss": 0.10749074816703796, + "loss_ib": 0.001513513270765543, + "step": 1434 + }, + { + "epoch": 0.4126824358329139, + "grad_norm": 0.08337324112653732, + "learning_rate": 4.851728354148203e-05, + "loss": 0.849, + "step": 1435 + }, + { + "ce_ib": 4.55735969543457, + "ce_orig": 0.6752052307128906, + "epoch": 0.4126824358329139, + "kl_loss": 0.0997617095708847, + "loss_ib": 0.0014533529756590724, + "step": 1435 + }, + { + "ce_ib": 5.787301540374756, + "ce_orig": 1.0791867971420288, + "epoch": 0.4126824358329139, + "kl_loss": 0.053296104073524475, + "loss_ib": 0.0011116911191493273, + "step": 1435 + }, + { + "ce_ib": 6.046157360076904, + "ce_orig": 0.8505937457084656, + "epoch": 0.4126824358329139, + "kl_loss": 0.07980112731456757, + "loss_ib": 0.0014026268618181348, + "step": 1435 + }, + { + "ce_ib": 4.417842388153076, + "ce_orig": 0.8308053612709045, + "epoch": 0.4126824358329139, + "kl_loss": 0.12036478519439697, + "loss_ib": 0.001645432086661458, + "step": 1435 + }, + { + "ce_ib": 9.31179428100586, + "ce_orig": 1.4481607675552368, + "epoch": 0.4129700194118916, + "kl_loss": 0.07177025079727173, + "loss_ib": 0.0016488818218931556, + "step": 1436 + }, + { + "ce_ib": 4.3464484214782715, + "ce_orig": 0.518915057182312, + "epoch": 0.4129700194118916, + "kl_loss": 0.07961948215961456, + "loss_ib": 0.0012308396399021149, + "step": 1436 + }, + { + "ce_ib": 4.7582173347473145, + "ce_orig": 0.32292982935905457, + "epoch": 0.4129700194118916, + "kl_loss": 0.13897094130516052, + "loss_ib": 0.0018655312014743686, + "step": 1436 + }, + { + "ce_ib": 6.521214962005615, + "ce_orig": 1.1188610792160034, + "epoch": 0.4129700194118916, + "kl_loss": 0.09975160658359528, + "loss_ib": 0.0016496374737471342, + "step": 1436 + }, + { + "ce_ib": 3.012716293334961, + "ce_orig": 0.3423144519329071, + "epoch": 0.4132576029908692, + "kl_loss": 0.20307299494743347, + "loss_ib": 0.002332001458853483, + "step": 1437 + }, + { + "ce_ib": 6.234538555145264, + "ce_orig": 0.7571339011192322, + "epoch": 0.4132576029908692, + "kl_loss": 0.10930953174829483, + "loss_ib": 0.0017165490426123142, + "step": 1437 + }, + { + "ce_ib": 7.31367301940918, + "ce_orig": 1.5435678958892822, + "epoch": 0.4132576029908692, + "kl_loss": 0.09054261445999146, + "loss_ib": 0.0016367933712899685, + "step": 1437 + }, + { + "ce_ib": 4.15863561630249, + "ce_orig": 0.6962539553642273, + "epoch": 0.4132576029908692, + "kl_loss": 0.09438318014144897, + "loss_ib": 0.0013596953358501196, + "step": 1437 + }, + { + "ce_ib": 3.0213708877563477, + "ce_orig": 0.6388891935348511, + "epoch": 0.41354518656984685, + "kl_loss": 0.07304428517818451, + "loss_ib": 0.0010325799230486155, + "step": 1438 + }, + { + "ce_ib": 2.410836935043335, + "ce_orig": 0.4856458008289337, + "epoch": 0.41354518656984685, + "kl_loss": 0.06359301507472992, + "loss_ib": 0.000877013779245317, + "step": 1438 + }, + { + "ce_ib": 6.516482353210449, + "ce_orig": 1.2926971912384033, + "epoch": 0.41354518656984685, + "kl_loss": 0.0939096137881279, + "loss_ib": 0.001590744243003428, + "step": 1438 + }, + { + "ce_ib": 6.705051898956299, + "ce_orig": 0.9979714751243591, + "epoch": 0.41354518656984685, + "kl_loss": 0.1320842057466507, + "loss_ib": 0.00199134717695415, + "step": 1438 + }, + { + "ce_ib": 5.880115985870361, + "ce_orig": 0.6562256813049316, + "epoch": 0.4138327701488245, + "kl_loss": 0.11906502395868301, + "loss_ib": 0.0017786618554964662, + "step": 1439 + }, + { + "ce_ib": 3.5629148483276367, + "ce_orig": 0.3119569420814514, + "epoch": 0.4138327701488245, + "kl_loss": 0.08793976902961731, + "loss_ib": 0.0012356891529634595, + "step": 1439 + }, + { + "ce_ib": 6.078202247619629, + "ce_orig": 0.9515578746795654, + "epoch": 0.4138327701488245, + "kl_loss": 0.10293766856193542, + "loss_ib": 0.0016371967503800988, + "step": 1439 + }, + { + "ce_ib": 5.050149917602539, + "ce_orig": 0.7735735774040222, + "epoch": 0.4138327701488245, + "kl_loss": 0.06617474555969238, + "loss_ib": 0.0011667624348774552, + "step": 1439 + }, + { + "epoch": 0.41412035372780215, + "grad_norm": 0.07793393731117249, + "learning_rate": 4.850409034497704e-05, + "loss": 0.8629, + "step": 1440 + }, + { + "ce_ib": 6.743407726287842, + "ce_orig": 1.0928657054901123, + "epoch": 0.41412035372780215, + "kl_loss": 0.08591713011264801, + "loss_ib": 0.0015335120260715485, + "step": 1440 + }, + { + "ce_ib": 5.080863952636719, + "ce_orig": 1.1487590074539185, + "epoch": 0.41412035372780215, + "kl_loss": 0.09081675112247467, + "loss_ib": 0.0014162538573145866, + "step": 1440 + }, + { + "ce_ib": 4.980000019073486, + "ce_orig": 0.7465852499008179, + "epoch": 0.41412035372780215, + "kl_loss": 0.07600586861371994, + "loss_ib": 0.0012580587062984705, + "step": 1440 + }, + { + "ce_ib": 5.498541831970215, + "ce_orig": 0.5793271660804749, + "epoch": 0.41412035372780215, + "kl_loss": 0.08694162964820862, + "loss_ib": 0.0014192704111337662, + "step": 1440 + }, + { + "ce_ib": 3.5841479301452637, + "ce_orig": 0.6023969054222107, + "epoch": 0.4144079373067798, + "kl_loss": 0.08362072706222534, + "loss_ib": 0.0011946220183745027, + "step": 1441 + }, + { + "ce_ib": 7.423813343048096, + "ce_orig": 1.302383303642273, + "epoch": 0.4144079373067798, + "kl_loss": 0.1895519644021988, + "loss_ib": 0.0026379008777439594, + "step": 1441 + }, + { + "ce_ib": 4.287861347198486, + "ce_orig": 0.546518862247467, + "epoch": 0.4144079373067798, + "kl_loss": 0.10317136347293854, + "loss_ib": 0.001460499712266028, + "step": 1441 + }, + { + "ce_ib": 3.742234945297241, + "ce_orig": 0.41573965549468994, + "epoch": 0.4144079373067798, + "kl_loss": 0.0966222956776619, + "loss_ib": 0.0013404464116320014, + "step": 1441 + }, + { + "ce_ib": 3.696802854537964, + "ce_orig": 0.582406759262085, + "epoch": 0.4146955208857574, + "kl_loss": 0.07445007562637329, + "loss_ib": 0.0011141810100525618, + "step": 1442 + }, + { + "ce_ib": 5.3360466957092285, + "ce_orig": 0.6130356192588806, + "epoch": 0.4146955208857574, + "kl_loss": 0.1057562604546547, + "loss_ib": 0.001591167296282947, + "step": 1442 + }, + { + "ce_ib": 5.148367404937744, + "ce_orig": 0.6853485107421875, + "epoch": 0.4146955208857574, + "kl_loss": 0.12549643218517303, + "loss_ib": 0.0017698010196909308, + "step": 1442 + }, + { + "ce_ib": 7.778355121612549, + "ce_orig": 1.2386947870254517, + "epoch": 0.4146955208857574, + "kl_loss": 0.11003492772579193, + "loss_ib": 0.0018781846156343818, + "step": 1442 + }, + { + "ce_ib": 4.685573577880859, + "ce_orig": 0.6230754852294922, + "epoch": 0.4149831044647351, + "kl_loss": 0.0866067036986351, + "loss_ib": 0.0013346243649721146, + "step": 1443 + }, + { + "ce_ib": 4.70703649520874, + "ce_orig": 0.354888379573822, + "epoch": 0.4149831044647351, + "kl_loss": 0.07406913489103317, + "loss_ib": 0.0012113949051126838, + "step": 1443 + }, + { + "ce_ib": 4.947773456573486, + "ce_orig": 0.8348574042320251, + "epoch": 0.4149831044647351, + "kl_loss": 0.221211776137352, + "loss_ib": 0.0027068951167166233, + "step": 1443 + }, + { + "ce_ib": 5.788531303405762, + "ce_orig": 0.5687450766563416, + "epoch": 0.4149831044647351, + "kl_loss": 0.0781509131193161, + "loss_ib": 0.0013603621628135443, + "step": 1443 + }, + { + "ce_ib": 4.952359199523926, + "ce_orig": 0.5754139423370361, + "epoch": 0.4152706880437127, + "kl_loss": 0.07508834451436996, + "loss_ib": 0.0012461193837225437, + "step": 1444 + }, + { + "ce_ib": 3.1481313705444336, + "ce_orig": 0.5248778462409973, + "epoch": 0.4152706880437127, + "kl_loss": 0.05129199102520943, + "loss_ib": 0.0008277330198325217, + "step": 1444 + }, + { + "ce_ib": 3.468945026397705, + "ce_orig": 0.585468590259552, + "epoch": 0.4152706880437127, + "kl_loss": 0.06292411684989929, + "loss_ib": 0.0009761356632225215, + "step": 1444 + }, + { + "ce_ib": 6.467720985412598, + "ce_orig": 0.6333016753196716, + "epoch": 0.4152706880437127, + "kl_loss": 0.15747122466564178, + "loss_ib": 0.002221484202891588, + "step": 1444 + }, + { + "epoch": 0.4155582716226903, + "grad_norm": 0.09773839265108109, + "learning_rate": 4.8490840521663e-05, + "loss": 0.831, + "step": 1445 + }, + { + "ce_ib": 4.575643539428711, + "ce_orig": 0.7352006435394287, + "epoch": 0.4155582716226903, + "kl_loss": 0.06750188022851944, + "loss_ib": 0.0011325831292197108, + "step": 1445 + }, + { + "ce_ib": 7.829094886779785, + "ce_orig": 1.3825410604476929, + "epoch": 0.4155582716226903, + "kl_loss": 0.10775604844093323, + "loss_ib": 0.001860469812527299, + "step": 1445 + }, + { + "ce_ib": 4.150346755981445, + "ce_orig": 0.8056705594062805, + "epoch": 0.4155582716226903, + "kl_loss": 0.06260480731725693, + "loss_ib": 0.0010410826653242111, + "step": 1445 + }, + { + "ce_ib": 3.7523932456970215, + "ce_orig": 0.6832635998725891, + "epoch": 0.4155582716226903, + "kl_loss": 0.04519576579332352, + "loss_ib": 0.0008271969854831696, + "step": 1445 + }, + { + "ce_ib": 4.2690205574035645, + "ce_orig": 0.7793135046958923, + "epoch": 0.415845855201668, + "kl_loss": 0.07240147888660431, + "loss_ib": 0.0011509167961776257, + "step": 1446 + }, + { + "ce_ib": 5.752465724945068, + "ce_orig": 0.8700240850448608, + "epoch": 0.415845855201668, + "kl_loss": 0.11336810141801834, + "loss_ib": 0.0017089275643229485, + "step": 1446 + }, + { + "ce_ib": 4.555610179901123, + "ce_orig": 0.3389473855495453, + "epoch": 0.415845855201668, + "kl_loss": 0.141608327627182, + "loss_ib": 0.001871644170023501, + "step": 1446 + }, + { + "ce_ib": 7.882659435272217, + "ce_orig": 1.145242691040039, + "epoch": 0.415845855201668, + "kl_loss": 0.08191496878862381, + "loss_ib": 0.001607415615580976, + "step": 1446 + }, + { + "ce_ib": 7.144163131713867, + "ce_orig": 0.7978856563568115, + "epoch": 0.4161334387806456, + "kl_loss": 0.14888674020767212, + "loss_ib": 0.0022032835986465216, + "step": 1447 + }, + { + "ce_ib": 4.994200706481934, + "ce_orig": 0.43828389048576355, + "epoch": 0.4161334387806456, + "kl_loss": 0.08625173568725586, + "loss_ib": 0.0013619373785331845, + "step": 1447 + }, + { + "ce_ib": 3.9171884059906006, + "ce_orig": 0.45851317048072815, + "epoch": 0.4161334387806456, + "kl_loss": 0.10175397992134094, + "loss_ib": 0.0014092584606260061, + "step": 1447 + }, + { + "ce_ib": 5.007274150848389, + "ce_orig": 0.895894467830658, + "epoch": 0.4161334387806456, + "kl_loss": 0.10107745230197906, + "loss_ib": 0.001511501963250339, + "step": 1447 + }, + { + "ce_ib": 10.265788078308105, + "ce_orig": 1.6524277925491333, + "epoch": 0.41642102235962325, + "kl_loss": 0.1213127076625824, + "loss_ib": 0.0022397057618945837, + "step": 1448 + }, + { + "ce_ib": 5.410793781280518, + "ce_orig": 0.3247361183166504, + "epoch": 0.41642102235962325, + "kl_loss": 0.15680810809135437, + "loss_ib": 0.002109160413965583, + "step": 1448 + }, + { + "ce_ib": 5.622844219207764, + "ce_orig": 0.9446114301681519, + "epoch": 0.41642102235962325, + "kl_loss": 0.08880884200334549, + "loss_ib": 0.0014503727434203029, + "step": 1448 + }, + { + "ce_ib": 2.635361909866333, + "ce_orig": 0.2937851846218109, + "epoch": 0.41642102235962325, + "kl_loss": 0.3179281949996948, + "loss_ib": 0.0034428180661052465, + "step": 1448 + }, + { + "ce_ib": 3.701805830001831, + "ce_orig": 0.5746484398841858, + "epoch": 0.41670860593860093, + "kl_loss": 0.13422581553459167, + "loss_ib": 0.0017124387668445706, + "step": 1449 + }, + { + "ce_ib": 4.573523044586182, + "ce_orig": 0.7001649737358093, + "epoch": 0.41670860593860093, + "kl_loss": 0.09929852932691574, + "loss_ib": 0.0014503375859931111, + "step": 1449 + }, + { + "ce_ib": 6.455227375030518, + "ce_orig": 0.9670495986938477, + "epoch": 0.41670860593860093, + "kl_loss": 0.11830205470323563, + "loss_ib": 0.0018285432597622275, + "step": 1449 + }, + { + "ce_ib": 4.639206409454346, + "ce_orig": 0.7645363211631775, + "epoch": 0.41670860593860093, + "kl_loss": 0.10847769677639008, + "loss_ib": 0.0015486975898966193, + "step": 1449 + }, + { + "epoch": 0.41699618951757855, + "grad_norm": 0.0861210972070694, + "learning_rate": 4.84775341034618e-05, + "loss": 0.823, + "step": 1450 + }, + { + "ce_ib": 6.17034912109375, + "ce_orig": 0.8687017560005188, + "epoch": 0.41699618951757855, + "kl_loss": 0.16763222217559814, + "loss_ib": 0.0022933571599423885, + "step": 1450 + }, + { + "ce_ib": 5.7684783935546875, + "ce_orig": 0.8275676965713501, + "epoch": 0.41699618951757855, + "kl_loss": 0.1634751409292221, + "loss_ib": 0.0022115991450846195, + "step": 1450 + }, + { + "ce_ib": 3.9640069007873535, + "ce_orig": 0.4531187117099762, + "epoch": 0.41699618951757855, + "kl_loss": 0.14157013595104218, + "loss_ib": 0.0018121020402759314, + "step": 1450 + }, + { + "ce_ib": 4.608716011047363, + "ce_orig": 0.34708335995674133, + "epoch": 0.41699618951757855, + "kl_loss": 0.10207530856132507, + "loss_ib": 0.0014816246693953872, + "step": 1450 + }, + { + "ce_ib": 5.21584939956665, + "ce_orig": 0.856076717376709, + "epoch": 0.4172837730965562, + "kl_loss": 0.09565088152885437, + "loss_ib": 0.0014780936762690544, + "step": 1451 + }, + { + "ce_ib": 5.448686599731445, + "ce_orig": 1.2174465656280518, + "epoch": 0.4172837730965562, + "kl_loss": 0.11567234247922897, + "loss_ib": 0.0017015920020639896, + "step": 1451 + }, + { + "ce_ib": 6.26071310043335, + "ce_orig": 0.8002637624740601, + "epoch": 0.4172837730965562, + "kl_loss": 0.12488710135221481, + "loss_ib": 0.0018749423325061798, + "step": 1451 + }, + { + "ce_ib": 5.369015216827393, + "ce_orig": 0.6488451957702637, + "epoch": 0.4172837730965562, + "kl_loss": 0.11009375005960464, + "loss_ib": 0.001637839013710618, + "step": 1451 + }, + { + "ce_ib": 7.470041275024414, + "ce_orig": 1.25552499294281, + "epoch": 0.4175713566755338, + "kl_loss": 0.08985939621925354, + "loss_ib": 0.0016455980949103832, + "step": 1452 + }, + { + "ce_ib": 6.61988639831543, + "ce_orig": 0.6664535403251648, + "epoch": 0.4175713566755338, + "kl_loss": 0.15661829710006714, + "loss_ib": 0.0022281715646386147, + "step": 1452 + }, + { + "ce_ib": 4.991513729095459, + "ce_orig": 0.8073134422302246, + "epoch": 0.4175713566755338, + "kl_loss": 0.11219026893377304, + "loss_ib": 0.001621054019778967, + "step": 1452 + }, + { + "ce_ib": 5.777252197265625, + "ce_orig": 0.6050467491149902, + "epoch": 0.4175713566755338, + "kl_loss": 0.10480629652738571, + "loss_ib": 0.001625788165256381, + "step": 1452 + }, + { + "ce_ib": 4.126240253448486, + "ce_orig": 0.8339105844497681, + "epoch": 0.4178589402545115, + "kl_loss": 0.11453627049922943, + "loss_ib": 0.001557986717671156, + "step": 1453 + }, + { + "ce_ib": 5.761976718902588, + "ce_orig": 0.6732054352760315, + "epoch": 0.4178589402545115, + "kl_loss": 0.08584851771593094, + "loss_ib": 0.0014346828684210777, + "step": 1453 + }, + { + "ce_ib": 6.47689962387085, + "ce_orig": 0.8545829653739929, + "epoch": 0.4178589402545115, + "kl_loss": 0.09251880645751953, + "loss_ib": 0.0015728779835626483, + "step": 1453 + }, + { + "ce_ib": 6.084139823913574, + "ce_orig": 0.8071439862251282, + "epoch": 0.4178589402545115, + "kl_loss": 0.13854151964187622, + "loss_ib": 0.0019938291516155005, + "step": 1453 + }, + { + "ce_ib": 5.937831401824951, + "ce_orig": 0.8069619536399841, + "epoch": 0.4181465238334891, + "kl_loss": 0.15720096230506897, + "loss_ib": 0.002165792742744088, + "step": 1454 + }, + { + "ce_ib": 3.6552929878234863, + "ce_orig": 0.6291875243186951, + "epoch": 0.4181465238334891, + "kl_loss": 0.07416625320911407, + "loss_ib": 0.001107191783376038, + "step": 1454 + }, + { + "ce_ib": 6.729911804199219, + "ce_orig": 1.3609896898269653, + "epoch": 0.4181465238334891, + "kl_loss": 0.11908746510744095, + "loss_ib": 0.0018638657638803124, + "step": 1454 + }, + { + "ce_ib": 6.7187981605529785, + "ce_orig": 1.2945940494537354, + "epoch": 0.4181465238334891, + "kl_loss": 0.10286684334278107, + "loss_ib": 0.0017005482222884893, + "step": 1454 + }, + { + "epoch": 0.4184341074124667, + "grad_norm": 0.09489905834197998, + "learning_rate": 4.8464171122431684e-05, + "loss": 0.8488, + "step": 1455 + }, + { + "ce_ib": 2.7830166816711426, + "ce_orig": 0.545401394367218, + "epoch": 0.4184341074124667, + "kl_loss": 0.052842650562524796, + "loss_ib": 0.0008067281451076269, + "step": 1455 + }, + { + "ce_ib": 4.6558709144592285, + "ce_orig": 0.6611546277999878, + "epoch": 0.4184341074124667, + "kl_loss": 0.11962183564901352, + "loss_ib": 0.001661805436015129, + "step": 1455 + }, + { + "ce_ib": 5.181333541870117, + "ce_orig": 0.7933509349822998, + "epoch": 0.4184341074124667, + "kl_loss": 0.09340114146471024, + "loss_ib": 0.0014521447010338306, + "step": 1455 + }, + { + "ce_ib": 5.011409282684326, + "ce_orig": 0.5729467868804932, + "epoch": 0.4184341074124667, + "kl_loss": 0.12782812118530273, + "loss_ib": 0.0017794221639633179, + "step": 1455 + }, + { + "ce_ib": 5.055473327636719, + "ce_orig": 0.7648478746414185, + "epoch": 0.4187216909914444, + "kl_loss": 0.1019359678030014, + "loss_ib": 0.0015249070711433887, + "step": 1456 + }, + { + "ce_ib": 4.663490295410156, + "ce_orig": 0.49775955080986023, + "epoch": 0.4187216909914444, + "kl_loss": 0.07127943634986877, + "loss_ib": 0.0011791433207690716, + "step": 1456 + }, + { + "ce_ib": 7.593672275543213, + "ce_orig": 1.1730624437332153, + "epoch": 0.4187216909914444, + "kl_loss": 0.14229080080986023, + "loss_ib": 0.0021822750568389893, + "step": 1456 + }, + { + "ce_ib": 4.291922569274902, + "ce_orig": 0.6264102458953857, + "epoch": 0.4187216909914444, + "kl_loss": 0.09957338124513626, + "loss_ib": 0.0014249259838834405, + "step": 1456 + }, + { + "ce_ib": 3.189561367034912, + "ce_orig": 0.4278847277164459, + "epoch": 0.41900927457042203, + "kl_loss": 0.13700458407402039, + "loss_ib": 0.0016890019178390503, + "step": 1457 + }, + { + "ce_ib": 3.4097647666931152, + "ce_orig": 0.49093571305274963, + "epoch": 0.41900927457042203, + "kl_loss": 0.09303900599479675, + "loss_ib": 0.001271366490982473, + "step": 1457 + }, + { + "ce_ib": 7.352541446685791, + "ce_orig": 0.9452694058418274, + "epoch": 0.41900927457042203, + "kl_loss": 0.08397386968135834, + "loss_ib": 0.0015749927842989564, + "step": 1457 + }, + { + "ce_ib": 4.712612152099609, + "ce_orig": 0.8468739986419678, + "epoch": 0.41900927457042203, + "kl_loss": 0.12789814174175262, + "loss_ib": 0.0017502426635473967, + "step": 1457 + }, + { + "ce_ib": 5.61114501953125, + "ce_orig": 0.8121333718299866, + "epoch": 0.41929685814939965, + "kl_loss": 0.10803937166929245, + "loss_ib": 0.0016415081918239594, + "step": 1458 + }, + { + "ce_ib": 4.783021450042725, + "ce_orig": 0.9681260585784912, + "epoch": 0.41929685814939965, + "kl_loss": 0.12566494941711426, + "loss_ib": 0.0017349515110254288, + "step": 1458 + }, + { + "ce_ib": 4.220062255859375, + "ce_orig": 0.6777926683425903, + "epoch": 0.41929685814939965, + "kl_loss": 0.10959968715906143, + "loss_ib": 0.0015180030604824424, + "step": 1458 + }, + { + "ce_ib": 8.752851486206055, + "ce_orig": 1.2470048666000366, + "epoch": 0.41929685814939965, + "kl_loss": 0.1120050922036171, + "loss_ib": 0.0019953360315412283, + "step": 1458 + }, + { + "ce_ib": 5.158299446105957, + "ce_orig": 0.9586217999458313, + "epoch": 0.41958444172837733, + "kl_loss": 0.08502523601055145, + "loss_ib": 0.0013660822296515107, + "step": 1459 + }, + { + "ce_ib": 5.161741256713867, + "ce_orig": 0.7361714243888855, + "epoch": 0.41958444172837733, + "kl_loss": 0.0770580917596817, + "loss_ib": 0.00128675508312881, + "step": 1459 + }, + { + "ce_ib": 4.91823673248291, + "ce_orig": 0.7018568515777588, + "epoch": 0.41958444172837733, + "kl_loss": 0.10236355662345886, + "loss_ib": 0.001515459269285202, + "step": 1459 + }, + { + "ce_ib": 4.538360118865967, + "ce_orig": 0.7272351980209351, + "epoch": 0.41958444172837733, + "kl_loss": 0.09524193406105042, + "loss_ib": 0.0014062552945688367, + "step": 1459 + }, + { + "epoch": 0.41987202530735496, + "grad_norm": 0.10186373442411423, + "learning_rate": 4.8450751610767194e-05, + "loss": 0.8158, + "step": 1460 + }, + { + "ce_ib": 4.1273512840271, + "ce_orig": 0.7310515642166138, + "epoch": 0.41987202530735496, + "kl_loss": 0.058533839881420135, + "loss_ib": 0.0009980734903365374, + "step": 1460 + }, + { + "ce_ib": 4.629617691040039, + "ce_orig": 0.4766985774040222, + "epoch": 0.41987202530735496, + "kl_loss": 0.1433141827583313, + "loss_ib": 0.001896103611215949, + "step": 1460 + }, + { + "ce_ib": 5.644496440887451, + "ce_orig": 0.7028821706771851, + "epoch": 0.41987202530735496, + "kl_loss": 0.09563258290290833, + "loss_ib": 0.001520775374956429, + "step": 1460 + }, + { + "ce_ib": 6.520239353179932, + "ce_orig": 1.4824178218841553, + "epoch": 0.41987202530735496, + "kl_loss": 0.07369743287563324, + "loss_ib": 0.0013889983529224992, + "step": 1460 + }, + { + "ce_ib": 2.592207193374634, + "ce_orig": 0.26936668157577515, + "epoch": 0.4201596088863326, + "kl_loss": 0.3481166660785675, + "loss_ib": 0.0037403872702270746, + "step": 1461 + }, + { + "ce_ib": 4.518808364868164, + "ce_orig": 0.6325695514678955, + "epoch": 0.4201596088863326, + "kl_loss": 0.11003083735704422, + "loss_ib": 0.0015521892346441746, + "step": 1461 + }, + { + "ce_ib": 3.3300020694732666, + "ce_orig": 0.5329961180686951, + "epoch": 0.4201596088863326, + "kl_loss": 0.0928843691945076, + "loss_ib": 0.0012618438340723515, + "step": 1461 + }, + { + "ce_ib": 3.862823963165283, + "ce_orig": 0.6123619675636292, + "epoch": 0.4201596088863326, + "kl_loss": 0.11784958839416504, + "loss_ib": 0.0015647781547158957, + "step": 1461 + }, + { + "ce_ib": 5.960676670074463, + "ce_orig": 1.0435458421707153, + "epoch": 0.4204471924653102, + "kl_loss": 0.10175187885761261, + "loss_ib": 0.0016135863261297345, + "step": 1462 + }, + { + "ce_ib": 4.466385364532471, + "ce_orig": 0.7635088562965393, + "epoch": 0.4204471924653102, + "kl_loss": 0.08558037132024765, + "loss_ib": 0.001302442280575633, + "step": 1462 + }, + { + "ce_ib": 3.277053117752075, + "ce_orig": 0.6193379759788513, + "epoch": 0.4204471924653102, + "kl_loss": 0.1026398092508316, + "loss_ib": 0.0013541033258661628, + "step": 1462 + }, + { + "ce_ib": 6.897516250610352, + "ce_orig": 1.2636864185333252, + "epoch": 0.4204471924653102, + "kl_loss": 0.10778491199016571, + "loss_ib": 0.0017676007701084018, + "step": 1462 + }, + { + "ce_ib": 7.378346920013428, + "ce_orig": 1.2946105003356934, + "epoch": 0.4207347760442879, + "kl_loss": 0.11343882977962494, + "loss_ib": 0.0018722229870036244, + "step": 1463 + }, + { + "ce_ib": 5.065412998199463, + "ce_orig": 0.7953489422798157, + "epoch": 0.4207347760442879, + "kl_loss": 0.14171633124351501, + "loss_ib": 0.001923704519867897, + "step": 1463 + }, + { + "ce_ib": 9.015141487121582, + "ce_orig": 0.8755373358726501, + "epoch": 0.4207347760442879, + "kl_loss": 0.20373690128326416, + "loss_ib": 0.0029388831462711096, + "step": 1463 + }, + { + "ce_ib": 1.8639222383499146, + "ce_orig": 0.34190768003463745, + "epoch": 0.4207347760442879, + "kl_loss": 0.035271838307380676, + "loss_ib": 0.0005391105660237372, + "step": 1463 + }, + { + "ce_ib": 6.010770797729492, + "ce_orig": 1.1248222589492798, + "epoch": 0.4210223596232655, + "kl_loss": 0.11656366288661957, + "loss_ib": 0.0017667136853560805, + "step": 1464 + }, + { + "ce_ib": 4.576229572296143, + "ce_orig": 0.4586426019668579, + "epoch": 0.4210223596232655, + "kl_loss": 0.13751602172851562, + "loss_ib": 0.0018327832221984863, + "step": 1464 + }, + { + "ce_ib": 4.836610317230225, + "ce_orig": 0.3597474992275238, + "epoch": 0.4210223596232655, + "kl_loss": 0.11420424282550812, + "loss_ib": 0.001625703414902091, + "step": 1464 + }, + { + "ce_ib": 4.152197360992432, + "ce_orig": 0.6041322946548462, + "epoch": 0.4210223596232655, + "kl_loss": 0.07142791152000427, + "loss_ib": 0.001129498821683228, + "step": 1464 + }, + { + "epoch": 0.42130994320224313, + "grad_norm": 0.08001025766134262, + "learning_rate": 4.8437275600799036e-05, + "loss": 0.8504, + "step": 1465 + }, + { + "ce_ib": 7.085323810577393, + "ce_orig": 1.2465083599090576, + "epoch": 0.42130994320224313, + "kl_loss": 0.08729834854602814, + "loss_ib": 0.0015815157676115632, + "step": 1465 + }, + { + "ce_ib": 8.220247268676758, + "ce_orig": 0.6453145146369934, + "epoch": 0.42130994320224313, + "kl_loss": 0.14087852835655212, + "loss_ib": 0.0022308097686618567, + "step": 1465 + }, + { + "ce_ib": 6.629300594329834, + "ce_orig": 1.267394781112671, + "epoch": 0.42130994320224313, + "kl_loss": 0.07615265995264053, + "loss_ib": 0.0014244564808905125, + "step": 1465 + }, + { + "ce_ib": 4.753538131713867, + "ce_orig": 0.4846392571926117, + "epoch": 0.42130994320224313, + "kl_loss": 0.07593982666730881, + "loss_ib": 0.0012347520096227527, + "step": 1465 + }, + { + "ce_ib": 2.3658440113067627, + "ce_orig": 0.26698946952819824, + "epoch": 0.4215975267812208, + "kl_loss": 0.23476958274841309, + "loss_ib": 0.002584280213341117, + "step": 1466 + }, + { + "ce_ib": 3.0916457176208496, + "ce_orig": 0.5870619416236877, + "epoch": 0.4215975267812208, + "kl_loss": 0.07413533329963684, + "loss_ib": 0.0010505177779123187, + "step": 1466 + }, + { + "ce_ib": 5.169697284698486, + "ce_orig": 1.0351004600524902, + "epoch": 0.4215975267812208, + "kl_loss": 0.09100858867168427, + "loss_ib": 0.0014270555693656206, + "step": 1466 + }, + { + "ce_ib": 9.962007522583008, + "ce_orig": 1.7318432331085205, + "epoch": 0.4215975267812208, + "kl_loss": 0.20365050435066223, + "loss_ib": 0.003032705746591091, + "step": 1466 + }, + { + "ce_ib": 6.981663227081299, + "ce_orig": 0.7525008320808411, + "epoch": 0.42188511036019843, + "kl_loss": 0.16806934773921967, + "loss_ib": 0.0023788597900420427, + "step": 1467 + }, + { + "ce_ib": 4.773881912231445, + "ce_orig": 0.6788235306739807, + "epoch": 0.42188511036019843, + "kl_loss": 0.13860733807086945, + "loss_ib": 0.0018634615698829293, + "step": 1467 + }, + { + "ce_ib": 7.603765964508057, + "ce_orig": 0.9796539545059204, + "epoch": 0.42188511036019843, + "kl_loss": 0.09102034568786621, + "loss_ib": 0.0016705800080671906, + "step": 1467 + }, + { + "ce_ib": 4.365150451660156, + "ce_orig": 0.2997058629989624, + "epoch": 0.42188511036019843, + "kl_loss": 0.1265721321105957, + "loss_ib": 0.0017022363608703017, + "step": 1467 + }, + { + "ce_ib": 6.895716667175293, + "ce_orig": 1.152829885482788, + "epoch": 0.42217269393917606, + "kl_loss": 0.09192080050706863, + "loss_ib": 0.0016087796539068222, + "step": 1468 + }, + { + "ce_ib": 6.535571575164795, + "ce_orig": 0.9287649393081665, + "epoch": 0.42217269393917606, + "kl_loss": 0.15220746397972107, + "loss_ib": 0.0021756317000836134, + "step": 1468 + }, + { + "ce_ib": 3.891815423965454, + "ce_orig": 0.3581300973892212, + "epoch": 0.42217269393917606, + "kl_loss": 0.1889212429523468, + "loss_ib": 0.0022783938329666853, + "step": 1468 + }, + { + "ce_ib": 2.487725019454956, + "ce_orig": 0.4007653594017029, + "epoch": 0.42217269393917606, + "kl_loss": 0.23008421063423157, + "loss_ib": 0.0025496145244687796, + "step": 1468 + }, + { + "ce_ib": 4.7796525955200195, + "ce_orig": 0.8040958642959595, + "epoch": 0.42246027751815374, + "kl_loss": 0.11338557302951813, + "loss_ib": 0.0016118210041895509, + "step": 1469 + }, + { + "ce_ib": 5.699153900146484, + "ce_orig": 0.7264449000358582, + "epoch": 0.42246027751815374, + "kl_loss": 0.1126704216003418, + "loss_ib": 0.0016966195544227958, + "step": 1469 + }, + { + "ce_ib": 5.746955394744873, + "ce_orig": 0.7303146123886108, + "epoch": 0.42246027751815374, + "kl_loss": 0.08956634998321533, + "loss_ib": 0.001470358925871551, + "step": 1469 + }, + { + "ce_ib": 4.594834804534912, + "ce_orig": 0.4276607632637024, + "epoch": 0.42246027751815374, + "kl_loss": 0.30717772245407104, + "loss_ib": 0.003531260648742318, + "step": 1469 + }, + { + "epoch": 0.42274786109713136, + "grad_norm": 0.09342991560697556, + "learning_rate": 4.842374312499405e-05, + "loss": 0.8164, + "step": 1470 + }, + { + "ce_ib": 7.28029203414917, + "ce_orig": 1.1571022272109985, + "epoch": 0.42274786109713136, + "kl_loss": 0.11070867627859116, + "loss_ib": 0.0018351158360019326, + "step": 1470 + }, + { + "ce_ib": 3.672571897506714, + "ce_orig": 0.4727495312690735, + "epoch": 0.42274786109713136, + "kl_loss": 0.09738650918006897, + "loss_ib": 0.0013411222025752068, + "step": 1470 + }, + { + "ce_ib": 7.971317768096924, + "ce_orig": 1.2943000793457031, + "epoch": 0.42274786109713136, + "kl_loss": 0.13336224853992462, + "loss_ib": 0.002130754292011261, + "step": 1470 + }, + { + "ce_ib": 4.77113151550293, + "ce_orig": 0.7445866465568542, + "epoch": 0.42274786109713136, + "kl_loss": 0.07389845699071884, + "loss_ib": 0.0012160976184532046, + "step": 1470 + }, + { + "ce_ib": 6.376780986785889, + "ce_orig": 1.117029070854187, + "epoch": 0.423035444676109, + "kl_loss": 0.11569513380527496, + "loss_ib": 0.0017946293810382485, + "step": 1471 + }, + { + "ce_ib": 7.349169731140137, + "ce_orig": 1.4872691631317139, + "epoch": 0.423035444676109, + "kl_loss": 0.09389686584472656, + "loss_ib": 0.0016738855047151446, + "step": 1471 + }, + { + "ce_ib": 3.8162693977355957, + "ce_orig": 0.4793437421321869, + "epoch": 0.423035444676109, + "kl_loss": 0.1309661865234375, + "loss_ib": 0.0016912887804210186, + "step": 1471 + }, + { + "ce_ib": 9.036438941955566, + "ce_orig": 1.5297104120254517, + "epoch": 0.423035444676109, + "kl_loss": 0.12823426723480225, + "loss_ib": 0.0021859866101294756, + "step": 1471 + }, + { + "ce_ib": 4.2849249839782715, + "ce_orig": 0.8074371218681335, + "epoch": 0.4233230282550866, + "kl_loss": 0.11331785470247269, + "loss_ib": 0.0015616710297763348, + "step": 1472 + }, + { + "ce_ib": 6.1896491050720215, + "ce_orig": 0.9406371712684631, + "epoch": 0.4233230282550866, + "kl_loss": 0.09268368035554886, + "loss_ib": 0.0015458017587661743, + "step": 1472 + }, + { + "ce_ib": 5.543100833892822, + "ce_orig": 0.6453739404678345, + "epoch": 0.4233230282550866, + "kl_loss": 0.13290368020534515, + "loss_ib": 0.0018833468202501535, + "step": 1472 + }, + { + "ce_ib": 5.75667667388916, + "ce_orig": 0.6321004033088684, + "epoch": 0.4233230282550866, + "kl_loss": 0.1326783448457718, + "loss_ib": 0.0019024510402232409, + "step": 1472 + }, + { + "ce_ib": 4.381680965423584, + "ce_orig": 0.7493016123771667, + "epoch": 0.4236106118340643, + "kl_loss": 0.07192018628120422, + "loss_ib": 0.0011573699302971363, + "step": 1473 + }, + { + "ce_ib": 3.2324516773223877, + "ce_orig": 0.4105515480041504, + "epoch": 0.4236106118340643, + "kl_loss": 0.09375610947608948, + "loss_ib": 0.0012608063407242298, + "step": 1473 + }, + { + "ce_ib": 5.543922424316406, + "ce_orig": 0.5035163164138794, + "epoch": 0.4236106118340643, + "kl_loss": 0.17082586884498596, + "loss_ib": 0.0022626507561653852, + "step": 1473 + }, + { + "ce_ib": 8.907859802246094, + "ce_orig": 1.4666497707366943, + "epoch": 0.4236106118340643, + "kl_loss": 0.07999315112829208, + "loss_ib": 0.001690717414021492, + "step": 1473 + }, + { + "ce_ib": 6.575271129608154, + "ce_orig": 0.5658308267593384, + "epoch": 0.4238981954130419, + "kl_loss": 0.3247229754924774, + "loss_ib": 0.0039047568570822477, + "step": 1474 + }, + { + "ce_ib": 3.7774455547332764, + "ce_orig": 0.7648996114730835, + "epoch": 0.4238981954130419, + "kl_loss": 0.06962984800338745, + "loss_ib": 0.0010740429861471057, + "step": 1474 + }, + { + "ce_ib": 5.892938137054443, + "ce_orig": 0.8106856942176819, + "epoch": 0.4238981954130419, + "kl_loss": 0.12563520669937134, + "loss_ib": 0.0018456458346918225, + "step": 1474 + }, + { + "ce_ib": 6.486325740814209, + "ce_orig": 0.46294519305229187, + "epoch": 0.4238981954130419, + "kl_loss": 0.28996336460113525, + "loss_ib": 0.0035482661332935095, + "step": 1474 + }, + { + "epoch": 0.42418577899201954, + "grad_norm": 0.09277452528476715, + "learning_rate": 4.841015421595511e-05, + "loss": 0.8851, + "step": 1475 + }, + { + "ce_ib": 6.460165977478027, + "ce_orig": 1.3669565916061401, + "epoch": 0.42418577899201954, + "kl_loss": 0.10979809612035751, + "loss_ib": 0.0017439975636079907, + "step": 1475 + }, + { + "ce_ib": 5.125516414642334, + "ce_orig": 0.6385084390640259, + "epoch": 0.42418577899201954, + "kl_loss": 0.09865008294582367, + "loss_ib": 0.0014990525087341666, + "step": 1475 + }, + { + "ce_ib": 5.793264865875244, + "ce_orig": 0.972436785697937, + "epoch": 0.42418577899201954, + "kl_loss": 0.08221779763698578, + "loss_ib": 0.0014015043852850795, + "step": 1475 + }, + { + "ce_ib": 4.9234795570373535, + "ce_orig": 0.5103410482406616, + "epoch": 0.42418577899201954, + "kl_loss": 0.12610337138175964, + "loss_ib": 0.0017533815698698163, + "step": 1475 + }, + { + "ce_ib": 6.956478595733643, + "ce_orig": 0.7556769251823425, + "epoch": 0.4244733625709972, + "kl_loss": 0.08710888028144836, + "loss_ib": 0.0015667366096749902, + "step": 1476 + }, + { + "ce_ib": 4.732303142547607, + "ce_orig": 0.6311543583869934, + "epoch": 0.4244733625709972, + "kl_loss": 0.11427406221628189, + "loss_ib": 0.0016159708611667156, + "step": 1476 + }, + { + "ce_ib": 8.348984718322754, + "ce_orig": 1.5093717575073242, + "epoch": 0.4244733625709972, + "kl_loss": 0.13694071769714355, + "loss_ib": 0.0022043054923415184, + "step": 1476 + }, + { + "ce_ib": 8.642187118530273, + "ce_orig": 1.3205996751785278, + "epoch": 0.4244733625709972, + "kl_loss": 0.12525543570518494, + "loss_ib": 0.0021167730446904898, + "step": 1476 + }, + { + "ce_ib": 3.667728900909424, + "ce_orig": 0.6192456483840942, + "epoch": 0.42476094614997484, + "kl_loss": 0.07596628367900848, + "loss_ib": 0.0011264357017353177, + "step": 1477 + }, + { + "ce_ib": 5.8655500411987305, + "ce_orig": 0.716063916683197, + "epoch": 0.42476094614997484, + "kl_loss": 0.12598375976085663, + "loss_ib": 0.0018463925225660205, + "step": 1477 + }, + { + "ce_ib": 8.43331241607666, + "ce_orig": 1.214815378189087, + "epoch": 0.42476094614997484, + "kl_loss": 0.09582893550395966, + "loss_ib": 0.0018016205867752433, + "step": 1477 + }, + { + "ce_ib": 3.889341354370117, + "ce_orig": 0.9948440194129944, + "epoch": 0.42476094614997484, + "kl_loss": 0.05919331684708595, + "loss_ib": 0.0009808673057705164, + "step": 1477 + }, + { + "ce_ib": 6.710433483123779, + "ce_orig": 1.4087427854537964, + "epoch": 0.42504852972895246, + "kl_loss": 0.08481179177761078, + "loss_ib": 0.0015191611601039767, + "step": 1478 + }, + { + "ce_ib": 5.514074325561523, + "ce_orig": 0.6335970163345337, + "epoch": 0.42504852972895246, + "kl_loss": 0.14356637001037598, + "loss_ib": 0.0019870710093528032, + "step": 1478 + }, + { + "ce_ib": 6.760642051696777, + "ce_orig": 0.7269102931022644, + "epoch": 0.42504852972895246, + "kl_loss": 0.12913690507411957, + "loss_ib": 0.0019674331415444613, + "step": 1478 + }, + { + "ce_ib": 5.520792484283447, + "ce_orig": 0.8305492997169495, + "epoch": 0.42504852972895246, + "kl_loss": 0.09097446501255035, + "loss_ib": 0.0014618238201364875, + "step": 1478 + }, + { + "ce_ib": 7.382885456085205, + "ce_orig": 0.9886298775672913, + "epoch": 0.42533611330793014, + "kl_loss": 0.09761233627796173, + "loss_ib": 0.0017144118901342154, + "step": 1479 + }, + { + "ce_ib": 3.056349039077759, + "ce_orig": 0.44829756021499634, + "epoch": 0.42533611330793014, + "kl_loss": 0.1604885309934616, + "loss_ib": 0.0019105201354250312, + "step": 1479 + }, + { + "ce_ib": 4.808676719665527, + "ce_orig": 0.206171452999115, + "epoch": 0.42533611330793014, + "kl_loss": 0.16769975423812866, + "loss_ib": 0.0021578650921583176, + "step": 1479 + }, + { + "ce_ib": 3.821026563644409, + "ce_orig": 0.6127325892448425, + "epoch": 0.42533611330793014, + "kl_loss": 0.054024383425712585, + "loss_ib": 0.0009223464876413345, + "step": 1479 + }, + { + "epoch": 0.42562369688690777, + "grad_norm": 0.09873180091381073, + "learning_rate": 4.839650890642104e-05, + "loss": 0.863, + "step": 1480 + }, + { + "ce_ib": 5.923498153686523, + "ce_orig": 0.9746879935264587, + "epoch": 0.42562369688690777, + "kl_loss": 0.10275821387767792, + "loss_ib": 0.001619931892491877, + "step": 1480 + }, + { + "ce_ib": 5.409031391143799, + "ce_orig": 0.6504145264625549, + "epoch": 0.42562369688690777, + "kl_loss": 0.14318981766700745, + "loss_ib": 0.001972801284864545, + "step": 1480 + }, + { + "ce_ib": 5.768213272094727, + "ce_orig": 1.4572311639785767, + "epoch": 0.42562369688690777, + "kl_loss": 0.07351214438676834, + "loss_ib": 0.0013119427021592855, + "step": 1480 + }, + { + "ce_ib": 7.788840293884277, + "ce_orig": 1.3698606491088867, + "epoch": 0.42562369688690777, + "kl_loss": 0.086025670170784, + "loss_ib": 0.001639140653423965, + "step": 1480 + }, + { + "ce_ib": 6.753962516784668, + "ce_orig": 0.8590308427810669, + "epoch": 0.4259112804658854, + "kl_loss": 0.20286300778388977, + "loss_ib": 0.002704026410356164, + "step": 1481 + }, + { + "ce_ib": 6.63479471206665, + "ce_orig": 1.1812267303466797, + "epoch": 0.4259112804658854, + "kl_loss": 0.13220015168190002, + "loss_ib": 0.001985481008887291, + "step": 1481 + }, + { + "ce_ib": 5.007169246673584, + "ce_orig": 0.7880942821502686, + "epoch": 0.4259112804658854, + "kl_loss": 0.07814208418130875, + "loss_ib": 0.0012821377022191882, + "step": 1481 + }, + { + "ce_ib": 5.508900165557861, + "ce_orig": 0.8839089870452881, + "epoch": 0.4259112804658854, + "kl_loss": 0.12156786024570465, + "loss_ib": 0.0017665685154497623, + "step": 1481 + }, + { + "ce_ib": 8.623146057128906, + "ce_orig": 1.9662206172943115, + "epoch": 0.426198864044863, + "kl_loss": 0.10497733950614929, + "loss_ib": 0.0019120879005640745, + "step": 1482 + }, + { + "ce_ib": 5.697625160217285, + "ce_orig": 0.9555112719535828, + "epoch": 0.426198864044863, + "kl_loss": 0.10451681166887283, + "loss_ib": 0.0016149305738508701, + "step": 1482 + }, + { + "ce_ib": 6.330455303192139, + "ce_orig": 0.987522304058075, + "epoch": 0.426198864044863, + "kl_loss": 0.11000921577215195, + "loss_ib": 0.0017331376438960433, + "step": 1482 + }, + { + "ce_ib": 4.160543441772461, + "ce_orig": 0.27671387791633606, + "epoch": 0.426198864044863, + "kl_loss": 0.17393755912780762, + "loss_ib": 0.0021554299164563417, + "step": 1482 + }, + { + "ce_ib": 4.079864025115967, + "ce_orig": 1.0547031164169312, + "epoch": 0.4264864476238407, + "kl_loss": 0.06679850071668625, + "loss_ib": 0.0010759714059531689, + "step": 1483 + }, + { + "ce_ib": 3.6775362491607666, + "ce_orig": 0.5392860770225525, + "epoch": 0.4264864476238407, + "kl_loss": 0.1283085197210312, + "loss_ib": 0.0016508387634530663, + "step": 1483 + }, + { + "ce_ib": 5.188068866729736, + "ce_orig": 0.8815830945968628, + "epoch": 0.4264864476238407, + "kl_loss": 0.07818719744682312, + "loss_ib": 0.0013006788212805986, + "step": 1483 + }, + { + "ce_ib": 5.105358123779297, + "ce_orig": 0.7038376331329346, + "epoch": 0.4264864476238407, + "kl_loss": 0.10735287517309189, + "loss_ib": 0.0015840644482523203, + "step": 1483 + }, + { + "ce_ib": 3.7244279384613037, + "ce_orig": 0.7728710174560547, + "epoch": 0.4267740312028183, + "kl_loss": 0.0835123211145401, + "loss_ib": 0.001207565888762474, + "step": 1484 + }, + { + "ce_ib": 5.630345821380615, + "ce_orig": 0.779137372970581, + "epoch": 0.4267740312028183, + "kl_loss": 0.14693334698677063, + "loss_ib": 0.00203236797824502, + "step": 1484 + }, + { + "ce_ib": 4.299570560455322, + "ce_orig": 1.070716142654419, + "epoch": 0.4267740312028183, + "kl_loss": 0.0551174022257328, + "loss_ib": 0.0009811309864744544, + "step": 1484 + }, + { + "ce_ib": 5.630002498626709, + "ce_orig": 1.0188994407653809, + "epoch": 0.4267740312028183, + "kl_loss": 0.10909400880336761, + "loss_ib": 0.0016539403004571795, + "step": 1484 + }, + { + "epoch": 0.42706161478179594, + "grad_norm": 0.10764925181865692, + "learning_rate": 4.8382807229266583e-05, + "loss": 0.9377, + "step": 1485 + }, + { + "ce_ib": 8.589742660522461, + "ce_orig": 1.2172328233718872, + "epoch": 0.42706161478179594, + "kl_loss": 0.12123409658670425, + "loss_ib": 0.0020713151898235083, + "step": 1485 + }, + { + "ce_ib": 8.910140991210938, + "ce_orig": 0.8911302089691162, + "epoch": 0.42706161478179594, + "kl_loss": 0.1684650480747223, + "loss_ib": 0.002575664548203349, + "step": 1485 + }, + { + "ce_ib": 6.011847019195557, + "ce_orig": 1.0063878297805786, + "epoch": 0.42706161478179594, + "kl_loss": 0.13005661964416504, + "loss_ib": 0.0019017508020624518, + "step": 1485 + }, + { + "ce_ib": 3.877028703689575, + "ce_orig": 0.4349795877933502, + "epoch": 0.42706161478179594, + "kl_loss": 0.07950502634048462, + "loss_ib": 0.0011827531270682812, + "step": 1485 + }, + { + "ce_ib": 3.6345038414001465, + "ce_orig": 0.6873906850814819, + "epoch": 0.4273491983607736, + "kl_loss": 0.090648353099823, + "loss_ib": 0.0012699338840320706, + "step": 1486 + }, + { + "ce_ib": 3.8409159183502197, + "ce_orig": 0.5949610471725464, + "epoch": 0.4273491983607736, + "kl_loss": 0.07319621741771698, + "loss_ib": 0.0011160537833347917, + "step": 1486 + }, + { + "ce_ib": 3.623453140258789, + "ce_orig": 0.7704918384552002, + "epoch": 0.4273491983607736, + "kl_loss": 0.12237702310085297, + "loss_ib": 0.0015861154533922672, + "step": 1486 + }, + { + "ce_ib": 4.723622798919678, + "ce_orig": 0.487240195274353, + "epoch": 0.4273491983607736, + "kl_loss": 0.17967435717582703, + "loss_ib": 0.002269105752930045, + "step": 1486 + }, + { + "ce_ib": 3.312293529510498, + "ce_orig": 0.7620286345481873, + "epoch": 0.42763678193975124, + "kl_loss": 0.0706988275051117, + "loss_ib": 0.0010382175678387284, + "step": 1487 + }, + { + "ce_ib": 3.4072983264923096, + "ce_orig": 0.6434142589569092, + "epoch": 0.42763678193975124, + "kl_loss": 0.09892557561397552, + "loss_ib": 0.0013299855636432767, + "step": 1487 + }, + { + "ce_ib": 3.0196757316589355, + "ce_orig": 0.33321601152420044, + "epoch": 0.42763678193975124, + "kl_loss": 0.22260043025016785, + "loss_ib": 0.002527971751987934, + "step": 1487 + }, + { + "ce_ib": 7.22377347946167, + "ce_orig": 1.249780297279358, + "epoch": 0.42763678193975124, + "kl_loss": 0.0948280617594719, + "loss_ib": 0.0016706580063328147, + "step": 1487 + }, + { + "ce_ib": 6.389316082000732, + "ce_orig": 0.8940539956092834, + "epoch": 0.42792436551872887, + "kl_loss": 0.11902444809675217, + "loss_ib": 0.0018291760934516788, + "step": 1488 + }, + { + "ce_ib": 7.263855457305908, + "ce_orig": 0.7269363403320312, + "epoch": 0.42792436551872887, + "kl_loss": 0.13229572772979736, + "loss_ib": 0.002049342729151249, + "step": 1488 + }, + { + "ce_ib": 4.953929901123047, + "ce_orig": 1.0663037300109863, + "epoch": 0.42792436551872887, + "kl_loss": 0.06149699166417122, + "loss_ib": 0.001110362820327282, + "step": 1488 + }, + { + "ce_ib": 8.461508750915527, + "ce_orig": 1.434740662574768, + "epoch": 0.42792436551872887, + "kl_loss": 0.15363231301307678, + "loss_ib": 0.002382473787292838, + "step": 1488 + }, + { + "ce_ib": 4.225872039794922, + "ce_orig": 0.5934211611747742, + "epoch": 0.42821194909770655, + "kl_loss": 0.11377574503421783, + "loss_ib": 0.0015603447100147605, + "step": 1489 + }, + { + "ce_ib": 6.648335933685303, + "ce_orig": 1.411804437637329, + "epoch": 0.42821194909770655, + "kl_loss": 0.07212527096271515, + "loss_ib": 0.0013860863400623202, + "step": 1489 + }, + { + "ce_ib": 6.111649513244629, + "ce_orig": 0.8437939286231995, + "epoch": 0.42821194909770655, + "kl_loss": 0.06988925486803055, + "loss_ib": 0.0013100574724376202, + "step": 1489 + }, + { + "ce_ib": 3.841252326965332, + "ce_orig": 0.5593860149383545, + "epoch": 0.42821194909770655, + "kl_loss": 0.09108671545982361, + "loss_ib": 0.0012949923984706402, + "step": 1489 + }, + { + "epoch": 0.42849953267668417, + "grad_norm": 0.10317344963550568, + "learning_rate": 4.836904921750223e-05, + "loss": 0.8231, + "step": 1490 + }, + { + "ce_ib": 5.85347318649292, + "ce_orig": 1.0941354036331177, + "epoch": 0.42849953267668417, + "kl_loss": 0.09418511390686035, + "loss_ib": 0.0015271983575075865, + "step": 1490 + }, + { + "ce_ib": 4.57334041595459, + "ce_orig": 0.8032926917076111, + "epoch": 0.42849953267668417, + "kl_loss": 0.06270309537649155, + "loss_ib": 0.0010843649506568909, + "step": 1490 + }, + { + "ce_ib": 3.4598300457000732, + "ce_orig": 0.3299408555030823, + "epoch": 0.42849953267668417, + "kl_loss": 0.07001639157533646, + "loss_ib": 0.0010461468482390046, + "step": 1490 + }, + { + "ce_ib": 4.194902420043945, + "ce_orig": 0.5485158562660217, + "epoch": 0.42849953267668417, + "kl_loss": 0.06838849186897278, + "loss_ib": 0.001103375107049942, + "step": 1490 + }, + { + "ce_ib": 4.9279961585998535, + "ce_orig": 0.8593989610671997, + "epoch": 0.4287871162556618, + "kl_loss": 0.09136204421520233, + "loss_ib": 0.0014064200222492218, + "step": 1491 + }, + { + "ce_ib": 5.720012664794922, + "ce_orig": 1.2061270475387573, + "epoch": 0.4287871162556618, + "kl_loss": 0.1045733168721199, + "loss_ib": 0.0016177344368770719, + "step": 1491 + }, + { + "ce_ib": 3.831967353820801, + "ce_orig": 0.6567938327789307, + "epoch": 0.4287871162556618, + "kl_loss": 0.07468820363283157, + "loss_ib": 0.001130078686401248, + "step": 1491 + }, + { + "ce_ib": 3.877784013748169, + "ce_orig": 0.8739439249038696, + "epoch": 0.4287871162556618, + "kl_loss": 0.08189292252063751, + "loss_ib": 0.001206707675009966, + "step": 1491 + }, + { + "ce_ib": 4.748837471008301, + "ce_orig": 1.0048332214355469, + "epoch": 0.4290746998346394, + "kl_loss": 0.06992149353027344, + "loss_ib": 0.001174098695628345, + "step": 1492 + }, + { + "ce_ib": 4.89517879486084, + "ce_orig": 0.5884690880775452, + "epoch": 0.4290746998346394, + "kl_loss": 0.07238373160362244, + "loss_ib": 0.0012133552227169275, + "step": 1492 + }, + { + "ce_ib": 7.266257286071777, + "ce_orig": 1.2749794721603394, + "epoch": 0.4290746998346394, + "kl_loss": 0.20466673374176025, + "loss_ib": 0.0027732928283512592, + "step": 1492 + }, + { + "ce_ib": 5.501678466796875, + "ce_orig": 0.3813422918319702, + "epoch": 0.4290746998346394, + "kl_loss": 0.10594455152750015, + "loss_ib": 0.0016096133040264249, + "step": 1492 + }, + { + "ce_ib": 6.912803649902344, + "ce_orig": 0.9841716289520264, + "epoch": 0.4293622834136171, + "kl_loss": 0.2044220119714737, + "loss_ib": 0.0027355002239346504, + "step": 1493 + }, + { + "ce_ib": 4.345367908477783, + "ce_orig": 0.7242448329925537, + "epoch": 0.4293622834136171, + "kl_loss": 0.0738978162407875, + "loss_ib": 0.001173514872789383, + "step": 1493 + }, + { + "ce_ib": 7.523437976837158, + "ce_orig": 0.5210906863212585, + "epoch": 0.4293622834136171, + "kl_loss": 0.1008140817284584, + "loss_ib": 0.001760484534315765, + "step": 1493 + }, + { + "ce_ib": 4.733566761016846, + "ce_orig": 0.6056400537490845, + "epoch": 0.4293622834136171, + "kl_loss": 0.1043701171875, + "loss_ib": 0.0015170578844845295, + "step": 1493 + }, + { + "ce_ib": 5.4491868019104, + "ce_orig": 0.9924566745758057, + "epoch": 0.4296498669925947, + "kl_loss": 0.07613148540258408, + "loss_ib": 0.0013062335783615708, + "step": 1494 + }, + { + "ce_ib": 2.647188186645508, + "ce_orig": 0.4178062677383423, + "epoch": 0.4296498669925947, + "kl_loss": 0.09912779927253723, + "loss_ib": 0.0012559967581182718, + "step": 1494 + }, + { + "ce_ib": 3.5450642108917236, + "ce_orig": 0.34603291749954224, + "epoch": 0.4296498669925947, + "kl_loss": 0.07269264757633209, + "loss_ib": 0.0010814327979460359, + "step": 1494 + }, + { + "ce_ib": 2.828864336013794, + "ce_orig": 0.5859595537185669, + "epoch": 0.4296498669925947, + "kl_loss": 0.05315869301557541, + "loss_ib": 0.0008144733146764338, + "step": 1494 + }, + { + "epoch": 0.42993745057157234, + "grad_norm": 0.10054890066385269, + "learning_rate": 4.835523490427425e-05, + "loss": 0.8296, + "step": 1495 + }, + { + "ce_ib": 4.736669063568115, + "ce_orig": 0.564308226108551, + "epoch": 0.42993745057157234, + "kl_loss": 0.07203033566474915, + "loss_ib": 0.0011939702089875937, + "step": 1495 + }, + { + "ce_ib": 3.487304925918579, + "ce_orig": 0.5112748742103577, + "epoch": 0.42993745057157234, + "kl_loss": 0.052416346967220306, + "loss_ib": 0.0008728938992135227, + "step": 1495 + }, + { + "ce_ib": 6.845876216888428, + "ce_orig": 0.859950065612793, + "epoch": 0.42993745057157234, + "kl_loss": 0.12004132568836212, + "loss_ib": 0.0018850007327273488, + "step": 1495 + }, + { + "ce_ib": 6.047791481018066, + "ce_orig": 1.0694940090179443, + "epoch": 0.42993745057157234, + "kl_loss": 0.06923744082450867, + "loss_ib": 0.0012971535325050354, + "step": 1495 + }, + { + "ce_ib": 7.036633491516113, + "ce_orig": 1.3809123039245605, + "epoch": 0.43022503415055, + "kl_loss": 0.12386683374643326, + "loss_ib": 0.0019423315534368157, + "step": 1496 + }, + { + "ce_ib": 9.137833595275879, + "ce_orig": 1.7173395156860352, + "epoch": 0.43022503415055, + "kl_loss": 0.11944153904914856, + "loss_ib": 0.0021081985905766487, + "step": 1496 + }, + { + "ce_ib": 5.539783477783203, + "ce_orig": 0.6790186762809753, + "epoch": 0.43022503415055, + "kl_loss": 0.16711831092834473, + "loss_ib": 0.0022251615300774574, + "step": 1496 + }, + { + "ce_ib": 6.523403167724609, + "ce_orig": 0.668170154094696, + "epoch": 0.43022503415055, + "kl_loss": 0.09907539188861847, + "loss_ib": 0.0016430941177532077, + "step": 1496 + }, + { + "ce_ib": 8.092485427856445, + "ce_orig": 1.5751919746398926, + "epoch": 0.43051261772952765, + "kl_loss": 0.10649112612009048, + "loss_ib": 0.0018741597887128592, + "step": 1497 + }, + { + "ce_ib": 8.320467948913574, + "ce_orig": 1.3753570318222046, + "epoch": 0.43051261772952765, + "kl_loss": 0.11326869577169418, + "loss_ib": 0.0019647337030619383, + "step": 1497 + }, + { + "ce_ib": 7.601989269256592, + "ce_orig": 1.6063616275787354, + "epoch": 0.43051261772952765, + "kl_loss": 0.10248681157827377, + "loss_ib": 0.0017850670265033841, + "step": 1497 + }, + { + "ce_ib": 5.250172138214111, + "ce_orig": 1.012228012084961, + "epoch": 0.43051261772952765, + "kl_loss": 0.08977590501308441, + "loss_ib": 0.0014227762585505843, + "step": 1497 + }, + { + "ce_ib": 7.121866703033447, + "ce_orig": 1.2347811460494995, + "epoch": 0.43080020130850527, + "kl_loss": 0.14125066995620728, + "loss_ib": 0.0021246932446956635, + "step": 1498 + }, + { + "ce_ib": 3.8643674850463867, + "ce_orig": 0.6364955902099609, + "epoch": 0.43080020130850527, + "kl_loss": 0.08937310427427292, + "loss_ib": 0.0012801677221432328, + "step": 1498 + }, + { + "ce_ib": 5.398726940155029, + "ce_orig": 1.1411463022232056, + "epoch": 0.43080020130850527, + "kl_loss": 0.08773718774318695, + "loss_ib": 0.001417244435288012, + "step": 1498 + }, + { + "ce_ib": 5.65582799911499, + "ce_orig": 1.0903723239898682, + "epoch": 0.43080020130850527, + "kl_loss": 0.08431953191757202, + "loss_ib": 0.0014087781310081482, + "step": 1498 + }, + { + "ce_ib": 8.064273834228516, + "ce_orig": 1.591813564300537, + "epoch": 0.43108778488748295, + "kl_loss": 0.12314474582672119, + "loss_ib": 0.0020378746557980776, + "step": 1499 + }, + { + "ce_ib": 7.066220760345459, + "ce_orig": 1.3731032609939575, + "epoch": 0.43108778488748295, + "kl_loss": 0.09561625123023987, + "loss_ib": 0.0016627844888716936, + "step": 1499 + }, + { + "ce_ib": 6.4272074699401855, + "ce_orig": 0.6825188994407654, + "epoch": 0.43108778488748295, + "kl_loss": 0.18424586951732635, + "loss_ib": 0.002485179342329502, + "step": 1499 + }, + { + "ce_ib": 7.436380863189697, + "ce_orig": 1.1504343748092651, + "epoch": 0.43108778488748295, + "kl_loss": 0.13248597085475922, + "loss_ib": 0.002068497706204653, + "step": 1499 + }, + { + "epoch": 0.4313753684664606, + "grad_norm": 0.08262129127979279, + "learning_rate": 4.8341364322864523e-05, + "loss": 0.9576, + "step": 1500 + }, + { + "ce_ib": 6.839731693267822, + "ce_orig": 1.3139455318450928, + "epoch": 0.4313753684664606, + "kl_loss": 0.08096840232610703, + "loss_ib": 0.0014936572406440973, + "step": 1500 + }, + { + "ce_ib": 2.4358808994293213, + "ce_orig": 0.20422646403312683, + "epoch": 0.4313753684664606, + "kl_loss": 0.12524980306625366, + "loss_ib": 0.0014960861299186945, + "step": 1500 + }, + { + "ce_ib": 4.247408390045166, + "ce_orig": 0.6773697137832642, + "epoch": 0.4313753684664606, + "kl_loss": 0.09362715482711792, + "loss_ib": 0.0013610124588012695, + "step": 1500 + }, + { + "ce_ib": 5.133529186248779, + "ce_orig": 0.8000495433807373, + "epoch": 0.4313753684664606, + "kl_loss": 0.1080671176314354, + "loss_ib": 0.0015940240118652582, + "step": 1500 + }, + { + "ce_ib": 5.000636577606201, + "ce_orig": 0.8373590707778931, + "epoch": 0.4316629520454382, + "kl_loss": 0.07154040783643723, + "loss_ib": 0.001215467695146799, + "step": 1501 + }, + { + "ce_ib": 6.954758167266846, + "ce_orig": 0.7926986217498779, + "epoch": 0.4316629520454382, + "kl_loss": 0.08943982422351837, + "loss_ib": 0.0015898740384727716, + "step": 1501 + }, + { + "ce_ib": 6.744113922119141, + "ce_orig": 1.4782406091690063, + "epoch": 0.4316629520454382, + "kl_loss": 0.08303745836019516, + "loss_ib": 0.0015047859633341432, + "step": 1501 + }, + { + "ce_ib": 3.2549874782562256, + "ce_orig": 0.5299631953239441, + "epoch": 0.4316629520454382, + "kl_loss": 0.07108180224895477, + "loss_ib": 0.0010363167384639382, + "step": 1501 + }, + { + "ce_ib": 7.279045104980469, + "ce_orig": 1.350122094154358, + "epoch": 0.4319505356244158, + "kl_loss": 0.21054333448410034, + "loss_ib": 0.0028333377558737993, + "step": 1502 + }, + { + "ce_ib": 4.620668411254883, + "ce_orig": 0.7452244758605957, + "epoch": 0.4319505356244158, + "kl_loss": 0.11692861467599869, + "loss_ib": 0.0016313528176397085, + "step": 1502 + }, + { + "ce_ib": 5.142733097076416, + "ce_orig": 0.5278648138046265, + "epoch": 0.4319505356244158, + "kl_loss": 0.11434760689735413, + "loss_ib": 0.0016577494097873569, + "step": 1502 + }, + { + "ce_ib": 8.882280349731445, + "ce_orig": 1.2213600873947144, + "epoch": 0.4319505356244158, + "kl_loss": 0.12895908951759338, + "loss_ib": 0.002177818911150098, + "step": 1502 + }, + { + "ce_ib": 5.134566783905029, + "ce_orig": 1.0764600038528442, + "epoch": 0.4322381192033935, + "kl_loss": 0.07653919607400894, + "loss_ib": 0.0012788486201316118, + "step": 1503 + }, + { + "ce_ib": 4.221365928649902, + "ce_orig": 0.6740595102310181, + "epoch": 0.4322381192033935, + "kl_loss": 0.10123997926712036, + "loss_ib": 0.0014345364179462194, + "step": 1503 + }, + { + "ce_ib": 5.515237808227539, + "ce_orig": 0.7556322813034058, + "epoch": 0.4322381192033935, + "kl_loss": 0.16605684161186218, + "loss_ib": 0.0022120920475572348, + "step": 1503 + }, + { + "ce_ib": 5.79183292388916, + "ce_orig": 1.3080008029937744, + "epoch": 0.4322381192033935, + "kl_loss": 0.10372674465179443, + "loss_ib": 0.0016164506087079644, + "step": 1503 + }, + { + "ce_ib": 4.8557963371276855, + "ce_orig": 0.6234728097915649, + "epoch": 0.4325257027823711, + "kl_loss": 0.0935010313987732, + "loss_ib": 0.0014205899788066745, + "step": 1504 + }, + { + "ce_ib": 7.1131086349487305, + "ce_orig": 1.0130103826522827, + "epoch": 0.4325257027823711, + "kl_loss": 0.08929497003555298, + "loss_ib": 0.0016042604111135006, + "step": 1504 + }, + { + "ce_ib": 6.16212272644043, + "ce_orig": 0.9215428829193115, + "epoch": 0.4325257027823711, + "kl_loss": 0.1784028857946396, + "loss_ib": 0.002400241093710065, + "step": 1504 + }, + { + "ce_ib": 4.01924467086792, + "ce_orig": 0.4782436192035675, + "epoch": 0.4325257027823711, + "kl_loss": 0.08804503083229065, + "loss_ib": 0.0012823748402297497, + "step": 1504 + }, + { + "epoch": 0.43281328636134875, + "grad_norm": 0.08887265622615814, + "learning_rate": 4.832743750669049e-05, + "loss": 0.8339, + "step": 1505 + }, + { + "ce_ib": 7.110744476318359, + "ce_orig": 1.2146950960159302, + "epoch": 0.43281328636134875, + "kl_loss": 0.07188645750284195, + "loss_ib": 0.0014299389440566301, + "step": 1505 + }, + { + "ce_ib": 6.700220584869385, + "ce_orig": 1.2089580297470093, + "epoch": 0.43281328636134875, + "kl_loss": 0.10347436368465424, + "loss_ib": 0.0017047657165676355, + "step": 1505 + }, + { + "ce_ib": 5.517516613006592, + "ce_orig": 0.904000997543335, + "epoch": 0.43281328636134875, + "kl_loss": 0.08528436720371246, + "loss_ib": 0.0014045953284949064, + "step": 1505 + }, + { + "ce_ib": 3.9207072257995605, + "ce_orig": 0.6445602178573608, + "epoch": 0.43281328636134875, + "kl_loss": 0.10317622870206833, + "loss_ib": 0.0014238330768421292, + "step": 1505 + }, + { + "ce_ib": 3.850243091583252, + "ce_orig": 0.6252925395965576, + "epoch": 0.4331008699403264, + "kl_loss": 0.08628550171852112, + "loss_ib": 0.0012478792341426015, + "step": 1506 + }, + { + "ce_ib": 7.749866008758545, + "ce_orig": 1.6045938730239868, + "epoch": 0.4331008699403264, + "kl_loss": 0.13435699045658112, + "loss_ib": 0.0021185565274208784, + "step": 1506 + }, + { + "ce_ib": 3.420017719268799, + "ce_orig": 0.5475412607192993, + "epoch": 0.4331008699403264, + "kl_loss": 0.08867872506380081, + "loss_ib": 0.0012287889840081334, + "step": 1506 + }, + { + "ce_ib": 3.9808108806610107, + "ce_orig": 0.7376654744148254, + "epoch": 0.4331008699403264, + "kl_loss": 0.15282005071640015, + "loss_ib": 0.0019262816058471799, + "step": 1506 + }, + { + "ce_ib": 4.581400394439697, + "ce_orig": 0.8846971988677979, + "epoch": 0.43338845351930405, + "kl_loss": 0.08067423850297928, + "loss_ib": 0.0012648823903873563, + "step": 1507 + }, + { + "ce_ib": 4.8960723876953125, + "ce_orig": 0.5462583303451538, + "epoch": 0.43338845351930405, + "kl_loss": 0.1471872627735138, + "loss_ib": 0.001961479661986232, + "step": 1507 + }, + { + "ce_ib": 4.630989074707031, + "ce_orig": 0.6840903759002686, + "epoch": 0.43338845351930405, + "kl_loss": 0.09103305637836456, + "loss_ib": 0.0013734294334426522, + "step": 1507 + }, + { + "ce_ib": 4.074948787689209, + "ce_orig": 0.6607051491737366, + "epoch": 0.43338845351930405, + "kl_loss": 0.18522922694683075, + "loss_ib": 0.0022597871720790863, + "step": 1507 + }, + { + "ce_ib": 7.040496826171875, + "ce_orig": 0.8136718273162842, + "epoch": 0.4336760370982817, + "kl_loss": 0.10526634752750397, + "loss_ib": 0.0017567130271345377, + "step": 1508 + }, + { + "ce_ib": 6.667243480682373, + "ce_orig": 1.112916350364685, + "epoch": 0.4336760370982817, + "kl_loss": 0.10773064196109772, + "loss_ib": 0.0017440306255593896, + "step": 1508 + }, + { + "ce_ib": 4.702635288238525, + "ce_orig": 0.6297177672386169, + "epoch": 0.4336760370982817, + "kl_loss": 0.08057142049074173, + "loss_ib": 0.0012759778182953596, + "step": 1508 + }, + { + "ce_ib": 5.877447128295898, + "ce_orig": 0.6286649107933044, + "epoch": 0.4336760370982817, + "kl_loss": 0.11513443291187286, + "loss_ib": 0.0017390890279784799, + "step": 1508 + }, + { + "ce_ib": 7.7480645179748535, + "ce_orig": 1.2657722234725952, + "epoch": 0.43396362067725935, + "kl_loss": 0.0821981206536293, + "loss_ib": 0.001596787478774786, + "step": 1509 + }, + { + "ce_ib": 5.088909149169922, + "ce_orig": 0.9932161569595337, + "epoch": 0.43396362067725935, + "kl_loss": 0.10796862840652466, + "loss_ib": 0.0015885771717876196, + "step": 1509 + }, + { + "ce_ib": 5.364145278930664, + "ce_orig": 0.8787831664085388, + "epoch": 0.43396362067725935, + "kl_loss": 0.09939250349998474, + "loss_ib": 0.0015303395921364427, + "step": 1509 + }, + { + "ce_ib": 4.924019813537598, + "ce_orig": 1.0369383096694946, + "epoch": 0.43396362067725935, + "kl_loss": 0.143111452460289, + "loss_ib": 0.0019235165091231465, + "step": 1509 + }, + { + "epoch": 0.434251204256237, + "grad_norm": 0.0973566398024559, + "learning_rate": 4.831345448930509e-05, + "loss": 0.8538, + "step": 1510 + }, + { + "ce_ib": 7.3009352684021, + "ce_orig": 0.5541552305221558, + "epoch": 0.434251204256237, + "kl_loss": 0.10915008187294006, + "loss_ib": 0.0018215941963717341, + "step": 1510 + }, + { + "ce_ib": 3.8974852561950684, + "ce_orig": 0.6998268961906433, + "epoch": 0.434251204256237, + "kl_loss": 0.059542421251535416, + "loss_ib": 0.000985172693617642, + "step": 1510 + }, + { + "ce_ib": 8.728836059570312, + "ce_orig": 1.5403189659118652, + "epoch": 0.434251204256237, + "kl_loss": 0.10407478362321854, + "loss_ib": 0.001913631334900856, + "step": 1510 + }, + { + "ce_ib": 4.761317729949951, + "ce_orig": 0.6266696453094482, + "epoch": 0.434251204256237, + "kl_loss": 0.1282866895198822, + "loss_ib": 0.001758998609147966, + "step": 1510 + }, + { + "ce_ib": 4.101319789886475, + "ce_orig": 0.27982378005981445, + "epoch": 0.4345387878352146, + "kl_loss": 0.19046571850776672, + "loss_ib": 0.0023147889878600836, + "step": 1511 + }, + { + "ce_ib": 5.992220878601074, + "ce_orig": 0.8278435468673706, + "epoch": 0.4345387878352146, + "kl_loss": 0.11560919880867004, + "loss_ib": 0.0017553139477968216, + "step": 1511 + }, + { + "ce_ib": 6.347783088684082, + "ce_orig": 0.7248057126998901, + "epoch": 0.4345387878352146, + "kl_loss": 0.0786624476313591, + "loss_ib": 0.0014214026741683483, + "step": 1511 + }, + { + "ce_ib": 8.597983360290527, + "ce_orig": 1.7392653226852417, + "epoch": 0.4345387878352146, + "kl_loss": 0.12400998175144196, + "loss_ib": 0.0020998981781303883, + "step": 1511 + }, + { + "ce_ib": 9.078392028808594, + "ce_orig": 1.7484946250915527, + "epoch": 0.4348263714141922, + "kl_loss": 0.20181328058242798, + "loss_ib": 0.0029259719885885715, + "step": 1512 + }, + { + "ce_ib": 4.593262672424316, + "ce_orig": 0.46031442284584045, + "epoch": 0.4348263714141922, + "kl_loss": 0.08651666343212128, + "loss_ib": 0.0013244928559288383, + "step": 1512 + }, + { + "ce_ib": 6.87193489074707, + "ce_orig": 0.8846132159233093, + "epoch": 0.4348263714141922, + "kl_loss": 0.128667414188385, + "loss_ib": 0.0019738676492124796, + "step": 1512 + }, + { + "ce_ib": 8.019463539123535, + "ce_orig": 1.1108201742172241, + "epoch": 0.4348263714141922, + "kl_loss": 0.08216021955013275, + "loss_ib": 0.0016235485672950745, + "step": 1512 + }, + { + "ce_ib": 4.452235698699951, + "ce_orig": 0.7211059331893921, + "epoch": 0.4351139549931699, + "kl_loss": 0.0791214108467102, + "loss_ib": 0.0012364377034828067, + "step": 1513 + }, + { + "ce_ib": 3.3802762031555176, + "ce_orig": 0.6794382333755493, + "epoch": 0.4351139549931699, + "kl_loss": 0.0506846085190773, + "loss_ib": 0.000844873720780015, + "step": 1513 + }, + { + "ce_ib": 5.593039512634277, + "ce_orig": 1.2539618015289307, + "epoch": 0.4351139549931699, + "kl_loss": 0.10623276233673096, + "loss_ib": 0.0016216314397752285, + "step": 1513 + }, + { + "ce_ib": 11.618020057678223, + "ce_orig": 2.0641403198242188, + "epoch": 0.4351139549931699, + "kl_loss": 0.3602520823478699, + "loss_ib": 0.004764322657138109, + "step": 1513 + }, + { + "ce_ib": 7.659701824188232, + "ce_orig": 1.406266212463379, + "epoch": 0.43540153857214753, + "kl_loss": 0.08894098550081253, + "loss_ib": 0.001655380125157535, + "step": 1514 + }, + { + "ce_ib": 5.817800045013428, + "ce_orig": 0.8796743750572205, + "epoch": 0.43540153857214753, + "kl_loss": 0.13410520553588867, + "loss_ib": 0.001922832103446126, + "step": 1514 + }, + { + "ce_ib": 7.372805118560791, + "ce_orig": 1.2872525453567505, + "epoch": 0.43540153857214753, + "kl_loss": 0.13149800896644592, + "loss_ib": 0.0020522605627775192, + "step": 1514 + }, + { + "ce_ib": 5.705288887023926, + "ce_orig": 0.8373372554779053, + "epoch": 0.43540153857214753, + "kl_loss": 0.1164829209446907, + "loss_ib": 0.0017353580333292484, + "step": 1514 + }, + { + "epoch": 0.43568912215112515, + "grad_norm": 0.10254115611314774, + "learning_rate": 4.829941530439666e-05, + "loss": 0.9174, + "step": 1515 + }, + { + "ce_ib": 3.642223834991455, + "ce_orig": 0.8573952913284302, + "epoch": 0.43568912215112515, + "kl_loss": 0.06534366309642792, + "loss_ib": 0.0010176589712500572, + "step": 1515 + }, + { + "ce_ib": 3.242440938949585, + "ce_orig": 0.38875240087509155, + "epoch": 0.43568912215112515, + "kl_loss": 0.16390663385391235, + "loss_ib": 0.001963310409337282, + "step": 1515 + }, + { + "ce_ib": 6.437833309173584, + "ce_orig": 0.9019474983215332, + "epoch": 0.43568912215112515, + "kl_loss": 0.08185650408267975, + "loss_ib": 0.0014623483875766397, + "step": 1515 + }, + { + "ce_ib": 3.479649782180786, + "ce_orig": 0.595124363899231, + "epoch": 0.43568912215112515, + "kl_loss": 0.06679306924343109, + "loss_ib": 0.0010158956283703446, + "step": 1515 + }, + { + "ce_ib": 4.77842378616333, + "ce_orig": 0.8117563724517822, + "epoch": 0.43597670573010283, + "kl_loss": 0.11193256080150604, + "loss_ib": 0.0015971679240465164, + "step": 1516 + }, + { + "ce_ib": 8.796906471252441, + "ce_orig": 1.1883763074874878, + "epoch": 0.43597670573010283, + "kl_loss": 0.11216680705547333, + "loss_ib": 0.002001358661800623, + "step": 1516 + }, + { + "ce_ib": 5.718897342681885, + "ce_orig": 0.7612197995185852, + "epoch": 0.43597670573010283, + "kl_loss": 0.15641018748283386, + "loss_ib": 0.002135991584509611, + "step": 1516 + }, + { + "ce_ib": 3.859581232070923, + "ce_orig": 1.0379844903945923, + "epoch": 0.43597670573010283, + "kl_loss": 0.2832275629043579, + "loss_ib": 0.003218233585357666, + "step": 1516 + }, + { + "ce_ib": 3.9302423000335693, + "ce_orig": 0.5944304466247559, + "epoch": 0.43626428930908046, + "kl_loss": 0.10014890879392624, + "loss_ib": 0.0013945131795480847, + "step": 1517 + }, + { + "ce_ib": 5.893682479858398, + "ce_orig": 0.9609090685844421, + "epoch": 0.43626428930908046, + "kl_loss": 0.1125008761882782, + "loss_ib": 0.0017143769655376673, + "step": 1517 + }, + { + "ce_ib": 7.110599040985107, + "ce_orig": 1.0786327123641968, + "epoch": 0.43626428930908046, + "kl_loss": 0.14500223100185394, + "loss_ib": 0.002161082113161683, + "step": 1517 + }, + { + "ce_ib": 6.0750298500061035, + "ce_orig": 1.0018584728240967, + "epoch": 0.43626428930908046, + "kl_loss": 0.11381145566701889, + "loss_ib": 0.0017456174828112125, + "step": 1517 + }, + { + "ce_ib": 4.3132524490356445, + "ce_orig": 0.5560404062271118, + "epoch": 0.4365518728880581, + "kl_loss": 0.13735431432724, + "loss_ib": 0.0018048683414235711, + "step": 1518 + }, + { + "ce_ib": 4.659675598144531, + "ce_orig": 0.6699545979499817, + "epoch": 0.4365518728880581, + "kl_loss": 0.11489828675985336, + "loss_ib": 0.0016149503644555807, + "step": 1518 + }, + { + "ce_ib": 9.484546661376953, + "ce_orig": 1.8520828485488892, + "epoch": 0.4365518728880581, + "kl_loss": 0.1452958881855011, + "loss_ib": 0.0024014136288315058, + "step": 1518 + }, + { + "ce_ib": 3.3063762187957764, + "ce_orig": 0.4528732895851135, + "epoch": 0.4365518728880581, + "kl_loss": 0.23213878273963928, + "loss_ib": 0.0026520255487412214, + "step": 1518 + }, + { + "ce_ib": 10.56447982788086, + "ce_orig": 2.0902206897735596, + "epoch": 0.43683945646703576, + "kl_loss": 0.1520857810974121, + "loss_ib": 0.0025773057714104652, + "step": 1519 + }, + { + "ce_ib": 5.2714524269104, + "ce_orig": 0.556014895439148, + "epoch": 0.43683945646703576, + "kl_loss": 0.08516090363264084, + "loss_ib": 0.0013787541538476944, + "step": 1519 + }, + { + "ce_ib": 4.367508411407471, + "ce_orig": 0.3937459886074066, + "epoch": 0.43683945646703576, + "kl_loss": 0.09350946545600891, + "loss_ib": 0.0013718453701585531, + "step": 1519 + }, + { + "ce_ib": 5.010787487030029, + "ce_orig": 0.7077054977416992, + "epoch": 0.43683945646703576, + "kl_loss": 0.1494310051202774, + "loss_ib": 0.0019953888840973377, + "step": 1519 + }, + { + "epoch": 0.4371270400460134, + "grad_norm": 0.08211319893598557, + "learning_rate": 4.828531998578885e-05, + "loss": 0.8708, + "step": 1520 + }, + { + "ce_ib": 6.620185852050781, + "ce_orig": 1.2019859552383423, + "epoch": 0.4371270400460134, + "kl_loss": 0.09790713340044022, + "loss_ib": 0.0016410899115726352, + "step": 1520 + }, + { + "ce_ib": 8.191238403320312, + "ce_orig": 1.472527265548706, + "epoch": 0.4371270400460134, + "kl_loss": 0.11276376247406006, + "loss_ib": 0.0019467613892629743, + "step": 1520 + }, + { + "ce_ib": 4.1960344314575195, + "ce_orig": 0.5593256950378418, + "epoch": 0.4371270400460134, + "kl_loss": 0.11115299165248871, + "loss_ib": 0.0015311333118006587, + "step": 1520 + }, + { + "ce_ib": 5.663719654083252, + "ce_orig": 0.8737356066703796, + "epoch": 0.4371270400460134, + "kl_loss": 0.10546106100082397, + "loss_ib": 0.0016209825407713652, + "step": 1520 + }, + { + "ce_ib": 4.696024417877197, + "ce_orig": 0.7118900418281555, + "epoch": 0.437414623624991, + "kl_loss": 0.0702265053987503, + "loss_ib": 0.00117186747957021, + "step": 1521 + }, + { + "ce_ib": 6.95864725112915, + "ce_orig": 1.5606194734573364, + "epoch": 0.437414623624991, + "kl_loss": 0.07407048344612122, + "loss_ib": 0.001436569495126605, + "step": 1521 + }, + { + "ce_ib": 9.09097671508789, + "ce_orig": 0.904660165309906, + "epoch": 0.437414623624991, + "kl_loss": 0.11046326160430908, + "loss_ib": 0.0020137301180511713, + "step": 1521 + }, + { + "ce_ib": 6.3199005126953125, + "ce_orig": 1.4025592803955078, + "epoch": 0.437414623624991, + "kl_loss": 0.08260238170623779, + "loss_ib": 0.0014580138958990574, + "step": 1521 + }, + { + "ce_ib": 8.26027774810791, + "ce_orig": 1.5451672077178955, + "epoch": 0.43770220720396863, + "kl_loss": 0.1031593605875969, + "loss_ib": 0.0018576213624328375, + "step": 1522 + }, + { + "ce_ib": 6.091907024383545, + "ce_orig": 0.811460018157959, + "epoch": 0.43770220720396863, + "kl_loss": 0.09542243927717209, + "loss_ib": 0.001563415047712624, + "step": 1522 + }, + { + "ce_ib": 4.2515950202941895, + "ce_orig": 0.7695032358169556, + "epoch": 0.43770220720396863, + "kl_loss": 0.11401493102312088, + "loss_ib": 0.0015653087757527828, + "step": 1522 + }, + { + "ce_ib": 3.9493980407714844, + "ce_orig": 0.4227944314479828, + "epoch": 0.43770220720396863, + "kl_loss": 0.11284856498241425, + "loss_ib": 0.0015234254533424973, + "step": 1522 + }, + { + "ce_ib": 7.753062725067139, + "ce_orig": 1.3116647005081177, + "epoch": 0.4379897907829463, + "kl_loss": 0.056807294487953186, + "loss_ib": 0.0013433791464194655, + "step": 1523 + }, + { + "ce_ib": 6.207674980163574, + "ce_orig": 1.0983881950378418, + "epoch": 0.4379897907829463, + "kl_loss": 0.10219351202249527, + "loss_ib": 0.0016427024966105819, + "step": 1523 + }, + { + "ce_ib": 9.094034194946289, + "ce_orig": 1.892133116722107, + "epoch": 0.4379897907829463, + "kl_loss": 0.38508155941963196, + "loss_ib": 0.004760218784213066, + "step": 1523 + }, + { + "ce_ib": 4.862968921661377, + "ce_orig": 0.8539610505104065, + "epoch": 0.4379897907829463, + "kl_loss": 0.09192570298910141, + "loss_ib": 0.0014055538922548294, + "step": 1523 + }, + { + "ce_ib": 6.706202983856201, + "ce_orig": 0.5762606263160706, + "epoch": 0.43827737436192393, + "kl_loss": 0.1387232095003128, + "loss_ib": 0.002057852456346154, + "step": 1524 + }, + { + "ce_ib": 3.491745948791504, + "ce_orig": 0.5919303894042969, + "epoch": 0.43827737436192393, + "kl_loss": 0.06342217326164246, + "loss_ib": 0.0009833963122218847, + "step": 1524 + }, + { + "ce_ib": 6.9889326095581055, + "ce_orig": 0.8394938707351685, + "epoch": 0.43827737436192393, + "kl_loss": 0.1032838299870491, + "loss_ib": 0.0017317315796390176, + "step": 1524 + }, + { + "ce_ib": 5.206730842590332, + "ce_orig": 1.1615697145462036, + "epoch": 0.43827737436192393, + "kl_loss": 0.08637557923793793, + "loss_ib": 0.0013844288187101483, + "step": 1524 + }, + { + "epoch": 0.43856495794090156, + "grad_norm": 0.08458583056926727, + "learning_rate": 4.827116856744056e-05, + "loss": 0.8713, + "step": 1525 + }, + { + "ce_ib": 5.4870171546936035, + "ce_orig": 1.080605149269104, + "epoch": 0.43856495794090156, + "kl_loss": 0.09647741913795471, + "loss_ib": 0.0015134759014472365, + "step": 1525 + }, + { + "ce_ib": 3.873538017272949, + "ce_orig": 0.5353042483329773, + "epoch": 0.43856495794090156, + "kl_loss": 0.0740727111697197, + "loss_ib": 0.001128080883063376, + "step": 1525 + }, + { + "ce_ib": 5.470432758331299, + "ce_orig": 0.9721705317497253, + "epoch": 0.43856495794090156, + "kl_loss": 0.10650671273469925, + "loss_ib": 0.0016121104126796126, + "step": 1525 + }, + { + "ce_ib": 6.264527320861816, + "ce_orig": 0.6104924082756042, + "epoch": 0.43856495794090156, + "kl_loss": 0.11099560558795929, + "loss_ib": 0.0017364086816087365, + "step": 1525 + }, + { + "ce_ib": 5.040921688079834, + "ce_orig": 0.6546604037284851, + "epoch": 0.43885254151987924, + "kl_loss": 0.07789278030395508, + "loss_ib": 0.0012830198975279927, + "step": 1526 + }, + { + "ce_ib": 6.6270670890808105, + "ce_orig": 1.3208789825439453, + "epoch": 0.43885254151987924, + "kl_loss": 0.10996094346046448, + "loss_ib": 0.00176231621298939, + "step": 1526 + }, + { + "ce_ib": 6.790947914123535, + "ce_orig": 1.2274993658065796, + "epoch": 0.43885254151987924, + "kl_loss": 0.09760048985481262, + "loss_ib": 0.001655099680647254, + "step": 1526 + }, + { + "ce_ib": 3.3892030715942383, + "ce_orig": 0.5881321430206299, + "epoch": 0.43885254151987924, + "kl_loss": 0.10825535655021667, + "loss_ib": 0.0014214739203453064, + "step": 1526 + }, + { + "ce_ib": 3.4919042587280273, + "ce_orig": 0.3422958552837372, + "epoch": 0.43914012509885686, + "kl_loss": 0.07836361229419708, + "loss_ib": 0.0011328264372423291, + "step": 1527 + }, + { + "ce_ib": 10.457748413085938, + "ce_orig": 1.8918616771697998, + "epoch": 0.43914012509885686, + "kl_loss": 0.1368015855550766, + "loss_ib": 0.002413790673017502, + "step": 1527 + }, + { + "ce_ib": 5.499739170074463, + "ce_orig": 0.7510017156600952, + "epoch": 0.43914012509885686, + "kl_loss": 0.11129018664360046, + "loss_ib": 0.001662875642068684, + "step": 1527 + }, + { + "ce_ib": 6.083384037017822, + "ce_orig": 1.1433619260787964, + "epoch": 0.43914012509885686, + "kl_loss": 0.11688944697380066, + "loss_ib": 0.0017772328574210405, + "step": 1527 + }, + { + "ce_ib": 6.293501377105713, + "ce_orig": 0.8512306809425354, + "epoch": 0.4394277086778345, + "kl_loss": 0.1311453878879547, + "loss_ib": 0.0019408039515838027, + "step": 1528 + }, + { + "ce_ib": 4.313621520996094, + "ce_orig": 0.40650105476379395, + "epoch": 0.4394277086778345, + "kl_loss": 0.09657540917396545, + "loss_ib": 0.0013971161097288132, + "step": 1528 + }, + { + "ce_ib": 6.521475315093994, + "ce_orig": 0.8618197441101074, + "epoch": 0.4394277086778345, + "kl_loss": 0.08713328093290329, + "loss_ib": 0.0015234804013743997, + "step": 1528 + }, + { + "ce_ib": 7.0165205001831055, + "ce_orig": 1.0275843143463135, + "epoch": 0.4394277086778345, + "kl_loss": 0.12216755747795105, + "loss_ib": 0.0019233275670558214, + "step": 1528 + }, + { + "ce_ib": 6.653157711029053, + "ce_orig": 1.3916330337524414, + "epoch": 0.43971529225681216, + "kl_loss": 0.12980209290981293, + "loss_ib": 0.001963336719200015, + "step": 1529 + }, + { + "ce_ib": 3.9800479412078857, + "ce_orig": 0.6354339122772217, + "epoch": 0.43971529225681216, + "kl_loss": 0.12970568239688873, + "loss_ib": 0.001695061568170786, + "step": 1529 + }, + { + "ce_ib": 5.695535182952881, + "ce_orig": 0.5129473805427551, + "epoch": 0.43971529225681216, + "kl_loss": 0.160762757062912, + "loss_ib": 0.0021771809551864862, + "step": 1529 + }, + { + "ce_ib": 4.398947238922119, + "ce_orig": 0.530144989490509, + "epoch": 0.43971529225681216, + "kl_loss": 0.08863487839698792, + "loss_ib": 0.0013262435095384717, + "step": 1529 + }, + { + "epoch": 0.4400028758357898, + "grad_norm": 0.08011375367641449, + "learning_rate": 4.8256961083445826e-05, + "loss": 0.8645, + "step": 1530 + }, + { + "ce_ib": 5.033506393432617, + "ce_orig": 0.5587021708488464, + "epoch": 0.4400028758357898, + "kl_loss": 0.11336100101470947, + "loss_ib": 0.0016369606601074338, + "step": 1530 + }, + { + "ce_ib": 6.276176452636719, + "ce_orig": 0.7141119837760925, + "epoch": 0.4400028758357898, + "kl_loss": 0.07518140971660614, + "loss_ib": 0.001379431807436049, + "step": 1530 + }, + { + "ce_ib": 3.5252747535705566, + "ce_orig": 0.5996282696723938, + "epoch": 0.4400028758357898, + "kl_loss": 0.08940495550632477, + "loss_ib": 0.0012465770123526454, + "step": 1530 + }, + { + "ce_ib": 4.548018932342529, + "ce_orig": 0.6446152925491333, + "epoch": 0.4400028758357898, + "kl_loss": 0.08440694212913513, + "loss_ib": 0.0012988713569939137, + "step": 1530 + }, + { + "ce_ib": 5.7587761878967285, + "ce_orig": 1.0859013795852661, + "epoch": 0.4402904594147674, + "kl_loss": 0.10964290052652359, + "loss_ib": 0.001672306563705206, + "step": 1531 + }, + { + "ce_ib": 6.364021301269531, + "ce_orig": 1.2268168926239014, + "epoch": 0.4402904594147674, + "kl_loss": 0.22489169239997864, + "loss_ib": 0.0028853188268840313, + "step": 1531 + }, + { + "ce_ib": 2.784122943878174, + "ce_orig": 0.42283838987350464, + "epoch": 0.4402904594147674, + "kl_loss": 0.10644792765378952, + "loss_ib": 0.0013428915990516543, + "step": 1531 + }, + { + "ce_ib": 3.548736572265625, + "ce_orig": 0.5701936483383179, + "epoch": 0.4402904594147674, + "kl_loss": 0.11180096864700317, + "loss_ib": 0.0014728833921253681, + "step": 1531 + }, + { + "ce_ib": 5.707919597625732, + "ce_orig": 1.0711405277252197, + "epoch": 0.44057804299374503, + "kl_loss": 0.06377618759870529, + "loss_ib": 0.0012085537891834974, + "step": 1532 + }, + { + "ce_ib": 5.556687355041504, + "ce_orig": 1.006879210472107, + "epoch": 0.44057804299374503, + "kl_loss": 0.11333635449409485, + "loss_ib": 0.0016890323022380471, + "step": 1532 + }, + { + "ce_ib": 5.794361591339111, + "ce_orig": 1.0382825136184692, + "epoch": 0.44057804299374503, + "kl_loss": 0.09803476929664612, + "loss_ib": 0.0015597838209941983, + "step": 1532 + }, + { + "ce_ib": 4.117677211761475, + "ce_orig": 0.42002275586128235, + "epoch": 0.44057804299374503, + "kl_loss": 0.12200498580932617, + "loss_ib": 0.0016318174311891198, + "step": 1532 + }, + { + "ce_ib": 4.202846527099609, + "ce_orig": 0.7035662531852722, + "epoch": 0.4408656265727227, + "kl_loss": 0.08787843585014343, + "loss_ib": 0.001299068913795054, + "step": 1533 + }, + { + "ce_ib": 4.010382175445557, + "ce_orig": 0.5868136286735535, + "epoch": 0.4408656265727227, + "kl_loss": 0.20491845905780792, + "loss_ib": 0.002450222847983241, + "step": 1533 + }, + { + "ce_ib": 4.14361047744751, + "ce_orig": 0.3988499939441681, + "epoch": 0.4408656265727227, + "kl_loss": 0.06145786866545677, + "loss_ib": 0.001028939732350409, + "step": 1533 + }, + { + "ce_ib": 6.855318069458008, + "ce_orig": 0.6845290660858154, + "epoch": 0.4408656265727227, + "kl_loss": 0.1065141037106514, + "loss_ib": 0.0017506727017462254, + "step": 1533 + }, + { + "ce_ib": 6.855345726013184, + "ce_orig": 1.4051142930984497, + "epoch": 0.44115321015170034, + "kl_loss": 0.1035260409116745, + "loss_ib": 0.0017207949422299862, + "step": 1534 + }, + { + "ce_ib": 3.4818737506866455, + "ce_orig": 0.6166447997093201, + "epoch": 0.44115321015170034, + "kl_loss": 0.06919325143098831, + "loss_ib": 0.0010401197941973805, + "step": 1534 + }, + { + "ce_ib": 5.097529888153076, + "ce_orig": 0.5195519924163818, + "epoch": 0.44115321015170034, + "kl_loss": 0.14445962011814117, + "loss_ib": 0.0019543489906936884, + "step": 1534 + }, + { + "ce_ib": 4.1158905029296875, + "ce_orig": 0.9049065709114075, + "epoch": 0.44115321015170034, + "kl_loss": 0.053823892027139664, + "loss_ib": 0.0009498279541730881, + "step": 1534 + }, + { + "epoch": 0.44144079373067796, + "grad_norm": 0.1036946177482605, + "learning_rate": 4.82426975680338e-05, + "loss": 0.8344, + "step": 1535 + }, + { + "ce_ib": 6.202638149261475, + "ce_orig": 1.119011640548706, + "epoch": 0.44144079373067796, + "kl_loss": 0.08892805129289627, + "loss_ib": 0.0015095442067831755, + "step": 1535 + }, + { + "ce_ib": 3.6364388465881348, + "ce_orig": 0.36542972922325134, + "epoch": 0.44144079373067796, + "kl_loss": 0.10305650532245636, + "loss_ib": 0.001394208986312151, + "step": 1535 + }, + { + "ce_ib": 4.990511894226074, + "ce_orig": 0.48364007472991943, + "epoch": 0.44144079373067796, + "kl_loss": 0.09347809851169586, + "loss_ib": 0.0014338322216644883, + "step": 1535 + }, + { + "ce_ib": 7.490294933319092, + "ce_orig": 1.297876000404358, + "epoch": 0.44144079373067796, + "kl_loss": 0.13190129399299622, + "loss_ib": 0.0020680425222963095, + "step": 1535 + }, + { + "ce_ib": 5.997652053833008, + "ce_orig": 0.8257885575294495, + "epoch": 0.44172837730965564, + "kl_loss": 0.11331549286842346, + "loss_ib": 0.0017329200636595488, + "step": 1536 + }, + { + "ce_ib": 3.240860939025879, + "ce_orig": 0.6098071336746216, + "epoch": 0.44172837730965564, + "kl_loss": 0.08399704098701477, + "loss_ib": 0.00116405647713691, + "step": 1536 + }, + { + "ce_ib": 4.847299098968506, + "ce_orig": 0.761056661605835, + "epoch": 0.44172837730965564, + "kl_loss": 0.0649537444114685, + "loss_ib": 0.001134267309680581, + "step": 1536 + }, + { + "ce_ib": 8.361990928649902, + "ce_orig": 1.4921995401382446, + "epoch": 0.44172837730965564, + "kl_loss": 0.1064828634262085, + "loss_ib": 0.001901027630083263, + "step": 1536 + }, + { + "ce_ib": 6.3969807624816895, + "ce_orig": 1.1810818910598755, + "epoch": 0.44201596088863326, + "kl_loss": 0.10175129771232605, + "loss_ib": 0.0016572109889239073, + "step": 1537 + }, + { + "ce_ib": 7.291069030761719, + "ce_orig": 1.1813929080963135, + "epoch": 0.44201596088863326, + "kl_loss": 0.10211050510406494, + "loss_ib": 0.0017502119299024343, + "step": 1537 + }, + { + "ce_ib": 5.969668388366699, + "ce_orig": 1.0291751623153687, + "epoch": 0.44201596088863326, + "kl_loss": 0.12677182257175446, + "loss_ib": 0.0018646850949153304, + "step": 1537 + }, + { + "ce_ib": 4.366037368774414, + "ce_orig": 0.5254493951797485, + "epoch": 0.44201596088863326, + "kl_loss": 0.0740274116396904, + "loss_ib": 0.0011768777621909976, + "step": 1537 + }, + { + "ce_ib": 9.962870597839355, + "ce_orig": 1.9265053272247314, + "epoch": 0.4423035444676109, + "kl_loss": 0.102281354367733, + "loss_ib": 0.002019100356847048, + "step": 1538 + }, + { + "ce_ib": 4.633265495300293, + "ce_orig": 0.6895825862884521, + "epoch": 0.4423035444676109, + "kl_loss": 0.062097519636154175, + "loss_ib": 0.0010843017371371388, + "step": 1538 + }, + { + "ce_ib": 4.876396656036377, + "ce_orig": 0.5468969941139221, + "epoch": 0.4423035444676109, + "kl_loss": 0.11198802292346954, + "loss_ib": 0.0016075198072940111, + "step": 1538 + }, + { + "ce_ib": 7.565672397613525, + "ce_orig": 0.9430594444274902, + "epoch": 0.4423035444676109, + "kl_loss": 0.14065617322921753, + "loss_ib": 0.0021631286945194006, + "step": 1538 + }, + { + "ce_ib": 3.9733831882476807, + "ce_orig": 0.8293970823287964, + "epoch": 0.44259112804658857, + "kl_loss": 0.09064613282680511, + "loss_ib": 0.0013037995668128133, + "step": 1539 + }, + { + "ce_ib": 8.867864608764648, + "ce_orig": 1.551695704460144, + "epoch": 0.44259112804658857, + "kl_loss": 0.08177647739648819, + "loss_ib": 0.0017045512795448303, + "step": 1539 + }, + { + "ce_ib": 4.037960052490234, + "ce_orig": 0.8340386152267456, + "epoch": 0.44259112804658857, + "kl_loss": 0.06397221237421036, + "loss_ib": 0.0010435180738568306, + "step": 1539 + }, + { + "ce_ib": 6.104748725891113, + "ce_orig": 0.9025256037712097, + "epoch": 0.44259112804658857, + "kl_loss": 0.06255464255809784, + "loss_ib": 0.0012360212858766317, + "step": 1539 + }, + { + "epoch": 0.4428787116255662, + "grad_norm": 0.0994582325220108, + "learning_rate": 4.822837805556858e-05, + "loss": 0.8381, + "step": 1540 + }, + { + "ce_ib": 5.178679466247559, + "ce_orig": 0.9453837275505066, + "epoch": 0.4428787116255662, + "kl_loss": 0.07855330407619476, + "loss_ib": 0.0013034009607508779, + "step": 1540 + }, + { + "ce_ib": 6.759856700897217, + "ce_orig": 1.2518740892410278, + "epoch": 0.4428787116255662, + "kl_loss": 0.06708598881959915, + "loss_ib": 0.0013468456454575062, + "step": 1540 + }, + { + "ce_ib": 5.5379180908203125, + "ce_orig": 0.8208099007606506, + "epoch": 0.4428787116255662, + "kl_loss": 0.08558105677366257, + "loss_ib": 0.001409602235071361, + "step": 1540 + }, + { + "ce_ib": 5.232536315917969, + "ce_orig": 0.6917774081230164, + "epoch": 0.4428787116255662, + "kl_loss": 0.15295881032943726, + "loss_ib": 0.0020528417080640793, + "step": 1540 + }, + { + "ce_ib": 7.505897521972656, + "ce_orig": 1.2370223999023438, + "epoch": 0.4431662952045438, + "kl_loss": 0.09297474473714828, + "loss_ib": 0.0016803371254354715, + "step": 1541 + }, + { + "ce_ib": 4.215508937835693, + "ce_orig": 0.3799034655094147, + "epoch": 0.4431662952045438, + "kl_loss": 0.10509985685348511, + "loss_ib": 0.0014725493965670466, + "step": 1541 + }, + { + "ce_ib": 6.20818567276001, + "ce_orig": 0.5150026679039001, + "epoch": 0.4431662952045438, + "kl_loss": 0.14453473687171936, + "loss_ib": 0.0020661659073084593, + "step": 1541 + }, + { + "ce_ib": 3.7776992321014404, + "ce_orig": 0.8367966413497925, + "epoch": 0.4431662952045438, + "kl_loss": 0.06234690546989441, + "loss_ib": 0.0010012389393523335, + "step": 1541 + }, + { + "ce_ib": 3.2646570205688477, + "ce_orig": 0.5974166393280029, + "epoch": 0.44345387878352144, + "kl_loss": 0.12072610855102539, + "loss_ib": 0.0015337266959249973, + "step": 1542 + }, + { + "ce_ib": 5.547255039215088, + "ce_orig": 1.019136905670166, + "epoch": 0.44345387878352144, + "kl_loss": 0.10777421295642853, + "loss_ib": 0.0016324676107615232, + "step": 1542 + }, + { + "ce_ib": 5.112756729125977, + "ce_orig": 0.7993668913841248, + "epoch": 0.44345387878352144, + "kl_loss": 0.1229899674654007, + "loss_ib": 0.0017411753069609404, + "step": 1542 + }, + { + "ce_ib": 7.813977241516113, + "ce_orig": 1.5835356712341309, + "epoch": 0.44345387878352144, + "kl_loss": 0.1065678596496582, + "loss_ib": 0.00184707622975111, + "step": 1542 + }, + { + "ce_ib": 6.219207763671875, + "ce_orig": 0.6118988394737244, + "epoch": 0.4437414623624991, + "kl_loss": 0.12151956558227539, + "loss_ib": 0.0018371164333075285, + "step": 1543 + }, + { + "ce_ib": 3.2174196243286133, + "ce_orig": 0.7604685425758362, + "epoch": 0.4437414623624991, + "kl_loss": 0.06420627981424332, + "loss_ib": 0.000963804719503969, + "step": 1543 + }, + { + "ce_ib": 3.882002353668213, + "ce_orig": 0.7521369457244873, + "epoch": 0.4437414623624991, + "kl_loss": 0.07849448919296265, + "loss_ib": 0.0011731450213119388, + "step": 1543 + }, + { + "ce_ib": 2.82770037651062, + "ce_orig": 0.38923606276512146, + "epoch": 0.4437414623624991, + "kl_loss": 0.11281279474496841, + "loss_ib": 0.0014108979376032948, + "step": 1543 + }, + { + "ce_ib": 5.287046432495117, + "ce_orig": 0.7627535462379456, + "epoch": 0.44402904594147674, + "kl_loss": 0.10497435927391052, + "loss_ib": 0.0015784482238814235, + "step": 1544 + }, + { + "ce_ib": 4.741152286529541, + "ce_orig": 0.7019892930984497, + "epoch": 0.44402904594147674, + "kl_loss": 0.0974903553724289, + "loss_ib": 0.0014490187168121338, + "step": 1544 + }, + { + "ce_ib": 6.222668170928955, + "ce_orig": 1.2530567646026611, + "epoch": 0.44402904594147674, + "kl_loss": 0.051129672676324844, + "loss_ib": 0.0011335634626448154, + "step": 1544 + }, + { + "ce_ib": 6.132937908172607, + "ce_orig": 1.0809364318847656, + "epoch": 0.44402904594147674, + "kl_loss": 0.15840841829776764, + "loss_ib": 0.002197377849370241, + "step": 1544 + }, + { + "epoch": 0.44431662952045436, + "grad_norm": 0.09242033213376999, + "learning_rate": 4.821400258054921e-05, + "loss": 0.8975, + "step": 1545 + }, + { + "ce_ib": 6.32316255569458, + "ce_orig": 1.173945665359497, + "epoch": 0.44431662952045436, + "kl_loss": 0.09522680938243866, + "loss_ib": 0.0015845843590795994, + "step": 1545 + }, + { + "ce_ib": 7.601150989532471, + "ce_orig": 1.0448795557022095, + "epoch": 0.44431662952045436, + "kl_loss": 0.10376627743244171, + "loss_ib": 0.001797777833417058, + "step": 1545 + }, + { + "ce_ib": 7.1550374031066895, + "ce_orig": 0.8920188546180725, + "epoch": 0.44431662952045436, + "kl_loss": 0.08144126832485199, + "loss_ib": 0.00152991630602628, + "step": 1545 + }, + { + "ce_ib": 3.975231170654297, + "ce_orig": 0.8033395409584045, + "epoch": 0.44431662952045436, + "kl_loss": 0.08068946748971939, + "loss_ib": 0.00120441778562963, + "step": 1545 + }, + { + "ce_ib": 7.915865898132324, + "ce_orig": 1.3254978656768799, + "epoch": 0.44460421309943204, + "kl_loss": 0.3935348093509674, + "loss_ib": 0.004726934712380171, + "step": 1546 + }, + { + "ce_ib": 6.2658610343933105, + "ce_orig": 1.0133064985275269, + "epoch": 0.44460421309943204, + "kl_loss": 0.09828180074691772, + "loss_ib": 0.0016094041056931019, + "step": 1546 + }, + { + "ce_ib": 6.209357261657715, + "ce_orig": 0.7786794900894165, + "epoch": 0.44460421309943204, + "kl_loss": 0.13253304362297058, + "loss_ib": 0.0019462661584839225, + "step": 1546 + }, + { + "ce_ib": 8.257509231567383, + "ce_orig": 1.4918216466903687, + "epoch": 0.44460421309943204, + "kl_loss": 0.11453811824321747, + "loss_ib": 0.0019711321219801903, + "step": 1546 + }, + { + "ce_ib": 4.934767723083496, + "ce_orig": 0.6198181509971619, + "epoch": 0.44489179667840967, + "kl_loss": 0.11633811891078949, + "loss_ib": 0.001656858017668128, + "step": 1547 + }, + { + "ce_ib": 3.6765692234039307, + "ce_orig": 0.5575041174888611, + "epoch": 0.44489179667840967, + "kl_loss": 0.06821808218955994, + "loss_ib": 0.0010498377960175276, + "step": 1547 + }, + { + "ce_ib": 7.560027122497559, + "ce_orig": 0.902812123298645, + "epoch": 0.44489179667840967, + "kl_loss": 0.082050621509552, + "loss_ib": 0.0015765088610351086, + "step": 1547 + }, + { + "ce_ib": 3.544227123260498, + "ce_orig": 0.4315322935581207, + "epoch": 0.44489179667840967, + "kl_loss": 0.06681840121746063, + "loss_ib": 0.0010226067388430238, + "step": 1547 + }, + { + "ce_ib": 4.447340488433838, + "ce_orig": 0.6964235305786133, + "epoch": 0.4451793802573873, + "kl_loss": 0.04815136641263962, + "loss_ib": 0.000926247681491077, + "step": 1548 + }, + { + "ce_ib": 4.657052040100098, + "ce_orig": 0.8825444579124451, + "epoch": 0.4451793802573873, + "kl_loss": 0.08796612173318863, + "loss_ib": 0.0013453663559630513, + "step": 1548 + }, + { + "ce_ib": 4.27765417098999, + "ce_orig": 0.771565854549408, + "epoch": 0.4451793802573873, + "kl_loss": 0.18008503317832947, + "loss_ib": 0.0022286155726760626, + "step": 1548 + }, + { + "ce_ib": 6.212048530578613, + "ce_orig": 1.1214803457260132, + "epoch": 0.4451793802573873, + "kl_loss": 0.10812464356422424, + "loss_ib": 0.0017024512635543942, + "step": 1548 + }, + { + "ce_ib": 6.587137699127197, + "ce_orig": 1.284794569015503, + "epoch": 0.44546696383636497, + "kl_loss": 0.07793333381414413, + "loss_ib": 0.0014380469219759107, + "step": 1549 + }, + { + "ce_ib": 8.378759384155273, + "ce_orig": 1.5839415788650513, + "epoch": 0.44546696383636497, + "kl_loss": 0.09636001288890839, + "loss_ib": 0.0018014759989455342, + "step": 1549 + }, + { + "ce_ib": 4.744248867034912, + "ce_orig": 0.11829902976751328, + "epoch": 0.44546696383636497, + "kl_loss": 0.29739588499069214, + "loss_ib": 0.0034483836498111486, + "step": 1549 + }, + { + "ce_ib": 7.5928754806518555, + "ce_orig": 1.5439260005950928, + "epoch": 0.44546696383636497, + "kl_loss": 0.08357639610767365, + "loss_ib": 0.0015950514934957027, + "step": 1549 + }, + { + "epoch": 0.4457545474153426, + "grad_norm": 0.11329171806573868, + "learning_rate": 4.819957117760953e-05, + "loss": 0.8462, + "step": 1550 + }, + { + "ce_ib": 5.801855087280273, + "ce_orig": 0.9170008301734924, + "epoch": 0.4457545474153426, + "kl_loss": 0.12739719450473785, + "loss_ib": 0.001854157424531877, + "step": 1550 + }, + { + "ce_ib": 4.269982814788818, + "ce_orig": 0.9335940480232239, + "epoch": 0.4457545474153426, + "kl_loss": 0.06512415409088135, + "loss_ib": 0.0010782397584989667, + "step": 1550 + }, + { + "ce_ib": 6.767131805419922, + "ce_orig": 1.002065896987915, + "epoch": 0.4457545474153426, + "kl_loss": 0.11951427161693573, + "loss_ib": 0.0018718558130785823, + "step": 1550 + }, + { + "ce_ib": 5.043998718261719, + "ce_orig": 0.8854005336761475, + "epoch": 0.4457545474153426, + "kl_loss": 0.1073165088891983, + "loss_ib": 0.0015775648644194007, + "step": 1550 + }, + { + "ce_ib": 7.135330677032471, + "ce_orig": 0.9555785655975342, + "epoch": 0.4460421309943202, + "kl_loss": 0.09675995260477066, + "loss_ib": 0.001681132591329515, + "step": 1551 + }, + { + "ce_ib": 5.3807525634765625, + "ce_orig": 0.8842431306838989, + "epoch": 0.4460421309943202, + "kl_loss": 0.11725437641143799, + "loss_ib": 0.001710618962533772, + "step": 1551 + }, + { + "ce_ib": 2.919546604156494, + "ce_orig": 0.5345602035522461, + "epoch": 0.4460421309943202, + "kl_loss": 0.08618354052305222, + "loss_ib": 0.0011537900427356362, + "step": 1551 + }, + { + "ce_ib": 3.4635467529296875, + "ce_orig": 0.6858002543449402, + "epoch": 0.4460421309943202, + "kl_loss": 0.07495497167110443, + "loss_ib": 0.0010959043866023421, + "step": 1551 + }, + { + "ce_ib": 5.275489807128906, + "ce_orig": 0.8966182470321655, + "epoch": 0.44632971457329784, + "kl_loss": 0.1296694129705429, + "loss_ib": 0.0018242429941892624, + "step": 1552 + }, + { + "ce_ib": 5.376269340515137, + "ce_orig": 0.9381533861160278, + "epoch": 0.44632971457329784, + "kl_loss": 0.07124143093824387, + "loss_ib": 0.0012500412994995713, + "step": 1552 + }, + { + "ce_ib": 3.747227907180786, + "ce_orig": 0.5894528031349182, + "epoch": 0.44632971457329784, + "kl_loss": 0.09324764460325241, + "loss_ib": 0.0013071992434561253, + "step": 1552 + }, + { + "ce_ib": 2.5852346420288086, + "ce_orig": 0.2793366312980652, + "epoch": 0.44632971457329784, + "kl_loss": 0.19539450109004974, + "loss_ib": 0.0022124683018773794, + "step": 1552 + }, + { + "ce_ib": 6.028305530548096, + "ce_orig": 1.2803107500076294, + "epoch": 0.4466172981522755, + "kl_loss": 0.11762706935405731, + "loss_ib": 0.001779101206921041, + "step": 1553 + }, + { + "ce_ib": 4.946916580200195, + "ce_orig": 0.7416130900382996, + "epoch": 0.4466172981522755, + "kl_loss": 0.09559804201126099, + "loss_ib": 0.0014506721636280417, + "step": 1553 + }, + { + "ce_ib": 7.909079074859619, + "ce_orig": 1.4085326194763184, + "epoch": 0.4466172981522755, + "kl_loss": 0.10646244883537292, + "loss_ib": 0.0018555322894826531, + "step": 1553 + }, + { + "ce_ib": 6.097696781158447, + "ce_orig": 0.9417270421981812, + "epoch": 0.4466172981522755, + "kl_loss": 0.12232429534196854, + "loss_ib": 0.0018330126767978072, + "step": 1553 + }, + { + "ce_ib": 9.341137886047363, + "ce_orig": 1.5854380130767822, + "epoch": 0.44690488173125315, + "kl_loss": 0.08973613381385803, + "loss_ib": 0.0018314751796424389, + "step": 1554 + }, + { + "ce_ib": 3.5456573963165283, + "ce_orig": 0.7813953161239624, + "epoch": 0.44690488173125315, + "kl_loss": 0.04397790506482124, + "loss_ib": 0.0007943447562865913, + "step": 1554 + }, + { + "ce_ib": 5.340150833129883, + "ce_orig": 0.6634575128555298, + "epoch": 0.44690488173125315, + "kl_loss": 0.10952778160572052, + "loss_ib": 0.0016292929649353027, + "step": 1554 + }, + { + "ce_ib": 3.3285343647003174, + "ce_orig": 0.4597083032131195, + "epoch": 0.44690488173125315, + "kl_loss": 0.07594092190265656, + "loss_ib": 0.0010922625660896301, + "step": 1554 + }, + { + "epoch": 0.44719246531023077, + "grad_norm": 0.09650268405675888, + "learning_rate": 4.818508388151815e-05, + "loss": 0.9027, + "step": 1555 + }, + { + "ce_ib": 3.450906753540039, + "ce_orig": 0.6248173117637634, + "epoch": 0.44719246531023077, + "kl_loss": 0.06737876683473587, + "loss_ib": 0.0010188783053308725, + "step": 1555 + }, + { + "ce_ib": 3.756511688232422, + "ce_orig": 0.7110925912857056, + "epoch": 0.44719246531023077, + "kl_loss": 0.11043556779623032, + "loss_ib": 0.0014800068456679583, + "step": 1555 + }, + { + "ce_ib": 6.647064685821533, + "ce_orig": 0.37961336970329285, + "epoch": 0.44719246531023077, + "kl_loss": 0.36079105734825134, + "loss_ib": 0.004272616934031248, + "step": 1555 + }, + { + "ce_ib": 5.7317609786987305, + "ce_orig": 0.9695166945457458, + "epoch": 0.44719246531023077, + "kl_loss": 0.0494835190474987, + "loss_ib": 0.0010680111590772867, + "step": 1555 + }, + { + "ce_ib": 5.119594097137451, + "ce_orig": 0.689030110836029, + "epoch": 0.44748004888920845, + "kl_loss": 0.14553231000900269, + "loss_ib": 0.001967282500118017, + "step": 1556 + }, + { + "ce_ib": 7.512217044830322, + "ce_orig": 1.2541621923446655, + "epoch": 0.44748004888920845, + "kl_loss": 0.0803542286157608, + "loss_ib": 0.001554763875901699, + "step": 1556 + }, + { + "ce_ib": 4.45106315612793, + "ce_orig": 0.573043167591095, + "epoch": 0.44748004888920845, + "kl_loss": 0.13128761947155, + "loss_ib": 0.0017579825362190604, + "step": 1556 + }, + { + "ce_ib": 4.492537021636963, + "ce_orig": 0.9524523019790649, + "epoch": 0.44748004888920845, + "kl_loss": 0.09307602792978287, + "loss_ib": 0.0013800138840451837, + "step": 1556 + }, + { + "ce_ib": 6.213217258453369, + "ce_orig": 0.8280027508735657, + "epoch": 0.44776763246818607, + "kl_loss": 0.17396774888038635, + "loss_ib": 0.0023609991185367107, + "step": 1557 + }, + { + "ce_ib": 5.024810791015625, + "ce_orig": 0.6813585758209229, + "epoch": 0.44776763246818607, + "kl_loss": 0.09068770706653595, + "loss_ib": 0.0014093579957261682, + "step": 1557 + }, + { + "ce_ib": 3.584723711013794, + "ce_orig": 0.5439204573631287, + "epoch": 0.44776763246818607, + "kl_loss": 0.11746008694171906, + "loss_ib": 0.001533073140308261, + "step": 1557 + }, + { + "ce_ib": 3.1678943634033203, + "ce_orig": 0.5593134164810181, + "epoch": 0.44776763246818607, + "kl_loss": 0.047958821058273315, + "loss_ib": 0.0007963776588439941, + "step": 1557 + }, + { + "ce_ib": 5.524857044219971, + "ce_orig": 1.0411049127578735, + "epoch": 0.4480552160471637, + "kl_loss": 0.10942517220973969, + "loss_ib": 0.0016467374516651034, + "step": 1558 + }, + { + "ce_ib": 7.360988616943359, + "ce_orig": 1.3304498195648193, + "epoch": 0.4480552160471637, + "kl_loss": 0.15740439295768738, + "loss_ib": 0.0023101428523659706, + "step": 1558 + }, + { + "ce_ib": 3.525597095489502, + "ce_orig": 0.37257295846939087, + "epoch": 0.4480552160471637, + "kl_loss": 0.2267375886440277, + "loss_ib": 0.002619935432448983, + "step": 1558 + }, + { + "ce_ib": 7.34909200668335, + "ce_orig": 1.4395421743392944, + "epoch": 0.4480552160471637, + "kl_loss": 0.08959254622459412, + "loss_ib": 0.0016308346530422568, + "step": 1558 + }, + { + "ce_ib": 4.741412162780762, + "ce_orig": 0.7893756031990051, + "epoch": 0.4483427996261414, + "kl_loss": 0.07341791689395905, + "loss_ib": 0.0012083203764632344, + "step": 1559 + }, + { + "ce_ib": 5.838155746459961, + "ce_orig": 0.8812609910964966, + "epoch": 0.4483427996261414, + "kl_loss": 0.09105563163757324, + "loss_ib": 0.0014943719143047929, + "step": 1559 + }, + { + "ce_ib": 6.531771659851074, + "ce_orig": 0.6093499064445496, + "epoch": 0.4483427996261414, + "kl_loss": 0.12991894781589508, + "loss_ib": 0.0019523664377629757, + "step": 1559 + }, + { + "ce_ib": 4.4940972328186035, + "ce_orig": 0.6442502737045288, + "epoch": 0.4483427996261414, + "kl_loss": 0.16736455261707306, + "loss_ib": 0.002123055048286915, + "step": 1559 + }, + { + "epoch": 0.448630383205119, + "grad_norm": 0.09075544774532318, + "learning_rate": 4.8170540727178326e-05, + "loss": 0.8255, + "step": 1560 + }, + { + "ce_ib": 6.793491840362549, + "ce_orig": 0.9849036931991577, + "epoch": 0.448630383205119, + "kl_loss": 0.09128949046134949, + "loss_ib": 0.0015922440215945244, + "step": 1560 + }, + { + "ce_ib": 6.840089797973633, + "ce_orig": 1.0358442068099976, + "epoch": 0.448630383205119, + "kl_loss": 0.07448925822973251, + "loss_ib": 0.0014289015671238303, + "step": 1560 + }, + { + "ce_ib": 5.9160895347595215, + "ce_orig": 1.152105450630188, + "epoch": 0.448630383205119, + "kl_loss": 0.1506045162677765, + "loss_ib": 0.0020976539235562086, + "step": 1560 + }, + { + "ce_ib": 6.09316349029541, + "ce_orig": 1.0662212371826172, + "epoch": 0.448630383205119, + "kl_loss": 0.08617434650659561, + "loss_ib": 0.0014710597461089492, + "step": 1560 + }, + { + "ce_ib": 5.327860355377197, + "ce_orig": 0.9268292784690857, + "epoch": 0.4489179667840966, + "kl_loss": 0.12022953480482101, + "loss_ib": 0.0017350813141092658, + "step": 1561 + }, + { + "ce_ib": 6.889469623565674, + "ce_orig": 1.4711467027664185, + "epoch": 0.4489179667840966, + "kl_loss": 0.07958342134952545, + "loss_ib": 0.0014847811544314027, + "step": 1561 + }, + { + "ce_ib": 4.875145435333252, + "ce_orig": 0.6364299058914185, + "epoch": 0.4489179667840966, + "kl_loss": 0.1256185919046402, + "loss_ib": 0.0017437004717066884, + "step": 1561 + }, + { + "ce_ib": 3.4591472148895264, + "ce_orig": 0.5252366662025452, + "epoch": 0.4489179667840966, + "kl_loss": 0.0922766625881195, + "loss_ib": 0.0012686812551692128, + "step": 1561 + }, + { + "ce_ib": 3.829379081726074, + "ce_orig": 0.5891574621200562, + "epoch": 0.44920555036307425, + "kl_loss": 0.08213187754154205, + "loss_ib": 0.0012042566668242216, + "step": 1562 + }, + { + "ce_ib": 7.296290874481201, + "ce_orig": 0.5090686082839966, + "epoch": 0.44920555036307425, + "kl_loss": 0.08414573967456818, + "loss_ib": 0.0015710864681750536, + "step": 1562 + }, + { + "ce_ib": 6.499896049499512, + "ce_orig": 0.7776138186454773, + "epoch": 0.44920555036307425, + "kl_loss": 0.09851748496294022, + "loss_ib": 0.001635164488106966, + "step": 1562 + }, + { + "ce_ib": 5.755539417266846, + "ce_orig": 1.098066806793213, + "epoch": 0.44920555036307425, + "kl_loss": 0.10267291963100433, + "loss_ib": 0.00160228309687227, + "step": 1562 + }, + { + "ce_ib": 7.139002323150635, + "ce_orig": 1.0056488513946533, + "epoch": 0.4494931339420519, + "kl_loss": 0.09915171563625336, + "loss_ib": 0.0017054172931239009, + "step": 1563 + }, + { + "ce_ib": 6.788796901702881, + "ce_orig": 1.124604344367981, + "epoch": 0.4494931339420519, + "kl_loss": 0.09610615670681, + "loss_ib": 0.0016399412415921688, + "step": 1563 + }, + { + "ce_ib": 6.790539741516113, + "ce_orig": 1.0976682901382446, + "epoch": 0.4494931339420519, + "kl_loss": 0.09875194728374481, + "loss_ib": 0.001666573341935873, + "step": 1563 + }, + { + "ce_ib": 7.323736190795898, + "ce_orig": 1.4758509397506714, + "epoch": 0.4494931339420519, + "kl_loss": 0.11641738563776016, + "loss_ib": 0.0018965473864227533, + "step": 1563 + }, + { + "ce_ib": 4.342124938964844, + "ce_orig": 0.6897947192192078, + "epoch": 0.44978071752102955, + "kl_loss": 0.05266604200005531, + "loss_ib": 0.0009608729160390794, + "step": 1564 + }, + { + "ce_ib": 6.504452228546143, + "ce_orig": 1.0442248582839966, + "epoch": 0.44978071752102955, + "kl_loss": 0.11225426197052002, + "loss_ib": 0.0017729877727106214, + "step": 1564 + }, + { + "ce_ib": 6.941709041595459, + "ce_orig": 0.4760146141052246, + "epoch": 0.44978071752102955, + "kl_loss": 0.09063249826431274, + "loss_ib": 0.001600495888851583, + "step": 1564 + }, + { + "ce_ib": 4.408914089202881, + "ce_orig": 0.36864158511161804, + "epoch": 0.44978071752102955, + "kl_loss": 0.10829253494739532, + "loss_ib": 0.0015238167252391577, + "step": 1564 + }, + { + "epoch": 0.4500683011000072, + "grad_norm": 0.09151628613471985, + "learning_rate": 4.8155941749627895e-05, + "loss": 0.8711, + "step": 1565 + }, + { + "ce_ib": 7.359333038330078, + "ce_orig": 1.3286747932434082, + "epoch": 0.4500683011000072, + "kl_loss": 0.11625319719314575, + "loss_ib": 0.0018984650960192084, + "step": 1565 + }, + { + "ce_ib": 6.011932373046875, + "ce_orig": 0.8918446898460388, + "epoch": 0.4500683011000072, + "kl_loss": 0.0708668902516365, + "loss_ib": 0.0013098621275275946, + "step": 1565 + }, + { + "ce_ib": 6.790759086608887, + "ce_orig": 0.9543012976646423, + "epoch": 0.4500683011000072, + "kl_loss": 0.09177093952894211, + "loss_ib": 0.0015967851504683495, + "step": 1565 + }, + { + "ce_ib": 4.474704265594482, + "ce_orig": 0.6601502299308777, + "epoch": 0.4500683011000072, + "kl_loss": 0.11329531669616699, + "loss_ib": 0.001580423559062183, + "step": 1565 + }, + { + "ce_ib": 3.9997353553771973, + "ce_orig": 0.6208578944206238, + "epoch": 0.45035588467898485, + "kl_loss": 0.06169162690639496, + "loss_ib": 0.0010168898152187467, + "step": 1566 + }, + { + "ce_ib": 3.712808609008789, + "ce_orig": 0.6365771889686584, + "epoch": 0.45035588467898485, + "kl_loss": 0.060917917639017105, + "loss_ib": 0.000980459968559444, + "step": 1566 + }, + { + "ce_ib": 6.2431111335754395, + "ce_orig": 1.1769055128097534, + "epoch": 0.45035588467898485, + "kl_loss": 0.1119333803653717, + "loss_ib": 0.001743644941598177, + "step": 1566 + }, + { + "ce_ib": 4.490558624267578, + "ce_orig": 0.5369220972061157, + "epoch": 0.45035588467898485, + "kl_loss": 0.16194584965705872, + "loss_ib": 0.002068514237180352, + "step": 1566 + }, + { + "ce_ib": 6.51278829574585, + "ce_orig": 1.0006382465362549, + "epoch": 0.4506434682579625, + "kl_loss": 0.1048964262008667, + "loss_ib": 0.0017002429813146591, + "step": 1567 + }, + { + "ce_ib": 4.534228324890137, + "ce_orig": 0.5963374376296997, + "epoch": 0.4506434682579625, + "kl_loss": 0.056120846420526505, + "loss_ib": 0.0010146312415599823, + "step": 1567 + }, + { + "ce_ib": 9.053264617919922, + "ce_orig": 1.6949427127838135, + "epoch": 0.4506434682579625, + "kl_loss": 0.10480242222547531, + "loss_ib": 0.0019533506128937006, + "step": 1567 + }, + { + "ce_ib": 5.364234447479248, + "ce_orig": 0.6692973375320435, + "epoch": 0.4506434682579625, + "kl_loss": 0.0869922786951065, + "loss_ib": 0.0014063462149351835, + "step": 1567 + }, + { + "ce_ib": 4.34559440612793, + "ce_orig": 0.9098697304725647, + "epoch": 0.4509310518369401, + "kl_loss": 0.12230445444583893, + "loss_ib": 0.0016576038906350732, + "step": 1568 + }, + { + "ce_ib": 6.514309883117676, + "ce_orig": 0.8986880779266357, + "epoch": 0.4509310518369401, + "kl_loss": 0.08929626643657684, + "loss_ib": 0.0015443935990333557, + "step": 1568 + }, + { + "ce_ib": 5.638384819030762, + "ce_orig": 0.745276153087616, + "epoch": 0.4509310518369401, + "kl_loss": 0.18910188972949982, + "loss_ib": 0.0024548573419451714, + "step": 1568 + }, + { + "ce_ib": 8.105575561523438, + "ce_orig": 0.8824712038040161, + "epoch": 0.4509310518369401, + "kl_loss": 0.10085771977901459, + "loss_ib": 0.0018191345734521747, + "step": 1568 + }, + { + "ce_ib": 8.253012657165527, + "ce_orig": 1.7500132322311401, + "epoch": 0.4512186354159177, + "kl_loss": 0.10303084552288055, + "loss_ib": 0.001855609705671668, + "step": 1569 + }, + { + "ce_ib": 4.842573165893555, + "ce_orig": 0.8658014535903931, + "epoch": 0.4512186354159177, + "kl_loss": 0.09123002737760544, + "loss_ib": 0.0013965575490146875, + "step": 1569 + }, + { + "ce_ib": 3.9768102169036865, + "ce_orig": 0.45405060052871704, + "epoch": 0.4512186354159177, + "kl_loss": 0.16116942465305328, + "loss_ib": 0.0020093752536922693, + "step": 1569 + }, + { + "ce_ib": 5.280497074127197, + "ce_orig": 0.9380026459693909, + "epoch": 0.4512186354159177, + "kl_loss": 0.11388112604618073, + "loss_ib": 0.0016668608877807856, + "step": 1569 + }, + { + "epoch": 0.4515062189948954, + "grad_norm": 0.1013677716255188, + "learning_rate": 4.814128698403918e-05, + "loss": 0.8998, + "step": 1570 + }, + { + "ce_ib": 6.224296569824219, + "ce_orig": 0.9939948916435242, + "epoch": 0.4515062189948954, + "kl_loss": 0.16565169394016266, + "loss_ib": 0.0022789465729147196, + "step": 1570 + }, + { + "ce_ib": 5.0194926261901855, + "ce_orig": 0.8029994368553162, + "epoch": 0.4515062189948954, + "kl_loss": 0.1701691448688507, + "loss_ib": 0.002203640528023243, + "step": 1570 + }, + { + "ce_ib": 6.144610404968262, + "ce_orig": 0.9827307462692261, + "epoch": 0.4515062189948954, + "kl_loss": 0.07304276525974274, + "loss_ib": 0.0013448885874822736, + "step": 1570 + }, + { + "ce_ib": 6.49107551574707, + "ce_orig": 1.031981110572815, + "epoch": 0.4515062189948954, + "kl_loss": 0.08809483796358109, + "loss_ib": 0.0015300560044124722, + "step": 1570 + }, + { + "ce_ib": 4.440684795379639, + "ce_orig": 0.5551092028617859, + "epoch": 0.451793802573873, + "kl_loss": 0.15694493055343628, + "loss_ib": 0.002013517776504159, + "step": 1571 + }, + { + "ce_ib": 6.9785895347595215, + "ce_orig": 0.8601738810539246, + "epoch": 0.451793802573873, + "kl_loss": 0.09427627176046371, + "loss_ib": 0.0016406216891482472, + "step": 1571 + }, + { + "ce_ib": 4.985313892364502, + "ce_orig": 0.8955661654472351, + "epoch": 0.451793802573873, + "kl_loss": 0.06613049656152725, + "loss_ib": 0.0011598363053053617, + "step": 1571 + }, + { + "ce_ib": 4.1067795753479, + "ce_orig": 0.4033355116844177, + "epoch": 0.451793802573873, + "kl_loss": 0.1197819709777832, + "loss_ib": 0.0016084975795820355, + "step": 1571 + }, + { + "ce_ib": 3.784276008605957, + "ce_orig": 0.3235808312892914, + "epoch": 0.45208138615285065, + "kl_loss": 0.061977874487638474, + "loss_ib": 0.000998206320218742, + "step": 1572 + }, + { + "ce_ib": 5.586164474487305, + "ce_orig": 0.9607438445091248, + "epoch": 0.45208138615285065, + "kl_loss": 0.11022624373435974, + "loss_ib": 0.0016608788864687085, + "step": 1572 + }, + { + "ce_ib": 4.443259239196777, + "ce_orig": 0.759601354598999, + "epoch": 0.45208138615285065, + "kl_loss": 0.050874728709459305, + "loss_ib": 0.0009530732058919966, + "step": 1572 + }, + { + "ce_ib": 6.079825401306152, + "ce_orig": 1.0342555046081543, + "epoch": 0.45208138615285065, + "kl_loss": 0.07085070759057999, + "loss_ib": 0.0013164895353838801, + "step": 1572 + }, + { + "ce_ib": 3.252683401107788, + "ce_orig": 0.5536786317825317, + "epoch": 0.45236896973182833, + "kl_loss": 0.11971843987703323, + "loss_ib": 0.0015224526869133115, + "step": 1573 + }, + { + "ce_ib": 6.713737487792969, + "ce_orig": 1.3118517398834229, + "epoch": 0.45236896973182833, + "kl_loss": 0.12153877317905426, + "loss_ib": 0.0018867613980546594, + "step": 1573 + }, + { + "ce_ib": 8.532139778137207, + "ce_orig": 1.2532261610031128, + "epoch": 0.45236896973182833, + "kl_loss": 0.35834378004074097, + "loss_ib": 0.004436651710420847, + "step": 1573 + }, + { + "ce_ib": 5.0024919509887695, + "ce_orig": 0.6499499678611755, + "epoch": 0.45236896973182833, + "kl_loss": 0.12933135032653809, + "loss_ib": 0.0017935627838596702, + "step": 1573 + }, + { + "ce_ib": 4.394222259521484, + "ce_orig": 0.5625379085540771, + "epoch": 0.45265655331080595, + "kl_loss": 0.165242999792099, + "loss_ib": 0.002091852016746998, + "step": 1574 + }, + { + "ce_ib": 4.604562759399414, + "ce_orig": 0.7034170031547546, + "epoch": 0.45265655331080595, + "kl_loss": 0.10290051251649857, + "loss_ib": 0.0014894612831994891, + "step": 1574 + }, + { + "ce_ib": 6.677795886993408, + "ce_orig": 0.6365276575088501, + "epoch": 0.45265655331080595, + "kl_loss": 0.13321809470653534, + "loss_ib": 0.0019999605137854815, + "step": 1574 + }, + { + "ce_ib": 6.29049825668335, + "ce_orig": 1.2175753116607666, + "epoch": 0.45265655331080595, + "kl_loss": 0.13825319707393646, + "loss_ib": 0.002011581789702177, + "step": 1574 + }, + { + "epoch": 0.4529441368897836, + "grad_norm": 0.08724623918533325, + "learning_rate": 4.812657646571891e-05, + "loss": 0.8844, + "step": 1575 + }, + { + "ce_ib": 3.8909454345703125, + "ce_orig": 0.4451230764389038, + "epoch": 0.4529441368897836, + "kl_loss": 0.12437300384044647, + "loss_ib": 0.0016328245401382446, + "step": 1575 + }, + { + "ce_ib": 3.7206075191497803, + "ce_orig": 0.8448427319526672, + "epoch": 0.4529441368897836, + "kl_loss": 0.07080793380737305, + "loss_ib": 0.0010801401222124696, + "step": 1575 + }, + { + "ce_ib": 7.175804615020752, + "ce_orig": 1.2047712802886963, + "epoch": 0.4529441368897836, + "kl_loss": 0.07131451368331909, + "loss_ib": 0.0014307255623862147, + "step": 1575 + }, + { + "ce_ib": 6.231307029724121, + "ce_orig": 0.7221335172653198, + "epoch": 0.4529441368897836, + "kl_loss": 0.10847526788711548, + "loss_ib": 0.0017078833188861609, + "step": 1575 + }, + { + "ce_ib": 5.236674785614014, + "ce_orig": 0.9069425463676453, + "epoch": 0.45323172046876126, + "kl_loss": 0.11083640903234482, + "loss_ib": 0.0016320315189659595, + "step": 1576 + }, + { + "ce_ib": 4.1772613525390625, + "ce_orig": 0.5958235859870911, + "epoch": 0.45323172046876126, + "kl_loss": 0.12903600931167603, + "loss_ib": 0.0017080861143767834, + "step": 1576 + }, + { + "ce_ib": 2.601656913757324, + "ce_orig": 0.3436908423900604, + "epoch": 0.45323172046876126, + "kl_loss": 0.19999085366725922, + "loss_ib": 0.002260074019432068, + "step": 1576 + }, + { + "ce_ib": 6.059291839599609, + "ce_orig": 0.7580294013023376, + "epoch": 0.45323172046876126, + "kl_loss": 0.08745376020669937, + "loss_ib": 0.0014804666861891747, + "step": 1576 + }, + { + "ce_ib": 4.14776086807251, + "ce_orig": 0.5458824038505554, + "epoch": 0.4535193040477389, + "kl_loss": 0.11837026476860046, + "loss_ib": 0.0015984786441549659, + "step": 1577 + }, + { + "ce_ib": 3.716325044631958, + "ce_orig": 0.691834568977356, + "epoch": 0.4535193040477389, + "kl_loss": 0.0667094886302948, + "loss_ib": 0.0010387273505330086, + "step": 1577 + }, + { + "ce_ib": 3.6579203605651855, + "ce_orig": 0.5806090235710144, + "epoch": 0.4535193040477389, + "kl_loss": 0.2247331440448761, + "loss_ib": 0.0026131232734769583, + "step": 1577 + }, + { + "ce_ib": 4.466121673583984, + "ce_orig": 0.42851439118385315, + "epoch": 0.4535193040477389, + "kl_loss": 0.14565202593803406, + "loss_ib": 0.001903132419101894, + "step": 1577 + }, + { + "ce_ib": 3.899508237838745, + "ce_orig": 0.6616722345352173, + "epoch": 0.4538068876267165, + "kl_loss": 0.06799940764904022, + "loss_ib": 0.001069944817572832, + "step": 1578 + }, + { + "ce_ib": 4.955551624298096, + "ce_orig": 0.7739723920822144, + "epoch": 0.4538068876267165, + "kl_loss": 0.054706305265426636, + "loss_ib": 0.0010426181834191084, + "step": 1578 + }, + { + "ce_ib": 6.20005464553833, + "ce_orig": 1.009901762008667, + "epoch": 0.4538068876267165, + "kl_loss": 0.08856213092803955, + "loss_ib": 0.0015056267147883773, + "step": 1578 + }, + { + "ce_ib": 6.514723777770996, + "ce_orig": 0.7740148305892944, + "epoch": 0.4538068876267165, + "kl_loss": 0.09899093210697174, + "loss_ib": 0.0016413816483691335, + "step": 1578 + }, + { + "ce_ib": 3.8490943908691406, + "ce_orig": 0.704555332660675, + "epoch": 0.45409447120569413, + "kl_loss": 0.08974114060401917, + "loss_ib": 0.0012823209399357438, + "step": 1579 + }, + { + "ce_ib": 3.815824508666992, + "ce_orig": 0.6228117942810059, + "epoch": 0.45409447120569413, + "kl_loss": 0.07745486497879028, + "loss_ib": 0.001156131038442254, + "step": 1579 + }, + { + "ce_ib": 5.286880970001221, + "ce_orig": 0.7760758399963379, + "epoch": 0.45409447120569413, + "kl_loss": 0.08921387791633606, + "loss_ib": 0.0014208268839865923, + "step": 1579 + }, + { + "ce_ib": 6.49752950668335, + "ce_orig": 0.9260256290435791, + "epoch": 0.45409447120569413, + "kl_loss": 0.11488643288612366, + "loss_ib": 0.0017986171878874302, + "step": 1579 + }, + { + "epoch": 0.4543820547846718, + "grad_norm": 0.0983215719461441, + "learning_rate": 4.8111810230108145e-05, + "loss": 0.859, + "step": 1580 + }, + { + "ce_ib": 6.966205596923828, + "ce_orig": 0.9874230623245239, + "epoch": 0.4543820547846718, + "kl_loss": 0.12363585829734802, + "loss_ib": 0.001932979212142527, + "step": 1580 + }, + { + "ce_ib": 8.067428588867188, + "ce_orig": 1.4266571998596191, + "epoch": 0.4543820547846718, + "kl_loss": 0.1337740570306778, + "loss_ib": 0.002144483383744955, + "step": 1580 + }, + { + "ce_ib": 5.044072151184082, + "ce_orig": 0.8083384037017822, + "epoch": 0.4543820547846718, + "kl_loss": 0.12358110398054123, + "loss_ib": 0.0017402182566002011, + "step": 1580 + }, + { + "ce_ib": 4.379663944244385, + "ce_orig": 0.5556591749191284, + "epoch": 0.4543820547846718, + "kl_loss": 0.11567586660385132, + "loss_ib": 0.0015947250649333, + "step": 1580 + }, + { + "ce_ib": 6.047522068023682, + "ce_orig": 0.8394386172294617, + "epoch": 0.45466963836364943, + "kl_loss": 0.09805427491664886, + "loss_ib": 0.0015852948417887092, + "step": 1581 + }, + { + "ce_ib": 4.410378932952881, + "ce_orig": 0.8269963264465332, + "epoch": 0.45466963836364943, + "kl_loss": 0.08171489089727402, + "loss_ib": 0.0012581867631524801, + "step": 1581 + }, + { + "ce_ib": 6.811370849609375, + "ce_orig": 1.1788527965545654, + "epoch": 0.45466963836364943, + "kl_loss": 0.12656870484352112, + "loss_ib": 0.001946824137121439, + "step": 1581 + }, + { + "ce_ib": 4.595601558685303, + "ce_orig": 0.6713652014732361, + "epoch": 0.45466963836364943, + "kl_loss": 0.0824875682592392, + "loss_ib": 0.0012844358570873737, + "step": 1581 + }, + { + "ce_ib": 3.8033931255340576, + "ce_orig": 0.625810980796814, + "epoch": 0.45495722194262705, + "kl_loss": 0.07849664986133575, + "loss_ib": 0.0011653058463707566, + "step": 1582 + }, + { + "ce_ib": 5.467626094818115, + "ce_orig": 0.7394246459007263, + "epoch": 0.45495722194262705, + "kl_loss": 0.07132050395011902, + "loss_ib": 0.0012599676847457886, + "step": 1582 + }, + { + "ce_ib": 5.7691826820373535, + "ce_orig": 1.0427066087722778, + "epoch": 0.45495722194262705, + "kl_loss": 0.335252046585083, + "loss_ib": 0.0039294385351240635, + "step": 1582 + }, + { + "ce_ib": 3.797595500946045, + "ce_orig": 0.6962493658065796, + "epoch": 0.45495722194262705, + "kl_loss": 0.0654153823852539, + "loss_ib": 0.0010339133441448212, + "step": 1582 + }, + { + "ce_ib": 3.6037580966949463, + "ce_orig": 0.5999656915664673, + "epoch": 0.45524480552160473, + "kl_loss": 0.06097700446844101, + "loss_ib": 0.0009701458038762212, + "step": 1583 + }, + { + "ce_ib": 3.4936084747314453, + "ce_orig": 0.5981646180152893, + "epoch": 0.45524480552160473, + "kl_loss": 0.08388510346412659, + "loss_ib": 0.0011882118415087461, + "step": 1583 + }, + { + "ce_ib": 6.085269927978516, + "ce_orig": 0.9119758605957031, + "epoch": 0.45524480552160473, + "kl_loss": 0.08455945551395416, + "loss_ib": 0.001454121433198452, + "step": 1583 + }, + { + "ce_ib": 5.985597610473633, + "ce_orig": 0.9023646712303162, + "epoch": 0.45524480552160473, + "kl_loss": 0.12020603567361832, + "loss_ib": 0.0018006201134994626, + "step": 1583 + }, + { + "ce_ib": 4.178989410400391, + "ce_orig": 0.7633252143859863, + "epoch": 0.45553238910058236, + "kl_loss": 0.0653102695941925, + "loss_ib": 0.0010710015194490552, + "step": 1584 + }, + { + "ce_ib": 6.6875481605529785, + "ce_orig": 0.9403988718986511, + "epoch": 0.45553238910058236, + "kl_loss": 0.10325045138597488, + "loss_ib": 0.0017012592870742083, + "step": 1584 + }, + { + "ce_ib": 3.4798643589019775, + "ce_orig": 0.6613839864730835, + "epoch": 0.45553238910058236, + "kl_loss": 0.09522046148777008, + "loss_ib": 0.001300190924666822, + "step": 1584 + }, + { + "ce_ib": 4.60916805267334, + "ce_orig": 0.8537339568138123, + "epoch": 0.45553238910058236, + "kl_loss": 0.17622928321361542, + "loss_ib": 0.0022232094779610634, + "step": 1584 + }, + { + "epoch": 0.45581997267956, + "grad_norm": 0.0987664982676506, + "learning_rate": 4.8096988312782174e-05, + "loss": 0.8739, + "step": 1585 + }, + { + "ce_ib": 4.234344959259033, + "ce_orig": 0.6707295775413513, + "epoch": 0.45581997267956, + "kl_loss": 0.09574142843484879, + "loss_ib": 0.0013808486983180046, + "step": 1585 + }, + { + "ce_ib": 5.656366348266602, + "ce_orig": 1.0704694986343384, + "epoch": 0.45581997267956, + "kl_loss": 0.09176561236381531, + "loss_ib": 0.0014832926681265235, + "step": 1585 + }, + { + "ce_ib": 4.543145179748535, + "ce_orig": 0.49647119641304016, + "epoch": 0.45581997267956, + "kl_loss": 0.17786526679992676, + "loss_ib": 0.0022329671774059534, + "step": 1585 + }, + { + "ce_ib": 3.9295666217803955, + "ce_orig": 0.5952968001365662, + "epoch": 0.45581997267956, + "kl_loss": 0.07553210854530334, + "loss_ib": 0.001148277660831809, + "step": 1585 + }, + { + "ce_ib": 4.90818977355957, + "ce_orig": 0.8451528549194336, + "epoch": 0.45610755625853766, + "kl_loss": 0.07999219000339508, + "loss_ib": 0.0012907407945021987, + "step": 1586 + }, + { + "ce_ib": 3.650014877319336, + "ce_orig": 0.4921742379665375, + "epoch": 0.45610755625853766, + "kl_loss": 0.10422439873218536, + "loss_ib": 0.0014072454068809748, + "step": 1586 + }, + { + "ce_ib": 6.317634105682373, + "ce_orig": 1.0604970455169678, + "epoch": 0.45610755625853766, + "kl_loss": 0.11050582677125931, + "loss_ib": 0.0017368216067552567, + "step": 1586 + }, + { + "ce_ib": 6.8194732666015625, + "ce_orig": 1.1158506870269775, + "epoch": 0.45610755625853766, + "kl_loss": 0.12400923669338226, + "loss_ib": 0.00192203966435045, + "step": 1586 + }, + { + "ce_ib": 5.110295295715332, + "ce_orig": 0.7998859882354736, + "epoch": 0.4563951398375153, + "kl_loss": 0.11907786875963211, + "loss_ib": 0.0017018081853166223, + "step": 1587 + }, + { + "ce_ib": 3.83758807182312, + "ce_orig": 0.7441935539245605, + "epoch": 0.4563951398375153, + "kl_loss": 0.05651930719614029, + "loss_ib": 0.0009489518124610186, + "step": 1587 + }, + { + "ce_ib": 4.983565807342529, + "ce_orig": 0.9777162075042725, + "epoch": 0.4563951398375153, + "kl_loss": 0.08305898308753967, + "loss_ib": 0.0013289463240653276, + "step": 1587 + }, + { + "ce_ib": 4.548033714294434, + "ce_orig": 0.7015355825424194, + "epoch": 0.4563951398375153, + "kl_loss": 0.0653744488954544, + "loss_ib": 0.00110854790546, + "step": 1587 + }, + { + "ce_ib": 3.2286124229431152, + "ce_orig": 0.46955880522727966, + "epoch": 0.4566827234164929, + "kl_loss": 0.07160650193691254, + "loss_ib": 0.001038926187902689, + "step": 1588 + }, + { + "ce_ib": 5.882933139801025, + "ce_orig": 0.8384146094322205, + "epoch": 0.4566827234164929, + "kl_loss": 0.09918653219938278, + "loss_ib": 0.0015801585977897048, + "step": 1588 + }, + { + "ce_ib": 4.907978534698486, + "ce_orig": 0.5879037380218506, + "epoch": 0.4566827234164929, + "kl_loss": 0.06252609193325043, + "loss_ib": 0.0011160586727783084, + "step": 1588 + }, + { + "ce_ib": 4.269338607788086, + "ce_orig": 0.8758838772773743, + "epoch": 0.4566827234164929, + "kl_loss": 0.06765419989824295, + "loss_ib": 0.0011034758063033223, + "step": 1588 + }, + { + "ce_ib": 3.73880934715271, + "ce_orig": 0.4564005434513092, + "epoch": 0.45697030699547053, + "kl_loss": 0.1462571620941162, + "loss_ib": 0.0018364524003118277, + "step": 1589 + }, + { + "ce_ib": 9.295635223388672, + "ce_orig": 1.0418143272399902, + "epoch": 0.45697030699547053, + "kl_loss": 0.07211525738239288, + "loss_ib": 0.0016507160617038608, + "step": 1589 + }, + { + "ce_ib": 6.566661357879639, + "ce_orig": 0.8882603049278259, + "epoch": 0.45697030699547053, + "kl_loss": 0.1281050741672516, + "loss_ib": 0.001937716850079596, + "step": 1589 + }, + { + "ce_ib": 3.567533016204834, + "ce_orig": 0.5441127419471741, + "epoch": 0.45697030699547053, + "kl_loss": 0.07620217651128769, + "loss_ib": 0.0011187749914824963, + "step": 1589 + }, + { + "epoch": 0.4572578905744482, + "grad_norm": 0.10205253958702087, + "learning_rate": 4.808211074945043e-05, + "loss": 0.8768, + "step": 1590 + }, + { + "ce_ib": 4.352819442749023, + "ce_orig": 0.5268945097923279, + "epoch": 0.4572578905744482, + "kl_loss": 0.08796261250972748, + "loss_ib": 0.0013149079168215394, + "step": 1590 + }, + { + "ce_ib": 8.76469612121582, + "ce_orig": 1.4142521619796753, + "epoch": 0.4572578905744482, + "kl_loss": 0.11360123753547668, + "loss_ib": 0.002012481912970543, + "step": 1590 + }, + { + "ce_ib": 7.0795578956604, + "ce_orig": 0.5542007088661194, + "epoch": 0.4572578905744482, + "kl_loss": 0.1741562783718109, + "loss_ib": 0.002449518535286188, + "step": 1590 + }, + { + "ce_ib": 5.997165679931641, + "ce_orig": 0.711487352848053, + "epoch": 0.4572578905744482, + "kl_loss": 0.13430465757846832, + "loss_ib": 0.0019427631050348282, + "step": 1590 + }, + { + "ce_ib": 10.568790435791016, + "ce_orig": 1.812776803970337, + "epoch": 0.45754547415342584, + "kl_loss": 0.10568447411060333, + "loss_ib": 0.002113723661750555, + "step": 1591 + }, + { + "ce_ib": 4.506981372833252, + "ce_orig": 0.6098354458808899, + "epoch": 0.45754547415342584, + "kl_loss": 0.07062321901321411, + "loss_ib": 0.0011569303460419178, + "step": 1591 + }, + { + "ce_ib": 3.818392753601074, + "ce_orig": 0.5639158487319946, + "epoch": 0.45754547415342584, + "kl_loss": 0.09274062514305115, + "loss_ib": 0.0013092454755678773, + "step": 1591 + }, + { + "ce_ib": 5.184255123138428, + "ce_orig": 0.8862494230270386, + "epoch": 0.45754547415342584, + "kl_loss": 0.10891106724739075, + "loss_ib": 0.0016075362218543887, + "step": 1591 + }, + { + "ce_ib": 7.324061393737793, + "ce_orig": 1.2558680772781372, + "epoch": 0.45783305773240346, + "kl_loss": 0.13186070322990417, + "loss_ib": 0.002051013056188822, + "step": 1592 + }, + { + "ce_ib": 4.0423150062561035, + "ce_orig": 0.7875872254371643, + "epoch": 0.45783305773240346, + "kl_loss": 0.06912372261285782, + "loss_ib": 0.001095468644052744, + "step": 1592 + }, + { + "ce_ib": 5.6877121925354, + "ce_orig": 0.8436124324798584, + "epoch": 0.45783305773240346, + "kl_loss": 0.055687397718429565, + "loss_ib": 0.001125645125284791, + "step": 1592 + }, + { + "ce_ib": 6.3269476890563965, + "ce_orig": 0.9939271211624146, + "epoch": 0.45783305773240346, + "kl_loss": 0.09441665560007095, + "loss_ib": 0.0015768612502142787, + "step": 1592 + }, + { + "ce_ib": 5.305604457855225, + "ce_orig": 0.9422214031219482, + "epoch": 0.45812064131138114, + "kl_loss": 0.09084972739219666, + "loss_ib": 0.001439057756215334, + "step": 1593 + }, + { + "ce_ib": 5.688448429107666, + "ce_orig": 1.2723308801651, + "epoch": 0.45812064131138114, + "kl_loss": 0.06870556622743607, + "loss_ib": 0.001255900482647121, + "step": 1593 + }, + { + "ce_ib": 3.266572952270508, + "ce_orig": 0.7220095992088318, + "epoch": 0.45812064131138114, + "kl_loss": 0.08539354801177979, + "loss_ib": 0.001180592691525817, + "step": 1593 + }, + { + "ce_ib": 5.578038215637207, + "ce_orig": 1.0628691911697388, + "epoch": 0.45812064131138114, + "kl_loss": 0.16160991787910461, + "loss_ib": 0.0021739029325544834, + "step": 1593 + }, + { + "ce_ib": 4.618196964263916, + "ce_orig": 0.7974779605865479, + "epoch": 0.45840822489035876, + "kl_loss": 0.09742704033851624, + "loss_ib": 0.0014360900968313217, + "step": 1594 + }, + { + "ce_ib": 2.8123772144317627, + "ce_orig": 0.42348209023475647, + "epoch": 0.45840822489035876, + "kl_loss": 0.07272525876760483, + "loss_ib": 0.0010084903333336115, + "step": 1594 + }, + { + "ce_ib": 5.134484767913818, + "ce_orig": 0.8697676062583923, + "epoch": 0.45840822489035876, + "kl_loss": 0.07440787553787231, + "loss_ib": 0.0012575271539390087, + "step": 1594 + }, + { + "ce_ib": 7.12611722946167, + "ce_orig": 0.7876502275466919, + "epoch": 0.45840822489035876, + "kl_loss": 0.1192026361823082, + "loss_ib": 0.0019046380184590816, + "step": 1594 + }, + { + "epoch": 0.4586958084693364, + "grad_norm": 0.09854661673307419, + "learning_rate": 4.8067177575956414e-05, + "loss": 0.8765, + "step": 1595 + }, + { + "ce_ib": 5.294865608215332, + "ce_orig": 0.8717991709709167, + "epoch": 0.4586958084693364, + "kl_loss": 0.09843865036964417, + "loss_ib": 0.0015138729941099882, + "step": 1595 + }, + { + "ce_ib": 5.972879409790039, + "ce_orig": 1.0318090915679932, + "epoch": 0.4586958084693364, + "kl_loss": 0.08808019012212753, + "loss_ib": 0.0014780898345634341, + "step": 1595 + }, + { + "ce_ib": 3.513155937194824, + "ce_orig": 0.4928942918777466, + "epoch": 0.4586958084693364, + "kl_loss": 0.10873472690582275, + "loss_ib": 0.001438662875443697, + "step": 1595 + }, + { + "ce_ib": 6.674983501434326, + "ce_orig": 0.9159876704216003, + "epoch": 0.4586958084693364, + "kl_loss": 0.06071559339761734, + "loss_ib": 0.0012746542925015092, + "step": 1595 + }, + { + "ce_ib": 3.2801144123077393, + "ce_orig": 0.5153396725654602, + "epoch": 0.45898339204831407, + "kl_loss": 0.059506505727767944, + "loss_ib": 0.0009230764699168503, + "step": 1596 + }, + { + "ce_ib": 5.627926349639893, + "ce_orig": 1.0559040307998657, + "epoch": 0.45898339204831407, + "kl_loss": 0.12333964556455612, + "loss_ib": 0.0017961891135200858, + "step": 1596 + }, + { + "ce_ib": 6.170040130615234, + "ce_orig": 0.9097152352333069, + "epoch": 0.45898339204831407, + "kl_loss": 0.12116003036499023, + "loss_ib": 0.0018286042613908648, + "step": 1596 + }, + { + "ce_ib": 2.9290711879730225, + "ce_orig": 0.6307063698768616, + "epoch": 0.45898339204831407, + "kl_loss": 0.07005194574594498, + "loss_ib": 0.0009934265399351716, + "step": 1596 + }, + { + "ce_ib": 6.7575225830078125, + "ce_orig": 1.235298991203308, + "epoch": 0.4592709756272917, + "kl_loss": 0.07809670269489288, + "loss_ib": 0.0014567193575203419, + "step": 1597 + }, + { + "ce_ib": 4.255527973175049, + "ce_orig": 0.6438266038894653, + "epoch": 0.4592709756272917, + "kl_loss": 0.05946275591850281, + "loss_ib": 0.001020180294290185, + "step": 1597 + }, + { + "ce_ib": 5.109703063964844, + "ce_orig": 0.8424913883209229, + "epoch": 0.4592709756272917, + "kl_loss": 0.06801290810108185, + "loss_ib": 0.0011910992907360196, + "step": 1597 + }, + { + "ce_ib": 4.574324607849121, + "ce_orig": 0.5026845335960388, + "epoch": 0.4592709756272917, + "kl_loss": 0.0922662615776062, + "loss_ib": 0.001380095025524497, + "step": 1597 + }, + { + "ce_ib": 10.170074462890625, + "ce_orig": 1.6492831707000732, + "epoch": 0.4595585592062693, + "kl_loss": 0.06756480038166046, + "loss_ib": 0.001692655379883945, + "step": 1598 + }, + { + "ce_ib": 3.6748340129852295, + "ce_orig": 0.3913680911064148, + "epoch": 0.4595585592062693, + "kl_loss": 0.1260717511177063, + "loss_ib": 0.0016282008728012443, + "step": 1598 + }, + { + "ce_ib": 6.4218316078186035, + "ce_orig": 1.2846662998199463, + "epoch": 0.4595585592062693, + "kl_loss": 0.08358040452003479, + "loss_ib": 0.0014779871562495828, + "step": 1598 + }, + { + "ce_ib": 6.0570478439331055, + "ce_orig": 1.132595181465149, + "epoch": 0.4595585592062693, + "kl_loss": 0.09301123023033142, + "loss_ib": 0.0015358170494437218, + "step": 1598 + }, + { + "ce_ib": 5.923004150390625, + "ce_orig": 1.108635425567627, + "epoch": 0.45984614278524694, + "kl_loss": 0.0924454778432846, + "loss_ib": 0.0015167552046477795, + "step": 1599 + }, + { + "ce_ib": 5.29793643951416, + "ce_orig": 0.5977010726928711, + "epoch": 0.45984614278524694, + "kl_loss": 0.08304814249277115, + "loss_ib": 0.0013602750841528177, + "step": 1599 + }, + { + "ce_ib": 4.059332370758057, + "ce_orig": 0.8152369856834412, + "epoch": 0.45984614278524694, + "kl_loss": 0.10456560552120209, + "loss_ib": 0.0014515892835333943, + "step": 1599 + }, + { + "ce_ib": 4.594156265258789, + "ce_orig": 0.7348973751068115, + "epoch": 0.45984614278524694, + "kl_loss": 0.08424752205610275, + "loss_ib": 0.0013018909376114607, + "step": 1599 + }, + { + "epoch": 0.4601337263642246, + "grad_norm": 0.08759678900241852, + "learning_rate": 4.805218882827761e-05, + "loss": 0.8529, + "step": 1600 + }, + { + "ce_ib": 9.236629486083984, + "ce_orig": 1.6263139247894287, + "epoch": 0.4601337263642246, + "kl_loss": 0.08611010015010834, + "loss_ib": 0.0017847638810053468, + "step": 1600 + }, + { + "ce_ib": 3.228433609008789, + "ce_orig": 0.6260079145431519, + "epoch": 0.4601337263642246, + "kl_loss": 0.0828123539686203, + "loss_ib": 0.0011509668547660112, + "step": 1600 + }, + { + "ce_ib": 4.025729656219482, + "ce_orig": 0.43485715985298157, + "epoch": 0.4601337263642246, + "kl_loss": 0.12939047813415527, + "loss_ib": 0.0016964777605608106, + "step": 1600 + }, + { + "ce_ib": 6.7840776443481445, + "ce_orig": 0.8517627716064453, + "epoch": 0.4601337263642246, + "kl_loss": 0.11920034140348434, + "loss_ib": 0.00187041109893471, + "step": 1600 + }, + { + "ce_ib": 5.803313732147217, + "ce_orig": 1.0648530721664429, + "epoch": 0.46042130994320224, + "kl_loss": 0.10331310331821442, + "loss_ib": 0.0016134623438119888, + "step": 1601 + }, + { + "ce_ib": 6.364394664764404, + "ce_orig": 0.6606990098953247, + "epoch": 0.46042130994320224, + "kl_loss": 0.12442044913768768, + "loss_ib": 0.0018806438893079758, + "step": 1601 + }, + { + "ce_ib": 7.067164421081543, + "ce_orig": 1.1523054838180542, + "epoch": 0.46042130994320224, + "kl_loss": 0.08956655859947205, + "loss_ib": 0.001602382049895823, + "step": 1601 + }, + { + "ce_ib": 4.602412700653076, + "ce_orig": 0.6192760467529297, + "epoch": 0.46042130994320224, + "kl_loss": 0.07083219289779663, + "loss_ib": 0.0011685631470754743, + "step": 1601 + }, + { + "ce_ib": 6.54266881942749, + "ce_orig": 1.3287016153335571, + "epoch": 0.46070889352217986, + "kl_loss": 0.09697651863098145, + "loss_ib": 0.0016240319237113, + "step": 1602 + }, + { + "ce_ib": 7.9192914962768555, + "ce_orig": 1.5035167932510376, + "epoch": 0.46070889352217986, + "kl_loss": 0.1253090351819992, + "loss_ib": 0.002045019529759884, + "step": 1602 + }, + { + "ce_ib": 4.484292984008789, + "ce_orig": 0.9421902298927307, + "epoch": 0.46070889352217986, + "kl_loss": 0.07049290835857391, + "loss_ib": 0.001153358374722302, + "step": 1602 + }, + { + "ce_ib": 0.9982086420059204, + "ce_orig": 0.09433921426534653, + "epoch": 0.46070889352217986, + "kl_loss": 0.20659123361110687, + "loss_ib": 0.0021657331380993128, + "step": 1602 + }, + { + "ce_ib": 4.69843053817749, + "ce_orig": 0.6122257709503174, + "epoch": 0.46099647710115754, + "kl_loss": 0.14420655369758606, + "loss_ib": 0.0019119085045531392, + "step": 1603 + }, + { + "ce_ib": 4.817044258117676, + "ce_orig": 0.8395156860351562, + "epoch": 0.46099647710115754, + "kl_loss": 0.08112649619579315, + "loss_ib": 0.0012929692165926099, + "step": 1603 + }, + { + "ce_ib": 6.897902488708496, + "ce_orig": 0.48970434069633484, + "epoch": 0.46099647710115754, + "kl_loss": 0.10345920920372009, + "loss_ib": 0.0017243822803720832, + "step": 1603 + }, + { + "ce_ib": 5.222255706787109, + "ce_orig": 0.8666639924049377, + "epoch": 0.46099647710115754, + "kl_loss": 0.13412630558013916, + "loss_ib": 0.0018634884618222713, + "step": 1603 + }, + { + "ce_ib": 5.094054698944092, + "ce_orig": 0.810595691204071, + "epoch": 0.46128406068013517, + "kl_loss": 0.09368740767240524, + "loss_ib": 0.0014462795807048678, + "step": 1604 + }, + { + "ce_ib": 7.022961139678955, + "ce_orig": 1.11335289478302, + "epoch": 0.46128406068013517, + "kl_loss": 0.14231383800506592, + "loss_ib": 0.0021254345774650574, + "step": 1604 + }, + { + "ce_ib": 6.603672504425049, + "ce_orig": 0.5987228155136108, + "epoch": 0.46128406068013517, + "kl_loss": 0.32867318391799927, + "loss_ib": 0.003947099205106497, + "step": 1604 + }, + { + "ce_ib": 9.110772132873535, + "ce_orig": 1.105272650718689, + "epoch": 0.46128406068013517, + "kl_loss": 0.14440955221652985, + "loss_ib": 0.0023551725316792727, + "step": 1604 + }, + { + "epoch": 0.4615716442591128, + "grad_norm": 0.09383225440979004, + "learning_rate": 4.803714454252539e-05, + "loss": 0.853, + "step": 1605 + }, + { + "ce_ib": 6.94500732421875, + "ce_orig": 1.1379133462905884, + "epoch": 0.4615716442591128, + "kl_loss": 0.10250817984342575, + "loss_ib": 0.0017195824766531587, + "step": 1605 + }, + { + "ce_ib": 8.346969604492188, + "ce_orig": 0.9180467128753662, + "epoch": 0.4615716442591128, + "kl_loss": 0.08988431096076965, + "loss_ib": 0.0017335399752482772, + "step": 1605 + }, + { + "ce_ib": 3.6900126934051514, + "ce_orig": 0.49603766202926636, + "epoch": 0.4615716442591128, + "kl_loss": 0.08213101327419281, + "loss_ib": 0.001190311391837895, + "step": 1605 + }, + { + "ce_ib": 3.4203782081604004, + "ce_orig": 0.2425163835287094, + "epoch": 0.4615716442591128, + "kl_loss": 0.05531973019242287, + "loss_ib": 0.0008952350472100079, + "step": 1605 + }, + { + "ce_ib": 7.237185955047607, + "ce_orig": 0.9662517309188843, + "epoch": 0.46185922783809047, + "kl_loss": 0.07357059419155121, + "loss_ib": 0.0014594245003536344, + "step": 1606 + }, + { + "ce_ib": 2.47430682182312, + "ce_orig": 0.5705037713050842, + "epoch": 0.46185922783809047, + "kl_loss": 0.08378972858190536, + "loss_ib": 0.0010853279381990433, + "step": 1606 + }, + { + "ce_ib": 6.324599742889404, + "ce_orig": 0.878682017326355, + "epoch": 0.46185922783809047, + "kl_loss": 0.11069711297750473, + "loss_ib": 0.0017394309397786856, + "step": 1606 + }, + { + "ce_ib": 5.746100425720215, + "ce_orig": 0.8298347592353821, + "epoch": 0.46185922783809047, + "kl_loss": 0.12067721039056778, + "loss_ib": 0.0017813820159062743, + "step": 1606 + }, + { + "ce_ib": 5.822505950927734, + "ce_orig": 0.9922164678573608, + "epoch": 0.4621468114170681, + "kl_loss": 0.06500387191772461, + "loss_ib": 0.0012322892434895039, + "step": 1607 + }, + { + "ce_ib": 5.523167610168457, + "ce_orig": 1.0763579607009888, + "epoch": 0.4621468114170681, + "kl_loss": 0.07131494581699371, + "loss_ib": 0.0012654662132263184, + "step": 1607 + }, + { + "ce_ib": 3.037790298461914, + "ce_orig": 0.49163636565208435, + "epoch": 0.4621468114170681, + "kl_loss": 0.06566369533538818, + "loss_ib": 0.0009604159276932478, + "step": 1607 + }, + { + "ce_ib": 5.943887710571289, + "ce_orig": 1.0916857719421387, + "epoch": 0.4621468114170681, + "kl_loss": 0.07678355276584625, + "loss_ib": 0.001362224225886166, + "step": 1607 + }, + { + "ce_ib": 4.539219379425049, + "ce_orig": 0.40198588371276855, + "epoch": 0.4624343949960457, + "kl_loss": 0.17819423973560333, + "loss_ib": 0.0022358642891049385, + "step": 1608 + }, + { + "ce_ib": 5.70170783996582, + "ce_orig": 0.7915127873420715, + "epoch": 0.4624343949960457, + "kl_loss": 0.11548036336898804, + "loss_ib": 0.001724974368698895, + "step": 1608 + }, + { + "ce_ib": 4.3653998374938965, + "ce_orig": 0.7686159610748291, + "epoch": 0.4624343949960457, + "kl_loss": 0.06622038781642914, + "loss_ib": 0.001098743756301701, + "step": 1608 + }, + { + "ce_ib": 5.018896579742432, + "ce_orig": 0.8883548378944397, + "epoch": 0.4624343949960457, + "kl_loss": 0.12289467453956604, + "loss_ib": 0.0017308363458141685, + "step": 1608 + }, + { + "ce_ib": 4.712170600891113, + "ce_orig": 0.7292823195457458, + "epoch": 0.46272197857502334, + "kl_loss": 0.10712899267673492, + "loss_ib": 0.0015425069723278284, + "step": 1609 + }, + { + "ce_ib": 2.7452738285064697, + "ce_orig": 0.49776899814605713, + "epoch": 0.46272197857502334, + "kl_loss": 0.05747693032026291, + "loss_ib": 0.0008492966298945248, + "step": 1609 + }, + { + "ce_ib": 4.900729656219482, + "ce_orig": 0.876089334487915, + "epoch": 0.46272197857502334, + "kl_loss": 0.08828122913837433, + "loss_ib": 0.0013728851918131113, + "step": 1609 + }, + { + "ce_ib": 5.637519359588623, + "ce_orig": 0.944961667060852, + "epoch": 0.46272197857502334, + "kl_loss": 0.08231452107429504, + "loss_ib": 0.001386897056363523, + "step": 1609 + }, + { + "epoch": 0.463009562154001, + "grad_norm": 0.08620814234018326, + "learning_rate": 4.802204475494494e-05, + "loss": 0.8515, + "step": 1610 + }, + { + "ce_ib": 5.9752302169799805, + "ce_orig": 0.6863085031509399, + "epoch": 0.463009562154001, + "kl_loss": 0.13621509075164795, + "loss_ib": 0.0019596738275140524, + "step": 1610 + }, + { + "ce_ib": 6.934019088745117, + "ce_orig": 1.356187105178833, + "epoch": 0.463009562154001, + "kl_loss": 0.11160522699356079, + "loss_ib": 0.0018094541737809777, + "step": 1610 + }, + { + "ce_ib": 5.458354473114014, + "ce_orig": 0.9029397368431091, + "epoch": 0.463009562154001, + "kl_loss": 0.12356863170862198, + "loss_ib": 0.0017815217142924666, + "step": 1610 + }, + { + "ce_ib": 8.72394847869873, + "ce_orig": 1.678891897201538, + "epoch": 0.463009562154001, + "kl_loss": 0.11209751665592194, + "loss_ib": 0.001993370009586215, + "step": 1610 + }, + { + "ce_ib": 1.7895445823669434, + "ce_orig": 0.24826738238334656, + "epoch": 0.46329714573297864, + "kl_loss": 0.2264328896999359, + "loss_ib": 0.0024432833306491375, + "step": 1611 + }, + { + "ce_ib": 6.446073532104492, + "ce_orig": 1.0942883491516113, + "epoch": 0.46329714573297864, + "kl_loss": 0.10512395203113556, + "loss_ib": 0.0016958469059318304, + "step": 1611 + }, + { + "ce_ib": 6.265317916870117, + "ce_orig": 1.0460630655288696, + "epoch": 0.46329714573297864, + "kl_loss": 0.1364673376083374, + "loss_ib": 0.0019912051502615213, + "step": 1611 + }, + { + "ce_ib": 6.660171985626221, + "ce_orig": 1.116076946258545, + "epoch": 0.46329714573297864, + "kl_loss": 0.1512731909751892, + "loss_ib": 0.002178749069571495, + "step": 1611 + }, + { + "ce_ib": 4.695237636566162, + "ce_orig": 0.7803314328193665, + "epoch": 0.46358472931195627, + "kl_loss": 0.07630561292171478, + "loss_ib": 0.0012325798161327839, + "step": 1612 + }, + { + "ce_ib": 6.452287673950195, + "ce_orig": 0.7132724523544312, + "epoch": 0.46358472931195627, + "kl_loss": 0.1322929561138153, + "loss_ib": 0.0019681581761687994, + "step": 1612 + }, + { + "ce_ib": 6.530320644378662, + "ce_orig": 1.08219313621521, + "epoch": 0.46358472931195627, + "kl_loss": 0.102836012840271, + "loss_ib": 0.001681392197497189, + "step": 1612 + }, + { + "ce_ib": 4.676810264587402, + "ce_orig": 0.6925322413444519, + "epoch": 0.46358472931195627, + "kl_loss": 0.07854016125202179, + "loss_ib": 0.0012530825333669782, + "step": 1612 + }, + { + "ce_ib": 3.5001449584960938, + "ce_orig": 0.6155581474304199, + "epoch": 0.46387231289093395, + "kl_loss": 0.056357331573963165, + "loss_ib": 0.0009135878062807024, + "step": 1613 + }, + { + "ce_ib": 5.054927825927734, + "ce_orig": 0.6531220078468323, + "epoch": 0.46387231289093395, + "kl_loss": 0.12473924458026886, + "loss_ib": 0.001752885291352868, + "step": 1613 + }, + { + "ce_ib": 6.452062129974365, + "ce_orig": 1.0626121759414673, + "epoch": 0.46387231289093395, + "kl_loss": 0.10180076956748962, + "loss_ib": 0.0016632139449939132, + "step": 1613 + }, + { + "ce_ib": 4.114443778991699, + "ce_orig": 0.6199149489402771, + "epoch": 0.46387231289093395, + "kl_loss": 0.08772272616624832, + "loss_ib": 0.0012886716285720468, + "step": 1613 + }, + { + "ce_ib": 4.465858459472656, + "ce_orig": 0.6490845084190369, + "epoch": 0.46415989646991157, + "kl_loss": 0.10039170831441879, + "loss_ib": 0.0014505028957501054, + "step": 1614 + }, + { + "ce_ib": 4.068137168884277, + "ce_orig": 0.22883208096027374, + "epoch": 0.46415989646991157, + "kl_loss": 0.09778591990470886, + "loss_ib": 0.0013846728252246976, + "step": 1614 + }, + { + "ce_ib": 4.638211727142334, + "ce_orig": 0.5791996121406555, + "epoch": 0.46415989646991157, + "kl_loss": 0.10834634304046631, + "loss_ib": 0.001547284540720284, + "step": 1614 + }, + { + "ce_ib": 8.58838939666748, + "ce_orig": 1.750071406364441, + "epoch": 0.46415989646991157, + "kl_loss": 0.10412950813770294, + "loss_ib": 0.0019001340260729194, + "step": 1614 + }, + { + "epoch": 0.4644474800488892, + "grad_norm": 0.08307844400405884, + "learning_rate": 4.800688950191514e-05, + "loss": 0.818, + "step": 1615 + }, + { + "ce_ib": 6.64310359954834, + "ce_orig": 0.7114260196685791, + "epoch": 0.4644474800488892, + "kl_loss": 0.15462788939476013, + "loss_ib": 0.0022105893585830927, + "step": 1615 + }, + { + "ce_ib": 8.967602729797363, + "ce_orig": 1.147449254989624, + "epoch": 0.4644474800488892, + "kl_loss": 0.10776953399181366, + "loss_ib": 0.0019744555465877056, + "step": 1615 + }, + { + "ce_ib": 8.45907211303711, + "ce_orig": 1.7430347204208374, + "epoch": 0.4644474800488892, + "kl_loss": 0.11088278889656067, + "loss_ib": 0.0019547350239008665, + "step": 1615 + }, + { + "ce_ib": 6.470301628112793, + "ce_orig": 0.8734087347984314, + "epoch": 0.4644474800488892, + "kl_loss": 0.23173360526561737, + "loss_ib": 0.0029643659945577383, + "step": 1615 + }, + { + "ce_ib": 7.529819011688232, + "ce_orig": 1.5193835496902466, + "epoch": 0.4647350636278669, + "kl_loss": 0.11583857238292694, + "loss_ib": 0.0019113676389679313, + "step": 1616 + }, + { + "ce_ib": 4.394379615783691, + "ce_orig": 0.6914932727813721, + "epoch": 0.4647350636278669, + "kl_loss": 0.08587324619293213, + "loss_ib": 0.0012981703039258718, + "step": 1616 + }, + { + "ce_ib": 2.6670334339141846, + "ce_orig": 0.2629625201225281, + "epoch": 0.4647350636278669, + "kl_loss": 0.04519543796777725, + "loss_ib": 0.0007186576840467751, + "step": 1616 + }, + { + "ce_ib": 8.432150840759277, + "ce_orig": 1.5133119821548462, + "epoch": 0.4647350636278669, + "kl_loss": 0.09762774407863617, + "loss_ib": 0.0018194925505667925, + "step": 1616 + }, + { + "ce_ib": 5.002896308898926, + "ce_orig": 0.7370746731758118, + "epoch": 0.4650226472068445, + "kl_loss": 0.09471193701028824, + "loss_ib": 0.001447408925741911, + "step": 1617 + }, + { + "ce_ib": 6.903255462646484, + "ce_orig": 1.2944138050079346, + "epoch": 0.4650226472068445, + "kl_loss": 0.07799950242042542, + "loss_ib": 0.0014703205088153481, + "step": 1617 + }, + { + "ce_ib": 5.449745178222656, + "ce_orig": 0.9606087803840637, + "epoch": 0.4650226472068445, + "kl_loss": 0.11117492616176605, + "loss_ib": 0.0016567236743867397, + "step": 1617 + }, + { + "ce_ib": 4.104331970214844, + "ce_orig": 0.6403983235359192, + "epoch": 0.4650226472068445, + "kl_loss": 0.04955866187810898, + "loss_ib": 0.0009060197626240551, + "step": 1617 + }, + { + "ce_ib": 5.582988262176514, + "ce_orig": 0.925099790096283, + "epoch": 0.4653102307858221, + "kl_loss": 0.10564757883548737, + "loss_ib": 0.001614774577319622, + "step": 1618 + }, + { + "ce_ib": 5.2787580490112305, + "ce_orig": 0.6050167083740234, + "epoch": 0.4653102307858221, + "kl_loss": 0.09543651342391968, + "loss_ib": 0.0014822408556938171, + "step": 1618 + }, + { + "ce_ib": 4.415472507476807, + "ce_orig": 0.8990333080291748, + "epoch": 0.4653102307858221, + "kl_loss": 0.08279430121183395, + "loss_ib": 0.0012694902252405882, + "step": 1618 + }, + { + "ce_ib": 3.6856658458709717, + "ce_orig": 0.47574740648269653, + "epoch": 0.4653102307858221, + "kl_loss": 0.08100990951061249, + "loss_ib": 0.0011786656687036157, + "step": 1618 + }, + { + "ce_ib": 5.895461082458496, + "ce_orig": 1.0559676885604858, + "epoch": 0.46559781436479974, + "kl_loss": 0.1050267368555069, + "loss_ib": 0.0016398134175688028, + "step": 1619 + }, + { + "ce_ib": 2.2124409675598145, + "ce_orig": 0.17563967406749725, + "epoch": 0.46559781436479974, + "kl_loss": 0.11302216351032257, + "loss_ib": 0.00135146570391953, + "step": 1619 + }, + { + "ce_ib": 3.3515477180480957, + "ce_orig": 0.682487428188324, + "epoch": 0.46559781436479974, + "kl_loss": 0.08447563648223877, + "loss_ib": 0.0011799110798165202, + "step": 1619 + }, + { + "ce_ib": 3.6746199131011963, + "ce_orig": 0.7134358882904053, + "epoch": 0.46559781436479974, + "kl_loss": 0.09489287436008453, + "loss_ib": 0.0013163905823603272, + "step": 1619 + }, + { + "epoch": 0.4658853979437774, + "grad_norm": 0.09470746666193008, + "learning_rate": 4.7991678819948516e-05, + "loss": 0.8278, + "step": 1620 + }, + { + "ce_ib": 2.319380283355713, + "ce_orig": 0.5591430068016052, + "epoch": 0.4658853979437774, + "kl_loss": 0.05833408236503601, + "loss_ib": 0.0008152788504958153, + "step": 1620 + }, + { + "ce_ib": 6.1116204261779785, + "ce_orig": 1.0034416913986206, + "epoch": 0.4658853979437774, + "kl_loss": 0.11369115114212036, + "loss_ib": 0.0017480734968557954, + "step": 1620 + }, + { + "ce_ib": 8.599710464477539, + "ce_orig": 1.6411137580871582, + "epoch": 0.4658853979437774, + "kl_loss": 0.09795013815164566, + "loss_ib": 0.0018394723301753402, + "step": 1620 + }, + { + "ce_ib": 3.660987615585327, + "ce_orig": 0.43610480427742004, + "epoch": 0.4658853979437774, + "kl_loss": 0.061731308698654175, + "loss_ib": 0.0009834117954596877, + "step": 1620 + }, + { + "ce_ib": 8.022945404052734, + "ce_orig": 1.366593360900879, + "epoch": 0.46617298152275505, + "kl_loss": 0.14544644951820374, + "loss_ib": 0.002256758976727724, + "step": 1621 + }, + { + "ce_ib": 3.496922016143799, + "ce_orig": 0.5939387679100037, + "epoch": 0.46617298152275505, + "kl_loss": 0.11899632215499878, + "loss_ib": 0.0015396552626043558, + "step": 1621 + }, + { + "ce_ib": 4.745185852050781, + "ce_orig": 0.656234085559845, + "epoch": 0.46617298152275505, + "kl_loss": 0.0911838710308075, + "loss_ib": 0.0013863572385162115, + "step": 1621 + }, + { + "ce_ib": 4.084486961364746, + "ce_orig": 0.4449410140514374, + "epoch": 0.46617298152275505, + "kl_loss": 0.08196362107992172, + "loss_ib": 0.0012280847877264023, + "step": 1621 + }, + { + "ce_ib": 3.407409191131592, + "ce_orig": 0.3567643463611603, + "epoch": 0.46646056510173267, + "kl_loss": 0.07426545023918152, + "loss_ib": 0.0010833953274413943, + "step": 1622 + }, + { + "ce_ib": 4.158196449279785, + "ce_orig": 0.4661784768104553, + "epoch": 0.46646056510173267, + "kl_loss": 0.09505394846200943, + "loss_ib": 0.0013663590652868152, + "step": 1622 + }, + { + "ce_ib": 5.108338832855225, + "ce_orig": 0.6385115385055542, + "epoch": 0.46646056510173267, + "kl_loss": 0.10248829424381256, + "loss_ib": 0.0015357168158516288, + "step": 1622 + }, + { + "ce_ib": 6.897539138793945, + "ce_orig": 1.223198652267456, + "epoch": 0.46646056510173267, + "kl_loss": 0.15649108588695526, + "loss_ib": 0.0022546646650880575, + "step": 1622 + }, + { + "ce_ib": 4.51570463180542, + "ce_orig": 0.7653912305831909, + "epoch": 0.46674814868071035, + "kl_loss": 0.07304967939853668, + "loss_ib": 0.0011820672079920769, + "step": 1623 + }, + { + "ce_ib": 3.990912437438965, + "ce_orig": 0.8196806907653809, + "epoch": 0.46674814868071035, + "kl_loss": 0.07913756370544434, + "loss_ib": 0.0011904668062925339, + "step": 1623 + }, + { + "ce_ib": 6.494147300720215, + "ce_orig": 1.1667721271514893, + "epoch": 0.46674814868071035, + "kl_loss": 0.08262184262275696, + "loss_ib": 0.0014756330056115985, + "step": 1623 + }, + { + "ce_ib": 4.262913703918457, + "ce_orig": 0.9009286165237427, + "epoch": 0.46674814868071035, + "kl_loss": 0.04920269176363945, + "loss_ib": 0.0009183182846754789, + "step": 1623 + }, + { + "ce_ib": 5.713997840881348, + "ce_orig": 1.3532772064208984, + "epoch": 0.467035732259688, + "kl_loss": 0.14569884538650513, + "loss_ib": 0.0020283882040530443, + "step": 1624 + }, + { + "ce_ib": 6.004459381103516, + "ce_orig": 0.6772447228431702, + "epoch": 0.467035732259688, + "kl_loss": 0.13476631045341492, + "loss_ib": 0.001948108896613121, + "step": 1624 + }, + { + "ce_ib": 4.951720237731934, + "ce_orig": 0.9936774969100952, + "epoch": 0.467035732259688, + "kl_loss": 0.12742146849632263, + "loss_ib": 0.0017693866975605488, + "step": 1624 + }, + { + "ce_ib": 3.107513904571533, + "ce_orig": 0.3071587085723877, + "epoch": 0.467035732259688, + "kl_loss": 0.12146449834108353, + "loss_ib": 0.0015253963647410274, + "step": 1624 + }, + { + "epoch": 0.4673233158386656, + "grad_norm": 0.10182998329401016, + "learning_rate": 4.797641274569114e-05, + "loss": 0.9277, + "step": 1625 + }, + { + "ce_ib": 7.216080188751221, + "ce_orig": 1.0755058526992798, + "epoch": 0.4673233158386656, + "kl_loss": 0.12118510901927948, + "loss_ib": 0.001933458959683776, + "step": 1625 + }, + { + "ce_ib": 4.99370002746582, + "ce_orig": 0.8926939368247986, + "epoch": 0.4673233158386656, + "kl_loss": 0.21203802525997162, + "loss_ib": 0.0026197500992566347, + "step": 1625 + }, + { + "ce_ib": 7.363295555114746, + "ce_orig": 0.7989278435707092, + "epoch": 0.4673233158386656, + "kl_loss": 0.09319159388542175, + "loss_ib": 0.0016682454152032733, + "step": 1625 + }, + { + "ce_ib": 5.072775363922119, + "ce_orig": 0.9861810207366943, + "epoch": 0.4673233158386656, + "kl_loss": 0.04820244759321213, + "loss_ib": 0.0009893019450828433, + "step": 1625 + }, + { + "ce_ib": 4.191864013671875, + "ce_orig": 0.5402066111564636, + "epoch": 0.4676108994176433, + "kl_loss": 0.09365972876548767, + "loss_ib": 0.0013557836646214128, + "step": 1626 + }, + { + "ce_ib": 5.7118821144104, + "ce_orig": 0.7033581137657166, + "epoch": 0.4676108994176433, + "kl_loss": 0.1047724187374115, + "loss_ib": 0.0016189123271033168, + "step": 1626 + }, + { + "ce_ib": 3.9012491703033447, + "ce_orig": 0.6061784625053406, + "epoch": 0.4676108994176433, + "kl_loss": 0.0717439353466034, + "loss_ib": 0.0011075641959905624, + "step": 1626 + }, + { + "ce_ib": 2.0894625186920166, + "ce_orig": 0.42235544323921204, + "epoch": 0.4676108994176433, + "kl_loss": 0.056995391845703125, + "loss_ib": 0.0007789001683704555, + "step": 1626 + }, + { + "ce_ib": 8.207594871520996, + "ce_orig": 1.0996456146240234, + "epoch": 0.4678984829966209, + "kl_loss": 0.11258533596992493, + "loss_ib": 0.001946612843312323, + "step": 1627 + }, + { + "ce_ib": 5.346253871917725, + "ce_orig": 0.6724973917007446, + "epoch": 0.4678984829966209, + "kl_loss": 0.15539121627807617, + "loss_ib": 0.0020885374397039413, + "step": 1627 + }, + { + "ce_ib": 5.198059558868408, + "ce_orig": 0.7222253084182739, + "epoch": 0.4678984829966209, + "kl_loss": 0.08410137891769409, + "loss_ib": 0.001360819791443646, + "step": 1627 + }, + { + "ce_ib": 6.272176742553711, + "ce_orig": 1.3587089776992798, + "epoch": 0.4678984829966209, + "kl_loss": 0.07652045786380768, + "loss_ib": 0.0013924222439527512, + "step": 1627 + }, + { + "ce_ib": 3.9057886600494385, + "ce_orig": 0.744353175163269, + "epoch": 0.4681860665755985, + "kl_loss": 0.0938820019364357, + "loss_ib": 0.001329398830421269, + "step": 1628 + }, + { + "ce_ib": 4.421374320983887, + "ce_orig": 0.9611724615097046, + "epoch": 0.4681860665755985, + "kl_loss": 0.093656025826931, + "loss_ib": 0.0013786976924166083, + "step": 1628 + }, + { + "ce_ib": 4.582045078277588, + "ce_orig": 0.8428356647491455, + "epoch": 0.4681860665755985, + "kl_loss": 0.049522776156663895, + "loss_ib": 0.0009534322307445109, + "step": 1628 + }, + { + "ce_ib": 6.360233783721924, + "ce_orig": 0.7545627951622009, + "epoch": 0.4681860665755985, + "kl_loss": 0.08374233543872833, + "loss_ib": 0.0014734467258676887, + "step": 1628 + }, + { + "ce_ib": 3.5460033416748047, + "ce_orig": 0.6144778728485107, + "epoch": 0.46847365015457615, + "kl_loss": 0.0757514238357544, + "loss_ib": 0.0011121145216748118, + "step": 1629 + }, + { + "ce_ib": 8.494545936584473, + "ce_orig": 0.8958108425140381, + "epoch": 0.46847365015457615, + "kl_loss": 0.12561029195785522, + "loss_ib": 0.002105557359755039, + "step": 1629 + }, + { + "ce_ib": 5.489189624786377, + "ce_orig": 0.6908852458000183, + "epoch": 0.46847365015457615, + "kl_loss": 0.12143585830926895, + "loss_ib": 0.001763277454301715, + "step": 1629 + }, + { + "ce_ib": 5.982507228851318, + "ce_orig": 0.38469386100769043, + "epoch": 0.46847365015457615, + "kl_loss": 0.11548519879579544, + "loss_ib": 0.001753102638758719, + "step": 1629 + }, + { + "epoch": 0.46876123373355383, + "grad_norm": 0.08811596781015396, + "learning_rate": 4.796109131592251e-05, + "loss": 0.8597, + "step": 1630 + }, + { + "ce_ib": 4.264716148376465, + "ce_orig": 0.5796000957489014, + "epoch": 0.46876123373355383, + "kl_loss": 0.10034304112195969, + "loss_ib": 0.001429902040399611, + "step": 1630 + }, + { + "ce_ib": 4.2115583419799805, + "ce_orig": 0.8887648582458496, + "epoch": 0.46876123373355383, + "kl_loss": 0.1510642170906067, + "loss_ib": 0.0019317979458719492, + "step": 1630 + }, + { + "ce_ib": 6.578847408294678, + "ce_orig": 0.9982287287712097, + "epoch": 0.46876123373355383, + "kl_loss": 0.15291307866573334, + "loss_ib": 0.0021870152559131384, + "step": 1630 + }, + { + "ce_ib": 2.334812879562378, + "ce_orig": 0.46511587500572205, + "epoch": 0.46876123373355383, + "kl_loss": 0.05477580800652504, + "loss_ib": 0.0007812393014319241, + "step": 1630 + }, + { + "ce_ib": 8.5779390335083, + "ce_orig": 1.4282007217407227, + "epoch": 0.46904881731253145, + "kl_loss": 0.08074074983596802, + "loss_ib": 0.0016652015037834644, + "step": 1631 + }, + { + "ce_ib": 7.128454685211182, + "ce_orig": 1.3503929376602173, + "epoch": 0.46904881731253145, + "kl_loss": 0.06944238394498825, + "loss_ib": 0.0014072691556066275, + "step": 1631 + }, + { + "ce_ib": 8.130393028259277, + "ce_orig": 1.3000606298446655, + "epoch": 0.46904881731253145, + "kl_loss": 0.10952828079462051, + "loss_ib": 0.0019083220977336168, + "step": 1631 + }, + { + "ce_ib": 5.533591270446777, + "ce_orig": 0.7740480899810791, + "epoch": 0.46904881731253145, + "kl_loss": 0.12924638390541077, + "loss_ib": 0.0018458229023963213, + "step": 1631 + }, + { + "ce_ib": 4.681056499481201, + "ce_orig": 0.8515880107879639, + "epoch": 0.4693364008915091, + "kl_loss": 0.09135465323925018, + "loss_ib": 0.0013816521968692541, + "step": 1632 + }, + { + "ce_ib": 5.774306774139404, + "ce_orig": 0.45689377188682556, + "epoch": 0.4693364008915091, + "kl_loss": 0.13186566531658173, + "loss_ib": 0.0018960871966555715, + "step": 1632 + }, + { + "ce_ib": 4.2594475746154785, + "ce_orig": 0.5677432417869568, + "epoch": 0.4693364008915091, + "kl_loss": 0.11653105169534683, + "loss_ib": 0.0015912551898509264, + "step": 1632 + }, + { + "ce_ib": 5.484095096588135, + "ce_orig": 0.9039006233215332, + "epoch": 0.4693364008915091, + "kl_loss": 0.12893222272396088, + "loss_ib": 0.0018377316882833838, + "step": 1632 + }, + { + "ce_ib": 6.990882873535156, + "ce_orig": 1.2038713693618774, + "epoch": 0.46962398447048675, + "kl_loss": 0.11466965824365616, + "loss_ib": 0.001845784718170762, + "step": 1633 + }, + { + "ce_ib": 6.602409839630127, + "ce_orig": 1.1719971895217896, + "epoch": 0.46962398447048675, + "kl_loss": 0.0945901870727539, + "loss_ib": 0.0016061427304521203, + "step": 1633 + }, + { + "ce_ib": 7.474662780761719, + "ce_orig": 0.7000597715377808, + "epoch": 0.46962398447048675, + "kl_loss": 0.06572352349758148, + "loss_ib": 0.0014047013828530908, + "step": 1633 + }, + { + "ce_ib": 7.451690673828125, + "ce_orig": 1.0486136674880981, + "epoch": 0.46962398447048675, + "kl_loss": 0.07629195600748062, + "loss_ib": 0.0015080886660143733, + "step": 1633 + }, + { + "ce_ib": 4.54631233215332, + "ce_orig": 0.8728470206260681, + "epoch": 0.4699115680494644, + "kl_loss": 0.10248885303735733, + "loss_ib": 0.0014795197639614344, + "step": 1634 + }, + { + "ce_ib": 6.613746166229248, + "ce_orig": 0.7538928985595703, + "epoch": 0.4699115680494644, + "kl_loss": 0.0750356912612915, + "loss_ib": 0.0014117314713075757, + "step": 1634 + }, + { + "ce_ib": 6.792486190795898, + "ce_orig": 1.25835120677948, + "epoch": 0.4699115680494644, + "kl_loss": 0.1144326701760292, + "loss_ib": 0.0018235751194879413, + "step": 1634 + }, + { + "ce_ib": 6.543907642364502, + "ce_orig": 0.936229944229126, + "epoch": 0.4699115680494644, + "kl_loss": 0.10016370564699173, + "loss_ib": 0.0016560277435928583, + "step": 1634 + }, + { + "epoch": 0.470199151628442, + "grad_norm": 0.08764450997114182, + "learning_rate": 4.794571456755552e-05, + "loss": 0.82, + "step": 1635 + }, + { + "ce_ib": 9.22854232788086, + "ce_orig": 1.3820220232009888, + "epoch": 0.470199151628442, + "kl_loss": 0.11801333725452423, + "loss_ib": 0.0021029876079410315, + "step": 1635 + }, + { + "ce_ib": 6.925240516662598, + "ce_orig": 1.0383059978485107, + "epoch": 0.470199151628442, + "kl_loss": 0.11120613664388657, + "loss_ib": 0.0018045854521915317, + "step": 1635 + }, + { + "ce_ib": 4.760732173919678, + "ce_orig": 0.8001464605331421, + "epoch": 0.470199151628442, + "kl_loss": 0.08531699329614639, + "loss_ib": 0.0013292431831359863, + "step": 1635 + }, + { + "ce_ib": 4.772797584533691, + "ce_orig": 0.9150943756103516, + "epoch": 0.470199151628442, + "kl_loss": 0.10309725254774094, + "loss_ib": 0.001508252345956862, + "step": 1635 + }, + { + "ce_ib": 4.658623218536377, + "ce_orig": 0.5300869941711426, + "epoch": 0.4704867352074197, + "kl_loss": 0.14670562744140625, + "loss_ib": 0.0019329185597598553, + "step": 1636 + }, + { + "ce_ib": 4.678239345550537, + "ce_orig": 0.6459416151046753, + "epoch": 0.4704867352074197, + "kl_loss": 0.12120941281318665, + "loss_ib": 0.0016799180302768946, + "step": 1636 + }, + { + "ce_ib": 2.8791491985321045, + "ce_orig": 0.5109738111495972, + "epoch": 0.4704867352074197, + "kl_loss": 0.09026513993740082, + "loss_ib": 0.001190566224977374, + "step": 1636 + }, + { + "ce_ib": 4.467174053192139, + "ce_orig": 0.8216496109962463, + "epoch": 0.4704867352074197, + "kl_loss": 0.06287071853876114, + "loss_ib": 0.001075424486771226, + "step": 1636 + }, + { + "ce_ib": 6.510900497436523, + "ce_orig": 1.0002672672271729, + "epoch": 0.4707743187863973, + "kl_loss": 0.10140696167945862, + "loss_ib": 0.0016651597106829286, + "step": 1637 + }, + { + "ce_ib": 3.403122663497925, + "ce_orig": 0.4351435899734497, + "epoch": 0.4707743187863973, + "kl_loss": 0.0862436294555664, + "loss_ib": 0.0012027485063299537, + "step": 1637 + }, + { + "ce_ib": 4.648889064788818, + "ce_orig": 0.8207218050956726, + "epoch": 0.4707743187863973, + "kl_loss": 0.06439683586359024, + "loss_ib": 0.0011088572209700942, + "step": 1637 + }, + { + "ce_ib": 4.900266647338867, + "ce_orig": 1.042765736579895, + "epoch": 0.4707743187863973, + "kl_loss": 0.08767692744731903, + "loss_ib": 0.0013667958555743098, + "step": 1637 + }, + { + "ce_ib": 3.3974955081939697, + "ce_orig": 0.6899465918540955, + "epoch": 0.47106190236537493, + "kl_loss": 0.042887695133686066, + "loss_ib": 0.0007686264580115676, + "step": 1638 + }, + { + "ce_ib": 7.088229656219482, + "ce_orig": 1.1895263195037842, + "epoch": 0.47106190236537493, + "kl_loss": 0.1299183964729309, + "loss_ib": 0.0020080069079995155, + "step": 1638 + }, + { + "ce_ib": 5.504932403564453, + "ce_orig": 0.6148539185523987, + "epoch": 0.47106190236537493, + "kl_loss": 0.11039682477712631, + "loss_ib": 0.0016544614918529987, + "step": 1638 + }, + { + "ce_ib": 5.1553635597229, + "ce_orig": 0.651860237121582, + "epoch": 0.47106190236537493, + "kl_loss": 0.10196275264024734, + "loss_ib": 0.0015351638430729508, + "step": 1638 + }, + { + "ce_ib": 4.588486194610596, + "ce_orig": 0.8320633172988892, + "epoch": 0.47134948594435255, + "kl_loss": 0.06154872477054596, + "loss_ib": 0.0010743358870968223, + "step": 1639 + }, + { + "ce_ib": 5.462040901184082, + "ce_orig": 0.5526759028434753, + "epoch": 0.47134948594435255, + "kl_loss": 0.2662752568721771, + "loss_ib": 0.0032089566811919212, + "step": 1639 + }, + { + "ce_ib": 5.113626480102539, + "ce_orig": 0.8517841100692749, + "epoch": 0.47134948594435255, + "kl_loss": 0.08039755374193192, + "loss_ib": 0.0013153381878510118, + "step": 1639 + }, + { + "ce_ib": 4.003795146942139, + "ce_orig": 0.6875722408294678, + "epoch": 0.47134948594435255, + "kl_loss": 0.06065041199326515, + "loss_ib": 0.001006883685477078, + "step": 1639 + }, + { + "epoch": 0.47163706952333023, + "grad_norm": 0.10183624178171158, + "learning_rate": 4.793028253763633e-05, + "loss": 0.8509, + "step": 1640 + }, + { + "ce_ib": 3.141430377960205, + "ce_orig": 0.3879804015159607, + "epoch": 0.47163706952333023, + "kl_loss": 0.11613143980503082, + "loss_ib": 0.0014754573348909616, + "step": 1640 + }, + { + "ce_ib": 6.323172092437744, + "ce_orig": 1.1947147846221924, + "epoch": 0.47163706952333023, + "kl_loss": 0.12802723050117493, + "loss_ib": 0.0019125895341858268, + "step": 1640 + }, + { + "ce_ib": 5.756337642669678, + "ce_orig": 0.9178058505058289, + "epoch": 0.47163706952333023, + "kl_loss": 0.10986852645874023, + "loss_ib": 0.0016743190353736281, + "step": 1640 + }, + { + "ce_ib": 5.321605682373047, + "ce_orig": 1.2204010486602783, + "epoch": 0.47163706952333023, + "kl_loss": 0.0846148282289505, + "loss_ib": 0.0013783087488263845, + "step": 1640 + }, + { + "ce_ib": 1.0997956991195679, + "ce_orig": 0.09497205913066864, + "epoch": 0.47192465310230786, + "kl_loss": 0.21477143466472626, + "loss_ib": 0.0022576937917619944, + "step": 1641 + }, + { + "ce_ib": 5.128005027770996, + "ce_orig": 0.5250779986381531, + "epoch": 0.47192465310230786, + "kl_loss": 0.09437090158462524, + "loss_ib": 0.001456509460695088, + "step": 1641 + }, + { + "ce_ib": 6.079080104827881, + "ce_orig": 1.018676519393921, + "epoch": 0.47192465310230786, + "kl_loss": 0.0800662636756897, + "loss_ib": 0.0014085706789046526, + "step": 1641 + }, + { + "ce_ib": 5.389089107513428, + "ce_orig": 0.8359324932098389, + "epoch": 0.47192465310230786, + "kl_loss": 0.07742631435394287, + "loss_ib": 0.0013131719315424562, + "step": 1641 + }, + { + "ce_ib": 4.561464309692383, + "ce_orig": 1.0369048118591309, + "epoch": 0.4722122366812855, + "kl_loss": 0.07179403305053711, + "loss_ib": 0.0011740867048501968, + "step": 1642 + }, + { + "ce_ib": 4.821961402893066, + "ce_orig": 1.0121508836746216, + "epoch": 0.4722122366812855, + "kl_loss": 0.058352645486593246, + "loss_ib": 0.001065722550265491, + "step": 1642 + }, + { + "ce_ib": 6.181014537811279, + "ce_orig": 1.0154244899749756, + "epoch": 0.4722122366812855, + "kl_loss": 0.11048468947410583, + "loss_ib": 0.0017229481600224972, + "step": 1642 + }, + { + "ce_ib": 1.1775178909301758, + "ce_orig": 0.16251279413700104, + "epoch": 0.4722122366812855, + "kl_loss": 0.20156028866767883, + "loss_ib": 0.0021333545446395874, + "step": 1642 + }, + { + "ce_ib": 3.348700761795044, + "ce_orig": 0.34774312376976013, + "epoch": 0.47249982026026316, + "kl_loss": 0.07763587683439255, + "loss_ib": 0.0011112288339063525, + "step": 1643 + }, + { + "ce_ib": 6.343746662139893, + "ce_orig": 0.5521023869514465, + "epoch": 0.47249982026026316, + "kl_loss": 0.14622247219085693, + "loss_ib": 0.0020965992007404566, + "step": 1643 + }, + { + "ce_ib": 4.353401184082031, + "ce_orig": 0.712981641292572, + "epoch": 0.47249982026026316, + "kl_loss": 0.10432441532611847, + "loss_ib": 0.0014785842504352331, + "step": 1643 + }, + { + "ce_ib": 4.6726393699646, + "ce_orig": 0.9165730476379395, + "epoch": 0.47249982026026316, + "kl_loss": 0.10064257681369781, + "loss_ib": 0.0014736896846443415, + "step": 1643 + }, + { + "ce_ib": 5.039916038513184, + "ce_orig": 0.9508856534957886, + "epoch": 0.4727874038392408, + "kl_loss": 0.054173268377780914, + "loss_ib": 0.0010457242606207728, + "step": 1644 + }, + { + "ce_ib": 4.8029046058654785, + "ce_orig": 0.9531353116035461, + "epoch": 0.4727874038392408, + "kl_loss": 0.08584731072187424, + "loss_ib": 0.0013387635117396712, + "step": 1644 + }, + { + "ce_ib": 4.031595230102539, + "ce_orig": 0.9436784982681274, + "epoch": 0.4727874038392408, + "kl_loss": 0.10451234877109528, + "loss_ib": 0.0014482829719781876, + "step": 1644 + }, + { + "ce_ib": 7.845670223236084, + "ce_orig": 1.2929906845092773, + "epoch": 0.4727874038392408, + "kl_loss": 0.10712304711341858, + "loss_ib": 0.001855797483585775, + "step": 1644 + }, + { + "epoch": 0.4730749874182184, + "grad_norm": 0.09625625610351562, + "learning_rate": 4.791479526334427e-05, + "loss": 0.873, + "step": 1645 + }, + { + "ce_ib": 4.172504425048828, + "ce_orig": 0.8137608766555786, + "epoch": 0.4730749874182184, + "kl_loss": 0.062310680747032166, + "loss_ib": 0.0010403571650385857, + "step": 1645 + }, + { + "ce_ib": 5.008554458618164, + "ce_orig": 0.37394434213638306, + "epoch": 0.4730749874182184, + "kl_loss": 0.15815435349941254, + "loss_ib": 0.002082398859784007, + "step": 1645 + }, + { + "ce_ib": 4.493655681610107, + "ce_orig": 0.38105231523513794, + "epoch": 0.4730749874182184, + "kl_loss": 0.09856106340885162, + "loss_ib": 0.0014349761186167598, + "step": 1645 + }, + { + "ce_ib": 6.357497692108154, + "ce_orig": 0.5087502598762512, + "epoch": 0.4730749874182184, + "kl_loss": 0.12408942729234695, + "loss_ib": 0.0018766439752653241, + "step": 1645 + }, + { + "ce_ib": 7.306758880615234, + "ce_orig": 1.0328766107559204, + "epoch": 0.4733625709971961, + "kl_loss": 0.09654441475868225, + "loss_ib": 0.0016961200162768364, + "step": 1646 + }, + { + "ce_ib": 7.362045764923096, + "ce_orig": 1.3497101068496704, + "epoch": 0.4733625709971961, + "kl_loss": 0.12016648054122925, + "loss_ib": 0.0019378693541511893, + "step": 1646 + }, + { + "ce_ib": 8.575414657592773, + "ce_orig": 1.6906617879867554, + "epoch": 0.4733625709971961, + "kl_loss": 0.11888878792524338, + "loss_ib": 0.0020464290864765644, + "step": 1646 + }, + { + "ce_ib": 4.022438049316406, + "ce_orig": 0.4592558741569519, + "epoch": 0.4733625709971961, + "kl_loss": 0.12091752886772156, + "loss_ib": 0.0016114190220832825, + "step": 1646 + }, + { + "ce_ib": 3.1051573753356934, + "ce_orig": 0.5143097639083862, + "epoch": 0.4736501545761737, + "kl_loss": 0.07025837153196335, + "loss_ib": 0.0010130993323400617, + "step": 1647 + }, + { + "ce_ib": 6.405169486999512, + "ce_orig": 0.8632025718688965, + "epoch": 0.4736501545761737, + "kl_loss": 0.06118062138557434, + "loss_ib": 0.0012523230398073792, + "step": 1647 + }, + { + "ce_ib": 7.520890235900879, + "ce_orig": 1.2296860218048096, + "epoch": 0.4736501545761737, + "kl_loss": 0.10565954446792603, + "loss_ib": 0.001808684435673058, + "step": 1647 + }, + { + "ce_ib": 5.978287220001221, + "ce_orig": 1.0228112936019897, + "epoch": 0.4736501545761737, + "kl_loss": 0.19486770033836365, + "loss_ib": 0.0025465055368840694, + "step": 1647 + }, + { + "ce_ib": 6.4952712059021, + "ce_orig": 1.2738813161849976, + "epoch": 0.47393773815515133, + "kl_loss": 0.0883905291557312, + "loss_ib": 0.001533432281576097, + "step": 1648 + }, + { + "ce_ib": 4.549715995788574, + "ce_orig": 0.6895288228988647, + "epoch": 0.47393773815515133, + "kl_loss": 0.07594676315784454, + "loss_ib": 0.00121443928219378, + "step": 1648 + }, + { + "ce_ib": 4.325887680053711, + "ce_orig": 0.7023555636405945, + "epoch": 0.47393773815515133, + "kl_loss": 0.09429537504911423, + "loss_ib": 0.0013755426043644547, + "step": 1648 + }, + { + "ce_ib": 7.116502285003662, + "ce_orig": 0.8668026328086853, + "epoch": 0.47393773815515133, + "kl_loss": 0.1483638435602188, + "loss_ib": 0.0021952884271740913, + "step": 1648 + }, + { + "ce_ib": 4.741905689239502, + "ce_orig": 0.7644832134246826, + "epoch": 0.47422532173412896, + "kl_loss": 0.09129126369953156, + "loss_ib": 0.0013871031114831567, + "step": 1649 + }, + { + "ce_ib": 4.93929386138916, + "ce_orig": 0.6977857351303101, + "epoch": 0.47422532173412896, + "kl_loss": 0.11320854723453522, + "loss_ib": 0.001626014825887978, + "step": 1649 + }, + { + "ce_ib": 7.178192138671875, + "ce_orig": 1.1130963563919067, + "epoch": 0.47422532173412896, + "kl_loss": 0.12693922221660614, + "loss_ib": 0.0019872114062309265, + "step": 1649 + }, + { + "ce_ib": 6.942747116088867, + "ce_orig": 1.2567002773284912, + "epoch": 0.47422532173412896, + "kl_loss": 0.12031295895576477, + "loss_ib": 0.0018974042031913996, + "step": 1649 + }, + { + "epoch": 0.47451290531310664, + "grad_norm": 0.095345638692379, + "learning_rate": 4.789925278199178e-05, + "loss": 0.8476, + "step": 1650 + }, + { + "ce_ib": 4.848964214324951, + "ce_orig": 1.168644905090332, + "epoch": 0.47451290531310664, + "kl_loss": 0.06233830749988556, + "loss_ib": 0.0011082794517278671, + "step": 1650 + }, + { + "ce_ib": 3.9706501960754395, + "ce_orig": 0.6107508540153503, + "epoch": 0.47451290531310664, + "kl_loss": 0.08528577536344528, + "loss_ib": 0.0012499227887019515, + "step": 1650 + }, + { + "ce_ib": 3.900743007659912, + "ce_orig": 0.6841636896133423, + "epoch": 0.47451290531310664, + "kl_loss": 0.10971971601247787, + "loss_ib": 0.0014872715109959245, + "step": 1650 + }, + { + "ce_ib": 5.913122653961182, + "ce_orig": 0.9182349443435669, + "epoch": 0.47451290531310664, + "kl_loss": 0.11489193141460419, + "loss_ib": 0.0017402315279468894, + "step": 1650 + }, + { + "ce_ib": 4.609219551086426, + "ce_orig": 0.7213387489318848, + "epoch": 0.47480048889208426, + "kl_loss": 0.10644984245300293, + "loss_ib": 0.0015254203462973237, + "step": 1651 + }, + { + "ce_ib": 4.235653877258301, + "ce_orig": 0.5725576281547546, + "epoch": 0.47480048889208426, + "kl_loss": 0.1106574535369873, + "loss_ib": 0.0015301398234441876, + "step": 1651 + }, + { + "ce_ib": 7.059186935424805, + "ce_orig": 1.2867361307144165, + "epoch": 0.47480048889208426, + "kl_loss": 0.09026020765304565, + "loss_ib": 0.001608520746231079, + "step": 1651 + }, + { + "ce_ib": 6.8145551681518555, + "ce_orig": 1.1922791004180908, + "epoch": 0.47480048889208426, + "kl_loss": 0.09488432109355927, + "loss_ib": 0.0016302987933158875, + "step": 1651 + }, + { + "ce_ib": 5.427689552307129, + "ce_orig": 0.7169917821884155, + "epoch": 0.4750880724710619, + "kl_loss": 0.12568458914756775, + "loss_ib": 0.001799614867195487, + "step": 1652 + }, + { + "ce_ib": 5.780550956726074, + "ce_orig": 0.873707115650177, + "epoch": 0.4750880724710619, + "kl_loss": 0.07378093898296356, + "loss_ib": 0.0013158645015209913, + "step": 1652 + }, + { + "ce_ib": 2.7009575366973877, + "ce_orig": 0.412929892539978, + "epoch": 0.4750880724710619, + "kl_loss": 0.0539809949696064, + "loss_ib": 0.0008099056431092322, + "step": 1652 + }, + { + "ce_ib": 4.361164569854736, + "ce_orig": 0.7267390489578247, + "epoch": 0.4750880724710619, + "kl_loss": 0.10404205322265625, + "loss_ib": 0.0014765369705855846, + "step": 1652 + }, + { + "ce_ib": 5.099285125732422, + "ce_orig": 1.0923534631729126, + "epoch": 0.47537565605003956, + "kl_loss": 0.08385036885738373, + "loss_ib": 0.0013484321534633636, + "step": 1653 + }, + { + "ce_ib": 3.958472967147827, + "ce_orig": 0.7816349864006042, + "epoch": 0.47537565605003956, + "kl_loss": 0.07238197326660156, + "loss_ib": 0.0011196669656783342, + "step": 1653 + }, + { + "ce_ib": 4.758826732635498, + "ce_orig": 0.7682834267616272, + "epoch": 0.47537565605003956, + "kl_loss": 0.19518451392650604, + "loss_ib": 0.0024277279153466225, + "step": 1653 + }, + { + "ce_ib": 5.606645584106445, + "ce_orig": 0.8202415108680725, + "epoch": 0.47537565605003956, + "kl_loss": 0.07514069974422455, + "loss_ib": 0.0013120714575052261, + "step": 1653 + }, + { + "ce_ib": 4.625110626220703, + "ce_orig": 0.829686164855957, + "epoch": 0.4756632396290172, + "kl_loss": 0.10468250513076782, + "loss_ib": 0.001509336056187749, + "step": 1654 + }, + { + "ce_ib": 4.022656440734863, + "ce_orig": 0.7199187278747559, + "epoch": 0.4756632396290172, + "kl_loss": 0.11242827028036118, + "loss_ib": 0.001526548177935183, + "step": 1654 + }, + { + "ce_ib": 7.125650405883789, + "ce_orig": 1.245455265045166, + "epoch": 0.4756632396290172, + "kl_loss": 0.12931214272975922, + "loss_ib": 0.002005686517804861, + "step": 1654 + }, + { + "ce_ib": 5.290366172790527, + "ce_orig": 0.6249306797981262, + "epoch": 0.4756632396290172, + "kl_loss": 0.08849038183689117, + "loss_ib": 0.0014139404520392418, + "step": 1654 + }, + { + "epoch": 0.4759508232079948, + "grad_norm": 0.09167289733886719, + "learning_rate": 4.788365513102431e-05, + "loss": 0.8607, + "step": 1655 + }, + { + "ce_ib": 5.522833824157715, + "ce_orig": 1.1913273334503174, + "epoch": 0.4759508232079948, + "kl_loss": 0.11503159999847412, + "loss_ib": 0.001702599343843758, + "step": 1655 + }, + { + "ce_ib": 6.553410530090332, + "ce_orig": 1.129356026649475, + "epoch": 0.4759508232079948, + "kl_loss": 0.09408903121948242, + "loss_ib": 0.0015962314791977406, + "step": 1655 + }, + { + "ce_ib": 5.140038013458252, + "ce_orig": 0.7515914440155029, + "epoch": 0.4759508232079948, + "kl_loss": 0.11353213340044022, + "loss_ib": 0.0016493251314386725, + "step": 1655 + }, + { + "ce_ib": 4.804584980010986, + "ce_orig": 0.6128144860267639, + "epoch": 0.4759508232079948, + "kl_loss": 0.09433731436729431, + "loss_ib": 0.0014238315634429455, + "step": 1655 + }, + { + "ce_ib": 3.4557955265045166, + "ce_orig": 0.3736017346382141, + "epoch": 0.4762384067869725, + "kl_loss": 0.1295512318611145, + "loss_ib": 0.0016410917742177844, + "step": 1656 + }, + { + "ce_ib": 6.099903106689453, + "ce_orig": 0.8652370572090149, + "epoch": 0.4762384067869725, + "kl_loss": 0.10870938748121262, + "loss_ib": 0.001697084167972207, + "step": 1656 + }, + { + "ce_ib": 5.205838680267334, + "ce_orig": 0.8259576559066772, + "epoch": 0.4762384067869725, + "kl_loss": 0.09861639887094498, + "loss_ib": 0.0015067479107528925, + "step": 1656 + }, + { + "ce_ib": 7.066460132598877, + "ce_orig": 1.2005068063735962, + "epoch": 0.4762384067869725, + "kl_loss": 0.12398439645767212, + "loss_ib": 0.0019464899087324739, + "step": 1656 + }, + { + "ce_ib": 3.521681785583496, + "ce_orig": 0.6802165508270264, + "epoch": 0.4765259903659501, + "kl_loss": 0.066031314432621, + "loss_ib": 0.0010124812833964825, + "step": 1657 + }, + { + "ce_ib": 5.094246864318848, + "ce_orig": 0.7290297150611877, + "epoch": 0.4765259903659501, + "kl_loss": 0.05646766349673271, + "loss_ib": 0.001074101310223341, + "step": 1657 + }, + { + "ce_ib": 4.200117588043213, + "ce_orig": 0.7326703071594238, + "epoch": 0.4765259903659501, + "kl_loss": 0.11678765714168549, + "loss_ib": 0.0015878882259130478, + "step": 1657 + }, + { + "ce_ib": 4.12151575088501, + "ce_orig": 0.3739613890647888, + "epoch": 0.4765259903659501, + "kl_loss": 0.10518161207437515, + "loss_ib": 0.0014639677247032523, + "step": 1657 + }, + { + "ce_ib": 3.98354434967041, + "ce_orig": 0.7937265634536743, + "epoch": 0.47681357394492774, + "kl_loss": 0.13152624666690826, + "loss_ib": 0.0017136167734861374, + "step": 1658 + }, + { + "ce_ib": 3.7774970531463623, + "ce_orig": 0.6829609870910645, + "epoch": 0.47681357394492774, + "kl_loss": 0.05601814389228821, + "loss_ib": 0.0009379310649819672, + "step": 1658 + }, + { + "ce_ib": 3.846912384033203, + "ce_orig": 0.614590585231781, + "epoch": 0.47681357394492774, + "kl_loss": 0.07806535810232162, + "loss_ib": 0.0011653448455035686, + "step": 1658 + }, + { + "ce_ib": 5.443789482116699, + "ce_orig": 0.9188669919967651, + "epoch": 0.47681357394492774, + "kl_loss": 0.07750996947288513, + "loss_ib": 0.0013194786151871085, + "step": 1658 + }, + { + "ce_ib": 3.84982967376709, + "ce_orig": 0.30002501606941223, + "epoch": 0.47710115752390536, + "kl_loss": 0.11256584525108337, + "loss_ib": 0.0015106414211913943, + "step": 1659 + }, + { + "ce_ib": 6.077197074890137, + "ce_orig": 0.9570844173431396, + "epoch": 0.47710115752390536, + "kl_loss": 0.08428899943828583, + "loss_ib": 0.0014506096486002207, + "step": 1659 + }, + { + "ce_ib": 5.84196138381958, + "ce_orig": 0.8461624979972839, + "epoch": 0.47710115752390536, + "kl_loss": 0.11088182032108307, + "loss_ib": 0.0016930142883211374, + "step": 1659 + }, + { + "ce_ib": 5.181061267852783, + "ce_orig": 0.8407763242721558, + "epoch": 0.47710115752390536, + "kl_loss": 0.142070934176445, + "loss_ib": 0.001938815345056355, + "step": 1659 + }, + { + "epoch": 0.47738874110288304, + "grad_norm": 0.1021294891834259, + "learning_rate": 4.786800234802022e-05, + "loss": 0.8165, + "step": 1660 + }, + { + "ce_ib": 5.705546855926514, + "ce_orig": 0.9660429954528809, + "epoch": 0.47738874110288304, + "kl_loss": 0.2579490542411804, + "loss_ib": 0.003150045173242688, + "step": 1660 + }, + { + "ce_ib": 7.32716703414917, + "ce_orig": 1.3031272888183594, + "epoch": 0.47738874110288304, + "kl_loss": 0.08098369836807251, + "loss_ib": 0.0015425535384565592, + "step": 1660 + }, + { + "ce_ib": 5.782827854156494, + "ce_orig": 0.688116192817688, + "epoch": 0.47738874110288304, + "kl_loss": 0.09562751650810242, + "loss_ib": 0.0015345579013228416, + "step": 1660 + }, + { + "ce_ib": 5.189853668212891, + "ce_orig": 0.6560202240943909, + "epoch": 0.47738874110288304, + "kl_loss": 0.07133083045482635, + "loss_ib": 0.0012322936672717333, + "step": 1660 + }, + { + "ce_ib": 4.019514560699463, + "ce_orig": 0.6960217952728271, + "epoch": 0.47767632468186066, + "kl_loss": 0.06075235456228256, + "loss_ib": 0.0010094749741256237, + "step": 1661 + }, + { + "ce_ib": 3.7688229084014893, + "ce_orig": 0.7833560109138489, + "epoch": 0.47767632468186066, + "kl_loss": 0.06938940286636353, + "loss_ib": 0.00107077625580132, + "step": 1661 + }, + { + "ce_ib": 5.573322772979736, + "ce_orig": 0.861670970916748, + "epoch": 0.47767632468186066, + "kl_loss": 0.18410144746303558, + "loss_ib": 0.0023983465507626534, + "step": 1661 + }, + { + "ce_ib": 3.3330554962158203, + "ce_orig": 0.7142177224159241, + "epoch": 0.47767632468186066, + "kl_loss": 0.08667472004890442, + "loss_ib": 0.0012000527931377292, + "step": 1661 + }, + { + "ce_ib": 5.8003621101379395, + "ce_orig": 0.4947451055049896, + "epoch": 0.4779639082608383, + "kl_loss": 0.15392550826072693, + "loss_ib": 0.00211929134093225, + "step": 1662 + }, + { + "ce_ib": 6.053724765777588, + "ce_orig": 1.1161195039749146, + "epoch": 0.4779639082608383, + "kl_loss": 0.10841212421655655, + "loss_ib": 0.0016894936561584473, + "step": 1662 + }, + { + "ce_ib": 3.0097856521606445, + "ce_orig": 0.5619295239448547, + "epoch": 0.4779639082608383, + "kl_loss": 0.09309350699186325, + "loss_ib": 0.0012319135712459683, + "step": 1662 + }, + { + "ce_ib": 5.506343364715576, + "ce_orig": 0.8677623271942139, + "epoch": 0.4779639082608383, + "kl_loss": 0.12412496656179428, + "loss_ib": 0.001791883958503604, + "step": 1662 + }, + { + "ce_ib": 3.2884507179260254, + "ce_orig": 0.745903730392456, + "epoch": 0.47825149183981597, + "kl_loss": 0.055387601256370544, + "loss_ib": 0.0008827210986055434, + "step": 1663 + }, + { + "ce_ib": 4.071484565734863, + "ce_orig": 0.7880527377128601, + "epoch": 0.47825149183981597, + "kl_loss": 0.06962347775697708, + "loss_ib": 0.00110338325612247, + "step": 1663 + }, + { + "ce_ib": 2.957454204559326, + "ce_orig": 0.6899718642234802, + "epoch": 0.47825149183981597, + "kl_loss": 0.06052111089229584, + "loss_ib": 0.0009009565110318363, + "step": 1663 + }, + { + "ce_ib": 5.684534549713135, + "ce_orig": 1.3355650901794434, + "epoch": 0.47825149183981597, + "kl_loss": 0.07650530338287354, + "loss_ib": 0.0013335064286366105, + "step": 1663 + }, + { + "ce_ib": 5.858797073364258, + "ce_orig": 1.1899547576904297, + "epoch": 0.4785390754187936, + "kl_loss": 0.0902283638715744, + "loss_ib": 0.0014881632523611188, + "step": 1664 + }, + { + "ce_ib": 5.383486747741699, + "ce_orig": 0.7965614795684814, + "epoch": 0.4785390754187936, + "kl_loss": 0.0925852507352829, + "loss_ib": 0.0014642011374235153, + "step": 1664 + }, + { + "ce_ib": 5.01948881149292, + "ce_orig": 0.6902015209197998, + "epoch": 0.4785390754187936, + "kl_loss": 0.11299136281013489, + "loss_ib": 0.0016318624839186668, + "step": 1664 + }, + { + "ce_ib": 2.9922852516174316, + "ce_orig": 0.6283895373344421, + "epoch": 0.4785390754187936, + "kl_loss": 0.06566999852657318, + "loss_ib": 0.0009559284662827849, + "step": 1664 + }, + { + "epoch": 0.4788266589977712, + "grad_norm": 0.10538511723279953, + "learning_rate": 4.785229447069069e-05, + "loss": 0.8265, + "step": 1665 + }, + { + "ce_ib": 4.233684062957764, + "ce_orig": 0.551160991191864, + "epoch": 0.4788266589977712, + "kl_loss": 0.10398277640342712, + "loss_ib": 0.0014631961239501834, + "step": 1665 + }, + { + "ce_ib": 4.18122673034668, + "ce_orig": 0.725835919380188, + "epoch": 0.4788266589977712, + "kl_loss": 0.06634792685508728, + "loss_ib": 0.0010816019494086504, + "step": 1665 + }, + { + "ce_ib": 7.011388778686523, + "ce_orig": 1.0992710590362549, + "epoch": 0.4788266589977712, + "kl_loss": 0.06781954318284988, + "loss_ib": 0.001379334251396358, + "step": 1665 + }, + { + "ce_ib": 4.543511867523193, + "ce_orig": 0.9829037189483643, + "epoch": 0.4788266589977712, + "kl_loss": 0.08306319266557693, + "loss_ib": 0.0012849831255152822, + "step": 1665 + }, + { + "ce_ib": 2.974393367767334, + "ce_orig": 0.5884367227554321, + "epoch": 0.4791142425767489, + "kl_loss": 0.06884880363941193, + "loss_ib": 0.0009859272977337241, + "step": 1666 + }, + { + "ce_ib": 6.110385894775391, + "ce_orig": 0.7310081720352173, + "epoch": 0.4791142425767489, + "kl_loss": 0.15741804242134094, + "loss_ib": 0.002185218967497349, + "step": 1666 + }, + { + "ce_ib": 4.982475280761719, + "ce_orig": 0.6903678178787231, + "epoch": 0.4791142425767489, + "kl_loss": 0.11614435911178589, + "loss_ib": 0.0016596909845247865, + "step": 1666 + }, + { + "ce_ib": 3.332186698913574, + "ce_orig": 0.6124793887138367, + "epoch": 0.4791142425767489, + "kl_loss": 0.08775538206100464, + "loss_ib": 0.0012107724323868752, + "step": 1666 + }, + { + "ce_ib": 5.119314670562744, + "ce_orig": 0.957892894744873, + "epoch": 0.4794018261557265, + "kl_loss": 0.08951407670974731, + "loss_ib": 0.0014070721808820963, + "step": 1667 + }, + { + "ce_ib": 7.730226516723633, + "ce_orig": 1.1394848823547363, + "epoch": 0.4794018261557265, + "kl_loss": 0.0865597277879715, + "loss_ib": 0.0016386198112741113, + "step": 1667 + }, + { + "ce_ib": 3.7020583152770996, + "ce_orig": 0.6443443298339844, + "epoch": 0.4794018261557265, + "kl_loss": 0.10694558173418045, + "loss_ib": 0.0014396616024896502, + "step": 1667 + }, + { + "ce_ib": 3.728663206100464, + "ce_orig": 0.667158305644989, + "epoch": 0.4794018261557265, + "kl_loss": 0.09213714301586151, + "loss_ib": 0.0012942376779392362, + "step": 1667 + }, + { + "ce_ib": 7.817596912384033, + "ce_orig": 1.670931100845337, + "epoch": 0.47968940973470414, + "kl_loss": 0.09541083127260208, + "loss_ib": 0.0017358679324388504, + "step": 1668 + }, + { + "ce_ib": 3.365189552307129, + "ce_orig": 0.444670170545578, + "epoch": 0.47968940973470414, + "kl_loss": 0.09522607922554016, + "loss_ib": 0.0012887796619907022, + "step": 1668 + }, + { + "ce_ib": 4.968541622161865, + "ce_orig": 0.8817481398582458, + "epoch": 0.47968940973470414, + "kl_loss": 0.08904688060283661, + "loss_ib": 0.001387322903610766, + "step": 1668 + }, + { + "ce_ib": 2.270416736602783, + "ce_orig": 0.26581287384033203, + "epoch": 0.47968940973470414, + "kl_loss": 0.18085765838623047, + "loss_ib": 0.002035618294030428, + "step": 1668 + }, + { + "ce_ib": 9.990456581115723, + "ce_orig": 0.47471708059310913, + "epoch": 0.47997699331368177, + "kl_loss": 0.0820496454834938, + "loss_ib": 0.001819542027078569, + "step": 1669 + }, + { + "ce_ib": 8.058823585510254, + "ce_orig": 1.1565622091293335, + "epoch": 0.47997699331368177, + "kl_loss": 0.12369002401828766, + "loss_ib": 0.0020427824929356575, + "step": 1669 + }, + { + "ce_ib": 4.762992858886719, + "ce_orig": 0.9343037009239197, + "epoch": 0.47997699331368177, + "kl_loss": 0.13188621401786804, + "loss_ib": 0.0017951612826436758, + "step": 1669 + }, + { + "ce_ib": 4.859309673309326, + "ce_orig": 0.7769688367843628, + "epoch": 0.47997699331368177, + "kl_loss": 0.07158667594194412, + "loss_ib": 0.001201797742396593, + "step": 1669 + }, + { + "epoch": 0.48026457689265944, + "grad_norm": 0.09808161854743958, + "learning_rate": 4.7836531536879663e-05, + "loss": 0.8038, + "step": 1670 + }, + { + "ce_ib": 6.418116569519043, + "ce_orig": 1.221234679222107, + "epoch": 0.48026457689265944, + "kl_loss": 0.10923849046230316, + "loss_ib": 0.001734196557663381, + "step": 1670 + }, + { + "ce_ib": 5.431075572967529, + "ce_orig": 0.8310892581939697, + "epoch": 0.48026457689265944, + "kl_loss": 0.36797434091567993, + "loss_ib": 0.004222850780934095, + "step": 1670 + }, + { + "ce_ib": 5.254874229431152, + "ce_orig": 0.8385922908782959, + "epoch": 0.48026457689265944, + "kl_loss": 0.10007026791572571, + "loss_ib": 0.0015261899679899216, + "step": 1670 + }, + { + "ce_ib": 8.395333290100098, + "ce_orig": 1.335869550704956, + "epoch": 0.48026457689265944, + "kl_loss": 0.11917714774608612, + "loss_ib": 0.0020313048735260963, + "step": 1670 + }, + { + "ce_ib": 3.179215669631958, + "ce_orig": 0.46057796478271484, + "epoch": 0.48055216047163707, + "kl_loss": 0.12228046357631683, + "loss_ib": 0.00154072605073452, + "step": 1671 + }, + { + "ce_ib": 3.9307398796081543, + "ce_orig": 0.836270272731781, + "epoch": 0.48055216047163707, + "kl_loss": 0.08138067275285721, + "loss_ib": 0.0012068806681782007, + "step": 1671 + }, + { + "ce_ib": 3.9418482780456543, + "ce_orig": 0.8026268482208252, + "epoch": 0.48055216047163707, + "kl_loss": 0.06703633815050125, + "loss_ib": 0.0010645481524989009, + "step": 1671 + }, + { + "ce_ib": 4.330459117889404, + "ce_orig": 0.701189398765564, + "epoch": 0.48055216047163707, + "kl_loss": 0.08540654182434082, + "loss_ib": 0.0012871113140136003, + "step": 1671 + }, + { + "ce_ib": 4.894429683685303, + "ce_orig": 0.9220350384712219, + "epoch": 0.4808397440506147, + "kl_loss": 0.055884700268507004, + "loss_ib": 0.0010482899378985167, + "step": 1672 + }, + { + "ce_ib": 4.995659351348877, + "ce_orig": 0.8572059273719788, + "epoch": 0.4808397440506147, + "kl_loss": 0.1277742087841034, + "loss_ib": 0.001777307945303619, + "step": 1672 + }, + { + "ce_ib": 6.21935510635376, + "ce_orig": 1.418296217918396, + "epoch": 0.4808397440506147, + "kl_loss": 0.07440599799156189, + "loss_ib": 0.0013659953838214278, + "step": 1672 + }, + { + "ce_ib": 3.7277157306671143, + "ce_orig": 0.8182400465011597, + "epoch": 0.4808397440506147, + "kl_loss": 0.08625528216362, + "loss_ib": 0.0012353243073448539, + "step": 1672 + }, + { + "ce_ib": 3.5139079093933105, + "ce_orig": 0.3413704037666321, + "epoch": 0.48112732762959237, + "kl_loss": 0.14054255187511444, + "loss_ib": 0.0017568162875249982, + "step": 1673 + }, + { + "ce_ib": 4.961234092712402, + "ce_orig": 0.7637052536010742, + "epoch": 0.48112732762959237, + "kl_loss": 0.11048056930303574, + "loss_ib": 0.001600929070264101, + "step": 1673 + }, + { + "ce_ib": 4.504831314086914, + "ce_orig": 0.9245123267173767, + "epoch": 0.48112732762959237, + "kl_loss": 0.06040796637535095, + "loss_ib": 0.0010545627446845174, + "step": 1673 + }, + { + "ce_ib": 7.99785852432251, + "ce_orig": 1.2470022439956665, + "epoch": 0.48112732762959237, + "kl_loss": 0.13159608840942383, + "loss_ib": 0.0021157467272132635, + "step": 1673 + }, + { + "ce_ib": 5.762369155883789, + "ce_orig": 0.6469131112098694, + "epoch": 0.48141491120857, + "kl_loss": 0.04331384599208832, + "loss_ib": 0.0010093753226101398, + "step": 1674 + }, + { + "ce_ib": 5.164357662200928, + "ce_orig": 0.7227626442909241, + "epoch": 0.48141491120857, + "kl_loss": 0.1831488311290741, + "loss_ib": 0.0023479240480810404, + "step": 1674 + }, + { + "ce_ib": 4.61436653137207, + "ce_orig": 0.7179257273674011, + "epoch": 0.48141491120857, + "kl_loss": 0.12068000435829163, + "loss_ib": 0.0016682366840541363, + "step": 1674 + }, + { + "ce_ib": 3.61407732963562, + "ce_orig": 0.6223304867744446, + "epoch": 0.48141491120857, + "kl_loss": 0.07009229809045792, + "loss_ib": 0.0010623306734487414, + "step": 1674 + }, + { + "epoch": 0.4817024947875476, + "grad_norm": 0.10047975182533264, + "learning_rate": 4.7820713584563685e-05, + "loss": 0.8566, + "step": 1675 + }, + { + "ce_ib": 5.23590612411499, + "ce_orig": 0.6879154443740845, + "epoch": 0.4817024947875476, + "kl_loss": 0.11426572501659393, + "loss_ib": 0.001666247844696045, + "step": 1675 + }, + { + "ce_ib": 7.617326259613037, + "ce_orig": 1.0653176307678223, + "epoch": 0.4817024947875476, + "kl_loss": 0.11999130249023438, + "loss_ib": 0.001961645670235157, + "step": 1675 + }, + { + "ce_ib": 3.478483200073242, + "ce_orig": 0.6705785393714905, + "epoch": 0.4817024947875476, + "kl_loss": 0.08594256639480591, + "loss_ib": 0.001207273919135332, + "step": 1675 + }, + { + "ce_ib": 4.602646350860596, + "ce_orig": 0.8192442655563354, + "epoch": 0.4817024947875476, + "kl_loss": 0.06830959022045135, + "loss_ib": 0.001143360510468483, + "step": 1675 + }, + { + "ce_ib": 5.377559185028076, + "ce_orig": 0.7111417055130005, + "epoch": 0.4819900783665253, + "kl_loss": 0.09001705050468445, + "loss_ib": 0.0014379264321178198, + "step": 1676 + }, + { + "ce_ib": 6.667327404022217, + "ce_orig": 1.3010003566741943, + "epoch": 0.4819900783665253, + "kl_loss": 0.10800987482070923, + "loss_ib": 0.0017468314617872238, + "step": 1676 + }, + { + "ce_ib": 7.093115329742432, + "ce_orig": 1.463265299797058, + "epoch": 0.4819900783665253, + "kl_loss": 0.12157618254423141, + "loss_ib": 0.0019250732148066163, + "step": 1676 + }, + { + "ce_ib": 3.082455635070801, + "ce_orig": 0.7444308996200562, + "epoch": 0.4819900783665253, + "kl_loss": 0.04221316799521446, + "loss_ib": 0.0007303772144950926, + "step": 1676 + }, + { + "ce_ib": 4.68575382232666, + "ce_orig": 0.8952086567878723, + "epoch": 0.4822776619455029, + "kl_loss": 0.10230021178722382, + "loss_ib": 0.0014915774809196591, + "step": 1677 + }, + { + "ce_ib": 4.940974712371826, + "ce_orig": 0.6075985431671143, + "epoch": 0.4822776619455029, + "kl_loss": 0.061147548258304596, + "loss_ib": 0.0011055729119107127, + "step": 1677 + }, + { + "ce_ib": 6.66244649887085, + "ce_orig": 1.0973036289215088, + "epoch": 0.4822776619455029, + "kl_loss": 0.13392263650894165, + "loss_ib": 0.0020054709166288376, + "step": 1677 + }, + { + "ce_ib": 6.53109884262085, + "ce_orig": 1.0314081907272339, + "epoch": 0.4822776619455029, + "kl_loss": 0.16137847304344177, + "loss_ib": 0.0022668945603072643, + "step": 1677 + }, + { + "ce_ib": 5.713950157165527, + "ce_orig": 0.7363423109054565, + "epoch": 0.48256524552448055, + "kl_loss": 0.13193345069885254, + "loss_ib": 0.0018907295307144523, + "step": 1678 + }, + { + "ce_ib": 3.2133963108062744, + "ce_orig": 0.42694565653800964, + "epoch": 0.48256524552448055, + "kl_loss": 0.07996172457933426, + "loss_ib": 0.0011209568474441767, + "step": 1678 + }, + { + "ce_ib": 7.477939605712891, + "ce_orig": 1.4408272504806519, + "epoch": 0.48256524552448055, + "kl_loss": 0.13289615511894226, + "loss_ib": 0.0020767555106431246, + "step": 1678 + }, + { + "ce_ib": 2.4067041873931885, + "ce_orig": 0.4999692142009735, + "epoch": 0.48256524552448055, + "kl_loss": 0.08751370012760162, + "loss_ib": 0.0011158074485138059, + "step": 1678 + }, + { + "ce_ib": 3.7097055912017822, + "ce_orig": 0.8902637362480164, + "epoch": 0.48285282910345817, + "kl_loss": 0.16290977597236633, + "loss_ib": 0.002000068314373493, + "step": 1679 + }, + { + "ce_ib": 3.931806802749634, + "ce_orig": 0.7523521780967712, + "epoch": 0.48285282910345817, + "kl_loss": 0.07110314816236496, + "loss_ib": 0.0011042121332138777, + "step": 1679 + }, + { + "ce_ib": 5.94912052154541, + "ce_orig": 1.1134450435638428, + "epoch": 0.48285282910345817, + "kl_loss": 0.09809355437755585, + "loss_ib": 0.0015758475055918097, + "step": 1679 + }, + { + "ce_ib": 7.01630163192749, + "ce_orig": 1.218076229095459, + "epoch": 0.48285282910345817, + "kl_loss": 0.10411044210195541, + "loss_ib": 0.0017427345737814903, + "step": 1679 + }, + { + "epoch": 0.48314041268243585, + "grad_norm": 0.08809898793697357, + "learning_rate": 4.780484065185188e-05, + "loss": 0.8953, + "step": 1680 + }, + { + "ce_ib": 6.792523384094238, + "ce_orig": 1.1839405298233032, + "epoch": 0.48314041268243585, + "kl_loss": 0.08424146473407745, + "loss_ib": 0.0015216668834909797, + "step": 1680 + }, + { + "ce_ib": 4.632278919219971, + "ce_orig": 0.9894521832466125, + "epoch": 0.48314041268243585, + "kl_loss": 0.08195150643587112, + "loss_ib": 0.0012827429454773664, + "step": 1680 + }, + { + "ce_ib": 8.20803165435791, + "ce_orig": 1.6578588485717773, + "epoch": 0.48314041268243585, + "kl_loss": 0.13085366785526276, + "loss_ib": 0.0021293398458510637, + "step": 1680 + }, + { + "ce_ib": 4.366178035736084, + "ce_orig": 0.6481921672821045, + "epoch": 0.48314041268243585, + "kl_loss": 0.11050732433795929, + "loss_ib": 0.0015416911337524652, + "step": 1680 + }, + { + "ce_ib": 9.377159118652344, + "ce_orig": 1.5477819442749023, + "epoch": 0.4834279962614135, + "kl_loss": 0.08732521533966064, + "loss_ib": 0.0018109680386260152, + "step": 1681 + }, + { + "ce_ib": 3.157066822052002, + "ce_orig": 0.32863619923591614, + "epoch": 0.4834279962614135, + "kl_loss": 0.08969905972480774, + "loss_ib": 0.0012126972433179617, + "step": 1681 + }, + { + "ce_ib": 6.606411933898926, + "ce_orig": 1.2873634099960327, + "epoch": 0.4834279962614135, + "kl_loss": 0.10148615390062332, + "loss_ib": 0.0016755026299506426, + "step": 1681 + }, + { + "ce_ib": 3.963296890258789, + "ce_orig": 0.4745732843875885, + "epoch": 0.4834279962614135, + "kl_loss": 0.15838183462619781, + "loss_ib": 0.001980148022994399, + "step": 1681 + }, + { + "ce_ib": 7.234645366668701, + "ce_orig": 1.3865464925765991, + "epoch": 0.4837155798403911, + "kl_loss": 0.08130612224340439, + "loss_ib": 0.0015365257859230042, + "step": 1682 + }, + { + "ce_ib": 3.5873725414276123, + "ce_orig": 0.3775164783000946, + "epoch": 0.4837155798403911, + "kl_loss": 0.21098384261131287, + "loss_ib": 0.0024685757234692574, + "step": 1682 + }, + { + "ce_ib": 4.682332515716553, + "ce_orig": 0.814209520816803, + "epoch": 0.4837155798403911, + "kl_loss": 0.09033824503421783, + "loss_ib": 0.0013716156827285886, + "step": 1682 + }, + { + "ce_ib": 5.839213848114014, + "ce_orig": 1.0977301597595215, + "epoch": 0.4837155798403911, + "kl_loss": 0.06113816797733307, + "loss_ib": 0.0011953030480071902, + "step": 1682 + }, + { + "ce_ib": 3.273890256881714, + "ce_orig": 0.547998309135437, + "epoch": 0.4840031634193688, + "kl_loss": 0.1411946415901184, + "loss_ib": 0.0017393353627994657, + "step": 1683 + }, + { + "ce_ib": 6.92445707321167, + "ce_orig": 1.0394126176834106, + "epoch": 0.4840031634193688, + "kl_loss": 0.10047905147075653, + "loss_ib": 0.001697236206382513, + "step": 1683 + }, + { + "ce_ib": 6.637228965759277, + "ce_orig": 1.302418828010559, + "epoch": 0.4840031634193688, + "kl_loss": 0.10971780121326447, + "loss_ib": 0.001760900835506618, + "step": 1683 + }, + { + "ce_ib": 5.007181644439697, + "ce_orig": 0.454929381608963, + "epoch": 0.4840031634193688, + "kl_loss": 0.17686998844146729, + "loss_ib": 0.002269417978823185, + "step": 1683 + }, + { + "ce_ib": 3.6803224086761475, + "ce_orig": 0.616387128829956, + "epoch": 0.4842907469983464, + "kl_loss": 0.08731701225042343, + "loss_ib": 0.0012412023497745395, + "step": 1684 + }, + { + "ce_ib": 3.859945058822632, + "ce_orig": 0.5916260480880737, + "epoch": 0.4842907469983464, + "kl_loss": 0.12433341145515442, + "loss_ib": 0.0016293285880237818, + "step": 1684 + }, + { + "ce_ib": 4.35489559173584, + "ce_orig": 1.012393593788147, + "epoch": 0.4842907469983464, + "kl_loss": 0.06735357642173767, + "loss_ib": 0.0011090253246948123, + "step": 1684 + }, + { + "ce_ib": 4.276301860809326, + "ce_orig": 0.7142661213874817, + "epoch": 0.4842907469983464, + "kl_loss": 0.07077422738075256, + "loss_ib": 0.0011353724403306842, + "step": 1684 + }, + { + "epoch": 0.484578330577324, + "grad_norm": 0.0964302197098732, + "learning_rate": 4.778891277698583e-05, + "loss": 0.8664, + "step": 1685 + }, + { + "ce_ib": 5.886646270751953, + "ce_orig": 1.1425445079803467, + "epoch": 0.484578330577324, + "kl_loss": 0.09462843090295792, + "loss_ib": 0.0015349489403888583, + "step": 1685 + }, + { + "ce_ib": 7.245325565338135, + "ce_orig": 1.016262412071228, + "epoch": 0.484578330577324, + "kl_loss": 0.058838725090026855, + "loss_ib": 0.001312919775955379, + "step": 1685 + }, + { + "ce_ib": 5.395687103271484, + "ce_orig": 0.910489022731781, + "epoch": 0.484578330577324, + "kl_loss": 0.12717774510383606, + "loss_ib": 0.0018113460391759872, + "step": 1685 + }, + { + "ce_ib": 5.621767520904541, + "ce_orig": 1.019993543624878, + "epoch": 0.484578330577324, + "kl_loss": 0.06101024150848389, + "loss_ib": 0.00117227912414819, + "step": 1685 + }, + { + "ce_ib": 6.252231597900391, + "ce_orig": 0.8595346808433533, + "epoch": 0.4848659141563017, + "kl_loss": 0.09245588630437851, + "loss_ib": 0.0015497819986194372, + "step": 1686 + }, + { + "ce_ib": 5.69130277633667, + "ce_orig": 1.0237202644348145, + "epoch": 0.4848659141563017, + "kl_loss": 0.12300124019384384, + "loss_ib": 0.0017991425702348351, + "step": 1686 + }, + { + "ce_ib": 6.671542644500732, + "ce_orig": 1.2952083349227905, + "epoch": 0.4848659141563017, + "kl_loss": 0.12281521409749985, + "loss_ib": 0.0018953063990920782, + "step": 1686 + }, + { + "ce_ib": 2.551654577255249, + "ce_orig": 0.4302632808685303, + "epoch": 0.4848659141563017, + "kl_loss": 0.05312460660934448, + "loss_ib": 0.000786411517765373, + "step": 1686 + }, + { + "ce_ib": 3.9692983627319336, + "ce_orig": 0.6493315100669861, + "epoch": 0.4851534977352793, + "kl_loss": 0.10558779537677765, + "loss_ib": 0.0014528078027069569, + "step": 1687 + }, + { + "ce_ib": 6.114811897277832, + "ce_orig": 0.6241535544395447, + "epoch": 0.4851534977352793, + "kl_loss": 0.09929528832435608, + "loss_ib": 0.0016044341027736664, + "step": 1687 + }, + { + "ce_ib": 7.906235218048096, + "ce_orig": 1.4687296152114868, + "epoch": 0.4851534977352793, + "kl_loss": 0.12695245444774628, + "loss_ib": 0.0020601481664925814, + "step": 1687 + }, + { + "ce_ib": 6.416369915008545, + "ce_orig": 1.1243771314620972, + "epoch": 0.4851534977352793, + "kl_loss": 0.07660981267690659, + "loss_ib": 0.0014077350497245789, + "step": 1687 + }, + { + "ce_ib": 3.6262400150299072, + "ce_orig": 0.6174125671386719, + "epoch": 0.48544108131425695, + "kl_loss": 0.05921046435832977, + "loss_ib": 0.0009547286317683756, + "step": 1688 + }, + { + "ce_ib": 6.6766510009765625, + "ce_orig": 0.8889259696006775, + "epoch": 0.48544108131425695, + "kl_loss": 0.14036715030670166, + "loss_ib": 0.0020713363774120808, + "step": 1688 + }, + { + "ce_ib": 6.474982738494873, + "ce_orig": 1.1360936164855957, + "epoch": 0.48544108131425695, + "kl_loss": 0.09956884384155273, + "loss_ib": 0.001643186784349382, + "step": 1688 + }, + { + "ce_ib": 5.548295021057129, + "ce_orig": 0.9558570981025696, + "epoch": 0.48544108131425695, + "kl_loss": 0.07600397616624832, + "loss_ib": 0.001314869150519371, + "step": 1688 + }, + { + "ce_ib": 5.768527507781982, + "ce_orig": 1.1330024003982544, + "epoch": 0.4857286648932346, + "kl_loss": 0.09641216695308685, + "loss_ib": 0.0015409743646159768, + "step": 1689 + }, + { + "ce_ib": 4.497453689575195, + "ce_orig": 0.6841451525688171, + "epoch": 0.4857286648932346, + "kl_loss": 0.12207835912704468, + "loss_ib": 0.0016705289017409086, + "step": 1689 + }, + { + "ce_ib": 6.134047031402588, + "ce_orig": 1.2061781883239746, + "epoch": 0.4857286648932346, + "kl_loss": 0.09529723227024078, + "loss_ib": 0.0015663770027458668, + "step": 1689 + }, + { + "ce_ib": 5.650447368621826, + "ce_orig": 1.1508811712265015, + "epoch": 0.4857286648932346, + "kl_loss": 0.10379251092672348, + "loss_ib": 0.0016029697144404054, + "step": 1689 + }, + { + "epoch": 0.48601624847221225, + "grad_norm": 0.09206751734018326, + "learning_rate": 4.7772929998339483e-05, + "loss": 0.8966, + "step": 1690 + }, + { + "ce_ib": 4.8727707862854, + "ce_orig": 0.7993532419204712, + "epoch": 0.48601624847221225, + "kl_loss": 0.1089472621679306, + "loss_ib": 0.0015767496079206467, + "step": 1690 + }, + { + "ce_ib": 2.993665933609009, + "ce_orig": 0.49995508790016174, + "epoch": 0.48601624847221225, + "kl_loss": 0.07151557505130768, + "loss_ib": 0.0010145222768187523, + "step": 1690 + }, + { + "ce_ib": 3.8113980293273926, + "ce_orig": 0.6894672513008118, + "epoch": 0.48601624847221225, + "kl_loss": 0.046646662056446075, + "loss_ib": 0.0008476063376292586, + "step": 1690 + }, + { + "ce_ib": 4.50052547454834, + "ce_orig": 1.1468896865844727, + "epoch": 0.48601624847221225, + "kl_loss": 0.05880790948867798, + "loss_ib": 0.0010381316533312201, + "step": 1690 + }, + { + "ce_ib": 5.624209880828857, + "ce_orig": 0.8921494483947754, + "epoch": 0.4863038320511899, + "kl_loss": 0.11992935836315155, + "loss_ib": 0.0017617144621908665, + "step": 1691 + }, + { + "ce_ib": 4.553849697113037, + "ce_orig": 0.7892396450042725, + "epoch": 0.4863038320511899, + "kl_loss": 0.09987618774175644, + "loss_ib": 0.0014541468117386103, + "step": 1691 + }, + { + "ce_ib": 4.985031604766846, + "ce_orig": 0.4969519376754761, + "epoch": 0.4863038320511899, + "kl_loss": 0.13247787952423096, + "loss_ib": 0.0018232818692922592, + "step": 1691 + }, + { + "ce_ib": 5.486037731170654, + "ce_orig": 0.6580516695976257, + "epoch": 0.4863038320511899, + "kl_loss": 0.11913005262613297, + "loss_ib": 0.0017399042844772339, + "step": 1691 + }, + { + "ce_ib": 1.7882920503616333, + "ce_orig": 0.24214190244674683, + "epoch": 0.4865914156301675, + "kl_loss": 0.23442135751247406, + "loss_ib": 0.0025230427272617817, + "step": 1692 + }, + { + "ce_ib": 5.8490095138549805, + "ce_orig": 0.8928971886634827, + "epoch": 0.4865914156301675, + "kl_loss": 0.10545745491981506, + "loss_ib": 0.0016394754638895392, + "step": 1692 + }, + { + "ce_ib": 3.8175415992736816, + "ce_orig": 0.5843856334686279, + "epoch": 0.4865914156301675, + "kl_loss": 0.09924076497554779, + "loss_ib": 0.0013741618022322655, + "step": 1692 + }, + { + "ce_ib": 7.871041774749756, + "ce_orig": 1.6187584400177002, + "epoch": 0.4865914156301675, + "kl_loss": 0.14639456570148468, + "loss_ib": 0.0022510497365146875, + "step": 1692 + }, + { + "ce_ib": 3.621203660964966, + "ce_orig": 0.8420932292938232, + "epoch": 0.4868789992091452, + "kl_loss": 0.06346692144870758, + "loss_ib": 0.000996789545752108, + "step": 1693 + }, + { + "ce_ib": 4.4356889724731445, + "ce_orig": 0.7879520058631897, + "epoch": 0.4868789992091452, + "kl_loss": 0.08009280264377594, + "loss_ib": 0.0012444969033822417, + "step": 1693 + }, + { + "ce_ib": 2.2315030097961426, + "ce_orig": 0.1959254890680313, + "epoch": 0.4868789992091452, + "kl_loss": 0.15773946046829224, + "loss_ib": 0.0018005447927862406, + "step": 1693 + }, + { + "ce_ib": 4.410289764404297, + "ce_orig": 0.668495237827301, + "epoch": 0.4868789992091452, + "kl_loss": 0.07908743619918823, + "loss_ib": 0.0012319032102823257, + "step": 1693 + }, + { + "ce_ib": 3.5901150703430176, + "ce_orig": 0.5433985590934753, + "epoch": 0.4871665827881228, + "kl_loss": 0.06901711970567703, + "loss_ib": 0.0010491827270016074, + "step": 1694 + }, + { + "ce_ib": 3.7071518898010254, + "ce_orig": 0.7718559503555298, + "epoch": 0.4871665827881228, + "kl_loss": 0.05477457493543625, + "loss_ib": 0.0009184608934447169, + "step": 1694 + }, + { + "ce_ib": 5.039360046386719, + "ce_orig": 0.8147886395454407, + "epoch": 0.4871665827881228, + "kl_loss": 0.11746887862682343, + "loss_ib": 0.0016786246560513973, + "step": 1694 + }, + { + "ce_ib": 3.584214210510254, + "ce_orig": 0.7416908740997314, + "epoch": 0.4871665827881228, + "kl_loss": 0.13640275597572327, + "loss_ib": 0.0017224489711225033, + "step": 1694 + }, + { + "epoch": 0.4874541663671004, + "grad_norm": 0.08452406525611877, + "learning_rate": 4.775689235441906e-05, + "loss": 0.7755, + "step": 1695 + }, + { + "ce_ib": 6.160760879516602, + "ce_orig": 1.0838754177093506, + "epoch": 0.4874541663671004, + "kl_loss": 0.09724076092243195, + "loss_ib": 0.0015884836902841926, + "step": 1695 + }, + { + "ce_ib": 7.721263408660889, + "ce_orig": 1.2226389646530151, + "epoch": 0.4874541663671004, + "kl_loss": 0.1127297505736351, + "loss_ib": 0.001899423892609775, + "step": 1695 + }, + { + "ce_ib": 4.271576404571533, + "ce_orig": 0.7681863307952881, + "epoch": 0.4874541663671004, + "kl_loss": 0.11804500222206116, + "loss_ib": 0.0016076075844466686, + "step": 1695 + }, + { + "ce_ib": 3.5640509128570557, + "ce_orig": 0.5958571434020996, + "epoch": 0.4874541663671004, + "kl_loss": 0.1069008857011795, + "loss_ib": 0.0014254138804972172, + "step": 1695 + }, + { + "ce_ib": 6.612865447998047, + "ce_orig": 0.959779679775238, + "epoch": 0.48774174994607805, + "kl_loss": 0.12709426879882812, + "loss_ib": 0.001932229264639318, + "step": 1696 + }, + { + "ce_ib": 8.688838005065918, + "ce_orig": 1.6913176774978638, + "epoch": 0.48774174994607805, + "kl_loss": 0.07869170606136322, + "loss_ib": 0.0016558008501306176, + "step": 1696 + }, + { + "ce_ib": 6.032428741455078, + "ce_orig": 0.7015625834465027, + "epoch": 0.48774174994607805, + "kl_loss": 0.1078311875462532, + "loss_ib": 0.0016815547132864594, + "step": 1696 + }, + { + "ce_ib": 5.3064656257629395, + "ce_orig": 0.7509438395500183, + "epoch": 0.48774174994607805, + "kl_loss": 0.11211893707513809, + "loss_ib": 0.001651835860684514, + "step": 1696 + }, + { + "ce_ib": 6.3635969161987305, + "ce_orig": 0.776512086391449, + "epoch": 0.48802933352505573, + "kl_loss": 0.06719277799129486, + "loss_ib": 0.0013082873774692416, + "step": 1697 + }, + { + "ce_ib": 4.171963214874268, + "ce_orig": 0.8718737363815308, + "epoch": 0.48802933352505573, + "kl_loss": 0.08152373135089874, + "loss_ib": 0.0012324335984885693, + "step": 1697 + }, + { + "ce_ib": 5.139289379119873, + "ce_orig": 1.029739260673523, + "epoch": 0.48802933352505573, + "kl_loss": 0.0638296902179718, + "loss_ib": 0.0011522258864715695, + "step": 1697 + }, + { + "ce_ib": 5.043673038482666, + "ce_orig": 0.33531060814857483, + "epoch": 0.48802933352505573, + "kl_loss": 0.08638446033000946, + "loss_ib": 0.0013682118151336908, + "step": 1697 + }, + { + "ce_ib": 6.389647483825684, + "ce_orig": 0.9322385787963867, + "epoch": 0.48831691710403335, + "kl_loss": 0.12672433257102966, + "loss_ib": 0.0019062081119045615, + "step": 1698 + }, + { + "ce_ib": 4.389415264129639, + "ce_orig": 0.85787034034729, + "epoch": 0.48831691710403335, + "kl_loss": 0.14398439228534698, + "loss_ib": 0.0018787854351103306, + "step": 1698 + }, + { + "ce_ib": 3.691295862197876, + "ce_orig": 0.7792192101478577, + "epoch": 0.48831691710403335, + "kl_loss": 0.11892806738615036, + "loss_ib": 0.0015584102366119623, + "step": 1698 + }, + { + "ce_ib": 3.6638667583465576, + "ce_orig": 0.6036960482597351, + "epoch": 0.48831691710403335, + "kl_loss": 0.05969225987792015, + "loss_ib": 0.0009633092558942735, + "step": 1698 + }, + { + "ce_ib": 6.808166027069092, + "ce_orig": 0.6703436970710754, + "epoch": 0.488604500683011, + "kl_loss": 0.18792811036109924, + "loss_ib": 0.0025600974913686514, + "step": 1699 + }, + { + "ce_ib": 2.8434956073760986, + "ce_orig": 0.520474374294281, + "epoch": 0.488604500683011, + "kl_loss": 0.05746614933013916, + "loss_ib": 0.0008590110228396952, + "step": 1699 + }, + { + "ce_ib": 1.9282779693603516, + "ce_orig": 0.17116010189056396, + "epoch": 0.488604500683011, + "kl_loss": 0.1406831443309784, + "loss_ib": 0.001599659095518291, + "step": 1699 + }, + { + "ce_ib": 3.2885959148406982, + "ce_orig": 0.47084707021713257, + "epoch": 0.488604500683011, + "kl_loss": 0.11110000312328339, + "loss_ib": 0.001439859508536756, + "step": 1699 + }, + { + "epoch": 0.48889208426198866, + "grad_norm": 0.09877913445234299, + "learning_rate": 4.774079988386296e-05, + "loss": 0.798, + "step": 1700 + }, + { + "ce_ib": 6.030065536499023, + "ce_orig": 0.6858091950416565, + "epoch": 0.48889208426198866, + "kl_loss": 0.1047324389219284, + "loss_ib": 0.0016503309598192573, + "step": 1700 + }, + { + "ce_ib": 7.007495880126953, + "ce_orig": 1.272796392440796, + "epoch": 0.48889208426198866, + "kl_loss": 0.10383155196905136, + "loss_ib": 0.0017390650464221835, + "step": 1700 + }, + { + "ce_ib": 4.6936354637146, + "ce_orig": 0.605167806148529, + "epoch": 0.48889208426198866, + "kl_loss": 0.1023249700665474, + "loss_ib": 0.0014926132280379534, + "step": 1700 + }, + { + "ce_ib": 5.064792156219482, + "ce_orig": 0.6369750499725342, + "epoch": 0.48889208426198866, + "kl_loss": 0.09911276400089264, + "loss_ib": 0.0014976068632677197, + "step": 1700 + }, + { + "ce_ib": 2.4347925186157227, + "ce_orig": 0.5714754462242126, + "epoch": 0.4891796678409663, + "kl_loss": 0.04211336374282837, + "loss_ib": 0.0006646128604188561, + "step": 1701 + }, + { + "ce_ib": 4.696434020996094, + "ce_orig": 0.9259843230247498, + "epoch": 0.4891796678409663, + "kl_loss": 0.08944237977266312, + "loss_ib": 0.0013640671968460083, + "step": 1701 + }, + { + "ce_ib": 4.639354705810547, + "ce_orig": 0.41791123151779175, + "epoch": 0.4891796678409663, + "kl_loss": 0.14367453753948212, + "loss_ib": 0.0019006807124242187, + "step": 1701 + }, + { + "ce_ib": 3.529367446899414, + "ce_orig": 0.6275355815887451, + "epoch": 0.4891796678409663, + "kl_loss": 0.07067733258008957, + "loss_ib": 0.0010597100481390953, + "step": 1701 + }, + { + "ce_ib": 4.044938087463379, + "ce_orig": 0.6155127882957458, + "epoch": 0.4894672514199439, + "kl_loss": 0.10646021366119385, + "loss_ib": 0.0014690959360450506, + "step": 1702 + }, + { + "ce_ib": 4.144913673400879, + "ce_orig": 0.38047054409980774, + "epoch": 0.4894672514199439, + "kl_loss": 0.11583450436592102, + "loss_ib": 0.0015728363068774343, + "step": 1702 + }, + { + "ce_ib": 5.156937599182129, + "ce_orig": 0.7264214754104614, + "epoch": 0.4894672514199439, + "kl_loss": 0.09264038503170013, + "loss_ib": 0.0014420975930988789, + "step": 1702 + }, + { + "ce_ib": 5.102726936340332, + "ce_orig": 0.8257129192352295, + "epoch": 0.4894672514199439, + "kl_loss": 0.07331917434930801, + "loss_ib": 0.0012434644158929586, + "step": 1702 + }, + { + "ce_ib": 5.4420485496521, + "ce_orig": 0.9207541346549988, + "epoch": 0.4897548349989216, + "kl_loss": 0.07844462245702744, + "loss_ib": 0.0013286509783938527, + "step": 1703 + }, + { + "ce_ib": 4.702119827270508, + "ce_orig": 1.0226807594299316, + "epoch": 0.4897548349989216, + "kl_loss": 0.06926761567592621, + "loss_ib": 0.0011628881329670548, + "step": 1703 + }, + { + "ce_ib": 3.663991928100586, + "ce_orig": 0.45649799704551697, + "epoch": 0.4897548349989216, + "kl_loss": 0.09283428639173508, + "loss_ib": 0.0012947421055287123, + "step": 1703 + }, + { + "ce_ib": 4.174275875091553, + "ce_orig": 0.8040467500686646, + "epoch": 0.4897548349989216, + "kl_loss": 0.0706256851553917, + "loss_ib": 0.00112368434201926, + "step": 1703 + }, + { + "ce_ib": 6.629425525665283, + "ce_orig": 0.8473314642906189, + "epoch": 0.4900424185778992, + "kl_loss": 0.09696064889431, + "loss_ib": 0.0016325489850714803, + "step": 1704 + }, + { + "ce_ib": 5.4243903160095215, + "ce_orig": 1.089656949043274, + "epoch": 0.4900424185778992, + "kl_loss": 0.3412671387195587, + "loss_ib": 0.003955110441893339, + "step": 1704 + }, + { + "ce_ib": 6.530048847198486, + "ce_orig": 0.8586751818656921, + "epoch": 0.4900424185778992, + "kl_loss": 0.09860202670097351, + "loss_ib": 0.0016390251694247127, + "step": 1704 + }, + { + "ce_ib": 8.027305603027344, + "ce_orig": 1.5659441947937012, + "epoch": 0.4900424185778992, + "kl_loss": 0.07563519477844238, + "loss_ib": 0.0015590825350955129, + "step": 1704 + }, + { + "epoch": 0.49033000215687683, + "grad_norm": 0.1182684600353241, + "learning_rate": 4.77246526254417e-05, + "loss": 0.8203, + "step": 1705 + }, + { + "ce_ib": 5.907613277435303, + "ce_orig": 0.46297040581703186, + "epoch": 0.49033000215687683, + "kl_loss": 0.18043935298919678, + "loss_ib": 0.0023951546754688025, + "step": 1705 + }, + { + "ce_ib": 5.961239337921143, + "ce_orig": 0.775822103023529, + "epoch": 0.49033000215687683, + "kl_loss": 0.14223052561283112, + "loss_ib": 0.0020184291061013937, + "step": 1705 + }, + { + "ce_ib": 2.585102081298828, + "ce_orig": 0.7058547139167786, + "epoch": 0.49033000215687683, + "kl_loss": 0.04008246958255768, + "loss_ib": 0.0006593348807655275, + "step": 1705 + }, + { + "ce_ib": 4.843050479888916, + "ce_orig": 0.8218753337860107, + "epoch": 0.49033000215687683, + "kl_loss": 0.06913113594055176, + "loss_ib": 0.0011756164021790028, + "step": 1705 + }, + { + "ce_ib": 3.733308792114258, + "ce_orig": 0.35645195841789246, + "epoch": 0.49061758573585446, + "kl_loss": 0.12476247549057007, + "loss_ib": 0.0016209555324167013, + "step": 1706 + }, + { + "ce_ib": 4.944437503814697, + "ce_orig": 0.6046806573867798, + "epoch": 0.49061758573585446, + "kl_loss": 0.08307860791683197, + "loss_ib": 0.0013252298813313246, + "step": 1706 + }, + { + "ce_ib": 4.234106063842773, + "ce_orig": 1.0436577796936035, + "epoch": 0.49061758573585446, + "kl_loss": 0.07675043493509293, + "loss_ib": 0.0011909148888662457, + "step": 1706 + }, + { + "ce_ib": 8.569589614868164, + "ce_orig": 1.5979472398757935, + "epoch": 0.49061758573585446, + "kl_loss": 0.06934089958667755, + "loss_ib": 0.0015503679169341922, + "step": 1706 + }, + { + "ce_ib": 5.919309139251709, + "ce_orig": 0.86369788646698, + "epoch": 0.49090516931483213, + "kl_loss": 0.11062835156917572, + "loss_ib": 0.001698214327916503, + "step": 1707 + }, + { + "ce_ib": 5.436333179473877, + "ce_orig": 1.0853995084762573, + "epoch": 0.49090516931483213, + "kl_loss": 0.07969792187213898, + "loss_ib": 0.0013406124198809266, + "step": 1707 + }, + { + "ce_ib": 3.617490530014038, + "ce_orig": 0.621108889579773, + "epoch": 0.49090516931483213, + "kl_loss": 0.058278508484363556, + "loss_ib": 0.00094453408382833, + "step": 1707 + }, + { + "ce_ib": 3.143474817276001, + "ce_orig": 0.6646585464477539, + "epoch": 0.49090516931483213, + "kl_loss": 0.07173430174589157, + "loss_ib": 0.0010316905099898577, + "step": 1707 + }, + { + "ce_ib": 3.9088222980499268, + "ce_orig": 0.7539189457893372, + "epoch": 0.49119275289380976, + "kl_loss": 0.10687832534313202, + "loss_ib": 0.0014596653636544943, + "step": 1708 + }, + { + "ce_ib": 6.277700901031494, + "ce_orig": 0.9516663551330566, + "epoch": 0.49119275289380976, + "kl_loss": 0.06727858632802963, + "loss_ib": 0.0013005558867007494, + "step": 1708 + }, + { + "ce_ib": 7.151388645172119, + "ce_orig": 1.1528204679489136, + "epoch": 0.49119275289380976, + "kl_loss": 0.10567735135555267, + "loss_ib": 0.0017719122115522623, + "step": 1708 + }, + { + "ce_ib": 7.287378787994385, + "ce_orig": 1.3541531562805176, + "epoch": 0.49119275289380976, + "kl_loss": 0.06862370669841766, + "loss_ib": 0.001414974918588996, + "step": 1708 + }, + { + "ce_ib": 3.8237762451171875, + "ce_orig": 0.4707103669643402, + "epoch": 0.4914803364727874, + "kl_loss": 0.08159242570400238, + "loss_ib": 0.001198301906697452, + "step": 1709 + }, + { + "ce_ib": 6.453456401824951, + "ce_orig": 1.189288854598999, + "epoch": 0.4914803364727874, + "kl_loss": 0.07389305531978607, + "loss_ib": 0.0013842760818079114, + "step": 1709 + }, + { + "ce_ib": 4.2426300048828125, + "ce_orig": 0.9153368473052979, + "epoch": 0.4914803364727874, + "kl_loss": 0.0663858950138092, + "loss_ib": 0.0010881219059228897, + "step": 1709 + }, + { + "ce_ib": 3.6270675659179688, + "ce_orig": 0.5315833687782288, + "epoch": 0.4914803364727874, + "kl_loss": 0.09829245507717133, + "loss_ib": 0.0013456313172355294, + "step": 1709 + }, + { + "epoch": 0.49176792005176506, + "grad_norm": 0.08848048746585846, + "learning_rate": 4.770845061805775e-05, + "loss": 0.8688, + "step": 1710 + }, + { + "ce_ib": 6.177319526672363, + "ce_orig": 1.007494330406189, + "epoch": 0.49176792005176506, + "kl_loss": 0.09623667597770691, + "loss_ib": 0.001580098643898964, + "step": 1710 + }, + { + "ce_ib": 3.3212127685546875, + "ce_orig": 0.41536638140678406, + "epoch": 0.49176792005176506, + "kl_loss": 0.08198144286870956, + "loss_ib": 0.001151935663074255, + "step": 1710 + }, + { + "ce_ib": 5.9256792068481445, + "ce_orig": 0.8392384648323059, + "epoch": 0.49176792005176506, + "kl_loss": 0.09618404507637024, + "loss_ib": 0.0015544083435088396, + "step": 1710 + }, + { + "ce_ib": 6.940138816833496, + "ce_orig": 0.9474507570266724, + "epoch": 0.49176792005176506, + "kl_loss": 0.10211776196956635, + "loss_ib": 0.0017151914071291685, + "step": 1710 + }, + { + "ce_ib": 3.626868724822998, + "ce_orig": 0.8035648465156555, + "epoch": 0.4920555036307427, + "kl_loss": 0.1979731023311615, + "loss_ib": 0.00234241783618927, + "step": 1711 + }, + { + "ce_ib": 6.395865440368652, + "ce_orig": 1.2413008213043213, + "epoch": 0.4920555036307427, + "kl_loss": 0.3335525095462799, + "loss_ib": 0.003975111525505781, + "step": 1711 + }, + { + "ce_ib": 8.108835220336914, + "ce_orig": 1.6105319261550903, + "epoch": 0.4920555036307427, + "kl_loss": 0.09881055355072021, + "loss_ib": 0.0017989890184253454, + "step": 1711 + }, + { + "ce_ib": 7.2427473068237305, + "ce_orig": 1.1496469974517822, + "epoch": 0.4920555036307427, + "kl_loss": 0.09461656212806702, + "loss_ib": 0.0016704404260963202, + "step": 1711 + }, + { + "ce_ib": 5.122899055480957, + "ce_orig": 0.8324193358421326, + "epoch": 0.4923430872097203, + "kl_loss": 0.12759749591350555, + "loss_ib": 0.0017882647225633264, + "step": 1712 + }, + { + "ce_ib": 7.809453010559082, + "ce_orig": 1.3805549144744873, + "epoch": 0.4923430872097203, + "kl_loss": 0.11528217047452927, + "loss_ib": 0.0019337668782100081, + "step": 1712 + }, + { + "ce_ib": 4.2030487060546875, + "ce_orig": 0.579928457736969, + "epoch": 0.4923430872097203, + "kl_loss": 0.10705523192882538, + "loss_ib": 0.001490857102908194, + "step": 1712 + }, + { + "ce_ib": 6.199850559234619, + "ce_orig": 1.4160586595535278, + "epoch": 0.4923430872097203, + "kl_loss": 0.10000849515199661, + "loss_ib": 0.0016200699610635638, + "step": 1712 + }, + { + "ce_ib": 5.627732276916504, + "ce_orig": 0.9592882394790649, + "epoch": 0.492630670788698, + "kl_loss": 0.08628078550100327, + "loss_ib": 0.0014255809364840388, + "step": 1713 + }, + { + "ce_ib": 7.140882968902588, + "ce_orig": 1.2684378623962402, + "epoch": 0.492630670788698, + "kl_loss": 0.10064823925495148, + "loss_ib": 0.0017205706099048257, + "step": 1713 + }, + { + "ce_ib": 5.072722434997559, + "ce_orig": 0.6799240112304688, + "epoch": 0.492630670788698, + "kl_loss": 0.1084919348359108, + "loss_ib": 0.0015921915182843804, + "step": 1713 + }, + { + "ce_ib": 2.9020159244537354, + "ce_orig": 0.4580099284648895, + "epoch": 0.492630670788698, + "kl_loss": 0.08765124529600143, + "loss_ib": 0.0011667140061035752, + "step": 1713 + }, + { + "ce_ib": 7.194737911224365, + "ce_orig": 1.1575733423233032, + "epoch": 0.4929182543676756, + "kl_loss": 0.1034325510263443, + "loss_ib": 0.0017537992680445313, + "step": 1714 + }, + { + "ce_ib": 6.020695209503174, + "ce_orig": 0.7919808626174927, + "epoch": 0.4929182543676756, + "kl_loss": 0.14096687734127045, + "loss_ib": 0.0020117382518947124, + "step": 1714 + }, + { + "ce_ib": 5.99286413192749, + "ce_orig": 0.8782891035079956, + "epoch": 0.4929182543676756, + "kl_loss": 0.11489802598953247, + "loss_ib": 0.0017482666298747063, + "step": 1714 + }, + { + "ce_ib": 4.053781032562256, + "ce_orig": 0.8700904250144958, + "epoch": 0.4929182543676756, + "kl_loss": 0.08213506639003754, + "loss_ib": 0.0012267286656424403, + "step": 1714 + }, + { + "epoch": 0.49320583794665324, + "grad_norm": 0.1028653234243393, + "learning_rate": 4.769219390074552e-05, + "loss": 0.845, + "step": 1715 + }, + { + "ce_ib": 3.4252209663391113, + "ce_orig": 0.610955536365509, + "epoch": 0.49320583794665324, + "kl_loss": 0.06524240225553513, + "loss_ib": 0.0009949459927156568, + "step": 1715 + }, + { + "ce_ib": 5.640174865722656, + "ce_orig": 0.5120779871940613, + "epoch": 0.49320583794665324, + "kl_loss": 0.1626831591129303, + "loss_ib": 0.002190849045291543, + "step": 1715 + }, + { + "ce_ib": 3.915278196334839, + "ce_orig": 0.6436498761177063, + "epoch": 0.49320583794665324, + "kl_loss": 0.08531583100557327, + "loss_ib": 0.0012446860782802105, + "step": 1715 + }, + { + "ce_ib": 6.7824296951293945, + "ce_orig": 1.4045052528381348, + "epoch": 0.49320583794665324, + "kl_loss": 0.07271700352430344, + "loss_ib": 0.001405412913300097, + "step": 1715 + }, + { + "ce_ib": 5.168056011199951, + "ce_orig": 0.5805914998054504, + "epoch": 0.49349342152563086, + "kl_loss": 0.08370394259691238, + "loss_ib": 0.0013538450002670288, + "step": 1716 + }, + { + "ce_ib": 4.898796558380127, + "ce_orig": 1.1683212518692017, + "epoch": 0.49349342152563086, + "kl_loss": 0.11404580622911453, + "loss_ib": 0.0016303376760333776, + "step": 1716 + }, + { + "ce_ib": 5.379641056060791, + "ce_orig": 0.7235344052314758, + "epoch": 0.49349342152563086, + "kl_loss": 0.12031477689743042, + "loss_ib": 0.0017411118606105447, + "step": 1716 + }, + { + "ce_ib": 3.6582415103912354, + "ce_orig": 0.8831492066383362, + "epoch": 0.49349342152563086, + "kl_loss": 0.10175725817680359, + "loss_ib": 0.0013833966804668307, + "step": 1716 + }, + { + "ce_ib": 6.059337139129639, + "ce_orig": 1.0201056003570557, + "epoch": 0.49378100510460854, + "kl_loss": 0.10570092499256134, + "loss_ib": 0.0016629429301247, + "step": 1717 + }, + { + "ce_ib": 5.489735126495361, + "ce_orig": 0.6933445334434509, + "epoch": 0.49378100510460854, + "kl_loss": 0.1173461377620697, + "loss_ib": 0.0017224348848685622, + "step": 1717 + }, + { + "ce_ib": 3.64178204536438, + "ce_orig": 0.7821058630943298, + "epoch": 0.49378100510460854, + "kl_loss": 0.06855927407741547, + "loss_ib": 0.001049770857207477, + "step": 1717 + }, + { + "ce_ib": 2.676449775695801, + "ce_orig": 0.3551661968231201, + "epoch": 0.49378100510460854, + "kl_loss": 0.08692323416471481, + "loss_ib": 0.001136877341195941, + "step": 1717 + }, + { + "ce_ib": 4.280928134918213, + "ce_orig": 0.731511652469635, + "epoch": 0.49406858868358616, + "kl_loss": 0.1115126758813858, + "loss_ib": 0.001543219550512731, + "step": 1718 + }, + { + "ce_ib": 3.997147798538208, + "ce_orig": 0.6431476473808289, + "epoch": 0.49406858868358616, + "kl_loss": 0.06864126026630402, + "loss_ib": 0.0010861273622140288, + "step": 1718 + }, + { + "ce_ib": 7.510290622711182, + "ce_orig": 1.2061995267868042, + "epoch": 0.49406858868358616, + "kl_loss": 0.13138845562934875, + "loss_ib": 0.002064913511276245, + "step": 1718 + }, + { + "ce_ib": 7.386535167694092, + "ce_orig": 1.4880925416946411, + "epoch": 0.49406858868358616, + "kl_loss": 0.08654581010341644, + "loss_ib": 0.0016041113995015621, + "step": 1718 + }, + { + "ce_ib": 6.5107831954956055, + "ce_orig": 0.797114372253418, + "epoch": 0.4943561722625638, + "kl_loss": 0.11661705374717712, + "loss_ib": 0.0018172487616539001, + "step": 1719 + }, + { + "ce_ib": 7.089412689208984, + "ce_orig": 1.6429165601730347, + "epoch": 0.4943561722625638, + "kl_loss": 0.09090930223464966, + "loss_ib": 0.0016180342063307762, + "step": 1719 + }, + { + "ce_ib": 5.779914379119873, + "ce_orig": 0.9645195603370667, + "epoch": 0.4943561722625638, + "kl_loss": 0.10814248025417328, + "loss_ib": 0.001659416244365275, + "step": 1719 + }, + { + "ce_ib": 4.7071614265441895, + "ce_orig": 0.6813072562217712, + "epoch": 0.4943561722625638, + "kl_loss": 0.16601449251174927, + "loss_ib": 0.0021308609284460545, + "step": 1719 + }, + { + "epoch": 0.49464375584154147, + "grad_norm": 0.11418356001377106, + "learning_rate": 4.767588251267121e-05, + "loss": 0.8799, + "step": 1720 + }, + { + "ce_ib": 7.927281856536865, + "ce_orig": 1.36933171749115, + "epoch": 0.49464375584154147, + "kl_loss": 0.11673523485660553, + "loss_ib": 0.0019600805826485157, + "step": 1720 + }, + { + "ce_ib": 6.415172100067139, + "ce_orig": 1.2229522466659546, + "epoch": 0.49464375584154147, + "kl_loss": 0.09318080544471741, + "loss_ib": 0.0015733252512291074, + "step": 1720 + }, + { + "ce_ib": 4.362771034240723, + "ce_orig": 0.812308132648468, + "epoch": 0.49464375584154147, + "kl_loss": 0.07320106774568558, + "loss_ib": 0.0011682877084240317, + "step": 1720 + }, + { + "ce_ib": 2.6815884113311768, + "ce_orig": 0.522832453250885, + "epoch": 0.49464375584154147, + "kl_loss": 0.09877176582813263, + "loss_ib": 0.0012558763846755028, + "step": 1720 + }, + { + "ce_ib": 3.827521562576294, + "ce_orig": 0.7778903841972351, + "epoch": 0.4949313394205191, + "kl_loss": 0.06745895743370056, + "loss_ib": 0.0010573416948318481, + "step": 1721 + }, + { + "ce_ib": 5.313347816467285, + "ce_orig": 0.7767109870910645, + "epoch": 0.4949313394205191, + "kl_loss": 0.08251943439245224, + "loss_ib": 0.0013565290719270706, + "step": 1721 + }, + { + "ce_ib": 5.538110256195068, + "ce_orig": 0.909490168094635, + "epoch": 0.4949313394205191, + "kl_loss": 0.09886971116065979, + "loss_ib": 0.0015425081364810467, + "step": 1721 + }, + { + "ce_ib": 3.9210591316223145, + "ce_orig": 0.7207149863243103, + "epoch": 0.4949313394205191, + "kl_loss": 0.0924137607216835, + "loss_ib": 0.0013162435498088598, + "step": 1721 + }, + { + "ce_ib": 3.448528289794922, + "ce_orig": 0.7080965638160706, + "epoch": 0.4952189229994967, + "kl_loss": 0.07973489910364151, + "loss_ib": 0.0011422018287703395, + "step": 1722 + }, + { + "ce_ib": 4.894460678100586, + "ce_orig": 0.6460247039794922, + "epoch": 0.4952189229994967, + "kl_loss": 0.09368401020765305, + "loss_ib": 0.0014262860640883446, + "step": 1722 + }, + { + "ce_ib": 4.141251087188721, + "ce_orig": 0.6642805337905884, + "epoch": 0.4952189229994967, + "kl_loss": 0.04982329159975052, + "loss_ib": 0.0009123579948209226, + "step": 1722 + }, + { + "ce_ib": 8.591182708740234, + "ce_orig": 1.0843234062194824, + "epoch": 0.4952189229994967, + "kl_loss": 0.07051913440227509, + "loss_ib": 0.0015643095830455422, + "step": 1722 + }, + { + "ce_ib": 3.0200982093811035, + "ce_orig": 0.4031941890716553, + "epoch": 0.4955065065784744, + "kl_loss": 0.06621220707893372, + "loss_ib": 0.0009641318465583026, + "step": 1723 + }, + { + "ce_ib": 6.254401206970215, + "ce_orig": 0.5961822271347046, + "epoch": 0.4955065065784744, + "kl_loss": 0.13185912370681763, + "loss_ib": 0.0019440313335508108, + "step": 1723 + }, + { + "ce_ib": 4.667854309082031, + "ce_orig": 0.9180195331573486, + "epoch": 0.4955065065784744, + "kl_loss": 0.10923993587493896, + "loss_ib": 0.001559184747748077, + "step": 1723 + }, + { + "ce_ib": 3.7965359687805176, + "ce_orig": 0.6017652750015259, + "epoch": 0.4955065065784744, + "kl_loss": 0.04251881688833237, + "loss_ib": 0.0008048417512327433, + "step": 1723 + }, + { + "ce_ib": 3.188062906265259, + "ce_orig": 0.3921128213405609, + "epoch": 0.495794090157452, + "kl_loss": 0.09296722710132599, + "loss_ib": 0.0012484785402193666, + "step": 1724 + }, + { + "ce_ib": 4.953934669494629, + "ce_orig": 0.9897075295448303, + "epoch": 0.495794090157452, + "kl_loss": 0.10949830710887909, + "loss_ib": 0.0015903764870017767, + "step": 1724 + }, + { + "ce_ib": 4.355916976928711, + "ce_orig": 0.8166881799697876, + "epoch": 0.495794090157452, + "kl_loss": 0.09253980964422226, + "loss_ib": 0.0013609897578135133, + "step": 1724 + }, + { + "ce_ib": 5.418321132659912, + "ce_orig": 0.9988260865211487, + "epoch": 0.495794090157452, + "kl_loss": 0.10717599093914032, + "loss_ib": 0.001613592030480504, + "step": 1724 + }, + { + "epoch": 0.49608167373642964, + "grad_norm": 0.12532958388328552, + "learning_rate": 4.7659516493132747e-05, + "loss": 0.8544, + "step": 1725 + }, + { + "ce_ib": 3.9007813930511475, + "ce_orig": 0.6148179173469543, + "epoch": 0.49608167373642964, + "kl_loss": 0.0985482782125473, + "loss_ib": 0.0013755608815699816, + "step": 1725 + }, + { + "ce_ib": 3.115358829498291, + "ce_orig": 0.6273937225341797, + "epoch": 0.49608167373642964, + "kl_loss": 0.33341020345687866, + "loss_ib": 0.003645637771114707, + "step": 1725 + }, + { + "ce_ib": 4.1785759925842285, + "ce_orig": 0.5228098630905151, + "epoch": 0.49608167373642964, + "kl_loss": 0.10771578550338745, + "loss_ib": 0.0014950154582038522, + "step": 1725 + }, + { + "ce_ib": 4.085210800170898, + "ce_orig": 0.6929702758789062, + "epoch": 0.49608167373642964, + "kl_loss": 0.09579423815011978, + "loss_ib": 0.001366463373415172, + "step": 1725 + }, + { + "ce_ib": 4.264012336730957, + "ce_orig": 0.7581554055213928, + "epoch": 0.49636925731540726, + "kl_loss": 0.11144217848777771, + "loss_ib": 0.00154082290828228, + "step": 1726 + }, + { + "ce_ib": 3.3874831199645996, + "ce_orig": 0.7368152737617493, + "epoch": 0.49636925731540726, + "kl_loss": 0.07451638579368591, + "loss_ib": 0.001083912211470306, + "step": 1726 + }, + { + "ce_ib": 7.388004302978516, + "ce_orig": 1.2559181451797485, + "epoch": 0.49636925731540726, + "kl_loss": 0.08685200661420822, + "loss_ib": 0.0016073203878477216, + "step": 1726 + }, + { + "ce_ib": 8.50687313079834, + "ce_orig": 1.7088189125061035, + "epoch": 0.49636925731540726, + "kl_loss": 0.08574852347373962, + "loss_ib": 0.001708172494545579, + "step": 1726 + }, + { + "ce_ib": 4.916175842285156, + "ce_orig": 0.60872483253479, + "epoch": 0.49665684089438494, + "kl_loss": 0.10028906166553497, + "loss_ib": 0.0014945082366466522, + "step": 1727 + }, + { + "ce_ib": 5.262096881866455, + "ce_orig": 0.9867510795593262, + "epoch": 0.49665684089438494, + "kl_loss": 0.10700102150440216, + "loss_ib": 0.0015962198376655579, + "step": 1727 + }, + { + "ce_ib": 6.419177055358887, + "ce_orig": 1.1166459321975708, + "epoch": 0.49665684089438494, + "kl_loss": 0.09437519311904907, + "loss_ib": 0.0015856694662943482, + "step": 1727 + }, + { + "ce_ib": 6.515366554260254, + "ce_orig": 0.6274426579475403, + "epoch": 0.49665684089438494, + "kl_loss": 0.09725320339202881, + "loss_ib": 0.0016240685945376754, + "step": 1727 + }, + { + "ce_ib": 6.484439373016357, + "ce_orig": 0.7738701701164246, + "epoch": 0.49694442447336257, + "kl_loss": 0.11813853681087494, + "loss_ib": 0.0018298291834071279, + "step": 1728 + }, + { + "ce_ib": 4.05966329574585, + "ce_orig": 0.7444210648536682, + "epoch": 0.49694442447336257, + "kl_loss": 0.1241145133972168, + "loss_ib": 0.0016471114940941334, + "step": 1728 + }, + { + "ce_ib": 3.5905139446258545, + "ce_orig": 0.6649654507637024, + "epoch": 0.49694442447336257, + "kl_loss": 0.06089675799012184, + "loss_ib": 0.0009680188959464431, + "step": 1728 + }, + { + "ce_ib": 6.308597564697266, + "ce_orig": 1.0921988487243652, + "epoch": 0.49694442447336257, + "kl_loss": 0.09195709228515625, + "loss_ib": 0.001550430664792657, + "step": 1728 + }, + { + "ce_ib": 4.000677585601807, + "ce_orig": 0.46061423420906067, + "epoch": 0.4972320080523402, + "kl_loss": 0.13731661438941956, + "loss_ib": 0.0017732338747009635, + "step": 1729 + }, + { + "ce_ib": 4.311793327331543, + "ce_orig": 0.8838813900947571, + "epoch": 0.4972320080523402, + "kl_loss": 0.12885144352912903, + "loss_ib": 0.0017196937697008252, + "step": 1729 + }, + { + "ce_ib": 6.728981971740723, + "ce_orig": 1.2123677730560303, + "epoch": 0.4972320080523402, + "kl_loss": 0.11881040036678314, + "loss_ib": 0.0018610020633786917, + "step": 1729 + }, + { + "ce_ib": 5.378375053405762, + "ce_orig": 0.8194847106933594, + "epoch": 0.4972320080523402, + "kl_loss": 0.0916559249162674, + "loss_ib": 0.0014543966390192509, + "step": 1729 + }, + { + "epoch": 0.49751959163131787, + "grad_norm": 0.08386300504207611, + "learning_rate": 4.764309588155966e-05, + "loss": 0.9238, + "step": 1730 + }, + { + "ce_ib": 5.920670986175537, + "ce_orig": 1.1291418075561523, + "epoch": 0.49751959163131787, + "kl_loss": 0.09754973649978638, + "loss_ib": 0.0015675644390285015, + "step": 1730 + }, + { + "ce_ib": 8.217052459716797, + "ce_orig": 1.4239951372146606, + "epoch": 0.49751959163131787, + "kl_loss": 0.09117145836353302, + "loss_ib": 0.0017334198346361518, + "step": 1730 + }, + { + "ce_ib": 4.219716548919678, + "ce_orig": 0.5232949256896973, + "epoch": 0.49751959163131787, + "kl_loss": 0.07372719049453735, + "loss_ib": 0.001159243518486619, + "step": 1730 + }, + { + "ce_ib": 4.065614223480225, + "ce_orig": 0.8240973353385925, + "epoch": 0.49751959163131787, + "kl_loss": 0.09847597032785416, + "loss_ib": 0.0013913210714235902, + "step": 1730 + }, + { + "ce_ib": 3.806094169616699, + "ce_orig": 0.5833454132080078, + "epoch": 0.4978071752102955, + "kl_loss": 0.056959860026836395, + "loss_ib": 0.0009502079919911921, + "step": 1731 + }, + { + "ce_ib": 3.1472842693328857, + "ce_orig": 0.6324117183685303, + "epoch": 0.4978071752102955, + "kl_loss": 0.0672142282128334, + "loss_ib": 0.0009868706110864878, + "step": 1731 + }, + { + "ce_ib": 5.9536824226379395, + "ce_orig": 0.663674533367157, + "epoch": 0.4978071752102955, + "kl_loss": 0.10824564844369888, + "loss_ib": 0.0016778246499598026, + "step": 1731 + }, + { + "ce_ib": 3.2498745918273926, + "ce_orig": 0.5611225366592407, + "epoch": 0.4978071752102955, + "kl_loss": 0.08438904583454132, + "loss_ib": 0.0011688779341056943, + "step": 1731 + }, + { + "ce_ib": 7.705049991607666, + "ce_orig": 1.7780629396438599, + "epoch": 0.4980947587892731, + "kl_loss": 0.10265342146158218, + "loss_ib": 0.001797039178200066, + "step": 1732 + }, + { + "ce_ib": 3.9708917140960693, + "ce_orig": 0.6379204392433167, + "epoch": 0.4980947587892731, + "kl_loss": 0.12149473279714584, + "loss_ib": 0.0016120364889502525, + "step": 1732 + }, + { + "ce_ib": 4.439757347106934, + "ce_orig": 0.9767668843269348, + "epoch": 0.4980947587892731, + "kl_loss": 0.06957311928272247, + "loss_ib": 0.0011397069320082664, + "step": 1732 + }, + { + "ce_ib": 5.312673091888428, + "ce_orig": 0.668224036693573, + "epoch": 0.4980947587892731, + "kl_loss": 0.09812991321086884, + "loss_ib": 0.0015125664649531245, + "step": 1732 + }, + { + "ce_ib": 5.367145538330078, + "ce_orig": 0.8808696269989014, + "epoch": 0.4983823423682508, + "kl_loss": 0.13414731621742249, + "loss_ib": 0.0018781876424327493, + "step": 1733 + }, + { + "ce_ib": 2.9793570041656494, + "ce_orig": 0.5071747303009033, + "epoch": 0.4983823423682508, + "kl_loss": 0.09389454126358032, + "loss_ib": 0.0012368810130283237, + "step": 1733 + }, + { + "ce_ib": 4.428302764892578, + "ce_orig": 0.6844320297241211, + "epoch": 0.4983823423682508, + "kl_loss": 0.10418913513422012, + "loss_ib": 0.0014847215497866273, + "step": 1733 + }, + { + "ce_ib": 4.0619916915893555, + "ce_orig": 0.6804245114326477, + "epoch": 0.4983823423682508, + "kl_loss": 0.0907549038529396, + "loss_ib": 0.0013137481873854995, + "step": 1733 + }, + { + "ce_ib": 6.571776866912842, + "ce_orig": 1.0220751762390137, + "epoch": 0.4986699259472284, + "kl_loss": 0.08240627497434616, + "loss_ib": 0.0014812403824180365, + "step": 1734 + }, + { + "ce_ib": 6.284444808959961, + "ce_orig": 1.2681993246078491, + "epoch": 0.4986699259472284, + "kl_loss": 0.08951609581708908, + "loss_ib": 0.0015236054314300418, + "step": 1734 + }, + { + "ce_ib": 4.447787284851074, + "ce_orig": 0.6758273243904114, + "epoch": 0.4986699259472284, + "kl_loss": 0.1032543033361435, + "loss_ib": 0.0014773216098546982, + "step": 1734 + }, + { + "ce_ib": 2.6137309074401855, + "ce_orig": 0.2851305603981018, + "epoch": 0.4986699259472284, + "kl_loss": 0.2721288204193115, + "loss_ib": 0.002982661360874772, + "step": 1734 + }, + { + "epoch": 0.49895750952620604, + "grad_norm": 0.10425584763288498, + "learning_rate": 4.7626620717513035e-05, + "loss": 0.8514, + "step": 1735 + }, + { + "ce_ib": 7.496507167816162, + "ce_orig": 1.3989319801330566, + "epoch": 0.49895750952620604, + "kl_loss": 0.11045782268047333, + "loss_ib": 0.0018542289035394788, + "step": 1735 + }, + { + "ce_ib": 5.036450386047363, + "ce_orig": 0.6575415134429932, + "epoch": 0.49895750952620604, + "kl_loss": 0.09376905858516693, + "loss_ib": 0.0014413356548175216, + "step": 1735 + }, + { + "ce_ib": 3.7786402702331543, + "ce_orig": 0.60782790184021, + "epoch": 0.49895750952620604, + "kl_loss": 0.07640475034713745, + "loss_ib": 0.0011419114889577031, + "step": 1735 + }, + { + "ce_ib": 3.3764452934265137, + "ce_orig": 0.5685099363327026, + "epoch": 0.49895750952620604, + "kl_loss": 0.08092882484197617, + "loss_ib": 0.0011469327146187425, + "step": 1735 + }, + { + "ce_ib": 7.066707134246826, + "ce_orig": 1.1540577411651611, + "epoch": 0.49924509310518367, + "kl_loss": 0.08824757486581802, + "loss_ib": 0.0015891465591266751, + "step": 1736 + }, + { + "ce_ib": 3.61773419380188, + "ce_orig": 0.7805092930793762, + "epoch": 0.49924509310518367, + "kl_loss": 0.06975187361240387, + "loss_ib": 0.0010592921171337366, + "step": 1736 + }, + { + "ce_ib": 5.326161861419678, + "ce_orig": 1.0745964050292969, + "epoch": 0.49924509310518367, + "kl_loss": 0.09485232830047607, + "loss_ib": 0.0014811394503340125, + "step": 1736 + }, + { + "ce_ib": 7.341875076293945, + "ce_orig": 1.2433197498321533, + "epoch": 0.49924509310518367, + "kl_loss": 0.13118135929107666, + "loss_ib": 0.0020460011437535286, + "step": 1736 + }, + { + "ce_ib": 4.627169609069824, + "ce_orig": 0.8437621593475342, + "epoch": 0.49953267668416135, + "kl_loss": 0.058301813900470734, + "loss_ib": 0.0010457350872457027, + "step": 1737 + }, + { + "ce_ib": 4.742276191711426, + "ce_orig": 0.8830808997154236, + "epoch": 0.49953267668416135, + "kl_loss": 0.07355999946594238, + "loss_ib": 0.0012098276056349277, + "step": 1737 + }, + { + "ce_ib": 4.208890914916992, + "ce_orig": 0.7050108313560486, + "epoch": 0.49953267668416135, + "kl_loss": 0.08421315252780914, + "loss_ib": 0.0012630205601453781, + "step": 1737 + }, + { + "ce_ib": 6.067041397094727, + "ce_orig": 1.0087031126022339, + "epoch": 0.49953267668416135, + "kl_loss": 0.11520966142416, + "loss_ib": 0.0017588007031008601, + "step": 1737 + }, + { + "ce_ib": 3.1678125858306885, + "ce_orig": 0.3288748562335968, + "epoch": 0.49982026026313897, + "kl_loss": 0.10716494917869568, + "loss_ib": 0.001388430711813271, + "step": 1738 + }, + { + "ce_ib": 5.5058441162109375, + "ce_orig": 1.189884066581726, + "epoch": 0.49982026026313897, + "kl_loss": 0.059209782630205154, + "loss_ib": 0.0011426821583881974, + "step": 1738 + }, + { + "ce_ib": 5.345909595489502, + "ce_orig": 1.0387386083602905, + "epoch": 0.49982026026313897, + "kl_loss": 0.1309211552143097, + "loss_ib": 0.001843802398070693, + "step": 1738 + }, + { + "ce_ib": 3.629713296890259, + "ce_orig": 0.6806856393814087, + "epoch": 0.49982026026313897, + "kl_loss": 0.10249398648738861, + "loss_ib": 0.0013879111502319574, + "step": 1738 + }, + { + "ce_ib": 7.156620025634766, + "ce_orig": 1.1354777812957764, + "epoch": 0.5001078438421166, + "kl_loss": 0.12062878161668777, + "loss_ib": 0.0019219497917219996, + "step": 1739 + }, + { + "ce_ib": 4.831526756286621, + "ce_orig": 0.6152259707450867, + "epoch": 0.5001078438421166, + "kl_loss": 0.09995514899492264, + "loss_ib": 0.0014827040722593665, + "step": 1739 + }, + { + "ce_ib": 4.5185370445251465, + "ce_orig": 0.5978391766548157, + "epoch": 0.5001078438421166, + "kl_loss": 0.12913918495178223, + "loss_ib": 0.0017432455206289887, + "step": 1739 + }, + { + "ce_ib": 4.2306294441223145, + "ce_orig": 0.968773603439331, + "epoch": 0.5001078438421166, + "kl_loss": 0.0869239866733551, + "loss_ib": 0.0012923027388751507, + "step": 1739 + }, + { + "epoch": 0.5003954274210942, + "grad_norm": 0.09850291162729263, + "learning_rate": 4.761009104068533e-05, + "loss": 0.8735, + "step": 1740 + }, + { + "ce_ib": 9.65589427947998, + "ce_orig": 1.4569464921951294, + "epoch": 0.5003954274210942, + "kl_loss": 0.11200807988643646, + "loss_ib": 0.002085670130327344, + "step": 1740 + }, + { + "ce_ib": 5.827742099761963, + "ce_orig": 0.8056154847145081, + "epoch": 0.5003954274210942, + "kl_loss": 0.07450857758522034, + "loss_ib": 0.0013278600526973605, + "step": 1740 + }, + { + "ce_ib": 5.772516250610352, + "ce_orig": 1.1650618314743042, + "epoch": 0.5003954274210942, + "kl_loss": 0.15006515383720398, + "loss_ib": 0.0020779031328856945, + "step": 1740 + }, + { + "ce_ib": 3.095163106918335, + "ce_orig": 0.6501392126083374, + "epoch": 0.5003954274210942, + "kl_loss": 0.10721646249294281, + "loss_ib": 0.0013816809514537454, + "step": 1740 + }, + { + "ce_ib": 5.9743170738220215, + "ce_orig": 1.2719261646270752, + "epoch": 0.5006830110000718, + "kl_loss": 0.12999939918518066, + "loss_ib": 0.0018974256236106157, + "step": 1741 + }, + { + "ce_ib": 6.541214466094971, + "ce_orig": 0.7686718106269836, + "epoch": 0.5006830110000718, + "kl_loss": 0.08610180020332336, + "loss_ib": 0.0015151393599808216, + "step": 1741 + }, + { + "ce_ib": 4.405296325683594, + "ce_orig": 0.9371830821037292, + "epoch": 0.5006830110000718, + "kl_loss": 0.08713194727897644, + "loss_ib": 0.0013118489878252149, + "step": 1741 + }, + { + "ce_ib": 4.9763617515563965, + "ce_orig": 0.673858106136322, + "epoch": 0.5006830110000718, + "kl_loss": 0.16543608903884888, + "loss_ib": 0.002151997061446309, + "step": 1741 + }, + { + "ce_ib": 8.608048439025879, + "ce_orig": 1.1199445724487305, + "epoch": 0.5009705945790496, + "kl_loss": 0.12141510099172592, + "loss_ib": 0.00207495572976768, + "step": 1742 + }, + { + "ce_ib": 3.103990316390991, + "ce_orig": 0.6160081624984741, + "epoch": 0.5009705945790496, + "kl_loss": 0.08121630549430847, + "loss_ib": 0.0011225620983168483, + "step": 1742 + }, + { + "ce_ib": 3.39253306388855, + "ce_orig": 0.5802491903305054, + "epoch": 0.5009705945790496, + "kl_loss": 0.07396113127470016, + "loss_ib": 0.0010788645595312119, + "step": 1742 + }, + { + "ce_ib": 3.377547025680542, + "ce_orig": 0.4776403307914734, + "epoch": 0.5009705945790496, + "kl_loss": 0.12740077078342438, + "loss_ib": 0.00161176233086735, + "step": 1742 + }, + { + "ce_ib": 5.8947014808654785, + "ce_orig": 1.1852939128875732, + "epoch": 0.5012581781580272, + "kl_loss": 0.09662100672721863, + "loss_ib": 0.0015556801808997989, + "step": 1743 + }, + { + "ce_ib": 4.117053031921387, + "ce_orig": 0.6870872974395752, + "epoch": 0.5012581781580272, + "kl_loss": 0.055195923894643784, + "loss_ib": 0.0009636644972488284, + "step": 1743 + }, + { + "ce_ib": 3.4533183574676514, + "ce_orig": 0.5691440105438232, + "epoch": 0.5012581781580272, + "kl_loss": 0.07171206921339035, + "loss_ib": 0.0010624524438753724, + "step": 1743 + }, + { + "ce_ib": 5.087991714477539, + "ce_orig": 0.6893404722213745, + "epoch": 0.5012581781580272, + "kl_loss": 0.11073730140924454, + "loss_ib": 0.0016161721432581544, + "step": 1743 + }, + { + "ce_ib": 6.909459590911865, + "ce_orig": 0.6637486815452576, + "epoch": 0.5015457617370048, + "kl_loss": 0.24058911204338074, + "loss_ib": 0.0030968370847404003, + "step": 1744 + }, + { + "ce_ib": 4.333632946014404, + "ce_orig": 0.8570219278335571, + "epoch": 0.5015457617370048, + "kl_loss": 0.15820840001106262, + "loss_ib": 0.0020154472440481186, + "step": 1744 + }, + { + "ce_ib": 6.000399112701416, + "ce_orig": 1.0544719696044922, + "epoch": 0.5015457617370048, + "kl_loss": 0.12396804988384247, + "loss_ib": 0.0018397202948108315, + "step": 1744 + }, + { + "ce_ib": 3.3837006092071533, + "ce_orig": 0.8157131671905518, + "epoch": 0.5015457617370048, + "kl_loss": 0.07416720688343048, + "loss_ib": 0.0010800421005114913, + "step": 1744 + }, + { + "epoch": 0.5018333453159824, + "grad_norm": 0.09900876134634018, + "learning_rate": 4.7593506890900405e-05, + "loss": 0.864, + "step": 1745 + }, + { + "ce_ib": 4.263499736785889, + "ce_orig": 0.5904655456542969, + "epoch": 0.5018333453159824, + "kl_loss": 0.06423919647932053, + "loss_ib": 0.0010687418980523944, + "step": 1745 + }, + { + "ce_ib": 3.546686887741089, + "ce_orig": 0.3186183571815491, + "epoch": 0.5018333453159824, + "kl_loss": 0.21409821510314941, + "loss_ib": 0.0024956506676971912, + "step": 1745 + }, + { + "ce_ib": 5.971281051635742, + "ce_orig": 1.1855957508087158, + "epoch": 0.5018333453159824, + "kl_loss": 0.10204104334115982, + "loss_ib": 0.0016175383934751153, + "step": 1745 + }, + { + "ce_ib": 3.1599652767181396, + "ce_orig": 0.5992884039878845, + "epoch": 0.5018333453159824, + "kl_loss": 0.08075752854347229, + "loss_ib": 0.0011235717684030533, + "step": 1745 + }, + { + "ce_ib": 3.6288797855377197, + "ce_orig": 0.6760779023170471, + "epoch": 0.5021209288949601, + "kl_loss": 0.07093308866024017, + "loss_ib": 0.0010722188744693995, + "step": 1746 + }, + { + "ce_ib": 5.8623433113098145, + "ce_orig": 1.203891634941101, + "epoch": 0.5021209288949601, + "kl_loss": 0.12156254053115845, + "loss_ib": 0.0018018597038462758, + "step": 1746 + }, + { + "ce_ib": 3.1267528533935547, + "ce_orig": 0.5984734892845154, + "epoch": 0.5021209288949601, + "kl_loss": 0.051463332027196884, + "loss_ib": 0.0008273085695691407, + "step": 1746 + }, + { + "ce_ib": 5.515877723693848, + "ce_orig": 0.7595711946487427, + "epoch": 0.5021209288949601, + "kl_loss": 0.14890992641448975, + "loss_ib": 0.0020406870171427727, + "step": 1746 + }, + { + "ce_ib": 2.8583717346191406, + "ce_orig": 0.43717148900032043, + "epoch": 0.5024085124739377, + "kl_loss": 0.06228368729352951, + "loss_ib": 0.0009086739737540483, + "step": 1747 + }, + { + "ce_ib": 3.9720606803894043, + "ce_orig": 0.3974498212337494, + "epoch": 0.5024085124739377, + "kl_loss": 0.08765491843223572, + "loss_ib": 0.0012737552169710398, + "step": 1747 + }, + { + "ce_ib": 4.238058090209961, + "ce_orig": 0.7524107694625854, + "epoch": 0.5024085124739377, + "kl_loss": 0.06884914636611938, + "loss_ib": 0.0011122971773147583, + "step": 1747 + }, + { + "ce_ib": 4.900828838348389, + "ce_orig": 0.838923990726471, + "epoch": 0.5024085124739377, + "kl_loss": 0.09693875908851624, + "loss_ib": 0.0014594703679904342, + "step": 1747 + }, + { + "ce_ib": 5.5714192390441895, + "ce_orig": 0.9417537450790405, + "epoch": 0.5026960960529154, + "kl_loss": 0.10547983646392822, + "loss_ib": 0.0016119402134791017, + "step": 1748 + }, + { + "ce_ib": 7.233001708984375, + "ce_orig": 0.5274019837379456, + "epoch": 0.5026960960529154, + "kl_loss": 0.10658366978168488, + "loss_ib": 0.001789136789739132, + "step": 1748 + }, + { + "ce_ib": 3.8977789878845215, + "ce_orig": 0.6983909606933594, + "epoch": 0.5026960960529154, + "kl_loss": 0.10827343165874481, + "loss_ib": 0.001472512143664062, + "step": 1748 + }, + { + "ce_ib": 7.986837863922119, + "ce_orig": 1.6704212427139282, + "epoch": 0.5026960960529154, + "kl_loss": 0.11359195411205292, + "loss_ib": 0.0019346032058820128, + "step": 1748 + }, + { + "ce_ib": 5.164767742156982, + "ce_orig": 0.742429792881012, + "epoch": 0.502983679631893, + "kl_loss": 0.11558070778846741, + "loss_ib": 0.0016722838627174497, + "step": 1749 + }, + { + "ce_ib": 4.2510552406311035, + "ce_orig": 0.6786426305770874, + "epoch": 0.502983679631893, + "kl_loss": 0.08232644200325012, + "loss_ib": 0.001248369924724102, + "step": 1749 + }, + { + "ce_ib": 3.428544759750366, + "ce_orig": 0.5286094546318054, + "epoch": 0.502983679631893, + "kl_loss": 0.07765693217515945, + "loss_ib": 0.0011194237740710378, + "step": 1749 + }, + { + "ce_ib": 5.1926374435424805, + "ce_orig": 1.127622365951538, + "epoch": 0.502983679631893, + "kl_loss": 0.09658324718475342, + "loss_ib": 0.0014850961742922664, + "step": 1749 + }, + { + "epoch": 0.5032712632108707, + "grad_norm": 0.08961226791143417, + "learning_rate": 4.757686830811332e-05, + "loss": 0.8021, + "step": 1750 + }, + { + "ce_ib": 9.975054740905762, + "ce_orig": 1.9483603239059448, + "epoch": 0.5032712632108707, + "kl_loss": 0.10372290015220642, + "loss_ib": 0.002034734468907118, + "step": 1750 + }, + { + "ce_ib": 5.111995220184326, + "ce_orig": 0.7818955183029175, + "epoch": 0.5032712632108707, + "kl_loss": 0.08424295485019684, + "loss_ib": 0.0013536290498450398, + "step": 1750 + }, + { + "ce_ib": 2.9647223949432373, + "ce_orig": 0.4814760088920593, + "epoch": 0.5032712632108707, + "kl_loss": 0.09579072892665863, + "loss_ib": 0.0012543794000521302, + "step": 1750 + }, + { + "ce_ib": 4.7183146476745605, + "ce_orig": 0.7111132144927979, + "epoch": 0.5032712632108707, + "kl_loss": 0.08042638748884201, + "loss_ib": 0.001276095281355083, + "step": 1750 + }, + { + "ce_ib": 5.344364643096924, + "ce_orig": 0.9831393361091614, + "epoch": 0.5035588467898483, + "kl_loss": 0.1293076127767563, + "loss_ib": 0.0018275125185027719, + "step": 1751 + }, + { + "ce_ib": 5.932323455810547, + "ce_orig": 0.8263015747070312, + "epoch": 0.5035588467898483, + "kl_loss": 0.11434921622276306, + "loss_ib": 0.001736724516376853, + "step": 1751 + }, + { + "ce_ib": 4.607931613922119, + "ce_orig": 0.5439244508743286, + "epoch": 0.5035588467898483, + "kl_loss": 0.13249653577804565, + "loss_ib": 0.0017857584170997143, + "step": 1751 + }, + { + "ce_ib": 5.876941204071045, + "ce_orig": 0.7904411554336548, + "epoch": 0.5035588467898483, + "kl_loss": 0.14947941899299622, + "loss_ib": 0.00208248826675117, + "step": 1751 + }, + { + "ce_ib": 5.389278888702393, + "ce_orig": 1.070987343788147, + "epoch": 0.5038464303688259, + "kl_loss": 0.1790669560432434, + "loss_ib": 0.002329597482457757, + "step": 1752 + }, + { + "ce_ib": 5.152892112731934, + "ce_orig": 0.8494266867637634, + "epoch": 0.5038464303688259, + "kl_loss": 0.07320482283830643, + "loss_ib": 0.001247337437234819, + "step": 1752 + }, + { + "ce_ib": 3.5102367401123047, + "ce_orig": 0.6085705757141113, + "epoch": 0.5038464303688259, + "kl_loss": 0.07244947552680969, + "loss_ib": 0.0010755184339359403, + "step": 1752 + }, + { + "ce_ib": 4.0515570640563965, + "ce_orig": 0.717208981513977, + "epoch": 0.5038464303688259, + "kl_loss": 0.10721513628959656, + "loss_ib": 0.0014773070579394698, + "step": 1752 + }, + { + "ce_ib": 3.761439561843872, + "ce_orig": 0.6423521041870117, + "epoch": 0.5041340139478035, + "kl_loss": 0.10668110847473145, + "loss_ib": 0.0014429549919441342, + "step": 1753 + }, + { + "ce_ib": 4.898961067199707, + "ce_orig": 1.082966685295105, + "epoch": 0.5041340139478035, + "kl_loss": 0.06628865003585815, + "loss_ib": 0.001152782584540546, + "step": 1753 + }, + { + "ce_ib": 4.415982246398926, + "ce_orig": 0.7205655574798584, + "epoch": 0.5041340139478035, + "kl_loss": 0.0771339014172554, + "loss_ib": 0.001212937175296247, + "step": 1753 + }, + { + "ce_ib": 5.591447353363037, + "ce_orig": 1.052877426147461, + "epoch": 0.5041340139478035, + "kl_loss": 0.09695450961589813, + "loss_ib": 0.0015286898706108332, + "step": 1753 + }, + { + "ce_ib": 5.605398654937744, + "ce_orig": 0.9697411060333252, + "epoch": 0.5044215975267812, + "kl_loss": 0.06414365768432617, + "loss_ib": 0.0012019763235002756, + "step": 1754 + }, + { + "ce_ib": 6.231783390045166, + "ce_orig": 1.501328945159912, + "epoch": 0.5044215975267812, + "kl_loss": 0.10558497905731201, + "loss_ib": 0.0016790280351415277, + "step": 1754 + }, + { + "ce_ib": 8.4285306930542, + "ce_orig": 1.5399643182754517, + "epoch": 0.5044215975267812, + "kl_loss": 0.10128645598888397, + "loss_ib": 0.0018557175062596798, + "step": 1754 + }, + { + "ce_ib": 3.882122039794922, + "ce_orig": 0.7142038345336914, + "epoch": 0.5044215975267812, + "kl_loss": 0.09716086834669113, + "loss_ib": 0.0013598209479823709, + "step": 1754 + }, + { + "epoch": 0.5047091811057589, + "grad_norm": 0.10455439984798431, + "learning_rate": 4.756017533241027e-05, + "loss": 0.8484, + "step": 1755 + }, + { + "ce_ib": 5.680105209350586, + "ce_orig": 0.7889714241027832, + "epoch": 0.5047091811057589, + "kl_loss": 0.1282590925693512, + "loss_ib": 0.0018506014021113515, + "step": 1755 + }, + { + "ce_ib": 5.770727157592773, + "ce_orig": 0.9311279058456421, + "epoch": 0.5047091811057589, + "kl_loss": 0.1011575236916542, + "loss_ib": 0.0015886479523032904, + "step": 1755 + }, + { + "ce_ib": 6.819508075714111, + "ce_orig": 1.1357989311218262, + "epoch": 0.5047091811057589, + "kl_loss": 0.10741980373859406, + "loss_ib": 0.0017561488784849644, + "step": 1755 + }, + { + "ce_ib": 5.206460952758789, + "ce_orig": 0.9904372692108154, + "epoch": 0.5047091811057589, + "kl_loss": 0.11252833157777786, + "loss_ib": 0.0016459292965009809, + "step": 1755 + }, + { + "ce_ib": 3.3703248500823975, + "ce_orig": 0.8458900451660156, + "epoch": 0.5049967646847365, + "kl_loss": 0.07917517423629761, + "loss_ib": 0.0011287842644378543, + "step": 1756 + }, + { + "ce_ib": 3.802079916000366, + "ce_orig": 0.6954262256622314, + "epoch": 0.5049967646847365, + "kl_loss": 0.08914148807525635, + "loss_ib": 0.001271622721105814, + "step": 1756 + }, + { + "ce_ib": 4.1940412521362305, + "ce_orig": 0.6664143800735474, + "epoch": 0.5049967646847365, + "kl_loss": 0.059560492634773254, + "loss_ib": 0.0010150090092793107, + "step": 1756 + }, + { + "ce_ib": 4.276806354522705, + "ce_orig": 0.6618050336837769, + "epoch": 0.5049967646847365, + "kl_loss": 0.05484602227807045, + "loss_ib": 0.0009761407854966819, + "step": 1756 + }, + { + "ce_ib": 2.7642500400543213, + "ce_orig": 0.4152410328388214, + "epoch": 0.5052843482637142, + "kl_loss": 0.10613086819648743, + "loss_ib": 0.0013377337018027902, + "step": 1757 + }, + { + "ce_ib": 6.457054615020752, + "ce_orig": 0.8633922338485718, + "epoch": 0.5052843482637142, + "kl_loss": 0.10131816565990448, + "loss_ib": 0.0016588871367275715, + "step": 1757 + }, + { + "ce_ib": 4.437014102935791, + "ce_orig": 1.032947063446045, + "epoch": 0.5052843482637142, + "kl_loss": 0.07439582049846649, + "loss_ib": 0.0011876595672219992, + "step": 1757 + }, + { + "ce_ib": 3.927295446395874, + "ce_orig": 0.8213032484054565, + "epoch": 0.5052843482637142, + "kl_loss": 0.06984390318393707, + "loss_ib": 0.0010911684948951006, + "step": 1757 + }, + { + "ce_ib": 6.9278693199157715, + "ce_orig": 1.4048984050750732, + "epoch": 0.5055719318426918, + "kl_loss": 0.19950547814369202, + "loss_ib": 0.0026878416538238525, + "step": 1758 + }, + { + "ce_ib": 4.840634346008301, + "ce_orig": 0.7703735828399658, + "epoch": 0.5055719318426918, + "kl_loss": 0.09456930309534073, + "loss_ib": 0.0014297564048320055, + "step": 1758 + }, + { + "ce_ib": 6.920482635498047, + "ce_orig": 0.8598986268043518, + "epoch": 0.5055719318426918, + "kl_loss": 0.058769647032022476, + "loss_ib": 0.0012797446688637137, + "step": 1758 + }, + { + "ce_ib": 7.270328044891357, + "ce_orig": 0.8928629755973816, + "epoch": 0.5055719318426918, + "kl_loss": 0.16049724817276, + "loss_ib": 0.0023320051841437817, + "step": 1758 + }, + { + "ce_ib": 7.589656829833984, + "ce_orig": 1.0071818828582764, + "epoch": 0.5058595154216694, + "kl_loss": 0.09681051969528198, + "loss_ib": 0.0017270707758143544, + "step": 1759 + }, + { + "ce_ib": 3.481691598892212, + "ce_orig": 0.4423674941062927, + "epoch": 0.5058595154216694, + "kl_loss": 0.1583106815814972, + "loss_ib": 0.0019312759395688772, + "step": 1759 + }, + { + "ce_ib": 6.392866134643555, + "ce_orig": 1.102768898010254, + "epoch": 0.5058595154216694, + "kl_loss": 0.07312214374542236, + "loss_ib": 0.0013705079909414053, + "step": 1759 + }, + { + "ce_ib": 4.338046550750732, + "ce_orig": 0.7104328274726868, + "epoch": 0.5058595154216694, + "kl_loss": 0.0648549348115921, + "loss_ib": 0.0010823539923876524, + "step": 1759 + }, + { + "epoch": 0.506147099000647, + "grad_norm": 0.09612871706485748, + "learning_rate": 4.754342800400852e-05, + "loss": 0.8668, + "step": 1760 + }, + { + "ce_ib": 4.129088878631592, + "ce_orig": 1.001846432685852, + "epoch": 0.506147099000647, + "kl_loss": 0.051527801901102066, + "loss_ib": 0.00092818692792207, + "step": 1760 + }, + { + "ce_ib": 4.406821250915527, + "ce_orig": 0.8956955671310425, + "epoch": 0.506147099000647, + "kl_loss": 0.1283273994922638, + "loss_ib": 0.0017239560838788748, + "step": 1760 + }, + { + "ce_ib": 4.636455059051514, + "ce_orig": 0.7169792652130127, + "epoch": 0.506147099000647, + "kl_loss": 0.11588340252637863, + "loss_ib": 0.0016224795253947377, + "step": 1760 + }, + { + "ce_ib": 5.2470808029174805, + "ce_orig": 0.9945482015609741, + "epoch": 0.506147099000647, + "kl_loss": 0.0669299066066742, + "loss_ib": 0.0011940071126446128, + "step": 1760 + }, + { + "ce_ib": 3.908897638320923, + "ce_orig": 0.9324092864990234, + "epoch": 0.5064346825796247, + "kl_loss": 0.1094617024064064, + "loss_ib": 0.0014855066547170281, + "step": 1761 + }, + { + "ce_ib": 5.8706841468811035, + "ce_orig": 1.096620798110962, + "epoch": 0.5064346825796247, + "kl_loss": 0.0675792470574379, + "loss_ib": 0.0012628608383238316, + "step": 1761 + }, + { + "ce_ib": 6.186111927032471, + "ce_orig": 0.8521334528923035, + "epoch": 0.5064346825796247, + "kl_loss": 0.10772758722305298, + "loss_ib": 0.0016958869528025389, + "step": 1761 + }, + { + "ce_ib": 3.482836961746216, + "ce_orig": 0.49571141600608826, + "epoch": 0.5064346825796247, + "kl_loss": 0.06385008245706558, + "loss_ib": 0.0009867845801636577, + "step": 1761 + }, + { + "ce_ib": 3.7395946979522705, + "ce_orig": 0.7395663261413574, + "epoch": 0.5067222661586024, + "kl_loss": 0.07331500947475433, + "loss_ib": 0.0011071095941588283, + "step": 1762 + }, + { + "ce_ib": 7.151701927185059, + "ce_orig": 1.1692900657653809, + "epoch": 0.5067222661586024, + "kl_loss": 0.05847446992993355, + "loss_ib": 0.0012999147875234485, + "step": 1762 + }, + { + "ce_ib": 5.947727203369141, + "ce_orig": 1.0413905382156372, + "epoch": 0.5067222661586024, + "kl_loss": 0.062116026878356934, + "loss_ib": 0.0012159328907728195, + "step": 1762 + }, + { + "ce_ib": 1.257192611694336, + "ce_orig": 0.12802673876285553, + "epoch": 0.5067222661586024, + "kl_loss": 0.20849010348320007, + "loss_ib": 0.0022106203250586987, + "step": 1762 + }, + { + "ce_ib": 5.67080020904541, + "ce_orig": 1.1660603284835815, + "epoch": 0.50700984973758, + "kl_loss": 0.07760757207870483, + "loss_ib": 0.0013431557454168797, + "step": 1763 + }, + { + "ce_ib": 6.104028701782227, + "ce_orig": 1.1988955736160278, + "epoch": 0.50700984973758, + "kl_loss": 0.11322954297065735, + "loss_ib": 0.0017426982522010803, + "step": 1763 + }, + { + "ce_ib": 6.720372676849365, + "ce_orig": 1.2614942789077759, + "epoch": 0.50700984973758, + "kl_loss": 0.0781300812959671, + "loss_ib": 0.0014533379580825567, + "step": 1763 + }, + { + "ce_ib": 6.195744037628174, + "ce_orig": 0.9090049266815186, + "epoch": 0.50700984973758, + "kl_loss": 0.09819072484970093, + "loss_ib": 0.0016014815773814917, + "step": 1763 + }, + { + "ce_ib": 4.931328296661377, + "ce_orig": 0.6541999578475952, + "epoch": 0.5072974333165576, + "kl_loss": 0.07424721121788025, + "loss_ib": 0.0012356048682704568, + "step": 1764 + }, + { + "ce_ib": 4.325194358825684, + "ce_orig": 0.9135701060295105, + "epoch": 0.5072974333165576, + "kl_loss": 0.09098568558692932, + "loss_ib": 0.0013423763448372483, + "step": 1764 + }, + { + "ce_ib": 4.428623676300049, + "ce_orig": 0.9130421876907349, + "epoch": 0.5072974333165576, + "kl_loss": 0.10139874368906021, + "loss_ib": 0.0014568498590961099, + "step": 1764 + }, + { + "ce_ib": 3.9288747310638428, + "ce_orig": 0.27669161558151245, + "epoch": 0.5072974333165576, + "kl_loss": 0.08933092653751373, + "loss_ib": 0.0012861966388300061, + "step": 1764 + }, + { + "epoch": 0.5075850168955353, + "grad_norm": 0.08229364454746246, + "learning_rate": 4.7526626363256264e-05, + "loss": 0.822, + "step": 1765 + }, + { + "ce_ib": 3.862168788909912, + "ce_orig": 0.7066456079483032, + "epoch": 0.5075850168955353, + "kl_loss": 0.08449746668338776, + "loss_ib": 0.0012311914470046759, + "step": 1765 + }, + { + "ce_ib": 5.854787826538086, + "ce_orig": 0.4140854775905609, + "epoch": 0.5075850168955353, + "kl_loss": 0.0997677743434906, + "loss_ib": 0.001583156525157392, + "step": 1765 + }, + { + "ce_ib": 3.8967857360839844, + "ce_orig": 0.7378742694854736, + "epoch": 0.5075850168955353, + "kl_loss": 0.055556364357471466, + "loss_ib": 0.0009452421800233424, + "step": 1765 + }, + { + "ce_ib": 4.762308597564697, + "ce_orig": 0.7006094455718994, + "epoch": 0.5075850168955353, + "kl_loss": 0.07786941528320312, + "loss_ib": 0.0012549249222502112, + "step": 1765 + }, + { + "ce_ib": 4.998976230621338, + "ce_orig": 0.9930461645126343, + "epoch": 0.5078726004745129, + "kl_loss": 0.06878222525119781, + "loss_ib": 0.0011877198703587055, + "step": 1766 + }, + { + "ce_ib": 6.143427848815918, + "ce_orig": 1.1252905130386353, + "epoch": 0.5078726004745129, + "kl_loss": 0.09324176609516144, + "loss_ib": 0.0015467604389414191, + "step": 1766 + }, + { + "ce_ib": 4.538148880004883, + "ce_orig": 0.6679875254631042, + "epoch": 0.5078726004745129, + "kl_loss": 0.07794864475727081, + "loss_ib": 0.0012333012418821454, + "step": 1766 + }, + { + "ce_ib": 8.552364349365234, + "ce_orig": 1.1823530197143555, + "epoch": 0.5078726004745129, + "kl_loss": 0.1540687382221222, + "loss_ib": 0.002395923715084791, + "step": 1766 + }, + { + "ce_ib": 2.8883609771728516, + "ce_orig": 0.2317194640636444, + "epoch": 0.5081601840534905, + "kl_loss": 0.14492864906787872, + "loss_ib": 0.0017381225479766726, + "step": 1767 + }, + { + "ce_ib": 3.89682674407959, + "ce_orig": 0.6030203104019165, + "epoch": 0.5081601840534905, + "kl_loss": 0.08308561146259308, + "loss_ib": 0.0012205387465655804, + "step": 1767 + }, + { + "ce_ib": 5.565364360809326, + "ce_orig": 1.4104336500167847, + "epoch": 0.5081601840534905, + "kl_loss": 0.08605089783668518, + "loss_ib": 0.001417045365087688, + "step": 1767 + }, + { + "ce_ib": 4.66987943649292, + "ce_orig": 0.809913158416748, + "epoch": 0.5081601840534905, + "kl_loss": 0.0784648209810257, + "loss_ib": 0.0012516360729932785, + "step": 1767 + }, + { + "ce_ib": 4.872735500335693, + "ce_orig": 0.898309051990509, + "epoch": 0.5084477676324682, + "kl_loss": 0.08341196179389954, + "loss_ib": 0.0013213930651545525, + "step": 1768 + }, + { + "ce_ib": 5.409481525421143, + "ce_orig": 1.023613452911377, + "epoch": 0.5084477676324682, + "kl_loss": 0.10570819675922394, + "loss_ib": 0.0015980299795046449, + "step": 1768 + }, + { + "ce_ib": 3.2785403728485107, + "ce_orig": 0.6372849941253662, + "epoch": 0.5084477676324682, + "kl_loss": 0.1252453625202179, + "loss_ib": 0.0015803076094016433, + "step": 1768 + }, + { + "ce_ib": 3.1991066932678223, + "ce_orig": 0.641861081123352, + "epoch": 0.5084477676324682, + "kl_loss": 0.06867444515228271, + "loss_ib": 0.0010066550457850099, + "step": 1768 + }, + { + "ce_ib": 3.2764604091644287, + "ce_orig": 0.47703030705451965, + "epoch": 0.5087353512114459, + "kl_loss": 0.09576079249382019, + "loss_ib": 0.0012852539075538516, + "step": 1769 + }, + { + "ce_ib": 3.566113233566284, + "ce_orig": 0.7106217741966248, + "epoch": 0.5087353512114459, + "kl_loss": 0.11011755466461182, + "loss_ib": 0.001457786769606173, + "step": 1769 + }, + { + "ce_ib": 3.5261590480804443, + "ce_orig": 0.6616961359977722, + "epoch": 0.5087353512114459, + "kl_loss": 0.11149020493030548, + "loss_ib": 0.0014675179263576865, + "step": 1769 + }, + { + "ce_ib": 4.568676471710205, + "ce_orig": 0.4977152347564697, + "epoch": 0.5087353512114459, + "kl_loss": 0.15457169711589813, + "loss_ib": 0.0020025845151394606, + "step": 1769 + }, + { + "epoch": 0.5090229347904235, + "grad_norm": 0.08699528872966766, + "learning_rate": 4.750977045063256e-05, + "loss": 0.8033, + "step": 1770 + }, + { + "ce_ib": 5.102743625640869, + "ce_orig": 0.6232215762138367, + "epoch": 0.5090229347904235, + "kl_loss": 0.08821675181388855, + "loss_ib": 0.001392441801726818, + "step": 1770 + }, + { + "ce_ib": 5.086276054382324, + "ce_orig": 0.9885613322257996, + "epoch": 0.5090229347904235, + "kl_loss": 0.0929737389087677, + "loss_ib": 0.0014383649686351418, + "step": 1770 + }, + { + "ce_ib": 2.0016775131225586, + "ce_orig": 0.24005869030952454, + "epoch": 0.5090229347904235, + "kl_loss": 0.0741710215806961, + "loss_ib": 0.0009418779518455267, + "step": 1770 + }, + { + "ce_ib": 2.505354642868042, + "ce_orig": 0.3593565821647644, + "epoch": 0.5090229347904235, + "kl_loss": 0.0835561454296112, + "loss_ib": 0.0010860968613997102, + "step": 1770 + }, + { + "ce_ib": 3.728950023651123, + "ce_orig": 0.3645147979259491, + "epoch": 0.5093105183694011, + "kl_loss": 0.07489664107561111, + "loss_ib": 0.001121861394494772, + "step": 1771 + }, + { + "ce_ib": 8.625306129455566, + "ce_orig": 2.0382447242736816, + "epoch": 0.5093105183694011, + "kl_loss": 0.11205710470676422, + "loss_ib": 0.001983101712539792, + "step": 1771 + }, + { + "ce_ib": 3.5968687534332275, + "ce_orig": 0.8689912557601929, + "epoch": 0.5093105183694011, + "kl_loss": 0.072394959628582, + "loss_ib": 0.001083636423572898, + "step": 1771 + }, + { + "ce_ib": 3.425039529800415, + "ce_orig": 0.5296727418899536, + "epoch": 0.5093105183694011, + "kl_loss": 0.04901398345828056, + "loss_ib": 0.0008326437673531473, + "step": 1771 + }, + { + "ce_ib": 4.597757816314697, + "ce_orig": 0.8231284022331238, + "epoch": 0.5095981019483787, + "kl_loss": 0.09374092519283295, + "loss_ib": 0.0013971850275993347, + "step": 1772 + }, + { + "ce_ib": 4.463605880737305, + "ce_orig": 0.6693673729896545, + "epoch": 0.5095981019483787, + "kl_loss": 0.08627757430076599, + "loss_ib": 0.0013091362779960036, + "step": 1772 + }, + { + "ce_ib": 7.218458652496338, + "ce_orig": 1.4844129085540771, + "epoch": 0.5095981019483787, + "kl_loss": 0.11206276714801788, + "loss_ib": 0.0018424734007567167, + "step": 1772 + }, + { + "ce_ib": 4.386680603027344, + "ce_orig": 0.9053648710250854, + "epoch": 0.5095981019483787, + "kl_loss": 0.22263477742671967, + "loss_ib": 0.002665015636011958, + "step": 1772 + }, + { + "ce_ib": 4.831755638122559, + "ce_orig": 0.9517534971237183, + "epoch": 0.5098856855273564, + "kl_loss": 0.09549269825220108, + "loss_ib": 0.001438102568499744, + "step": 1773 + }, + { + "ce_ib": 4.96191930770874, + "ce_orig": 0.42862173914909363, + "epoch": 0.5098856855273564, + "kl_loss": 0.15376532077789307, + "loss_ib": 0.00203384505584836, + "step": 1773 + }, + { + "ce_ib": 4.940501689910889, + "ce_orig": 0.8946219086647034, + "epoch": 0.5098856855273564, + "kl_loss": 0.056947022676467896, + "loss_ib": 0.0010635203216224909, + "step": 1773 + }, + { + "ce_ib": 8.19971752166748, + "ce_orig": 1.5476789474487305, + "epoch": 0.5098856855273564, + "kl_loss": 0.07117143273353577, + "loss_ib": 0.001531686051748693, + "step": 1773 + }, + { + "ce_ib": 5.3299407958984375, + "ce_orig": 0.8949192762374878, + "epoch": 0.510173269106334, + "kl_loss": 0.090696319937706, + "loss_ib": 0.0014399572974070907, + "step": 1774 + }, + { + "ce_ib": 2.814570188522339, + "ce_orig": 0.6373908519744873, + "epoch": 0.510173269106334, + "kl_loss": 0.07348345220088959, + "loss_ib": 0.001016291556879878, + "step": 1774 + }, + { + "ce_ib": 3.8231544494628906, + "ce_orig": 0.702505886554718, + "epoch": 0.510173269106334, + "kl_loss": 0.07547560334205627, + "loss_ib": 0.0011370714055374265, + "step": 1774 + }, + { + "ce_ib": 6.458587646484375, + "ce_orig": 1.2107186317443848, + "epoch": 0.510173269106334, + "kl_loss": 0.09314244985580444, + "loss_ib": 0.0015772831393405795, + "step": 1774 + }, + { + "epoch": 0.5104608526853117, + "grad_norm": 0.08893676847219467, + "learning_rate": 4.749286030674721e-05, + "loss": 0.8205, + "step": 1775 + }, + { + "ce_ib": 6.961551189422607, + "ce_orig": 0.9440939426422119, + "epoch": 0.5104608526853117, + "kl_loss": 0.07369294762611389, + "loss_ib": 0.0014330846024677157, + "step": 1775 + }, + { + "ce_ib": 4.587409496307373, + "ce_orig": 0.9834845066070557, + "epoch": 0.5104608526853117, + "kl_loss": 0.06986919045448303, + "loss_ib": 0.0011574327945709229, + "step": 1775 + }, + { + "ce_ib": 4.577333450317383, + "ce_orig": 0.3774460256099701, + "epoch": 0.5104608526853117, + "kl_loss": 0.11621811985969543, + "loss_ib": 0.001619914430193603, + "step": 1775 + }, + { + "ce_ib": 7.149835586547852, + "ce_orig": 1.6160179376602173, + "epoch": 0.5104608526853117, + "kl_loss": 0.11098894476890564, + "loss_ib": 0.0018248730339109898, + "step": 1775 + }, + { + "ce_ib": 1.6233744621276855, + "ce_orig": 0.16389018297195435, + "epoch": 0.5107484362642893, + "kl_loss": 0.21676485240459442, + "loss_ib": 0.0023299858439713717, + "step": 1776 + }, + { + "ce_ib": 4.949944019317627, + "ce_orig": 0.9170655012130737, + "epoch": 0.5107484362642893, + "kl_loss": 0.08072083443403244, + "loss_ib": 0.001302202814258635, + "step": 1776 + }, + { + "ce_ib": 5.64072847366333, + "ce_orig": 0.9654892086982727, + "epoch": 0.5107484362642893, + "kl_loss": 0.1329715996980667, + "loss_ib": 0.0018937888089567423, + "step": 1776 + }, + { + "ce_ib": 5.414496421813965, + "ce_orig": 1.1029136180877686, + "epoch": 0.5107484362642893, + "kl_loss": 0.11805348843336105, + "loss_ib": 0.001721984357573092, + "step": 1776 + }, + { + "ce_ib": 4.067188262939453, + "ce_orig": 0.8657865524291992, + "epoch": 0.511036019843267, + "kl_loss": 0.0842527374625206, + "loss_ib": 0.0012492460664361715, + "step": 1777 + }, + { + "ce_ib": 3.8845126628875732, + "ce_orig": 0.889708399772644, + "epoch": 0.511036019843267, + "kl_loss": 0.09745290875434875, + "loss_ib": 0.0013629802269861102, + "step": 1777 + }, + { + "ce_ib": 4.470139980316162, + "ce_orig": 0.692625105381012, + "epoch": 0.511036019843267, + "kl_loss": 0.2262895554304123, + "loss_ib": 0.00270990957506001, + "step": 1777 + }, + { + "ce_ib": 5.353429317474365, + "ce_orig": 0.6145848631858826, + "epoch": 0.511036019843267, + "kl_loss": 0.10075217485427856, + "loss_ib": 0.0015428647166118026, + "step": 1777 + }, + { + "ce_ib": 4.671683311462402, + "ce_orig": 1.044663906097412, + "epoch": 0.5113236034222446, + "kl_loss": 0.06150062382221222, + "loss_ib": 0.0010821744799613953, + "step": 1778 + }, + { + "ce_ib": 2.9225199222564697, + "ce_orig": 0.41982778906822205, + "epoch": 0.5113236034222446, + "kl_loss": 0.1266959011554718, + "loss_ib": 0.001559210941195488, + "step": 1778 + }, + { + "ce_ib": 4.2299370765686035, + "ce_orig": 0.8129241466522217, + "epoch": 0.5113236034222446, + "kl_loss": 0.08659450709819794, + "loss_ib": 0.0012889388017356396, + "step": 1778 + }, + { + "ce_ib": 2.318153142929077, + "ce_orig": 0.24173253774642944, + "epoch": 0.5113236034222446, + "kl_loss": 0.21548357605934143, + "loss_ib": 0.002386651001870632, + "step": 1778 + }, + { + "ce_ib": 5.922190189361572, + "ce_orig": 0.9133435487747192, + "epoch": 0.5116111870012222, + "kl_loss": 0.15004266798496246, + "loss_ib": 0.002092645736411214, + "step": 1779 + }, + { + "ce_ib": 4.460919380187988, + "ce_orig": 0.5385552644729614, + "epoch": 0.5116111870012222, + "kl_loss": 0.11327634751796722, + "loss_ib": 0.0015788553282618523, + "step": 1779 + }, + { + "ce_ib": 4.0045037269592285, + "ce_orig": 0.8596768975257874, + "epoch": 0.5116111870012222, + "kl_loss": 0.10909485816955566, + "loss_ib": 0.0014913988998159766, + "step": 1779 + }, + { + "ce_ib": 5.77869176864624, + "ce_orig": 1.0787780284881592, + "epoch": 0.5116111870012222, + "kl_loss": 0.09324748814105988, + "loss_ib": 0.0015103439800441265, + "step": 1779 + }, + { + "epoch": 0.5118987705801998, + "grad_norm": 0.09148537367582321, + "learning_rate": 4.747589597234068e-05, + "loss": 0.8597, + "step": 1780 + }, + { + "ce_ib": 5.600949287414551, + "ce_orig": 0.878082811832428, + "epoch": 0.5118987705801998, + "kl_loss": 0.32276013493537903, + "loss_ib": 0.003787696361541748, + "step": 1780 + }, + { + "ce_ib": 3.198049783706665, + "ce_orig": 0.49984225630760193, + "epoch": 0.5118987705801998, + "kl_loss": 0.06508980691432953, + "loss_ib": 0.0009707030258141458, + "step": 1780 + }, + { + "ce_ib": 4.201373100280762, + "ce_orig": 0.8684957027435303, + "epoch": 0.5118987705801998, + "kl_loss": 0.07928602397441864, + "loss_ib": 0.0012129975948482752, + "step": 1780 + }, + { + "ce_ib": 4.552159786224365, + "ce_orig": 1.0664016008377075, + "epoch": 0.5118987705801998, + "kl_loss": 0.06013708934187889, + "loss_ib": 0.0010565868578851223, + "step": 1780 + }, + { + "ce_ib": 5.037264823913574, + "ce_orig": 0.7394843697547913, + "epoch": 0.5121863541591775, + "kl_loss": 0.08530676364898682, + "loss_ib": 0.0013567940331995487, + "step": 1781 + }, + { + "ce_ib": 4.759026527404785, + "ce_orig": 0.7003152966499329, + "epoch": 0.5121863541591775, + "kl_loss": 0.09705665707588196, + "loss_ib": 0.001446469104848802, + "step": 1781 + }, + { + "ce_ib": 6.053905963897705, + "ce_orig": 1.0476199388504028, + "epoch": 0.5121863541591775, + "kl_loss": 0.10392187535762787, + "loss_ib": 0.0016446092631667852, + "step": 1781 + }, + { + "ce_ib": 5.060477256774902, + "ce_orig": 0.6719687581062317, + "epoch": 0.5121863541591775, + "kl_loss": 0.07917675375938416, + "loss_ib": 0.0012978152371942997, + "step": 1781 + }, + { + "ce_ib": 7.035765171051025, + "ce_orig": 1.5783270597457886, + "epoch": 0.5124739377381552, + "kl_loss": 0.05607299134135246, + "loss_ib": 0.0012643064837902784, + "step": 1782 + }, + { + "ce_ib": 3.236018657684326, + "ce_orig": 0.6313103437423706, + "epoch": 0.5124739377381552, + "kl_loss": 0.08075962960720062, + "loss_ib": 0.0011311981361359358, + "step": 1782 + }, + { + "ce_ib": 4.3718953132629395, + "ce_orig": 0.5801230669021606, + "epoch": 0.5124739377381552, + "kl_loss": 0.12816086411476135, + "loss_ib": 0.0017187980702146888, + "step": 1782 + }, + { + "ce_ib": 6.645564556121826, + "ce_orig": 1.6162117719650269, + "epoch": 0.5124739377381552, + "kl_loss": 0.08607015013694763, + "loss_ib": 0.0015252579469233751, + "step": 1782 + }, + { + "ce_ib": 2.7030863761901855, + "ce_orig": 0.5469380617141724, + "epoch": 0.5127615213171328, + "kl_loss": 0.06018144637346268, + "loss_ib": 0.0008721230551600456, + "step": 1783 + }, + { + "ce_ib": 4.05257511138916, + "ce_orig": 0.6803352236747742, + "epoch": 0.5127615213171328, + "kl_loss": 0.06617367267608643, + "loss_ib": 0.0010669941548258066, + "step": 1783 + }, + { + "ce_ib": 3.9796226024627686, + "ce_orig": 0.7345646023750305, + "epoch": 0.5127615213171328, + "kl_loss": 0.05275813490152359, + "loss_ib": 0.0009255436016246676, + "step": 1783 + }, + { + "ce_ib": 6.033324241638184, + "ce_orig": 1.1279188394546509, + "epoch": 0.5127615213171328, + "kl_loss": 0.11568685621023178, + "loss_ib": 0.0017602009465917945, + "step": 1783 + }, + { + "ce_ib": 6.469935894012451, + "ce_orig": 0.9484708905220032, + "epoch": 0.5130491048961104, + "kl_loss": 0.08917199820280075, + "loss_ib": 0.0015387135790660977, + "step": 1784 + }, + { + "ce_ib": 5.98094367980957, + "ce_orig": 0.7051974534988403, + "epoch": 0.5130491048961104, + "kl_loss": 0.06926282495260239, + "loss_ib": 0.0012907226337119937, + "step": 1784 + }, + { + "ce_ib": 2.932389259338379, + "ce_orig": 0.3347587287425995, + "epoch": 0.5130491048961104, + "kl_loss": 0.14469224214553833, + "loss_ib": 0.0017401613295078278, + "step": 1784 + }, + { + "ce_ib": 3.5781795978546143, + "ce_orig": 0.5990049839019775, + "epoch": 0.5130491048961104, + "kl_loss": 0.0620713047683239, + "loss_ib": 0.0009785309666767716, + "step": 1784 + }, + { + "epoch": 0.5133366884750881, + "grad_norm": 0.08898758888244629, + "learning_rate": 4.745887748828398e-05, + "loss": 0.9157, + "step": 1785 + }, + { + "ce_ib": 4.582487106323242, + "ce_orig": 0.759151041507721, + "epoch": 0.5133366884750881, + "kl_loss": 0.06483379006385803, + "loss_ib": 0.0011065865401178598, + "step": 1785 + }, + { + "ce_ib": 5.357223033905029, + "ce_orig": 0.9177649617195129, + "epoch": 0.5133366884750881, + "kl_loss": 0.06491922587156296, + "loss_ib": 0.00118491449393332, + "step": 1785 + }, + { + "ce_ib": 2.4972574710845947, + "ce_orig": 0.550807774066925, + "epoch": 0.5133366884750881, + "kl_loss": 0.054967381060123444, + "loss_ib": 0.0007993995095603168, + "step": 1785 + }, + { + "ce_ib": 4.205108165740967, + "ce_orig": 0.6275907754898071, + "epoch": 0.5133366884750881, + "kl_loss": 0.10267739742994308, + "loss_ib": 0.0014472848270088434, + "step": 1785 + }, + { + "ce_ib": 3.576864004135132, + "ce_orig": 0.5575857758522034, + "epoch": 0.5136242720540657, + "kl_loss": 0.12365647405385971, + "loss_ib": 0.0015942510217428207, + "step": 1786 + }, + { + "ce_ib": 5.023237228393555, + "ce_orig": 0.9665496349334717, + "epoch": 0.5136242720540657, + "kl_loss": 0.06915776431560516, + "loss_ib": 0.001193901291117072, + "step": 1786 + }, + { + "ce_ib": 5.6870245933532715, + "ce_orig": 0.9943622350692749, + "epoch": 0.5136242720540657, + "kl_loss": 0.13347890973091125, + "loss_ib": 0.00190349156036973, + "step": 1786 + }, + { + "ce_ib": 6.550649642944336, + "ce_orig": 0.9684162735939026, + "epoch": 0.5136242720540657, + "kl_loss": 0.06809549033641815, + "loss_ib": 0.0013360199518501759, + "step": 1786 + }, + { + "ce_ib": 5.541101932525635, + "ce_orig": 0.6955784559249878, + "epoch": 0.5139118556330433, + "kl_loss": 0.1498645544052124, + "loss_ib": 0.0020527555607259274, + "step": 1787 + }, + { + "ce_ib": 5.258474349975586, + "ce_orig": 0.629980206489563, + "epoch": 0.5139118556330433, + "kl_loss": 0.14708316326141357, + "loss_ib": 0.001996678998693824, + "step": 1787 + }, + { + "ce_ib": 6.486989974975586, + "ce_orig": 1.3173272609710693, + "epoch": 0.5139118556330433, + "kl_loss": 0.07136699557304382, + "loss_ib": 0.001362368930131197, + "step": 1787 + }, + { + "ce_ib": 3.2799532413482666, + "ce_orig": 0.48049771785736084, + "epoch": 0.5139118556330433, + "kl_loss": 0.0726211667060852, + "loss_ib": 0.0010542068630456924, + "step": 1787 + }, + { + "ce_ib": 4.979735374450684, + "ce_orig": 0.6353864073753357, + "epoch": 0.514199439212021, + "kl_loss": 0.2991983890533447, + "loss_ib": 0.0034899574238806963, + "step": 1788 + }, + { + "ce_ib": 4.659762382507324, + "ce_orig": 0.6701453924179077, + "epoch": 0.514199439212021, + "kl_loss": 0.1031075268983841, + "loss_ib": 0.0014970513293519616, + "step": 1788 + }, + { + "ce_ib": 6.794959545135498, + "ce_orig": 0.620962381362915, + "epoch": 0.514199439212021, + "kl_loss": 0.13857750594615936, + "loss_ib": 0.002065270906314254, + "step": 1788 + }, + { + "ce_ib": 5.957469463348389, + "ce_orig": 0.6336969137191772, + "epoch": 0.514199439212021, + "kl_loss": 0.1691070944070816, + "loss_ib": 0.002286817878484726, + "step": 1788 + }, + { + "ce_ib": 8.929947853088379, + "ce_orig": 1.8512810468673706, + "epoch": 0.5144870227909987, + "kl_loss": 0.12855695188045502, + "loss_ib": 0.0021785644348710775, + "step": 1789 + }, + { + "ce_ib": 5.093568325042725, + "ce_orig": 0.7419148087501526, + "epoch": 0.5144870227909987, + "kl_loss": 0.06420955061912537, + "loss_ib": 0.0011514523066580296, + "step": 1789 + }, + { + "ce_ib": 2.9192957878112793, + "ce_orig": 0.5708819031715393, + "epoch": 0.5144870227909987, + "kl_loss": 0.05380064249038696, + "loss_ib": 0.0008299359469674528, + "step": 1789 + }, + { + "ce_ib": 3.4499402046203613, + "ce_orig": 0.7110227942466736, + "epoch": 0.5144870227909987, + "kl_loss": 0.0810091644525528, + "loss_ib": 0.0011550856288522482, + "step": 1789 + }, + { + "epoch": 0.5147746063699763, + "grad_norm": 0.09902392327785492, + "learning_rate": 4.7441804895578596e-05, + "loss": 0.851, + "step": 1790 + }, + { + "ce_ib": 4.804992198944092, + "ce_orig": 0.8432773947715759, + "epoch": 0.5147746063699763, + "kl_loss": 0.06772968173027039, + "loss_ib": 0.0011577960103750229, + "step": 1790 + }, + { + "ce_ib": 4.891820907592773, + "ce_orig": 0.8863018751144409, + "epoch": 0.5147746063699763, + "kl_loss": 0.06498017907142639, + "loss_ib": 0.0011389837600290775, + "step": 1790 + }, + { + "ce_ib": 5.202576160430908, + "ce_orig": 0.49562036991119385, + "epoch": 0.5147746063699763, + "kl_loss": 0.10109511762857437, + "loss_ib": 0.0015312087489292026, + "step": 1790 + }, + { + "ce_ib": 5.438233375549316, + "ce_orig": 0.9678243398666382, + "epoch": 0.5147746063699763, + "kl_loss": 0.09614156931638718, + "loss_ib": 0.0015052389353513718, + "step": 1790 + }, + { + "ce_ib": 7.572583198547363, + "ce_orig": 1.5677707195281982, + "epoch": 0.5150621899489539, + "kl_loss": 0.06213182210922241, + "loss_ib": 0.0013785763876512647, + "step": 1791 + }, + { + "ce_ib": 3.577009677886963, + "ce_orig": 0.35877156257629395, + "epoch": 0.5150621899489539, + "kl_loss": 0.08650585263967514, + "loss_ib": 0.001222759485244751, + "step": 1791 + }, + { + "ce_ib": 3.016291379928589, + "ce_orig": 0.5378506779670715, + "epoch": 0.5150621899489539, + "kl_loss": 0.06493708491325378, + "loss_ib": 0.0009509999654255807, + "step": 1791 + }, + { + "ce_ib": 7.297212600708008, + "ce_orig": 1.480816125869751, + "epoch": 0.5150621899489539, + "kl_loss": 0.09432238340377808, + "loss_ib": 0.0016729449853301048, + "step": 1791 + }, + { + "ce_ib": 5.155754566192627, + "ce_orig": 0.6995849609375, + "epoch": 0.5153497735279315, + "kl_loss": 0.059285104274749756, + "loss_ib": 0.0011084264842793345, + "step": 1792 + }, + { + "ce_ib": 5.2729949951171875, + "ce_orig": 0.5867595672607422, + "epoch": 0.5153497735279315, + "kl_loss": 0.08403134346008301, + "loss_ib": 0.0013676128583028913, + "step": 1792 + }, + { + "ce_ib": 5.368934631347656, + "ce_orig": 0.8686208128929138, + "epoch": 0.5153497735279315, + "kl_loss": 0.08417310565710068, + "loss_ib": 0.0013786243507638574, + "step": 1792 + }, + { + "ce_ib": 4.690865993499756, + "ce_orig": 0.7399212121963501, + "epoch": 0.5153497735279315, + "kl_loss": 0.09189394116401672, + "loss_ib": 0.0013880260521546006, + "step": 1792 + }, + { + "ce_ib": 5.363914489746094, + "ce_orig": 0.9444307684898376, + "epoch": 0.5156373571069092, + "kl_loss": 0.057573676109313965, + "loss_ib": 0.0011121281422674656, + "step": 1793 + }, + { + "ce_ib": 3.1041059494018555, + "ce_orig": 0.5543800592422485, + "epoch": 0.5156373571069092, + "kl_loss": 0.11022639274597168, + "loss_ib": 0.0014126744354143739, + "step": 1793 + }, + { + "ce_ib": 6.091623306274414, + "ce_orig": 1.169345498085022, + "epoch": 0.5156373571069092, + "kl_loss": 0.08374954015016556, + "loss_ib": 0.0014466576976701617, + "step": 1793 + }, + { + "ce_ib": 4.993160247802734, + "ce_orig": 0.40856286883354187, + "epoch": 0.5156373571069092, + "kl_loss": 0.0718187466263771, + "loss_ib": 0.0012175034498795867, + "step": 1793 + }, + { + "ce_ib": 2.587731122970581, + "ce_orig": 0.32825371623039246, + "epoch": 0.5159249406858868, + "kl_loss": 0.1468496471643448, + "loss_ib": 0.001727269496768713, + "step": 1794 + }, + { + "ce_ib": 6.132460594177246, + "ce_orig": 1.0242807865142822, + "epoch": 0.5159249406858868, + "kl_loss": 0.07819671928882599, + "loss_ib": 0.001395213301293552, + "step": 1794 + }, + { + "ce_ib": 6.823421955108643, + "ce_orig": 1.1650936603546143, + "epoch": 0.5159249406858868, + "kl_loss": 0.09522058069705963, + "loss_ib": 0.0016345479525625706, + "step": 1794 + }, + { + "ce_ib": 5.848074436187744, + "ce_orig": 1.1733639240264893, + "epoch": 0.5159249406858868, + "kl_loss": 0.11129340529441833, + "loss_ib": 0.001697741448879242, + "step": 1794 + }, + { + "epoch": 0.5162125242648645, + "grad_norm": 0.08269287645816803, + "learning_rate": 4.7424678235356365e-05, + "loss": 0.8264, + "step": 1795 + }, + { + "ce_ib": 5.553140640258789, + "ce_orig": 1.3773504495620728, + "epoch": 0.5162125242648645, + "kl_loss": 0.07974319159984589, + "loss_ib": 0.0013527458067983389, + "step": 1795 + }, + { + "ce_ib": 5.826701641082764, + "ce_orig": 0.48632243275642395, + "epoch": 0.5162125242648645, + "kl_loss": 0.10959632694721222, + "loss_ib": 0.0016786333872005343, + "step": 1795 + }, + { + "ce_ib": 4.373113632202148, + "ce_orig": 0.9363877773284912, + "epoch": 0.5162125242648645, + "kl_loss": 0.080726258456707, + "loss_ib": 0.0012445738539099693, + "step": 1795 + }, + { + "ce_ib": 5.803233623504639, + "ce_orig": 0.8773140907287598, + "epoch": 0.5162125242648645, + "kl_loss": 0.10161232203245163, + "loss_ib": 0.0015964466147124767, + "step": 1795 + }, + { + "ce_ib": 5.468360424041748, + "ce_orig": 0.5933574438095093, + "epoch": 0.5165001078438421, + "kl_loss": 0.12443482130765915, + "loss_ib": 0.0017911841860041022, + "step": 1796 + }, + { + "ce_ib": 8.15287971496582, + "ce_orig": 1.226313829421997, + "epoch": 0.5165001078438421, + "kl_loss": 0.12663553655147552, + "loss_ib": 0.0020816433243453503, + "step": 1796 + }, + { + "ce_ib": 6.532032489776611, + "ce_orig": 1.2739646434783936, + "epoch": 0.5165001078438421, + "kl_loss": 0.10157695412635803, + "loss_ib": 0.0016689726617187262, + "step": 1796 + }, + { + "ce_ib": 5.628530025482178, + "ce_orig": 1.185591697692871, + "epoch": 0.5165001078438421, + "kl_loss": 0.07102993875741959, + "loss_ib": 0.001273152302019298, + "step": 1796 + }, + { + "ce_ib": 4.403310298919678, + "ce_orig": 0.608948826789856, + "epoch": 0.5167876914228198, + "kl_loss": 0.08276194334030151, + "loss_ib": 0.0012679505161941051, + "step": 1797 + }, + { + "ce_ib": 4.430848121643066, + "ce_orig": 0.7969364523887634, + "epoch": 0.5167876914228198, + "kl_loss": 0.10147301852703094, + "loss_ib": 0.0014578149421140552, + "step": 1797 + }, + { + "ce_ib": 4.255521297454834, + "ce_orig": 0.7000121474266052, + "epoch": 0.5167876914228198, + "kl_loss": 0.09942778944969177, + "loss_ib": 0.0014198300195857882, + "step": 1797 + }, + { + "ce_ib": 5.171020984649658, + "ce_orig": 0.6277419924736023, + "epoch": 0.5167876914228198, + "kl_loss": 0.1318666636943817, + "loss_ib": 0.0018357685767114162, + "step": 1797 + }, + { + "ce_ib": 6.474034309387207, + "ce_orig": 1.3310670852661133, + "epoch": 0.5170752750017974, + "kl_loss": 0.08396780490875244, + "loss_ib": 0.001487081521190703, + "step": 1798 + }, + { + "ce_ib": 4.748281002044678, + "ce_orig": 0.7798356413841248, + "epoch": 0.5170752750017974, + "kl_loss": 0.07680720090866089, + "loss_ib": 0.0012429000344127417, + "step": 1798 + }, + { + "ce_ib": 3.959460735321045, + "ce_orig": 0.6202555894851685, + "epoch": 0.5170752750017974, + "kl_loss": 0.05460125207901001, + "loss_ib": 0.0009419585694558918, + "step": 1798 + }, + { + "ce_ib": 5.445580959320068, + "ce_orig": 0.9552915096282959, + "epoch": 0.5170752750017974, + "kl_loss": 0.07115896046161652, + "loss_ib": 0.0012561476323753595, + "step": 1798 + }, + { + "ce_ib": 6.414239406585693, + "ce_orig": 0.9398961067199707, + "epoch": 0.517362858580775, + "kl_loss": 0.07086366415023804, + "loss_ib": 0.0013500605709850788, + "step": 1799 + }, + { + "ce_ib": 6.406497955322266, + "ce_orig": 0.9640332460403442, + "epoch": 0.517362858580775, + "kl_loss": 0.0996541678905487, + "loss_ib": 0.0016371913952752948, + "step": 1799 + }, + { + "ce_ib": 6.483564853668213, + "ce_orig": 1.2220382690429688, + "epoch": 0.517362858580775, + "kl_loss": 0.09754916280508041, + "loss_ib": 0.0016238479875028133, + "step": 1799 + }, + { + "ce_ib": 6.463403701782227, + "ce_orig": 0.6011400818824768, + "epoch": 0.517362858580775, + "kl_loss": 0.15577423572540283, + "loss_ib": 0.0022040826734155416, + "step": 1799 + }, + { + "epoch": 0.5176504421597526, + "grad_norm": 0.08627192676067352, + "learning_rate": 4.7407497548879384e-05, + "loss": 0.8859, + "step": 1800 + }, + { + "ce_ib": 3.4012506008148193, + "ce_orig": 0.4643048644065857, + "epoch": 0.5176504421597526, + "kl_loss": 0.11117995530366898, + "loss_ib": 0.001451924559660256, + "step": 1800 + }, + { + "ce_ib": 5.989325046539307, + "ce_orig": 0.9873146414756775, + "epoch": 0.5176504421597526, + "kl_loss": 0.10967859625816345, + "loss_ib": 0.0016957183834165335, + "step": 1800 + }, + { + "ce_ib": 3.348367691040039, + "ce_orig": 0.42767611145973206, + "epoch": 0.5176504421597526, + "kl_loss": 0.10696423053741455, + "loss_ib": 0.0014044791460037231, + "step": 1800 + }, + { + "ce_ib": 4.912835121154785, + "ce_orig": 0.9151213765144348, + "epoch": 0.5176504421597526, + "kl_loss": 0.1404777318239212, + "loss_ib": 0.0018960607703775167, + "step": 1800 + }, + { + "ce_ib": 2.7078704833984375, + "ce_orig": 0.4498378336429596, + "epoch": 0.5179380257387303, + "kl_loss": 0.20132394134998322, + "loss_ib": 0.0022840264718979597, + "step": 1801 + }, + { + "ce_ib": 5.304815292358398, + "ce_orig": 0.6368709802627563, + "epoch": 0.5179380257387303, + "kl_loss": 0.1034865528345108, + "loss_ib": 0.001565346959978342, + "step": 1801 + }, + { + "ce_ib": 3.205249547958374, + "ce_orig": 0.5847539901733398, + "epoch": 0.5179380257387303, + "kl_loss": 0.06835612654685974, + "loss_ib": 0.0010040862252935767, + "step": 1801 + }, + { + "ce_ib": 5.289492607116699, + "ce_orig": 0.6419307589530945, + "epoch": 0.5179380257387303, + "kl_loss": 0.08543986827135086, + "loss_ib": 0.0013833479024469852, + "step": 1801 + }, + { + "ce_ib": 3.7555058002471924, + "ce_orig": 0.543796718120575, + "epoch": 0.518225609317708, + "kl_loss": 0.09083995968103409, + "loss_ib": 0.0012839501723647118, + "step": 1802 + }, + { + "ce_ib": 3.7138619422912598, + "ce_orig": 0.6342188119888306, + "epoch": 0.518225609317708, + "kl_loss": 0.09891486167907715, + "loss_ib": 0.0013605346903204918, + "step": 1802 + }, + { + "ce_ib": 7.458725929260254, + "ce_orig": 1.1483128070831299, + "epoch": 0.518225609317708, + "kl_loss": 0.15071004629135132, + "loss_ib": 0.0022529729176312685, + "step": 1802 + }, + { + "ce_ib": 4.87919807434082, + "ce_orig": 0.7931258678436279, + "epoch": 0.518225609317708, + "kl_loss": 0.08113189786672592, + "loss_ib": 0.0012992387637495995, + "step": 1802 + }, + { + "ce_ib": 3.0352590084075928, + "ce_orig": 0.6244330406188965, + "epoch": 0.5185131928966856, + "kl_loss": 0.09762992709875107, + "loss_ib": 0.0012798251118510962, + "step": 1803 + }, + { + "ce_ib": 5.82951545715332, + "ce_orig": 0.6780531406402588, + "epoch": 0.5185131928966856, + "kl_loss": 0.198766827583313, + "loss_ib": 0.0025706198066473007, + "step": 1803 + }, + { + "ce_ib": 4.321685314178467, + "ce_orig": 0.6965339183807373, + "epoch": 0.5185131928966856, + "kl_loss": 0.09260575473308563, + "loss_ib": 0.0013582260580733418, + "step": 1803 + }, + { + "ce_ib": 5.852208137512207, + "ce_orig": 1.1622402667999268, + "epoch": 0.5185131928966856, + "kl_loss": 0.06015312299132347, + "loss_ib": 0.0011867519933730364, + "step": 1803 + }, + { + "ce_ib": 4.550256252288818, + "ce_orig": 0.6395211815834045, + "epoch": 0.5188007764756633, + "kl_loss": 0.08066686242818832, + "loss_ib": 0.0012616942403838038, + "step": 1804 + }, + { + "ce_ib": 3.471066951751709, + "ce_orig": 0.7321628332138062, + "epoch": 0.5188007764756633, + "kl_loss": 0.046288181096315384, + "loss_ib": 0.000809988530818373, + "step": 1804 + }, + { + "ce_ib": 5.920793533325195, + "ce_orig": 1.1064788103103638, + "epoch": 0.5188007764756633, + "kl_loss": 0.09294071048498154, + "loss_ib": 0.001521486439742148, + "step": 1804 + }, + { + "ce_ib": 4.934189319610596, + "ce_orig": 1.2711670398712158, + "epoch": 0.5188007764756633, + "kl_loss": 0.10551372170448303, + "loss_ib": 0.0015485560288652778, + "step": 1804 + }, + { + "epoch": 0.5190883600546409, + "grad_norm": 0.10114715993404388, + "learning_rate": 4.739026287753991e-05, + "loss": 0.8662, + "step": 1805 + }, + { + "ce_ib": 3.756671905517578, + "ce_orig": 0.7044788599014282, + "epoch": 0.5190883600546409, + "kl_loss": 0.08393668383359909, + "loss_ib": 0.001215033931657672, + "step": 1805 + }, + { + "ce_ib": 5.312620639801025, + "ce_orig": 0.7455626726150513, + "epoch": 0.5190883600546409, + "kl_loss": 0.08651909232139587, + "loss_ib": 0.0013964528916403651, + "step": 1805 + }, + { + "ce_ib": 4.267414569854736, + "ce_orig": 0.6703543066978455, + "epoch": 0.5190883600546409, + "kl_loss": 0.051903557032346725, + "loss_ib": 0.0009457769920118153, + "step": 1805 + }, + { + "ce_ib": 6.670166015625, + "ce_orig": 1.2992558479309082, + "epoch": 0.5190883600546409, + "kl_loss": 0.0653977245092392, + "loss_ib": 0.001320993877016008, + "step": 1805 + }, + { + "ce_ib": 7.5734100341796875, + "ce_orig": 1.5710968971252441, + "epoch": 0.5193759436336185, + "kl_loss": 0.10187701135873795, + "loss_ib": 0.0017761110793799162, + "step": 1806 + }, + { + "ce_ib": 3.6703009605407715, + "ce_orig": 0.7268949747085571, + "epoch": 0.5193759436336185, + "kl_loss": 0.0769713968038559, + "loss_ib": 0.0011367440456524491, + "step": 1806 + }, + { + "ce_ib": 3.8246397972106934, + "ce_orig": 1.0897506475448608, + "epoch": 0.5193759436336185, + "kl_loss": 0.06341678649187088, + "loss_ib": 0.0010166318388655782, + "step": 1806 + }, + { + "ce_ib": 5.007676601409912, + "ce_orig": 1.2937309741973877, + "epoch": 0.5193759436336185, + "kl_loss": 0.03793942928314209, + "loss_ib": 0.0008801619405858219, + "step": 1806 + }, + { + "ce_ib": 4.149056911468506, + "ce_orig": 0.6792333722114563, + "epoch": 0.5196635272125961, + "kl_loss": 0.1577262133359909, + "loss_ib": 0.0019921676721423864, + "step": 1807 + }, + { + "ce_ib": 6.604621410369873, + "ce_orig": 1.3342257738113403, + "epoch": 0.5196635272125961, + "kl_loss": 0.11868235468864441, + "loss_ib": 0.001847285544499755, + "step": 1807 + }, + { + "ce_ib": 5.724687576293945, + "ce_orig": 1.0005601644515991, + "epoch": 0.5196635272125961, + "kl_loss": 0.22801947593688965, + "loss_ib": 0.0028526633977890015, + "step": 1807 + }, + { + "ce_ib": 3.9214529991149902, + "ce_orig": 0.34302622079849243, + "epoch": 0.5196635272125961, + "kl_loss": 0.09325598925352097, + "loss_ib": 0.0013247051974758506, + "step": 1807 + }, + { + "ce_ib": 4.380338668823242, + "ce_orig": 0.6630991697311401, + "epoch": 0.5199511107915739, + "kl_loss": 0.10154488682746887, + "loss_ib": 0.0014534827787429094, + "step": 1808 + }, + { + "ce_ib": 4.409718036651611, + "ce_orig": 0.6710807085037231, + "epoch": 0.5199511107915739, + "kl_loss": 0.15432900190353394, + "loss_ib": 0.0019842619076371193, + "step": 1808 + }, + { + "ce_ib": 3.4346773624420166, + "ce_orig": 0.5531025528907776, + "epoch": 0.5199511107915739, + "kl_loss": 0.10229062288999557, + "loss_ib": 0.0013663738500326872, + "step": 1808 + }, + { + "ce_ib": 2.7428243160247803, + "ce_orig": 0.47916510701179504, + "epoch": 0.5199511107915739, + "kl_loss": 0.0955585241317749, + "loss_ib": 0.00122986757196486, + "step": 1808 + }, + { + "ce_ib": 7.068719387054443, + "ce_orig": 1.453889012336731, + "epoch": 0.5202386943705515, + "kl_loss": 0.3483327627182007, + "loss_ib": 0.004190199077129364, + "step": 1809 + }, + { + "ce_ib": 5.409357070922852, + "ce_orig": 0.6340644359588623, + "epoch": 0.5202386943705515, + "kl_loss": 0.12500275671482086, + "loss_ib": 0.0017909632297232747, + "step": 1809 + }, + { + "ce_ib": 3.1178486347198486, + "ce_orig": 0.693834125995636, + "epoch": 0.5202386943705515, + "kl_loss": 0.2148078978061676, + "loss_ib": 0.0024598638992756605, + "step": 1809 + }, + { + "ce_ib": 3.2665345668792725, + "ce_orig": 0.38246142864227295, + "epoch": 0.5202386943705515, + "kl_loss": 0.08975034952163696, + "loss_ib": 0.0012241569347679615, + "step": 1809 + }, + { + "epoch": 0.5205262779495291, + "grad_norm": 0.10595458000898361, + "learning_rate": 4.7372974262860294e-05, + "loss": 0.8162, + "step": 1810 + }, + { + "ce_ib": 6.231423377990723, + "ce_orig": 1.1110771894454956, + "epoch": 0.5205262779495291, + "kl_loss": 0.08919298648834229, + "loss_ib": 0.0015150720719248056, + "step": 1810 + }, + { + "ce_ib": 3.4507579803466797, + "ce_orig": 0.683596670627594, + "epoch": 0.5205262779495291, + "kl_loss": 0.06465554982423782, + "loss_ib": 0.0009916312992572784, + "step": 1810 + }, + { + "ce_ib": 6.258065700531006, + "ce_orig": 1.283189296722412, + "epoch": 0.5205262779495291, + "kl_loss": 0.08220970630645752, + "loss_ib": 0.0014479034580290318, + "step": 1810 + }, + { + "ce_ib": 3.2762346267700195, + "ce_orig": 0.6312574148178101, + "epoch": 0.5205262779495291, + "kl_loss": 0.06733869016170502, + "loss_ib": 0.0010010104160755873, + "step": 1810 + }, + { + "ce_ib": 4.352741241455078, + "ce_orig": 0.3797578811645508, + "epoch": 0.5208138615285067, + "kl_loss": 0.07618406414985657, + "loss_ib": 0.001197114703245461, + "step": 1811 + }, + { + "ce_ib": 3.130756139755249, + "ce_orig": 0.7292548418045044, + "epoch": 0.5208138615285067, + "kl_loss": 0.04529916122555733, + "loss_ib": 0.0007660671835765243, + "step": 1811 + }, + { + "ce_ib": 5.736673355102539, + "ce_orig": 1.0384514331817627, + "epoch": 0.5208138615285067, + "kl_loss": 0.1427643895149231, + "loss_ib": 0.0020013111643493176, + "step": 1811 + }, + { + "ce_ib": 5.874814033508301, + "ce_orig": 1.473393201828003, + "epoch": 0.5208138615285067, + "kl_loss": 0.058076292276382446, + "loss_ib": 0.0011682442855089903, + "step": 1811 + }, + { + "ce_ib": 4.232322692871094, + "ce_orig": 0.8905428051948547, + "epoch": 0.5211014451074844, + "kl_loss": 0.07042574137449265, + "loss_ib": 0.001127489609643817, + "step": 1812 + }, + { + "ce_ib": 4.064655780792236, + "ce_orig": 0.469370037317276, + "epoch": 0.5211014451074844, + "kl_loss": 0.12562108039855957, + "loss_ib": 0.0016626763390377164, + "step": 1812 + }, + { + "ce_ib": 6.380324840545654, + "ce_orig": 1.3931618928909302, + "epoch": 0.5211014451074844, + "kl_loss": 0.08758416026830673, + "loss_ib": 0.0015138740418478847, + "step": 1812 + }, + { + "ce_ib": 6.653614044189453, + "ce_orig": 1.1573448181152344, + "epoch": 0.5211014451074844, + "kl_loss": 0.09285688400268555, + "loss_ib": 0.0015939300647005439, + "step": 1812 + }, + { + "ce_ib": 4.301087379455566, + "ce_orig": 0.7379696369171143, + "epoch": 0.521389028686462, + "kl_loss": 0.05680270865559578, + "loss_ib": 0.0009981357725337148, + "step": 1813 + }, + { + "ce_ib": 6.014238357543945, + "ce_orig": 0.7989896535873413, + "epoch": 0.521389028686462, + "kl_loss": 0.11396847665309906, + "loss_ib": 0.0017411086009815335, + "step": 1813 + }, + { + "ce_ib": 3.2849302291870117, + "ce_orig": 0.6306245923042297, + "epoch": 0.521389028686462, + "kl_loss": 0.051529496908187866, + "loss_ib": 0.0008437879150733352, + "step": 1813 + }, + { + "ce_ib": 4.725744247436523, + "ce_orig": 0.8219425678253174, + "epoch": 0.521389028686462, + "kl_loss": 0.10422439873218536, + "loss_ib": 0.001514818286523223, + "step": 1813 + }, + { + "ce_ib": 6.492020606994629, + "ce_orig": 1.038405179977417, + "epoch": 0.5216766122654396, + "kl_loss": 0.08631042391061783, + "loss_ib": 0.0015123062767088413, + "step": 1814 + }, + { + "ce_ib": 4.684038162231445, + "ce_orig": 0.6555771231651306, + "epoch": 0.5216766122654396, + "kl_loss": 0.07601496577262878, + "loss_ib": 0.0012285534758120775, + "step": 1814 + }, + { + "ce_ib": 3.673503875732422, + "ce_orig": 0.6698758602142334, + "epoch": 0.5216766122654396, + "kl_loss": 0.07869471609592438, + "loss_ib": 0.0011542976135388017, + "step": 1814 + }, + { + "ce_ib": 2.7217767238616943, + "ce_orig": 0.3834896385669708, + "epoch": 0.5216766122654396, + "kl_loss": 0.1019369438290596, + "loss_ib": 0.001291547087021172, + "step": 1814 + }, + { + "epoch": 0.5219641958444173, + "grad_norm": 0.11708398908376694, + "learning_rate": 4.735563174649278e-05, + "loss": 0.8213, + "step": 1815 + }, + { + "ce_ib": 8.318763732910156, + "ce_orig": 1.6304057836532593, + "epoch": 0.5219641958444173, + "kl_loss": 0.11775930225849152, + "loss_ib": 0.0020094693172723055, + "step": 1815 + }, + { + "ce_ib": 3.7222695350646973, + "ce_orig": 0.6264578700065613, + "epoch": 0.5219641958444173, + "kl_loss": 0.07069855183362961, + "loss_ib": 0.0010792124085128307, + "step": 1815 + }, + { + "ce_ib": 7.6702656745910645, + "ce_orig": 1.5261114835739136, + "epoch": 0.5219641958444173, + "kl_loss": 0.09343883395195007, + "loss_ib": 0.001701414817944169, + "step": 1815 + }, + { + "ce_ib": 4.340613842010498, + "ce_orig": 0.6966679692268372, + "epoch": 0.5219641958444173, + "kl_loss": 0.0969456136226654, + "loss_ib": 0.0014035174390301108, + "step": 1815 + }, + { + "ce_ib": 2.6370952129364014, + "ce_orig": 0.5362669229507446, + "epoch": 0.522251779423395, + "kl_loss": 0.05210626870393753, + "loss_ib": 0.0007847721572034061, + "step": 1816 + }, + { + "ce_ib": 6.7394795417785645, + "ce_orig": 1.3703293800354004, + "epoch": 0.522251779423395, + "kl_loss": 0.10384607315063477, + "loss_ib": 0.0017124086152762175, + "step": 1816 + }, + { + "ce_ib": 4.034965515136719, + "ce_orig": 0.4399716854095459, + "epoch": 0.522251779423395, + "kl_loss": 0.12095208466053009, + "loss_ib": 0.0016130172880366445, + "step": 1816 + }, + { + "ce_ib": 4.519737243652344, + "ce_orig": 1.009818434715271, + "epoch": 0.522251779423395, + "kl_loss": 0.06268458813428879, + "loss_ib": 0.0010788196232169867, + "step": 1816 + }, + { + "ce_ib": 9.268354415893555, + "ce_orig": 1.8635910749435425, + "epoch": 0.5225393630023726, + "kl_loss": 0.11157218366861343, + "loss_ib": 0.002042557345703244, + "step": 1817 + }, + { + "ce_ib": 5.600277900695801, + "ce_orig": 1.0463265180587769, + "epoch": 0.5225393630023726, + "kl_loss": 0.09952437877655029, + "loss_ib": 0.0015552715631201863, + "step": 1817 + }, + { + "ce_ib": 6.125753402709961, + "ce_orig": 1.0574010610580444, + "epoch": 0.5225393630023726, + "kl_loss": 0.16778095066547394, + "loss_ib": 0.0022903846111148596, + "step": 1817 + }, + { + "ce_ib": 7.646019458770752, + "ce_orig": 1.281021237373352, + "epoch": 0.5225393630023726, + "kl_loss": 0.09393389523029327, + "loss_ib": 0.0017039409140124917, + "step": 1817 + }, + { + "ce_ib": 4.273820400238037, + "ce_orig": 0.8420512676239014, + "epoch": 0.5228269465813502, + "kl_loss": 0.09307074546813965, + "loss_ib": 0.001358089386485517, + "step": 1818 + }, + { + "ce_ib": 5.788002967834473, + "ce_orig": 0.9121771454811096, + "epoch": 0.5228269465813502, + "kl_loss": 0.07878164947032928, + "loss_ib": 0.00136661680880934, + "step": 1818 + }, + { + "ce_ib": 5.830178737640381, + "ce_orig": 0.9008325338363647, + "epoch": 0.5228269465813502, + "kl_loss": 0.056575413793325424, + "loss_ib": 0.0011487719602882862, + "step": 1818 + }, + { + "ce_ib": 5.574953079223633, + "ce_orig": 0.8771875500679016, + "epoch": 0.5228269465813502, + "kl_loss": 0.13762995600700378, + "loss_ib": 0.0019337948178872466, + "step": 1818 + }, + { + "ce_ib": 3.760028123855591, + "ce_orig": 0.6475303173065186, + "epoch": 0.5231145301603278, + "kl_loss": 0.07815948873758316, + "loss_ib": 0.0011575976386666298, + "step": 1819 + }, + { + "ce_ib": 5.742321968078613, + "ce_orig": 0.7745041847229004, + "epoch": 0.5231145301603278, + "kl_loss": 0.09128974378108978, + "loss_ib": 0.0014871296007186174, + "step": 1819 + }, + { + "ce_ib": 5.6013078689575195, + "ce_orig": 1.0209522247314453, + "epoch": 0.5231145301603278, + "kl_loss": 0.13916921615600586, + "loss_ib": 0.0019518228946253657, + "step": 1819 + }, + { + "ce_ib": 5.397923469543457, + "ce_orig": 0.8855265974998474, + "epoch": 0.5231145301603278, + "kl_loss": 0.08570579439401627, + "loss_ib": 0.0013968503335490823, + "step": 1819 + }, + { + "epoch": 0.5234021137393055, + "grad_norm": 0.09577493369579315, + "learning_rate": 4.733823537021955e-05, + "loss": 0.9174, + "step": 1820 + }, + { + "ce_ib": 3.7873284816741943, + "ce_orig": 0.6859638690948486, + "epoch": 0.5234021137393055, + "kl_loss": 0.061896104365587234, + "loss_ib": 0.0009976938599720597, + "step": 1820 + }, + { + "ce_ib": 5.275213718414307, + "ce_orig": 1.0465030670166016, + "epoch": 0.5234021137393055, + "kl_loss": 0.08003317564725876, + "loss_ib": 0.0013278531841933727, + "step": 1820 + }, + { + "ce_ib": 5.358640670776367, + "ce_orig": 0.8145259618759155, + "epoch": 0.5234021137393055, + "kl_loss": 0.15040791034698486, + "loss_ib": 0.0020399431232362986, + "step": 1820 + }, + { + "ce_ib": 3.570126533508301, + "ce_orig": 0.787944495677948, + "epoch": 0.5234021137393055, + "kl_loss": 0.06554581224918365, + "loss_ib": 0.0010124706896021962, + "step": 1820 + }, + { + "ce_ib": 5.516453742980957, + "ce_orig": 0.8835792541503906, + "epoch": 0.5236896973182831, + "kl_loss": 0.12309664487838745, + "loss_ib": 0.0017826117109507322, + "step": 1821 + }, + { + "ce_ib": 6.123493671417236, + "ce_orig": 0.9543690085411072, + "epoch": 0.5236896973182831, + "kl_loss": 0.0857853889465332, + "loss_ib": 0.0014702031621709466, + "step": 1821 + }, + { + "ce_ib": 7.702580451965332, + "ce_orig": 1.3237342834472656, + "epoch": 0.5236896973182831, + "kl_loss": 0.18452784419059753, + "loss_ib": 0.002615536330267787, + "step": 1821 + }, + { + "ce_ib": 5.015350341796875, + "ce_orig": 0.7903554439544678, + "epoch": 0.5236896973182831, + "kl_loss": 0.07133451104164124, + "loss_ib": 0.0012148801470175385, + "step": 1821 + }, + { + "ce_ib": 4.284536838531494, + "ce_orig": 0.8054338097572327, + "epoch": 0.5239772808972608, + "kl_loss": 0.06689270585775375, + "loss_ib": 0.0010973807657137513, + "step": 1822 + }, + { + "ce_ib": 5.16529655456543, + "ce_orig": 0.6549634337425232, + "epoch": 0.5239772808972608, + "kl_loss": 0.11298231035470963, + "loss_ib": 0.0016463526990264654, + "step": 1822 + }, + { + "ce_ib": 3.342257499694824, + "ce_orig": 0.6846990585327148, + "epoch": 0.5239772808972608, + "kl_loss": 0.049947358667850494, + "loss_ib": 0.0008336993050761521, + "step": 1822 + }, + { + "ce_ib": 4.5733113288879395, + "ce_orig": 0.9374260306358337, + "epoch": 0.5239772808972608, + "kl_loss": 0.09508390724658966, + "loss_ib": 0.0014081700937822461, + "step": 1822 + }, + { + "ce_ib": 3.3939642906188965, + "ce_orig": 0.7487550377845764, + "epoch": 0.5242648644762384, + "kl_loss": 0.07913435250520706, + "loss_ib": 0.001130739925429225, + "step": 1823 + }, + { + "ce_ib": 3.4242751598358154, + "ce_orig": 0.6800208687782288, + "epoch": 0.5242648644762384, + "kl_loss": 0.07598484307527542, + "loss_ib": 0.001102275913581252, + "step": 1823 + }, + { + "ce_ib": 3.9166855812072754, + "ce_orig": 0.293745219707489, + "epoch": 0.5242648644762384, + "kl_loss": 0.2236151248216629, + "loss_ib": 0.002627819776535034, + "step": 1823 + }, + { + "ce_ib": 6.284453868865967, + "ce_orig": 0.7588310837745667, + "epoch": 0.5242648644762384, + "kl_loss": 0.19608229398727417, + "loss_ib": 0.0025892683770507574, + "step": 1823 + }, + { + "ce_ib": 6.089869022369385, + "ce_orig": 0.7041245102882385, + "epoch": 0.5245524480552161, + "kl_loss": 0.17428812384605408, + "loss_ib": 0.002351868199184537, + "step": 1824 + }, + { + "ce_ib": 4.536786079406738, + "ce_orig": 0.6655839085578918, + "epoch": 0.5245524480552161, + "kl_loss": 0.06658326089382172, + "loss_ib": 0.0011195112019777298, + "step": 1824 + }, + { + "ce_ib": 6.671474933624268, + "ce_orig": 1.1460767984390259, + "epoch": 0.5245524480552161, + "kl_loss": 0.09520304948091507, + "loss_ib": 0.0016191779868677258, + "step": 1824 + }, + { + "ce_ib": 3.272091865539551, + "ce_orig": 0.5087643265724182, + "epoch": 0.5245524480552161, + "kl_loss": 0.07615938782691956, + "loss_ib": 0.001088803051970899, + "step": 1824 + }, + { + "epoch": 0.5248400316341937, + "grad_norm": 0.10159893333911896, + "learning_rate": 4.7320785175952496e-05, + "loss": 0.8327, + "step": 1825 + }, + { + "ce_ib": 4.874035835266113, + "ce_orig": 1.0581839084625244, + "epoch": 0.5248400316341937, + "kl_loss": 0.06972561776638031, + "loss_ib": 0.0011846597772091627, + "step": 1825 + }, + { + "ce_ib": 4.534748554229736, + "ce_orig": 0.5559304356575012, + "epoch": 0.5248400316341937, + "kl_loss": 0.11386772990226746, + "loss_ib": 0.001592152053490281, + "step": 1825 + }, + { + "ce_ib": 3.317976713180542, + "ce_orig": 0.8454773426055908, + "epoch": 0.5248400316341937, + "kl_loss": 0.06905925273895264, + "loss_ib": 0.0010223902063444257, + "step": 1825 + }, + { + "ce_ib": 7.761319637298584, + "ce_orig": 1.4961448907852173, + "epoch": 0.5248400316341937, + "kl_loss": 0.07911602407693863, + "loss_ib": 0.0015672921435907483, + "step": 1825 + }, + { + "ce_ib": 3.8309166431427, + "ce_orig": 0.8245151042938232, + "epoch": 0.5251276152131713, + "kl_loss": 0.05368543416261673, + "loss_ib": 0.0009199459454976022, + "step": 1826 + }, + { + "ce_ib": 3.9119369983673096, + "ce_orig": 0.8872231841087341, + "epoch": 0.5251276152131713, + "kl_loss": 0.1055692583322525, + "loss_ib": 0.0014468863373622298, + "step": 1826 + }, + { + "ce_ib": 9.423162460327148, + "ce_orig": 1.8931254148483276, + "epoch": 0.5251276152131713, + "kl_loss": 0.24540795385837555, + "loss_ib": 0.003396395593881607, + "step": 1826 + }, + { + "ce_ib": 5.124600887298584, + "ce_orig": 0.9763220548629761, + "epoch": 0.5251276152131713, + "kl_loss": 0.1308102011680603, + "loss_ib": 0.0018205619417130947, + "step": 1826 + }, + { + "ce_ib": 4.121662616729736, + "ce_orig": 0.765508770942688, + "epoch": 0.5254151987921489, + "kl_loss": 0.11214081943035126, + "loss_ib": 0.0015335744246840477, + "step": 1827 + }, + { + "ce_ib": 4.465938091278076, + "ce_orig": 0.9244667887687683, + "epoch": 0.5254151987921489, + "kl_loss": 0.08366161584854126, + "loss_ib": 0.0012832098873332143, + "step": 1827 + }, + { + "ce_ib": 4.965151786804199, + "ce_orig": 0.8177745938301086, + "epoch": 0.5254151987921489, + "kl_loss": 0.07644996047019958, + "loss_ib": 0.0012610147241503, + "step": 1827 + }, + { + "ce_ib": 2.370084524154663, + "ce_orig": 0.299524188041687, + "epoch": 0.5254151987921489, + "kl_loss": 0.05783846974372864, + "loss_ib": 0.0008153931703418493, + "step": 1827 + }, + { + "ce_ib": 4.3341522216796875, + "ce_orig": 0.5957595109939575, + "epoch": 0.5257027823711266, + "kl_loss": 0.10297037661075592, + "loss_ib": 0.0014631189405918121, + "step": 1828 + }, + { + "ce_ib": 5.021320343017578, + "ce_orig": 0.8513695597648621, + "epoch": 0.5257027823711266, + "kl_loss": 0.10355868935585022, + "loss_ib": 0.0015377189265564084, + "step": 1828 + }, + { + "ce_ib": 4.681314945220947, + "ce_orig": 0.8675044775009155, + "epoch": 0.5257027823711266, + "kl_loss": 0.1169811338186264, + "loss_ib": 0.0016379427397623658, + "step": 1828 + }, + { + "ce_ib": 7.842348098754883, + "ce_orig": 1.0261859893798828, + "epoch": 0.5257027823711266, + "kl_loss": 0.09225030988454819, + "loss_ib": 0.0017067377921193838, + "step": 1828 + }, + { + "ce_ib": 3.2080273628234863, + "ce_orig": 0.6056621670722961, + "epoch": 0.5259903659501043, + "kl_loss": 0.06339588016271591, + "loss_ib": 0.0009547614608891308, + "step": 1829 + }, + { + "ce_ib": 4.580254554748535, + "ce_orig": 0.9237298965454102, + "epoch": 0.5259903659501043, + "kl_loss": 0.18489813804626465, + "loss_ib": 0.0023070068564265966, + "step": 1829 + }, + { + "ce_ib": 5.00038480758667, + "ce_orig": 0.8777315020561218, + "epoch": 0.5259903659501043, + "kl_loss": 0.14294299483299255, + "loss_ib": 0.0019294683588668704, + "step": 1829 + }, + { + "ce_ib": 7.001865863800049, + "ce_orig": 1.0818897485733032, + "epoch": 0.5259903659501043, + "kl_loss": 0.06203640624880791, + "loss_ib": 0.001320550567470491, + "step": 1829 + }, + { + "epoch": 0.5262779495290819, + "grad_norm": 0.09504136443138123, + "learning_rate": 4.730328120573318e-05, + "loss": 0.8582, + "step": 1830 + }, + { + "ce_ib": 7.221059322357178, + "ce_orig": 1.3277822732925415, + "epoch": 0.5262779495290819, + "kl_loss": 0.0993356853723526, + "loss_ib": 0.001715462771244347, + "step": 1830 + }, + { + "ce_ib": 4.691574573516846, + "ce_orig": 0.6713263988494873, + "epoch": 0.5262779495290819, + "kl_loss": 0.05912518873810768, + "loss_ib": 0.0010604093549773097, + "step": 1830 + }, + { + "ce_ib": 8.157382011413574, + "ce_orig": 1.2939140796661377, + "epoch": 0.5262779495290819, + "kl_loss": 0.10488729178905487, + "loss_ib": 0.0018646111711859703, + "step": 1830 + }, + { + "ce_ib": 5.157676696777344, + "ce_orig": 0.4529913365840912, + "epoch": 0.5262779495290819, + "kl_loss": 0.11135273426771164, + "loss_ib": 0.0016292950604110956, + "step": 1830 + }, + { + "ce_ib": 2.7440807819366455, + "ce_orig": 0.26964977383613586, + "epoch": 0.5265655331080595, + "kl_loss": 0.09464089572429657, + "loss_ib": 0.0012208169791847467, + "step": 1831 + }, + { + "ce_ib": 7.976547718048096, + "ce_orig": 1.1179219484329224, + "epoch": 0.5265655331080595, + "kl_loss": 0.1357959508895874, + "loss_ib": 0.002155614085495472, + "step": 1831 + }, + { + "ce_ib": 4.7736592292785645, + "ce_orig": 0.8849498629570007, + "epoch": 0.5265655331080595, + "kl_loss": 0.10312657803297043, + "loss_ib": 0.0015086316270753741, + "step": 1831 + }, + { + "ce_ib": 2.536576271057129, + "ce_orig": 0.4652963876724243, + "epoch": 0.5265655331080595, + "kl_loss": 0.060768578201532364, + "loss_ib": 0.0008613434038124979, + "step": 1831 + }, + { + "ce_ib": 5.091498851776123, + "ce_orig": 1.1292777061462402, + "epoch": 0.5268531166870372, + "kl_loss": 0.0927148163318634, + "loss_ib": 0.0014362980145961046, + "step": 1832 + }, + { + "ce_ib": 5.313485622406006, + "ce_orig": 0.9638594388961792, + "epoch": 0.5268531166870372, + "kl_loss": 0.09165152907371521, + "loss_ib": 0.0014478638768196106, + "step": 1832 + }, + { + "ce_ib": 3.8530848026275635, + "ce_orig": 0.7995981574058533, + "epoch": 0.5268531166870372, + "kl_loss": 0.058659665286540985, + "loss_ib": 0.0009719051304273307, + "step": 1832 + }, + { + "ce_ib": 3.8872439861297607, + "ce_orig": 0.5904503464698792, + "epoch": 0.5268531166870372, + "kl_loss": 0.07291679829359055, + "loss_ib": 0.0011178924469277263, + "step": 1832 + }, + { + "ce_ib": 4.028828144073486, + "ce_orig": 0.39750203490257263, + "epoch": 0.5271407002660148, + "kl_loss": 0.08351051807403564, + "loss_ib": 0.001237988006323576, + "step": 1833 + }, + { + "ce_ib": 6.441838264465332, + "ce_orig": 1.1215847730636597, + "epoch": 0.5271407002660148, + "kl_loss": 0.0842265859246254, + "loss_ib": 0.0014864496188238263, + "step": 1833 + }, + { + "ce_ib": 3.721705436706543, + "ce_orig": 0.5534646511077881, + "epoch": 0.5271407002660148, + "kl_loss": 0.07778242975473404, + "loss_ib": 0.0011499949032440782, + "step": 1833 + }, + { + "ce_ib": 4.957102298736572, + "ce_orig": 0.9665647745132446, + "epoch": 0.5271407002660148, + "kl_loss": 0.12029129266738892, + "loss_ib": 0.0016986230621114373, + "step": 1833 + }, + { + "ce_ib": 5.595959186553955, + "ce_orig": 0.9888685345649719, + "epoch": 0.5274282838449924, + "kl_loss": 0.07677915692329407, + "loss_ib": 0.0013273874064907432, + "step": 1834 + }, + { + "ce_ib": 5.263946533203125, + "ce_orig": 0.9644874334335327, + "epoch": 0.5274282838449924, + "kl_loss": 0.06487447023391724, + "loss_ib": 0.001175139332190156, + "step": 1834 + }, + { + "ce_ib": 3.2343273162841797, + "ce_orig": 0.694250762462616, + "epoch": 0.5274282838449924, + "kl_loss": 0.08413250744342804, + "loss_ib": 0.0011647577630355954, + "step": 1834 + }, + { + "ce_ib": 3.0933871269226074, + "ce_orig": 0.7096502184867859, + "epoch": 0.5274282838449924, + "kl_loss": 0.0822378471493721, + "loss_ib": 0.0011317171156406403, + "step": 1834 + }, + { + "epoch": 0.5277158674239701, + "grad_norm": 0.07885103672742844, + "learning_rate": 4.7285723501732746e-05, + "loss": 0.8077, + "step": 1835 + }, + { + "ce_ib": 5.286362648010254, + "ce_orig": 0.9879306554794312, + "epoch": 0.5277158674239701, + "kl_loss": 0.10594040155410767, + "loss_ib": 0.0015880402643233538, + "step": 1835 + }, + { + "ce_ib": 3.7081973552703857, + "ce_orig": 0.6360011100769043, + "epoch": 0.5277158674239701, + "kl_loss": 0.10041598975658417, + "loss_ib": 0.0013749796198680997, + "step": 1835 + }, + { + "ce_ib": 4.464404106140137, + "ce_orig": 0.8524314761161804, + "epoch": 0.5277158674239701, + "kl_loss": 0.08028027415275574, + "loss_ib": 0.0012492431560531259, + "step": 1835 + }, + { + "ce_ib": 5.482434272766113, + "ce_orig": 0.7572631239891052, + "epoch": 0.5277158674239701, + "kl_loss": 0.06954094022512436, + "loss_ib": 0.0012436527758836746, + "step": 1835 + }, + { + "ce_ib": 3.4663188457489014, + "ce_orig": 0.5446067452430725, + "epoch": 0.5280034510029478, + "kl_loss": 0.09966419637203217, + "loss_ib": 0.0013432737905532122, + "step": 1836 + }, + { + "ce_ib": 3.5433011054992676, + "ce_orig": 0.8401714563369751, + "epoch": 0.5280034510029478, + "kl_loss": 0.08658905327320099, + "loss_ib": 0.0012202205834910274, + "step": 1836 + }, + { + "ce_ib": 5.753762722015381, + "ce_orig": 0.9532781839370728, + "epoch": 0.5280034510029478, + "kl_loss": 0.09068961441516876, + "loss_ib": 0.0014822724042460322, + "step": 1836 + }, + { + "ce_ib": 5.587639331817627, + "ce_orig": 0.7353339195251465, + "epoch": 0.5280034510029478, + "kl_loss": 0.12940078973770142, + "loss_ib": 0.001852771732956171, + "step": 1836 + }, + { + "ce_ib": 4.8555169105529785, + "ce_orig": 0.77618807554245, + "epoch": 0.5282910345819254, + "kl_loss": 0.11298365890979767, + "loss_ib": 0.0016153882024809718, + "step": 1837 + }, + { + "ce_ib": 7.698131084442139, + "ce_orig": 0.5491910576820374, + "epoch": 0.5282910345819254, + "kl_loss": 0.12775568664073944, + "loss_ib": 0.0020473699551075697, + "step": 1837 + }, + { + "ce_ib": 3.672185182571411, + "ce_orig": 0.6776784658432007, + "epoch": 0.5282910345819254, + "kl_loss": 0.09392140805721283, + "loss_ib": 0.001306432648561895, + "step": 1837 + }, + { + "ce_ib": 5.370136737823486, + "ce_orig": 0.7923865914344788, + "epoch": 0.5282910345819254, + "kl_loss": 0.10201041400432587, + "loss_ib": 0.0015571177937090397, + "step": 1837 + }, + { + "ce_ib": 7.21496057510376, + "ce_orig": 1.2689510583877563, + "epoch": 0.528578618160903, + "kl_loss": 0.07586412876844406, + "loss_ib": 0.0014801373472437263, + "step": 1838 + }, + { + "ce_ib": 5.920375347137451, + "ce_orig": 0.8841639757156372, + "epoch": 0.528578618160903, + "kl_loss": 0.07994547486305237, + "loss_ib": 0.0013914922019466758, + "step": 1838 + }, + { + "ce_ib": 7.445903778076172, + "ce_orig": 1.4510369300842285, + "epoch": 0.528578618160903, + "kl_loss": 0.10795219242572784, + "loss_ib": 0.001824112143367529, + "step": 1838 + }, + { + "ce_ib": 5.009032726287842, + "ce_orig": 1.2325526475906372, + "epoch": 0.528578618160903, + "kl_loss": 0.05412755906581879, + "loss_ib": 0.0010421788319945335, + "step": 1838 + }, + { + "ce_ib": 5.748366355895996, + "ce_orig": 0.8171243667602539, + "epoch": 0.5288662017398806, + "kl_loss": 0.0819912850856781, + "loss_ib": 0.0013947493862360716, + "step": 1839 + }, + { + "ce_ib": 3.782522678375244, + "ce_orig": 0.6349021792411804, + "epoch": 0.5288662017398806, + "kl_loss": 0.06724929809570312, + "loss_ib": 0.0010507452534511685, + "step": 1839 + }, + { + "ce_ib": 3.1685445308685303, + "ce_orig": 0.5304456949234009, + "epoch": 0.5288662017398806, + "kl_loss": 0.06438175588846207, + "loss_ib": 0.000960672041401267, + "step": 1839 + }, + { + "ce_ib": 4.833837509155273, + "ce_orig": 0.7314773797988892, + "epoch": 0.5288662017398806, + "kl_loss": 0.11642509698867798, + "loss_ib": 0.0016476346645504236, + "step": 1839 + }, + { + "epoch": 0.5291537853188583, + "grad_norm": 0.09651792794466019, + "learning_rate": 4.726811210625176e-05, + "loss": 0.8705, + "step": 1840 + }, + { + "ce_ib": 5.7855353355407715, + "ce_orig": 1.5065516233444214, + "epoch": 0.5291537853188583, + "kl_loss": 0.07023538649082184, + "loss_ib": 0.0012809073086827993, + "step": 1840 + }, + { + "ce_ib": 6.015461444854736, + "ce_orig": 1.1647579669952393, + "epoch": 0.5291537853188583, + "kl_loss": 0.07269640266895294, + "loss_ib": 0.0013285101158544421, + "step": 1840 + }, + { + "ce_ib": 4.277002334594727, + "ce_orig": 0.6749783158302307, + "epoch": 0.5291537853188583, + "kl_loss": 0.05342990159988403, + "loss_ib": 0.0009619991760700941, + "step": 1840 + }, + { + "ce_ib": 5.18788480758667, + "ce_orig": 0.5327911376953125, + "epoch": 0.5291537853188583, + "kl_loss": 0.12858743965625763, + "loss_ib": 0.0018046628683805466, + "step": 1840 + }, + { + "ce_ib": 5.78037691116333, + "ce_orig": 1.2208914756774902, + "epoch": 0.5294413688978359, + "kl_loss": 0.12210308015346527, + "loss_ib": 0.0017990684136748314, + "step": 1841 + }, + { + "ce_ib": 3.694746732711792, + "ce_orig": 0.6938146352767944, + "epoch": 0.5294413688978359, + "kl_loss": 0.08981604129076004, + "loss_ib": 0.0012676350306719542, + "step": 1841 + }, + { + "ce_ib": 6.774399280548096, + "ce_orig": 1.000654935836792, + "epoch": 0.5294413688978359, + "kl_loss": 0.10995084792375565, + "loss_ib": 0.0017769483383744955, + "step": 1841 + }, + { + "ce_ib": 3.979309558868408, + "ce_orig": 0.7353513240814209, + "epoch": 0.5294413688978359, + "kl_loss": 0.08279234170913696, + "loss_ib": 0.00122585438657552, + "step": 1841 + }, + { + "ce_ib": 4.40796422958374, + "ce_orig": 0.7580081224441528, + "epoch": 0.5297289524768136, + "kl_loss": 0.09894724190235138, + "loss_ib": 0.0014302688650786877, + "step": 1842 + }, + { + "ce_ib": 1.4312268495559692, + "ce_orig": 0.1202683225274086, + "epoch": 0.5297289524768136, + "kl_loss": 0.21160447597503662, + "loss_ib": 0.0022591673769056797, + "step": 1842 + }, + { + "ce_ib": 3.678631067276001, + "ce_orig": 0.6522001624107361, + "epoch": 0.5297289524768136, + "kl_loss": 0.06645625829696655, + "loss_ib": 0.0010324256727471948, + "step": 1842 + }, + { + "ce_ib": 3.3446578979492188, + "ce_orig": 0.6396112442016602, + "epoch": 0.5297289524768136, + "kl_loss": 0.11764278262853622, + "loss_ib": 0.0015108935767784715, + "step": 1842 + }, + { + "ce_ib": 3.6602888107299805, + "ce_orig": 0.9720301628112793, + "epoch": 0.5300165360557912, + "kl_loss": 0.06074594706296921, + "loss_ib": 0.000973488378804177, + "step": 1843 + }, + { + "ce_ib": 2.968815326690674, + "ce_orig": 0.6054991483688354, + "epoch": 0.5300165360557912, + "kl_loss": 0.0862845927476883, + "loss_ib": 0.0011597273405641317, + "step": 1843 + }, + { + "ce_ib": 5.470424175262451, + "ce_orig": 1.1465959548950195, + "epoch": 0.5300165360557912, + "kl_loss": 0.07891489565372467, + "loss_ib": 0.001336191315203905, + "step": 1843 + }, + { + "ce_ib": 3.486302137374878, + "ce_orig": 0.6792863607406616, + "epoch": 0.5300165360557912, + "kl_loss": 0.07117938995361328, + "loss_ib": 0.0010604241397231817, + "step": 1843 + }, + { + "ce_ib": 6.203835487365723, + "ce_orig": 1.0007436275482178, + "epoch": 0.5303041196347689, + "kl_loss": 0.10201396048069, + "loss_ib": 0.0016405230853706598, + "step": 1844 + }, + { + "ce_ib": 3.689505100250244, + "ce_orig": 0.8046457171440125, + "epoch": 0.5303041196347689, + "kl_loss": 0.07047881931066513, + "loss_ib": 0.00107373867649585, + "step": 1844 + }, + { + "ce_ib": 5.467555046081543, + "ce_orig": 1.2114951610565186, + "epoch": 0.5303041196347689, + "kl_loss": 0.09876742959022522, + "loss_ib": 0.0015344297280535102, + "step": 1844 + }, + { + "ce_ib": 6.061855792999268, + "ce_orig": 0.7467073798179626, + "epoch": 0.5303041196347689, + "kl_loss": 0.07543133199214935, + "loss_ib": 0.001360498950816691, + "step": 1844 + }, + { + "epoch": 0.5305917032137465, + "grad_norm": 0.082877516746521, + "learning_rate": 4.725044706172018e-05, + "loss": 0.8484, + "step": 1845 + }, + { + "ce_ib": 6.732047080993652, + "ce_orig": 1.2095576524734497, + "epoch": 0.5305917032137465, + "kl_loss": 0.05563550814986229, + "loss_ib": 0.0012295597698539495, + "step": 1845 + }, + { + "ce_ib": 5.336376190185547, + "ce_orig": 0.773193359375, + "epoch": 0.5305917032137465, + "kl_loss": 0.05108676105737686, + "loss_ib": 0.0010445051593706012, + "step": 1845 + }, + { + "ce_ib": 3.920708179473877, + "ce_orig": 0.9297981858253479, + "epoch": 0.5305917032137465, + "kl_loss": 0.0542929582297802, + "loss_ib": 0.000935000367462635, + "step": 1845 + }, + { + "ce_ib": 5.009018421173096, + "ce_orig": 1.1416187286376953, + "epoch": 0.5305917032137465, + "kl_loss": 0.07950013875961304, + "loss_ib": 0.0012959031155332923, + "step": 1845 + }, + { + "ce_ib": 3.579012155532837, + "ce_orig": 0.6931934952735901, + "epoch": 0.5308792867927241, + "kl_loss": 0.0787254124879837, + "loss_ib": 0.0011451552854850888, + "step": 1846 + }, + { + "ce_ib": 4.071962833404541, + "ce_orig": 0.7220334410667419, + "epoch": 0.5308792867927241, + "kl_loss": 0.05628364905714989, + "loss_ib": 0.0009700327645987272, + "step": 1846 + }, + { + "ce_ib": 5.777100563049316, + "ce_orig": 0.7151867151260376, + "epoch": 0.5308792867927241, + "kl_loss": 0.07195407152175903, + "loss_ib": 0.001297250622883439, + "step": 1846 + }, + { + "ce_ib": 5.150516986846924, + "ce_orig": 0.6828787326812744, + "epoch": 0.5308792867927241, + "kl_loss": 0.10526735335588455, + "loss_ib": 0.0015677252085879445, + "step": 1846 + }, + { + "ce_ib": 5.555667400360107, + "ce_orig": 1.1619387865066528, + "epoch": 0.5311668703717017, + "kl_loss": 0.10827607661485672, + "loss_ib": 0.0016383274924010038, + "step": 1847 + }, + { + "ce_ib": 7.176548004150391, + "ce_orig": 0.9737242460250854, + "epoch": 0.5311668703717017, + "kl_loss": 0.13908283412456512, + "loss_ib": 0.00210848287679255, + "step": 1847 + }, + { + "ce_ib": 4.510713577270508, + "ce_orig": 0.6394917368888855, + "epoch": 0.5311668703717017, + "kl_loss": 0.10627682507038116, + "loss_ib": 0.0015138395829126239, + "step": 1847 + }, + { + "ce_ib": 5.125590801239014, + "ce_orig": 1.008892297744751, + "epoch": 0.5311668703717017, + "kl_loss": 0.14957037568092346, + "loss_ib": 0.002008262788876891, + "step": 1847 + }, + { + "ce_ib": 8.029643058776855, + "ce_orig": 0.5685223340988159, + "epoch": 0.5314544539506794, + "kl_loss": 0.07618086040019989, + "loss_ib": 0.0015647727996110916, + "step": 1848 + }, + { + "ce_ib": 3.460737943649292, + "ce_orig": 0.6284685134887695, + "epoch": 0.5314544539506794, + "kl_loss": 0.06853239238262177, + "loss_ib": 0.001031397725455463, + "step": 1848 + }, + { + "ce_ib": 4.254874229431152, + "ce_orig": 0.762387216091156, + "epoch": 0.5314544539506794, + "kl_loss": 0.08328486233949661, + "loss_ib": 0.0012583360075950623, + "step": 1848 + }, + { + "ce_ib": 4.809506416320801, + "ce_orig": 0.8023175597190857, + "epoch": 0.5314544539506794, + "kl_loss": 0.055585190653800964, + "loss_ib": 0.001036802539601922, + "step": 1848 + }, + { + "ce_ib": 2.8356399536132812, + "ce_orig": 0.4932306706905365, + "epoch": 0.5317420375296571, + "kl_loss": 0.07694998383522034, + "loss_ib": 0.0010530638974159956, + "step": 1849 + }, + { + "ce_ib": 7.100836753845215, + "ce_orig": 1.1024192571640015, + "epoch": 0.5317420375296571, + "kl_loss": 0.08932968974113464, + "loss_ib": 0.0016033805441111326, + "step": 1849 + }, + { + "ce_ib": 2.1208558082580566, + "ce_orig": 0.4015112519264221, + "epoch": 0.5317420375296571, + "kl_loss": 0.057552531361579895, + "loss_ib": 0.0007876108284108341, + "step": 1849 + }, + { + "ce_ib": 4.710927486419678, + "ce_orig": 0.8208847045898438, + "epoch": 0.5317420375296571, + "kl_loss": 0.1187567263841629, + "loss_ib": 0.0016586600104346871, + "step": 1849 + }, + { + "epoch": 0.5320296211086347, + "grad_norm": 0.08115453273057938, + "learning_rate": 4.723272841069719e-05, + "loss": 0.7952, + "step": 1850 + }, + { + "ce_ib": 2.919970750808716, + "ce_orig": 0.5886723399162292, + "epoch": 0.5320296211086347, + "kl_loss": 0.0843060165643692, + "loss_ib": 0.0011350571876391768, + "step": 1850 + }, + { + "ce_ib": 6.539360523223877, + "ce_orig": 0.9294676780700684, + "epoch": 0.5320296211086347, + "kl_loss": 0.11889328807592392, + "loss_ib": 0.0018428688636049628, + "step": 1850 + }, + { + "ce_ib": 6.381986141204834, + "ce_orig": 0.7286571860313416, + "epoch": 0.5320296211086347, + "kl_loss": 0.07944625616073608, + "loss_ib": 0.0014326610835269094, + "step": 1850 + }, + { + "ce_ib": 4.113337993621826, + "ce_orig": 0.9276814460754395, + "epoch": 0.5320296211086347, + "kl_loss": 0.11057782173156738, + "loss_ib": 0.0015171119011938572, + "step": 1850 + }, + { + "ce_ib": 6.601346969604492, + "ce_orig": 0.6478778719902039, + "epoch": 0.5323172046876123, + "kl_loss": 0.13244323432445526, + "loss_ib": 0.001984566915780306, + "step": 1851 + }, + { + "ce_ib": 2.873537302017212, + "ce_orig": 0.3240191638469696, + "epoch": 0.5323172046876123, + "kl_loss": 0.05880070477724075, + "loss_ib": 0.0008753607980906963, + "step": 1851 + }, + { + "ce_ib": 3.41331148147583, + "ce_orig": 0.7288956046104431, + "epoch": 0.5323172046876123, + "kl_loss": 0.08343812823295593, + "loss_ib": 0.00117571244481951, + "step": 1851 + }, + { + "ce_ib": 5.750965118408203, + "ce_orig": 0.6333882212638855, + "epoch": 0.5323172046876123, + "kl_loss": 0.08162769675254822, + "loss_ib": 0.0013913733419030905, + "step": 1851 + }, + { + "ce_ib": 4.178093433380127, + "ce_orig": 0.22672997415065765, + "epoch": 0.53260478826659, + "kl_loss": 0.07534828782081604, + "loss_ib": 0.0011712921550497413, + "step": 1852 + }, + { + "ce_ib": 8.764464378356934, + "ce_orig": 1.1781435012817383, + "epoch": 0.53260478826659, + "kl_loss": 0.10185735672712326, + "loss_ib": 0.001895020017400384, + "step": 1852 + }, + { + "ce_ib": 3.382254123687744, + "ce_orig": 0.6801653504371643, + "epoch": 0.53260478826659, + "kl_loss": 0.07063452899456024, + "loss_ib": 0.001044570584781468, + "step": 1852 + }, + { + "ce_ib": 6.7069501876831055, + "ce_orig": 1.0321285724639893, + "epoch": 0.53260478826659, + "kl_loss": 0.08821718394756317, + "loss_ib": 0.0015528667718172073, + "step": 1852 + }, + { + "ce_ib": 3.930634021759033, + "ce_orig": 0.7123586535453796, + "epoch": 0.5328923718455676, + "kl_loss": 0.09519091248512268, + "loss_ib": 0.0013449725229293108, + "step": 1853 + }, + { + "ce_ib": 3.0413570404052734, + "ce_orig": 0.803445041179657, + "epoch": 0.5328923718455676, + "kl_loss": 0.05303191393613815, + "loss_ib": 0.000834454782307148, + "step": 1853 + }, + { + "ce_ib": 3.5827205181121826, + "ce_orig": 0.8146140575408936, + "epoch": 0.5328923718455676, + "kl_loss": 0.07105029374361038, + "loss_ib": 0.0010687749600037932, + "step": 1853 + }, + { + "ce_ib": 4.103414535522461, + "ce_orig": 0.3186612129211426, + "epoch": 0.5328923718455676, + "kl_loss": 0.08719896525144577, + "loss_ib": 0.0012823310680687428, + "step": 1853 + }, + { + "ce_ib": 3.9935455322265625, + "ce_orig": 0.715681791305542, + "epoch": 0.5331799554245452, + "kl_loss": 0.10451751947402954, + "loss_ib": 0.0014445297420024872, + "step": 1854 + }, + { + "ce_ib": 3.282869815826416, + "ce_orig": 0.4870489537715912, + "epoch": 0.5331799554245452, + "kl_loss": 0.06514561921358109, + "loss_ib": 0.0009797430830076337, + "step": 1854 + }, + { + "ce_ib": 4.108365058898926, + "ce_orig": 0.6425372362136841, + "epoch": 0.5331799554245452, + "kl_loss": 0.07368110865354538, + "loss_ib": 0.0011476475046947598, + "step": 1854 + }, + { + "ce_ib": 3.965712547302246, + "ce_orig": 0.7305511236190796, + "epoch": 0.5331799554245452, + "kl_loss": 0.05856510251760483, + "loss_ib": 0.00098222226370126, + "step": 1854 + }, + { + "epoch": 0.533467539003523, + "grad_norm": 0.09198537468910217, + "learning_rate": 4.721495619587112e-05, + "loss": 0.7962, + "step": 1855 + }, + { + "ce_ib": 5.567660808563232, + "ce_orig": 1.3031708002090454, + "epoch": 0.533467539003523, + "kl_loss": 0.08030009269714355, + "loss_ib": 0.0013597669312730432, + "step": 1855 + }, + { + "ce_ib": 2.4188318252563477, + "ce_orig": 0.31718146800994873, + "epoch": 0.533467539003523, + "kl_loss": 0.07879798114299774, + "loss_ib": 0.0010298629058524966, + "step": 1855 + }, + { + "ce_ib": 5.0108962059021, + "ce_orig": 0.7286996245384216, + "epoch": 0.533467539003523, + "kl_loss": 0.07876469939947128, + "loss_ib": 0.0012887365883216262, + "step": 1855 + }, + { + "ce_ib": 6.256479263305664, + "ce_orig": 1.3145567178726196, + "epoch": 0.533467539003523, + "kl_loss": 0.0896788015961647, + "loss_ib": 0.0015224358066916466, + "step": 1855 + }, + { + "ce_ib": 4.103048324584961, + "ce_orig": 0.49328261613845825, + "epoch": 0.5337551225825006, + "kl_loss": 0.05637562274932861, + "loss_ib": 0.0009740610257722437, + "step": 1856 + }, + { + "ce_ib": 5.4311041831970215, + "ce_orig": 0.526505172252655, + "epoch": 0.5337551225825006, + "kl_loss": 0.16930055618286133, + "loss_ib": 0.0022361159790307283, + "step": 1856 + }, + { + "ce_ib": 9.534915924072266, + "ce_orig": 1.6566290855407715, + "epoch": 0.5337551225825006, + "kl_loss": 0.0850403904914856, + "loss_ib": 0.0018038954585790634, + "step": 1856 + }, + { + "ce_ib": 4.010655879974365, + "ce_orig": 0.619281530380249, + "epoch": 0.5337551225825006, + "kl_loss": 0.11700724065303802, + "loss_ib": 0.001571137923747301, + "step": 1856 + }, + { + "ce_ib": 3.6652016639709473, + "ce_orig": 0.5731359720230103, + "epoch": 0.5340427061614782, + "kl_loss": 0.06187981739640236, + "loss_ib": 0.0009853183291852474, + "step": 1857 + }, + { + "ce_ib": 3.9815115928649902, + "ce_orig": 0.6923850178718567, + "epoch": 0.5340427061614782, + "kl_loss": 0.10100679099559784, + "loss_ib": 0.0014082189882174134, + "step": 1857 + }, + { + "ce_ib": 4.362298965454102, + "ce_orig": 0.6413350105285645, + "epoch": 0.5340427061614782, + "kl_loss": 0.13990770280361176, + "loss_ib": 0.0018353068735450506, + "step": 1857 + }, + { + "ce_ib": 4.393571853637695, + "ce_orig": 0.857864499092102, + "epoch": 0.5340427061614782, + "kl_loss": 0.09159405529499054, + "loss_ib": 0.0013552976306527853, + "step": 1857 + }, + { + "ce_ib": 6.145808696746826, + "ce_orig": 1.2391997575759888, + "epoch": 0.5343302897404558, + "kl_loss": 0.09299813956022263, + "loss_ib": 0.001544562284834683, + "step": 1858 + }, + { + "ce_ib": 4.470007419586182, + "ce_orig": 0.9444975256919861, + "epoch": 0.5343302897404558, + "kl_loss": 0.09241517633199692, + "loss_ib": 0.0013711524661630392, + "step": 1858 + }, + { + "ce_ib": 4.079993724822998, + "ce_orig": 0.7647038102149963, + "epoch": 0.5343302897404558, + "kl_loss": 0.06401269137859344, + "loss_ib": 0.001048126257956028, + "step": 1858 + }, + { + "ce_ib": 4.955940246582031, + "ce_orig": 0.4836491048336029, + "epoch": 0.5343302897404558, + "kl_loss": 0.15141108632087708, + "loss_ib": 0.0020097047090530396, + "step": 1858 + }, + { + "ce_ib": 4.377822399139404, + "ce_orig": 0.7177245020866394, + "epoch": 0.5346178733194334, + "kl_loss": 0.04567714035511017, + "loss_ib": 0.0008945536683313549, + "step": 1859 + }, + { + "ce_ib": 7.762528896331787, + "ce_orig": 1.4742891788482666, + "epoch": 0.5346178733194334, + "kl_loss": 0.15391871333122253, + "loss_ib": 0.00231543998233974, + "step": 1859 + }, + { + "ce_ib": 3.7290639877319336, + "ce_orig": 0.3724476993083954, + "epoch": 0.5346178733194334, + "kl_loss": 0.0905434638261795, + "loss_ib": 0.0012783410493284464, + "step": 1859 + }, + { + "ce_ib": 4.995687484741211, + "ce_orig": 0.4139731526374817, + "epoch": 0.5346178733194334, + "kl_loss": 0.07245605438947678, + "loss_ib": 0.0012241292279213667, + "step": 1859 + }, + { + "epoch": 0.5349054568984111, + "grad_norm": 0.090834841132164, + "learning_rate": 4.719713046005938e-05, + "loss": 0.8223, + "step": 1860 + }, + { + "ce_ib": 4.0647454261779785, + "ce_orig": 0.6698126792907715, + "epoch": 0.5349054568984111, + "kl_loss": 0.07729653269052505, + "loss_ib": 0.0011794398305937648, + "step": 1860 + }, + { + "ce_ib": 3.361959457397461, + "ce_orig": 0.6456858515739441, + "epoch": 0.5349054568984111, + "kl_loss": 0.08039053529500961, + "loss_ib": 0.0011401012307032943, + "step": 1860 + }, + { + "ce_ib": 6.0044355392456055, + "ce_orig": 1.1099152565002441, + "epoch": 0.5349054568984111, + "kl_loss": 0.07986097037792206, + "loss_ib": 0.0013990532606840134, + "step": 1860 + }, + { + "ce_ib": 5.856914520263672, + "ce_orig": 0.45476415753364563, + "epoch": 0.5349054568984111, + "kl_loss": 0.1881665289402008, + "loss_ib": 0.0024673566222190857, + "step": 1860 + }, + { + "ce_ib": 5.325282573699951, + "ce_orig": 1.0000052452087402, + "epoch": 0.5351930404773887, + "kl_loss": 0.08844589442014694, + "loss_ib": 0.0014169871574267745, + "step": 1861 + }, + { + "ce_ib": 2.823394775390625, + "ce_orig": 0.5317319631576538, + "epoch": 0.5351930404773887, + "kl_loss": 0.07738970220088959, + "loss_ib": 0.0010562364477664232, + "step": 1861 + }, + { + "ce_ib": 4.384865760803223, + "ce_orig": 0.7221093773841858, + "epoch": 0.5351930404773887, + "kl_loss": 0.3226340413093567, + "loss_ib": 0.003664826974272728, + "step": 1861 + }, + { + "ce_ib": 5.176585674285889, + "ce_orig": 0.5063703656196594, + "epoch": 0.5351930404773887, + "kl_loss": 0.07601437717676163, + "loss_ib": 0.0012778022792190313, + "step": 1861 + }, + { + "ce_ib": 5.447780132293701, + "ce_orig": 0.9343679547309875, + "epoch": 0.5354806240563664, + "kl_loss": 0.07958365976810455, + "loss_ib": 0.0013406145153567195, + "step": 1862 + }, + { + "ce_ib": 5.593994617462158, + "ce_orig": 1.3064777851104736, + "epoch": 0.5354806240563664, + "kl_loss": 0.16426706314086914, + "loss_ib": 0.0022020700853317976, + "step": 1862 + }, + { + "ce_ib": 3.979562997817993, + "ce_orig": 0.5550950765609741, + "epoch": 0.5354806240563664, + "kl_loss": 0.08275218307971954, + "loss_ib": 0.0012254781322553754, + "step": 1862 + }, + { + "ce_ib": 4.1164422035217285, + "ce_orig": 0.801969587802887, + "epoch": 0.5354806240563664, + "kl_loss": 0.08719400316476822, + "loss_ib": 0.0012835841625928879, + "step": 1862 + }, + { + "ce_ib": 4.288140773773193, + "ce_orig": 0.8692682981491089, + "epoch": 0.535768207635344, + "kl_loss": 0.062317490577697754, + "loss_ib": 0.0010519889183342457, + "step": 1863 + }, + { + "ce_ib": 3.639946222305298, + "ce_orig": 0.5509376525878906, + "epoch": 0.535768207635344, + "kl_loss": 0.07786630094051361, + "loss_ib": 0.001142657594755292, + "step": 1863 + }, + { + "ce_ib": 3.9248785972595215, + "ce_orig": 0.8734833598136902, + "epoch": 0.535768207635344, + "kl_loss": 0.07180636376142502, + "loss_ib": 0.0011105515295639634, + "step": 1863 + }, + { + "ce_ib": 5.362997055053711, + "ce_orig": 0.9866791367530823, + "epoch": 0.535768207635344, + "kl_loss": 0.07493927329778671, + "loss_ib": 0.0012856924440711737, + "step": 1863 + }, + { + "ce_ib": 7.0346503257751465, + "ce_orig": 1.416372537612915, + "epoch": 0.5360557912143217, + "kl_loss": 0.1120106652379036, + "loss_ib": 0.0018235716270282865, + "step": 1864 + }, + { + "ce_ib": 3.537402868270874, + "ce_orig": 0.4890269339084625, + "epoch": 0.5360557912143217, + "kl_loss": 0.06066582351922989, + "loss_ib": 0.0009603984653949738, + "step": 1864 + }, + { + "ce_ib": 4.326279640197754, + "ce_orig": 0.6598331928253174, + "epoch": 0.5360557912143217, + "kl_loss": 0.05674532428383827, + "loss_ib": 0.0010000810725614429, + "step": 1864 + }, + { + "ce_ib": 5.022051811218262, + "ce_orig": 1.2499855756759644, + "epoch": 0.5360557912143217, + "kl_loss": 0.08580147475004196, + "loss_ib": 0.00136021978687495, + "step": 1864 + }, + { + "epoch": 0.5363433747932993, + "grad_norm": 0.11152894794940948, + "learning_rate": 4.7179251246208303e-05, + "loss": 0.8425, + "step": 1865 + }, + { + "ce_ib": 3.3266448974609375, + "ce_orig": 0.5198007822036743, + "epoch": 0.5363433747932993, + "kl_loss": 0.050374094396829605, + "loss_ib": 0.0008364054374396801, + "step": 1865 + }, + { + "ce_ib": 5.248294830322266, + "ce_orig": 0.6830242872238159, + "epoch": 0.5363433747932993, + "kl_loss": 0.07221022248268127, + "loss_ib": 0.001246931729838252, + "step": 1865 + }, + { + "ce_ib": 4.352480411529541, + "ce_orig": 0.8697564005851746, + "epoch": 0.5363433747932993, + "kl_loss": 0.18739229440689087, + "loss_ib": 0.0023091710172593594, + "step": 1865 + }, + { + "ce_ib": 5.229000568389893, + "ce_orig": 0.7954290509223938, + "epoch": 0.5363433747932993, + "kl_loss": 0.11133542656898499, + "loss_ib": 0.001636254251934588, + "step": 1865 + }, + { + "ce_ib": 7.480777263641357, + "ce_orig": 1.5291461944580078, + "epoch": 0.5366309583722769, + "kl_loss": 0.09287488460540771, + "loss_ib": 0.0016768263885751367, + "step": 1866 + }, + { + "ce_ib": 5.935614585876465, + "ce_orig": 0.9175684452056885, + "epoch": 0.5366309583722769, + "kl_loss": 0.09335638582706451, + "loss_ib": 0.0015271252486854792, + "step": 1866 + }, + { + "ce_ib": 6.934662818908691, + "ce_orig": 0.941277027130127, + "epoch": 0.5366309583722769, + "kl_loss": 0.06205186992883682, + "loss_ib": 0.0013139849761500955, + "step": 1866 + }, + { + "ce_ib": 8.533012390136719, + "ce_orig": 1.1701033115386963, + "epoch": 0.5366309583722769, + "kl_loss": 0.08932290971279144, + "loss_ib": 0.0017465301789343357, + "step": 1866 + }, + { + "ce_ib": 6.125261306762695, + "ce_orig": 1.0572534799575806, + "epoch": 0.5369185419512545, + "kl_loss": 0.046793870627880096, + "loss_ib": 0.001080464804545045, + "step": 1867 + }, + { + "ce_ib": 7.80275821685791, + "ce_orig": 1.5057945251464844, + "epoch": 0.5369185419512545, + "kl_loss": 0.06967668235301971, + "loss_ib": 0.0014770426787436008, + "step": 1867 + }, + { + "ce_ib": 3.7662670612335205, + "ce_orig": 0.6024259328842163, + "epoch": 0.5369185419512545, + "kl_loss": 0.06198422610759735, + "loss_ib": 0.0009964689379557967, + "step": 1867 + }, + { + "ce_ib": 3.9333996772766113, + "ce_orig": 0.8519479632377625, + "epoch": 0.5369185419512545, + "kl_loss": 0.053352952003479004, + "loss_ib": 0.0009268695139326155, + "step": 1867 + }, + { + "ce_ib": 4.49202823638916, + "ce_orig": 1.0928200483322144, + "epoch": 0.5372061255302322, + "kl_loss": 0.06097971647977829, + "loss_ib": 0.001058999914675951, + "step": 1868 + }, + { + "ce_ib": 4.743061065673828, + "ce_orig": 0.8736206889152527, + "epoch": 0.5372061255302322, + "kl_loss": 0.06666283309459686, + "loss_ib": 0.0011409342987462878, + "step": 1868 + }, + { + "ce_ib": 6.196759223937988, + "ce_orig": 1.2826341390609741, + "epoch": 0.5372061255302322, + "kl_loss": 0.06491327285766602, + "loss_ib": 0.0012688086135312915, + "step": 1868 + }, + { + "ce_ib": 6.093201637268066, + "ce_orig": 1.0684585571289062, + "epoch": 0.5372061255302322, + "kl_loss": 0.16246895492076874, + "loss_ib": 0.0022340095601975918, + "step": 1868 + }, + { + "ce_ib": 4.604904651641846, + "ce_orig": 0.6252310872077942, + "epoch": 0.5374937091092099, + "kl_loss": 0.06963659822940826, + "loss_ib": 0.0011568564223125577, + "step": 1869 + }, + { + "ce_ib": 6.108121871948242, + "ce_orig": 0.7665905952453613, + "epoch": 0.5374937091092099, + "kl_loss": 0.06648684293031693, + "loss_ib": 0.0012756806099787354, + "step": 1869 + }, + { + "ce_ib": 4.63380765914917, + "ce_orig": 1.1782315969467163, + "epoch": 0.5374937091092099, + "kl_loss": 0.037484291940927505, + "loss_ib": 0.0008382236701436341, + "step": 1869 + }, + { + "ce_ib": 6.929873466491699, + "ce_orig": 1.9465168714523315, + "epoch": 0.5374937091092099, + "kl_loss": 0.3356351852416992, + "loss_ib": 0.004049339331686497, + "step": 1869 + }, + { + "epoch": 0.5377812926881875, + "grad_norm": 0.11187389492988586, + "learning_rate": 4.7161318597393054e-05, + "loss": 0.881, + "step": 1870 + }, + { + "ce_ib": 2.8059377670288086, + "ce_orig": 0.6418023705482483, + "epoch": 0.5377812926881875, + "kl_loss": 0.06153927743434906, + "loss_ib": 0.0008959865081124008, + "step": 1870 + }, + { + "ce_ib": 6.832296371459961, + "ce_orig": 0.8005569577217102, + "epoch": 0.5377812926881875, + "kl_loss": 0.14064443111419678, + "loss_ib": 0.002089673886075616, + "step": 1870 + }, + { + "ce_ib": 5.314372539520264, + "ce_orig": 0.7052491307258606, + "epoch": 0.5377812926881875, + "kl_loss": 0.09595352411270142, + "loss_ib": 0.0014909724704921246, + "step": 1870 + }, + { + "ce_ib": 8.3805513381958, + "ce_orig": 1.433889627456665, + "epoch": 0.5377812926881875, + "kl_loss": 0.08600347489118576, + "loss_ib": 0.0016980897635221481, + "step": 1870 + }, + { + "ce_ib": 3.5613880157470703, + "ce_orig": 0.5242851376533508, + "epoch": 0.5380688762671652, + "kl_loss": 0.05114305019378662, + "loss_ib": 0.0008675692952238023, + "step": 1871 + }, + { + "ce_ib": 3.1910884380340576, + "ce_orig": 0.7214317321777344, + "epoch": 0.5380688762671652, + "kl_loss": 0.03734808415174484, + "loss_ib": 0.0006925897323526442, + "step": 1871 + }, + { + "ce_ib": 6.119264602661133, + "ce_orig": 1.1227083206176758, + "epoch": 0.5380688762671652, + "kl_loss": 0.11017464846372604, + "loss_ib": 0.001713672885671258, + "step": 1871 + }, + { + "ce_ib": 3.0809619426727295, + "ce_orig": 0.6351144313812256, + "epoch": 0.5380688762671652, + "kl_loss": 0.09793555736541748, + "loss_ib": 0.0012874517124146223, + "step": 1871 + }, + { + "ce_ib": 2.5573227405548096, + "ce_orig": 0.4840497374534607, + "epoch": 0.5383564598461428, + "kl_loss": 0.08007337898015976, + "loss_ib": 0.0010564661351963878, + "step": 1872 + }, + { + "ce_ib": 5.343964099884033, + "ce_orig": 0.9847210049629211, + "epoch": 0.5383564598461428, + "kl_loss": 0.07772941887378693, + "loss_ib": 0.0013116904301568866, + "step": 1872 + }, + { + "ce_ib": 6.919244766235352, + "ce_orig": 1.3753911256790161, + "epoch": 0.5383564598461428, + "kl_loss": 0.08101324737071991, + "loss_ib": 0.0015020569553598762, + "step": 1872 + }, + { + "ce_ib": 1.9719284772872925, + "ce_orig": 0.2595716714859009, + "epoch": 0.5383564598461428, + "kl_loss": 0.15304377675056458, + "loss_ib": 0.0017276306170970201, + "step": 1872 + }, + { + "ce_ib": 5.512475490570068, + "ce_orig": 0.9804131388664246, + "epoch": 0.5386440434251204, + "kl_loss": 0.11799289286136627, + "loss_ib": 0.001731176394969225, + "step": 1873 + }, + { + "ce_ib": 6.971649646759033, + "ce_orig": 1.445634126663208, + "epoch": 0.5386440434251204, + "kl_loss": 0.13900133967399597, + "loss_ib": 0.0020871784072369337, + "step": 1873 + }, + { + "ce_ib": 4.9493913650512695, + "ce_orig": 1.2242472171783447, + "epoch": 0.5386440434251204, + "kl_loss": 0.0836649239063263, + "loss_ib": 0.001331588253378868, + "step": 1873 + }, + { + "ce_ib": 4.99583101272583, + "ce_orig": 0.7371603846549988, + "epoch": 0.5386440434251204, + "kl_loss": 0.11052550375461578, + "loss_ib": 0.0016048380639404058, + "step": 1873 + }, + { + "ce_ib": 5.664217472076416, + "ce_orig": 0.9064738750457764, + "epoch": 0.538931627004098, + "kl_loss": 0.11258955299854279, + "loss_ib": 0.0016923171933740377, + "step": 1874 + }, + { + "ce_ib": 7.241815090179443, + "ce_orig": 1.7119221687316895, + "epoch": 0.538931627004098, + "kl_loss": 0.05008301883935928, + "loss_ib": 0.0012250116560608149, + "step": 1874 + }, + { + "ce_ib": 5.434731483459473, + "ce_orig": 0.8272178769111633, + "epoch": 0.538931627004098, + "kl_loss": 0.04766685515642166, + "loss_ib": 0.0010201416444033384, + "step": 1874 + }, + { + "ce_ib": 4.582449436187744, + "ce_orig": 0.6659607887268066, + "epoch": 0.538931627004098, + "kl_loss": 0.13097268342971802, + "loss_ib": 0.0017679717857390642, + "step": 1874 + }, + { + "epoch": 0.5392192105830758, + "grad_norm": 0.10883186012506485, + "learning_rate": 4.714333255681755e-05, + "loss": 0.8448, + "step": 1875 + }, + { + "ce_ib": 2.938021421432495, + "ce_orig": 0.5171647071838379, + "epoch": 0.5392192105830758, + "kl_loss": 0.06409141421318054, + "loss_ib": 0.0009347163140773773, + "step": 1875 + }, + { + "ce_ib": 6.743963718414307, + "ce_orig": 0.8710337281227112, + "epoch": 0.5392192105830758, + "kl_loss": 0.1021910309791565, + "loss_ib": 0.0016963067464530468, + "step": 1875 + }, + { + "ce_ib": 2.5126259326934814, + "ce_orig": 0.4402546286582947, + "epoch": 0.5392192105830758, + "kl_loss": 0.029375649988651276, + "loss_ib": 0.0005450190510600805, + "step": 1875 + }, + { + "ce_ib": 3.58014178276062, + "ce_orig": 0.7703568935394287, + "epoch": 0.5392192105830758, + "kl_loss": 0.06910865753889084, + "loss_ib": 0.0010491007706150413, + "step": 1875 + }, + { + "ce_ib": 4.8024821281433105, + "ce_orig": 0.717775285243988, + "epoch": 0.5395067941620534, + "kl_loss": 0.09310498833656311, + "loss_ib": 0.001411298057064414, + "step": 1876 + }, + { + "ce_ib": 6.631094455718994, + "ce_orig": 1.485589623451233, + "epoch": 0.5395067941620534, + "kl_loss": 0.055475715547800064, + "loss_ib": 0.001217866549268365, + "step": 1876 + }, + { + "ce_ib": 2.974945306777954, + "ce_orig": 0.6519951224327087, + "epoch": 0.5395067941620534, + "kl_loss": 0.05824623629450798, + "loss_ib": 0.0008799569332040846, + "step": 1876 + }, + { + "ce_ib": 6.146327972412109, + "ce_orig": 1.34998619556427, + "epoch": 0.5395067941620534, + "kl_loss": 0.0800832062959671, + "loss_ib": 0.0014154647942632437, + "step": 1876 + }, + { + "ce_ib": 4.794435977935791, + "ce_orig": 0.5025479197502136, + "epoch": 0.539794377741031, + "kl_loss": 0.1766454428434372, + "loss_ib": 0.0022458978928625584, + "step": 1877 + }, + { + "ce_ib": 6.66762113571167, + "ce_orig": 1.119901418685913, + "epoch": 0.539794377741031, + "kl_loss": 0.07807546108961105, + "loss_ib": 0.0014475166099146008, + "step": 1877 + }, + { + "ce_ib": 5.839198589324951, + "ce_orig": 0.7469760775566101, + "epoch": 0.539794377741031, + "kl_loss": 0.12092413008213043, + "loss_ib": 0.0017931611509993672, + "step": 1877 + }, + { + "ce_ib": 5.422294616699219, + "ce_orig": 1.0639286041259766, + "epoch": 0.539794377741031, + "kl_loss": 0.06867212802171707, + "loss_ib": 0.001228950684890151, + "step": 1877 + }, + { + "ce_ib": 3.2782089710235596, + "ce_orig": 0.4706994593143463, + "epoch": 0.5400819613200086, + "kl_loss": 0.11956185102462769, + "loss_ib": 0.0015234394231811166, + "step": 1878 + }, + { + "ce_ib": 3.7124524116516113, + "ce_orig": 0.5277577042579651, + "epoch": 0.5400819613200086, + "kl_loss": 0.06934241950511932, + "loss_ib": 0.0010646693408489227, + "step": 1878 + }, + { + "ce_ib": 6.270019054412842, + "ce_orig": 1.6265575885772705, + "epoch": 0.5400819613200086, + "kl_loss": 0.07714739441871643, + "loss_ib": 0.00139847572427243, + "step": 1878 + }, + { + "ce_ib": 4.782007694244385, + "ce_orig": 1.0015133619308472, + "epoch": 0.5400819613200086, + "kl_loss": 0.08120578527450562, + "loss_ib": 0.0012902586022391915, + "step": 1878 + }, + { + "ce_ib": 4.344252586364746, + "ce_orig": 0.872351884841919, + "epoch": 0.5403695448989863, + "kl_loss": 0.0856708437204361, + "loss_ib": 0.0012911336962133646, + "step": 1879 + }, + { + "ce_ib": 6.844085216522217, + "ce_orig": 1.5220190286636353, + "epoch": 0.5403695448989863, + "kl_loss": 0.11233113706111908, + "loss_ib": 0.0018077197019010782, + "step": 1879 + }, + { + "ce_ib": 5.931623935699463, + "ce_orig": 1.4490350484848022, + "epoch": 0.5403695448989863, + "kl_loss": 0.07694714516401291, + "loss_ib": 0.0013626337749883533, + "step": 1879 + }, + { + "ce_ib": 4.209210395812988, + "ce_orig": 0.6330265402793884, + "epoch": 0.5403695448989863, + "kl_loss": 0.10785370320081711, + "loss_ib": 0.0014994580997154117, + "step": 1879 + }, + { + "epoch": 0.5406571284779639, + "grad_norm": 0.09346377104520798, + "learning_rate": 4.7125293167814345e-05, + "loss": 0.862, + "step": 1880 + }, + { + "ce_ib": 3.753009796142578, + "ce_orig": 0.6798171997070312, + "epoch": 0.5406571284779639, + "kl_loss": 0.09947092086076736, + "loss_ib": 0.0013700101990252733, + "step": 1880 + }, + { + "ce_ib": 3.867032527923584, + "ce_orig": 0.6207717061042786, + "epoch": 0.5406571284779639, + "kl_loss": 0.08714562654495239, + "loss_ib": 0.0012581595219671726, + "step": 1880 + }, + { + "ce_ib": 5.156338691711426, + "ce_orig": 1.098903775215149, + "epoch": 0.5406571284779639, + "kl_loss": 0.06555266678333282, + "loss_ib": 0.001171160489320755, + "step": 1880 + }, + { + "ce_ib": 5.191904544830322, + "ce_orig": 0.6229297518730164, + "epoch": 0.5406571284779639, + "kl_loss": 0.11149217188358307, + "loss_ib": 0.0016341120935976505, + "step": 1880 + }, + { + "ce_ib": 6.216848373413086, + "ce_orig": 1.219089150428772, + "epoch": 0.5409447120569415, + "kl_loss": 0.15237785875797272, + "loss_ib": 0.002145463367924094, + "step": 1881 + }, + { + "ce_ib": 3.3246984481811523, + "ce_orig": 0.7209340929985046, + "epoch": 0.5409447120569415, + "kl_loss": 0.10308524966239929, + "loss_ib": 0.0013633222552016377, + "step": 1881 + }, + { + "ce_ib": 3.8802926540374756, + "ce_orig": 0.6300032138824463, + "epoch": 0.5409447120569415, + "kl_loss": 0.04307711869478226, + "loss_ib": 0.000818800472188741, + "step": 1881 + }, + { + "ce_ib": 7.105929851531982, + "ce_orig": 1.4070239067077637, + "epoch": 0.5409447120569415, + "kl_loss": 0.19312109053134918, + "loss_ib": 0.0026418038178235292, + "step": 1881 + }, + { + "ce_ib": 3.810304641723633, + "ce_orig": 0.6895675659179688, + "epoch": 0.5412322956359192, + "kl_loss": 0.08753614127635956, + "loss_ib": 0.0012563918717205524, + "step": 1882 + }, + { + "ce_ib": 6.364545822143555, + "ce_orig": 1.1733458042144775, + "epoch": 0.5412322956359192, + "kl_loss": 0.12536737322807312, + "loss_ib": 0.0018901282455772161, + "step": 1882 + }, + { + "ce_ib": 2.928663969039917, + "ce_orig": 0.43544265627861023, + "epoch": 0.5412322956359192, + "kl_loss": 0.03458476439118385, + "loss_ib": 0.0006387140601873398, + "step": 1882 + }, + { + "ce_ib": 5.622189998626709, + "ce_orig": 1.0925984382629395, + "epoch": 0.5412322956359192, + "kl_loss": 0.11252744495868683, + "loss_ib": 0.0016874934080988169, + "step": 1882 + }, + { + "ce_ib": 5.2332611083984375, + "ce_orig": 0.8060828447341919, + "epoch": 0.5415198792148969, + "kl_loss": 0.18054616451263428, + "loss_ib": 0.002328787697479129, + "step": 1883 + }, + { + "ce_ib": 4.134537220001221, + "ce_orig": 0.6565957069396973, + "epoch": 0.5415198792148969, + "kl_loss": 0.10169609636068344, + "loss_ib": 0.001430414617061615, + "step": 1883 + }, + { + "ce_ib": 5.400188446044922, + "ce_orig": 0.9141398072242737, + "epoch": 0.5415198792148969, + "kl_loss": 0.12270620465278625, + "loss_ib": 0.0017670808592811227, + "step": 1883 + }, + { + "ce_ib": 3.9813194274902344, + "ce_orig": 0.37552177906036377, + "epoch": 0.5415198792148969, + "kl_loss": 0.08992289006710052, + "loss_ib": 0.0012973607517778873, + "step": 1883 + }, + { + "ce_ib": 3.476588726043701, + "ce_orig": 0.8150554895401001, + "epoch": 0.5418074627938745, + "kl_loss": 0.08288824558258057, + "loss_ib": 0.0011765413219109178, + "step": 1884 + }, + { + "ce_ib": 5.488793849945068, + "ce_orig": 1.2628737688064575, + "epoch": 0.5418074627938745, + "kl_loss": 0.08476603031158447, + "loss_ib": 0.0013965396210551262, + "step": 1884 + }, + { + "ce_ib": 3.709301710128784, + "ce_orig": 0.8635600805282593, + "epoch": 0.5418074627938745, + "kl_loss": 0.054674986749887466, + "loss_ib": 0.0009176800376735628, + "step": 1884 + }, + { + "ce_ib": 4.6660475730896, + "ce_orig": 0.7240663766860962, + "epoch": 0.5418074627938745, + "kl_loss": 0.048985399305820465, + "loss_ib": 0.0009564587380737066, + "step": 1884 + }, + { + "epoch": 0.5420950463728521, + "grad_norm": 0.09899216145277023, + "learning_rate": 4.710720047384451e-05, + "loss": 0.8234, + "step": 1885 + }, + { + "ce_ib": 3.9311368465423584, + "ce_orig": 0.649273157119751, + "epoch": 0.5420950463728521, + "kl_loss": 0.07055657356977463, + "loss_ib": 0.0010986793786287308, + "step": 1885 + }, + { + "ce_ib": 4.835707187652588, + "ce_orig": 0.820050835609436, + "epoch": 0.5420950463728521, + "kl_loss": 0.07729177922010422, + "loss_ib": 0.0012564884964376688, + "step": 1885 + }, + { + "ce_ib": 2.9703049659729004, + "ce_orig": 0.5429113507270813, + "epoch": 0.5420950463728521, + "kl_loss": 0.06394404172897339, + "loss_ib": 0.0009364709258079529, + "step": 1885 + }, + { + "ce_ib": 4.416995048522949, + "ce_orig": 0.7352343797683716, + "epoch": 0.5420950463728521, + "kl_loss": 0.1675829440355301, + "loss_ib": 0.0021175288129597902, + "step": 1885 + }, + { + "ce_ib": 4.931896209716797, + "ce_orig": 1.0551700592041016, + "epoch": 0.5423826299518297, + "kl_loss": 0.10906436294317245, + "loss_ib": 0.001583833247423172, + "step": 1886 + }, + { + "ce_ib": 3.7773826122283936, + "ce_orig": 0.6575794816017151, + "epoch": 0.5423826299518297, + "kl_loss": 0.06918908655643463, + "loss_ib": 0.0010696290992200375, + "step": 1886 + }, + { + "ce_ib": 4.158738613128662, + "ce_orig": 0.8615984320640564, + "epoch": 0.5423826299518297, + "kl_loss": 0.04870394617319107, + "loss_ib": 0.0009029133361764252, + "step": 1886 + }, + { + "ce_ib": 5.506878852844238, + "ce_orig": 1.0230817794799805, + "epoch": 0.5423826299518297, + "kl_loss": 0.10155496001243591, + "loss_ib": 0.0015662374207749963, + "step": 1886 + }, + { + "ce_ib": 3.300973653793335, + "ce_orig": 0.8024538159370422, + "epoch": 0.5426702135308074, + "kl_loss": 0.060236573219299316, + "loss_ib": 0.000932463095523417, + "step": 1887 + }, + { + "ce_ib": 3.858247756958008, + "ce_orig": 0.7646993398666382, + "epoch": 0.5426702135308074, + "kl_loss": 0.06595297902822495, + "loss_ib": 0.0010453545255586505, + "step": 1887 + }, + { + "ce_ib": 7.572666645050049, + "ce_orig": 1.5162445306777954, + "epoch": 0.5426702135308074, + "kl_loss": 0.07627195119857788, + "loss_ib": 0.0015199860790744424, + "step": 1887 + }, + { + "ce_ib": 2.6665029525756836, + "ce_orig": 0.5964523553848267, + "epoch": 0.5426702135308074, + "kl_loss": 0.09715908765792847, + "loss_ib": 0.0012382412096485496, + "step": 1887 + }, + { + "ce_ib": 5.631707668304443, + "ce_orig": 1.161486029624939, + "epoch": 0.542957797109785, + "kl_loss": 0.0744452178478241, + "loss_ib": 0.0013076228788122535, + "step": 1888 + }, + { + "ce_ib": 3.5369491577148438, + "ce_orig": 0.503812313079834, + "epoch": 0.542957797109785, + "kl_loss": 0.06044292449951172, + "loss_ib": 0.0009581240592524409, + "step": 1888 + }, + { + "ce_ib": 7.328972339630127, + "ce_orig": 1.6762977838516235, + "epoch": 0.542957797109785, + "kl_loss": 0.08736298978328705, + "loss_ib": 0.0016065271338447928, + "step": 1888 + }, + { + "ce_ib": 4.475137233734131, + "ce_orig": 0.7289717793464661, + "epoch": 0.542957797109785, + "kl_loss": 0.12014168500900269, + "loss_ib": 0.001648930599913001, + "step": 1888 + }, + { + "ce_ib": 3.1602540016174316, + "ce_orig": 0.6076099872589111, + "epoch": 0.5432453806887627, + "kl_loss": 0.06145313009619713, + "loss_ib": 0.0009305566200055182, + "step": 1889 + }, + { + "ce_ib": 3.1713321208953857, + "ce_orig": 0.5249027609825134, + "epoch": 0.5432453806887627, + "kl_loss": 0.10683239996433258, + "loss_ib": 0.0013854572316631675, + "step": 1889 + }, + { + "ce_ib": 5.930431842803955, + "ce_orig": 1.0936001539230347, + "epoch": 0.5432453806887627, + "kl_loss": 0.04907810688018799, + "loss_ib": 0.0010838242014870048, + "step": 1889 + }, + { + "ce_ib": 4.424904823303223, + "ce_orig": 0.4856807589530945, + "epoch": 0.5432453806887627, + "kl_loss": 0.12904152274131775, + "loss_ib": 0.0017329056281596422, + "step": 1889 + }, + { + "epoch": 0.5435329642677403, + "grad_norm": 0.09431487321853638, + "learning_rate": 4.708905451849754e-05, + "loss": 0.8598, + "step": 1890 + }, + { + "ce_ib": 3.321425199508667, + "ce_orig": 0.6521619558334351, + "epoch": 0.5435329642677403, + "kl_loss": 0.05718793720006943, + "loss_ib": 0.0009040218428708613, + "step": 1890 + }, + { + "ce_ib": 6.172669887542725, + "ce_orig": 1.1968848705291748, + "epoch": 0.5435329642677403, + "kl_loss": 0.10189983248710632, + "loss_ib": 0.0016362651949748397, + "step": 1890 + }, + { + "ce_ib": 3.9000966548919678, + "ce_orig": 0.8681198954582214, + "epoch": 0.5435329642677403, + "kl_loss": 0.0637349933385849, + "loss_ib": 0.0010273595107719302, + "step": 1890 + }, + { + "ce_ib": 2.2704081535339355, + "ce_orig": 0.24217291176319122, + "epoch": 0.5435329642677403, + "kl_loss": 0.18675987422466278, + "loss_ib": 0.002094639465212822, + "step": 1890 + }, + { + "ce_ib": 5.390542507171631, + "ce_orig": 1.143835186958313, + "epoch": 0.543820547846718, + "kl_loss": 0.07080741226673126, + "loss_ib": 0.0012471283553168178, + "step": 1891 + }, + { + "ce_ib": 4.884975433349609, + "ce_orig": 0.6801275014877319, + "epoch": 0.543820547846718, + "kl_loss": 0.0762237012386322, + "loss_ib": 0.0012507345527410507, + "step": 1891 + }, + { + "ce_ib": 4.23158073425293, + "ce_orig": 0.6321654915809631, + "epoch": 0.543820547846718, + "kl_loss": 0.09597187489271164, + "loss_ib": 0.0013828767696395516, + "step": 1891 + }, + { + "ce_ib": 5.039252758026123, + "ce_orig": 0.9282062649726868, + "epoch": 0.543820547846718, + "kl_loss": 0.09189581871032715, + "loss_ib": 0.001422883360646665, + "step": 1891 + }, + { + "ce_ib": 4.583867073059082, + "ce_orig": 0.66720050573349, + "epoch": 0.5441081314256956, + "kl_loss": 0.1338731348514557, + "loss_ib": 0.001797117991372943, + "step": 1892 + }, + { + "ce_ib": 3.6417152881622314, + "ce_orig": 0.516059398651123, + "epoch": 0.5441081314256956, + "kl_loss": 0.0642818957567215, + "loss_ib": 0.001006990554742515, + "step": 1892 + }, + { + "ce_ib": 4.23419189453125, + "ce_orig": 0.7409697771072388, + "epoch": 0.5441081314256956, + "kl_loss": 0.07750946283340454, + "loss_ib": 0.001198513782583177, + "step": 1892 + }, + { + "ce_ib": 5.378389358520508, + "ce_orig": 0.981338620185852, + "epoch": 0.5441081314256956, + "kl_loss": 0.10631321370601654, + "loss_ib": 0.0016009709797799587, + "step": 1892 + }, + { + "ce_ib": 5.4572529792785645, + "ce_orig": 0.9062219262123108, + "epoch": 0.5443957150046732, + "kl_loss": 0.10660901665687561, + "loss_ib": 0.0016118152998387814, + "step": 1893 + }, + { + "ce_ib": 5.669580936431885, + "ce_orig": 1.052855134010315, + "epoch": 0.5443957150046732, + "kl_loss": 0.14106683433055878, + "loss_ib": 0.001977626234292984, + "step": 1893 + }, + { + "ce_ib": 3.016963481903076, + "ce_orig": 0.4418914020061493, + "epoch": 0.5443957150046732, + "kl_loss": 0.0614200234413147, + "loss_ib": 0.0009158965549431741, + "step": 1893 + }, + { + "ce_ib": 5.598696708679199, + "ce_orig": 1.3665869235992432, + "epoch": 0.5443957150046732, + "kl_loss": 0.08963833749294281, + "loss_ib": 0.0014562529977411032, + "step": 1893 + }, + { + "ce_ib": 3.722309112548828, + "ce_orig": 0.6094381809234619, + "epoch": 0.5446832985836508, + "kl_loss": 0.08273034542798996, + "loss_ib": 0.0011995343957096338, + "step": 1894 + }, + { + "ce_ib": 4.523601531982422, + "ce_orig": 0.5780817866325378, + "epoch": 0.5446832985836508, + "kl_loss": 0.11053586006164551, + "loss_ib": 0.0015577187296003103, + "step": 1894 + }, + { + "ce_ib": 4.384653568267822, + "ce_orig": 0.8522878289222717, + "epoch": 0.5446832985836508, + "kl_loss": 0.0761469304561615, + "loss_ib": 0.0011999346315860748, + "step": 1894 + }, + { + "ce_ib": 3.9355502128601074, + "ce_orig": 0.6783236861228943, + "epoch": 0.5446832985836508, + "kl_loss": 0.10731876641511917, + "loss_ib": 0.001466742716729641, + "step": 1894 + }, + { + "epoch": 0.5449708821626286, + "grad_norm": 0.08689374476671219, + "learning_rate": 4.7070855345491255e-05, + "loss": 0.87, + "step": 1895 + }, + { + "ce_ib": 5.652263641357422, + "ce_orig": 0.9107344746589661, + "epoch": 0.5449708821626286, + "kl_loss": 0.11160555481910706, + "loss_ib": 0.0016812818357720971, + "step": 1895 + }, + { + "ce_ib": 3.823521614074707, + "ce_orig": 0.4738576412200928, + "epoch": 0.5449708821626286, + "kl_loss": 0.05789677053689957, + "loss_ib": 0.0009613197762519121, + "step": 1895 + }, + { + "ce_ib": 6.0164690017700195, + "ce_orig": 0.8367154598236084, + "epoch": 0.5449708821626286, + "kl_loss": 0.05606399476528168, + "loss_ib": 0.0011622868478298187, + "step": 1895 + }, + { + "ce_ib": 4.970792293548584, + "ce_orig": 0.6383576393127441, + "epoch": 0.5449708821626286, + "kl_loss": 0.11592382192611694, + "loss_ib": 0.0016563173849135637, + "step": 1895 + }, + { + "ce_ib": 3.4311530590057373, + "ce_orig": 0.5449477434158325, + "epoch": 0.5452584657416062, + "kl_loss": 0.07220610976219177, + "loss_ib": 0.0010651764459908009, + "step": 1896 + }, + { + "ce_ib": 6.0411601066589355, + "ce_orig": 0.6378240585327148, + "epoch": 0.5452584657416062, + "kl_loss": 0.11894844472408295, + "loss_ib": 0.0017936003860086203, + "step": 1896 + }, + { + "ce_ib": 7.708635330200195, + "ce_orig": 1.4525448083877563, + "epoch": 0.5452584657416062, + "kl_loss": 0.08826719224452972, + "loss_ib": 0.0016535352915525436, + "step": 1896 + }, + { + "ce_ib": 3.537482738494873, + "ce_orig": 0.7116951942443848, + "epoch": 0.5452584657416062, + "kl_loss": 0.07469046115875244, + "loss_ib": 0.001100652851164341, + "step": 1896 + }, + { + "ce_ib": 7.163231372833252, + "ce_orig": 1.3363291025161743, + "epoch": 0.5455460493205838, + "kl_loss": 0.09963632375001907, + "loss_ib": 0.0017126863822340965, + "step": 1897 + }, + { + "ce_ib": 3.694423198699951, + "ce_orig": 0.3684045374393463, + "epoch": 0.5455460493205838, + "kl_loss": 0.12045515328645706, + "loss_ib": 0.0015739938244223595, + "step": 1897 + }, + { + "ce_ib": 4.94851541519165, + "ce_orig": 1.125305414199829, + "epoch": 0.5455460493205838, + "kl_loss": 0.06253884732723236, + "loss_ib": 0.0011202399618923664, + "step": 1897 + }, + { + "ce_ib": 5.014147758483887, + "ce_orig": 0.9984070062637329, + "epoch": 0.5455460493205838, + "kl_loss": 0.07208918780088425, + "loss_ib": 0.0012223066296428442, + "step": 1897 + }, + { + "ce_ib": 4.193711757659912, + "ce_orig": 0.4987183213233948, + "epoch": 0.5458336328995614, + "kl_loss": 0.11857590079307556, + "loss_ib": 0.0016051301499828696, + "step": 1898 + }, + { + "ce_ib": 4.53301477432251, + "ce_orig": 0.8807665705680847, + "epoch": 0.5458336328995614, + "kl_loss": 0.06774713099002838, + "loss_ib": 0.0011307727545499802, + "step": 1898 + }, + { + "ce_ib": 2.9195384979248047, + "ce_orig": 0.3922559916973114, + "epoch": 0.5458336328995614, + "kl_loss": 0.0540936253964901, + "loss_ib": 0.0008328901021741331, + "step": 1898 + }, + { + "ce_ib": 5.41218900680542, + "ce_orig": 1.186556339263916, + "epoch": 0.5458336328995614, + "kl_loss": 0.09347891062498093, + "loss_ib": 0.001476007979363203, + "step": 1898 + }, + { + "ce_ib": 5.613734245300293, + "ce_orig": 0.4306528866291046, + "epoch": 0.5461212164785391, + "kl_loss": 0.10716602206230164, + "loss_ib": 0.0016330336220562458, + "step": 1899 + }, + { + "ce_ib": 3.353059768676758, + "ce_orig": 0.6383551955223083, + "epoch": 0.5461212164785391, + "kl_loss": 0.0669705867767334, + "loss_ib": 0.0010050117271021008, + "step": 1899 + }, + { + "ce_ib": 2.564528465270996, + "ce_orig": 0.4995785057544708, + "epoch": 0.5461212164785391, + "kl_loss": 0.055945415049791336, + "loss_ib": 0.0008159069693647325, + "step": 1899 + }, + { + "ce_ib": 4.447467803955078, + "ce_orig": 0.5093992352485657, + "epoch": 0.5461212164785391, + "kl_loss": 0.09166787564754486, + "loss_ib": 0.0013614255003631115, + "step": 1899 + }, + { + "epoch": 0.5464088000575167, + "grad_norm": 0.08654184639453888, + "learning_rate": 4.705260299867169e-05, + "loss": 0.8007, + "step": 1900 + }, + { + "ce_ib": 5.929286479949951, + "ce_orig": 0.7561840415000916, + "epoch": 0.5464088000575167, + "kl_loss": 0.05346221476793289, + "loss_ib": 0.001127550727687776, + "step": 1900 + }, + { + "ce_ib": 5.255467891693115, + "ce_orig": 0.4388079345226288, + "epoch": 0.5464088000575167, + "kl_loss": 0.1818041205406189, + "loss_ib": 0.0023435880430042744, + "step": 1900 + }, + { + "ce_ib": 3.502336025238037, + "ce_orig": 0.34196195006370544, + "epoch": 0.5464088000575167, + "kl_loss": 0.09808197617530823, + "loss_ib": 0.0013310533249750733, + "step": 1900 + }, + { + "ce_ib": 4.738243579864502, + "ce_orig": 1.0260612964630127, + "epoch": 0.5464088000575167, + "kl_loss": 0.09234722703695297, + "loss_ib": 0.001397296553477645, + "step": 1900 + }, + { + "ce_ib": 7.46832275390625, + "ce_orig": 1.6311382055282593, + "epoch": 0.5466963836364943, + "kl_loss": 0.10140446573495865, + "loss_ib": 0.0017608768539503217, + "step": 1901 + }, + { + "ce_ib": 6.357165336608887, + "ce_orig": 1.3606288433074951, + "epoch": 0.5466963836364943, + "kl_loss": 0.0798080638051033, + "loss_ib": 0.0014337971806526184, + "step": 1901 + }, + { + "ce_ib": 4.5675435066223145, + "ce_orig": 0.7304897904396057, + "epoch": 0.5466963836364943, + "kl_loss": 0.049390096217393875, + "loss_ib": 0.0009506553178653121, + "step": 1901 + }, + { + "ce_ib": 7.334258079528809, + "ce_orig": 1.5845714807510376, + "epoch": 0.5466963836364943, + "kl_loss": 0.068457692861557, + "loss_ib": 0.0014180026482790709, + "step": 1901 + }, + { + "ce_ib": 4.226787567138672, + "ce_orig": 0.540566086769104, + "epoch": 0.546983967215472, + "kl_loss": 0.07422268390655518, + "loss_ib": 0.0011649054940789938, + "step": 1902 + }, + { + "ce_ib": 2.6807408332824707, + "ce_orig": 0.4484787881374359, + "epoch": 0.546983967215472, + "kl_loss": 0.06156689673662186, + "loss_ib": 0.0008837429923005402, + "step": 1902 + }, + { + "ce_ib": 3.203948736190796, + "ce_orig": 0.6330675482749939, + "epoch": 0.546983967215472, + "kl_loss": 0.07740206271409988, + "loss_ib": 0.0010944154346361756, + "step": 1902 + }, + { + "ce_ib": 4.682159900665283, + "ce_orig": 0.842790961265564, + "epoch": 0.546983967215472, + "kl_loss": 0.07572542876005173, + "loss_ib": 0.001225470332428813, + "step": 1902 + }, + { + "ce_ib": 3.724529266357422, + "ce_orig": 0.37009721994400024, + "epoch": 0.5472715507944497, + "kl_loss": 0.0488913431763649, + "loss_ib": 0.0008613663958385587, + "step": 1903 + }, + { + "ce_ib": 5.576284408569336, + "ce_orig": 1.1009299755096436, + "epoch": 0.5472715507944497, + "kl_loss": 0.1622486114501953, + "loss_ib": 0.0021801143884658813, + "step": 1903 + }, + { + "ce_ib": 4.354438304901123, + "ce_orig": 0.8746090531349182, + "epoch": 0.5472715507944497, + "kl_loss": 0.06267772614955902, + "loss_ib": 0.0010622210102155805, + "step": 1903 + }, + { + "ce_ib": 5.2439446449279785, + "ce_orig": 0.9784339666366577, + "epoch": 0.5472715507944497, + "kl_loss": 0.0722421258687973, + "loss_ib": 0.0012468156637623906, + "step": 1903 + }, + { + "ce_ib": 5.449314594268799, + "ce_orig": 0.8789129257202148, + "epoch": 0.5475591343734273, + "kl_loss": 0.15755021572113037, + "loss_ib": 0.002120433608070016, + "step": 1904 + }, + { + "ce_ib": 6.919859886169434, + "ce_orig": 1.2989445924758911, + "epoch": 0.5475591343734273, + "kl_loss": 0.10482358932495117, + "loss_ib": 0.0017402219818904996, + "step": 1904 + }, + { + "ce_ib": 4.877593040466309, + "ce_orig": 0.9994472861289978, + "epoch": 0.5475591343734273, + "kl_loss": 0.07842819392681122, + "loss_ib": 0.00127204111777246, + "step": 1904 + }, + { + "ce_ib": 7.717144012451172, + "ce_orig": 1.4875824451446533, + "epoch": 0.5475591343734273, + "kl_loss": 0.11266853660345078, + "loss_ib": 0.0018983996706083417, + "step": 1904 + }, + { + "epoch": 0.5478467179524049, + "grad_norm": 0.09399117529392242, + "learning_rate": 4.7034297522012985e-05, + "loss": 0.8449, + "step": 1905 + }, + { + "ce_ib": 4.882028579711914, + "ce_orig": 1.0704759359359741, + "epoch": 0.5478467179524049, + "kl_loss": 0.06251262128353119, + "loss_ib": 0.0011133290827274323, + "step": 1905 + }, + { + "ce_ib": 3.5257132053375244, + "ce_orig": 0.676456868648529, + "epoch": 0.5478467179524049, + "kl_loss": 0.059761568903923035, + "loss_ib": 0.0009501869790256023, + "step": 1905 + }, + { + "ce_ib": 3.287277936935425, + "ce_orig": 0.8549056649208069, + "epoch": 0.5478467179524049, + "kl_loss": 0.07119278609752655, + "loss_ib": 0.00104065565392375, + "step": 1905 + }, + { + "ce_ib": 6.9569411277771, + "ce_orig": 1.0141600370407104, + "epoch": 0.5478467179524049, + "kl_loss": 0.1009128987789154, + "loss_ib": 0.0017048229929059744, + "step": 1905 + }, + { + "ce_ib": 5.464492321014404, + "ce_orig": 1.099178671836853, + "epoch": 0.5481343015313825, + "kl_loss": 0.05141352489590645, + "loss_ib": 0.0010605844436213374, + "step": 1906 + }, + { + "ce_ib": 3.9791250228881836, + "ce_orig": 0.6338537931442261, + "epoch": 0.5481343015313825, + "kl_loss": 0.08780016005039215, + "loss_ib": 0.0012759140226989985, + "step": 1906 + }, + { + "ce_ib": 5.811576843261719, + "ce_orig": 0.9505659937858582, + "epoch": 0.5481343015313825, + "kl_loss": 0.050640009343624115, + "loss_ib": 0.0010875577572733164, + "step": 1906 + }, + { + "ce_ib": 4.243020534515381, + "ce_orig": 0.7735797762870789, + "epoch": 0.5481343015313825, + "kl_loss": 0.09302234649658203, + "loss_ib": 0.0013545254478231072, + "step": 1906 + }, + { + "ce_ib": 3.3651955127716064, + "ce_orig": 0.3983408212661743, + "epoch": 0.5484218851103602, + "kl_loss": 0.03156634420156479, + "loss_ib": 0.0006521829636767507, + "step": 1907 + }, + { + "ce_ib": 3.7052488327026367, + "ce_orig": 0.8053063154220581, + "epoch": 0.5484218851103602, + "kl_loss": 0.07197430729866028, + "loss_ib": 0.0010902679059654474, + "step": 1907 + }, + { + "ce_ib": 3.3235042095184326, + "ce_orig": 0.7191283106803894, + "epoch": 0.5484218851103602, + "kl_loss": 0.10832494497299194, + "loss_ib": 0.001415599836036563, + "step": 1907 + }, + { + "ce_ib": 5.322559356689453, + "ce_orig": 1.0199711322784424, + "epoch": 0.5484218851103602, + "kl_loss": 0.079947330057621, + "loss_ib": 0.0013317292323336005, + "step": 1907 + }, + { + "ce_ib": 5.667689800262451, + "ce_orig": 0.7989073395729065, + "epoch": 0.5487094686893378, + "kl_loss": 0.11527878046035767, + "loss_ib": 0.001719556748867035, + "step": 1908 + }, + { + "ce_ib": 5.859560489654541, + "ce_orig": 0.9409865736961365, + "epoch": 0.5487094686893378, + "kl_loss": 0.110213503241539, + "loss_ib": 0.0016880910843610764, + "step": 1908 + }, + { + "ce_ib": 3.259211778640747, + "ce_orig": 0.7469629645347595, + "epoch": 0.5487094686893378, + "kl_loss": 0.0885789766907692, + "loss_ib": 0.001211710972711444, + "step": 1908 + }, + { + "ce_ib": 8.52100944519043, + "ce_orig": 0.8975664973258972, + "epoch": 0.5487094686893378, + "kl_loss": 0.06649929285049438, + "loss_ib": 0.0015170937404036522, + "step": 1908 + }, + { + "ce_ib": 5.684776306152344, + "ce_orig": 0.7283655405044556, + "epoch": 0.5489970522683155, + "kl_loss": 0.06858637928962708, + "loss_ib": 0.0012543414486572146, + "step": 1909 + }, + { + "ce_ib": 4.887577533721924, + "ce_orig": 1.0878584384918213, + "epoch": 0.5489970522683155, + "kl_loss": 0.0765177309513092, + "loss_ib": 0.0012539350427687168, + "step": 1909 + }, + { + "ce_ib": 5.060974597930908, + "ce_orig": 1.0285288095474243, + "epoch": 0.5489970522683155, + "kl_loss": 0.10135877877473831, + "loss_ib": 0.0015196852618828416, + "step": 1909 + }, + { + "ce_ib": 2.7073185443878174, + "ce_orig": 0.40073713660240173, + "epoch": 0.5489970522683155, + "kl_loss": 0.056665606796741486, + "loss_ib": 0.0008373878663405776, + "step": 1909 + }, + { + "epoch": 0.5492846358472931, + "grad_norm": 0.09760917723178864, + "learning_rate": 4.7015938959617276e-05, + "loss": 0.9135, + "step": 1910 + }, + { + "ce_ib": 2.7977044582366943, + "ce_orig": 0.569459855556488, + "epoch": 0.5492846358472931, + "kl_loss": 0.058827660977840424, + "loss_ib": 0.0008680470054969192, + "step": 1910 + }, + { + "ce_ib": 3.5508434772491455, + "ce_orig": 0.32057836651802063, + "epoch": 0.5492846358472931, + "kl_loss": 0.12434736639261246, + "loss_ib": 0.00159855792298913, + "step": 1910 + }, + { + "ce_ib": 3.7011046409606934, + "ce_orig": 0.805534839630127, + "epoch": 0.5492846358472931, + "kl_loss": 0.05860723927617073, + "loss_ib": 0.0009561828337609768, + "step": 1910 + }, + { + "ce_ib": 4.946987152099609, + "ce_orig": 0.8262712359428406, + "epoch": 0.5492846358472931, + "kl_loss": 0.11220541596412659, + "loss_ib": 0.0016167528228834271, + "step": 1910 + }, + { + "ce_ib": 4.484790325164795, + "ce_orig": 0.24797745048999786, + "epoch": 0.5495722194262708, + "kl_loss": 0.09415937960147858, + "loss_ib": 0.0013900728663429618, + "step": 1911 + }, + { + "ce_ib": 4.580901145935059, + "ce_orig": 0.5520976781845093, + "epoch": 0.5495722194262708, + "kl_loss": 0.11482474952936172, + "loss_ib": 0.0016063374932855368, + "step": 1911 + }, + { + "ce_ib": 3.8937273025512695, + "ce_orig": 0.8984609842300415, + "epoch": 0.5495722194262708, + "kl_loss": 0.03738212585449219, + "loss_ib": 0.0007631939370185137, + "step": 1911 + }, + { + "ce_ib": 2.9355034828186035, + "ce_orig": 0.3463990092277527, + "epoch": 0.5495722194262708, + "kl_loss": 0.08178159594535828, + "loss_ib": 0.0011113663204014301, + "step": 1911 + }, + { + "ce_ib": 3.4348607063293457, + "ce_orig": 0.6053741574287415, + "epoch": 0.5498598030052484, + "kl_loss": 0.07217971980571747, + "loss_ib": 0.0010652831988409162, + "step": 1912 + }, + { + "ce_ib": 3.1916728019714355, + "ce_orig": 0.5900030136108398, + "epoch": 0.5498598030052484, + "kl_loss": 0.0549001470208168, + "loss_ib": 0.0008681687759235501, + "step": 1912 + }, + { + "ce_ib": 6.212212562561035, + "ce_orig": 1.1028697490692139, + "epoch": 0.5498598030052484, + "kl_loss": 0.1011008620262146, + "loss_ib": 0.0016322297742590308, + "step": 1912 + }, + { + "ce_ib": 4.543127059936523, + "ce_orig": 0.9818519353866577, + "epoch": 0.5498598030052484, + "kl_loss": 0.05543201044201851, + "loss_ib": 0.0010086327092722058, + "step": 1912 + }, + { + "ce_ib": 5.143528938293457, + "ce_orig": 0.8511130213737488, + "epoch": 0.550147386584226, + "kl_loss": 0.07649235427379608, + "loss_ib": 0.0012792764464393258, + "step": 1913 + }, + { + "ce_ib": 6.344452857971191, + "ce_orig": 1.3267079591751099, + "epoch": 0.550147386584226, + "kl_loss": 0.09149109572172165, + "loss_ib": 0.0015493562677875161, + "step": 1913 + }, + { + "ce_ib": 4.662521839141846, + "ce_orig": 0.5107982158660889, + "epoch": 0.550147386584226, + "kl_loss": 0.08433954417705536, + "loss_ib": 0.0013096475740894675, + "step": 1913 + }, + { + "ce_ib": 5.144981861114502, + "ce_orig": 1.3137059211730957, + "epoch": 0.550147386584226, + "kl_loss": 0.08310288190841675, + "loss_ib": 0.0013455270091071725, + "step": 1913 + }, + { + "ce_ib": 6.523970603942871, + "ce_orig": 0.8307527303695679, + "epoch": 0.5504349701632036, + "kl_loss": 0.05977492779493332, + "loss_ib": 0.0012501463061198592, + "step": 1914 + }, + { + "ce_ib": 3.879868984222412, + "ce_orig": 0.49463674426078796, + "epoch": 0.5504349701632036, + "kl_loss": 0.0922413170337677, + "loss_ib": 0.0013104000827297568, + "step": 1914 + }, + { + "ce_ib": 3.6984682083129883, + "ce_orig": 0.6778438091278076, + "epoch": 0.5504349701632036, + "kl_loss": 0.08309254795312881, + "loss_ib": 0.0012007722398266196, + "step": 1914 + }, + { + "ce_ib": 3.680474042892456, + "ce_orig": 0.6179260015487671, + "epoch": 0.5504349701632036, + "kl_loss": 0.08862176537513733, + "loss_ib": 0.0012542649637907743, + "step": 1914 + }, + { + "epoch": 0.5507225537421814, + "grad_norm": 0.09404906630516052, + "learning_rate": 4.69975273557146e-05, + "loss": 0.8967, + "step": 1915 + }, + { + "ce_ib": 2.0851705074310303, + "ce_orig": 0.30706992745399475, + "epoch": 0.5507225537421814, + "kl_loss": 0.0868929773569107, + "loss_ib": 0.0010774467373266816, + "step": 1915 + }, + { + "ce_ib": 3.3939921855926514, + "ce_orig": 0.8344793319702148, + "epoch": 0.5507225537421814, + "kl_loss": 0.10104146599769592, + "loss_ib": 0.0013498138869181275, + "step": 1915 + }, + { + "ce_ib": 4.581013202667236, + "ce_orig": 0.9317339658737183, + "epoch": 0.5507225537421814, + "kl_loss": 0.08333141356706619, + "loss_ib": 0.0012914154212921858, + "step": 1915 + }, + { + "ce_ib": 2.8807990550994873, + "ce_orig": 0.6044163703918457, + "epoch": 0.5507225537421814, + "kl_loss": 0.10802438855171204, + "loss_ib": 0.0013683238066732883, + "step": 1915 + }, + { + "ce_ib": 3.4718735218048096, + "ce_orig": 0.7397521138191223, + "epoch": 0.551010137321159, + "kl_loss": 0.07197237014770508, + "loss_ib": 0.0010669110342860222, + "step": 1916 + }, + { + "ce_ib": 3.9319915771484375, + "ce_orig": 0.5353158712387085, + "epoch": 0.551010137321159, + "kl_loss": 0.07845504581928253, + "loss_ib": 0.0011777495965361595, + "step": 1916 + }, + { + "ce_ib": 5.801970958709717, + "ce_orig": 1.1798752546310425, + "epoch": 0.551010137321159, + "kl_loss": 0.0665987879037857, + "loss_ib": 0.0012461849255487323, + "step": 1916 + }, + { + "ce_ib": 5.825736045837402, + "ce_orig": 1.084876298904419, + "epoch": 0.551010137321159, + "kl_loss": 0.11635487526655197, + "loss_ib": 0.001746122376061976, + "step": 1916 + }, + { + "ce_ib": 4.956907272338867, + "ce_orig": 1.1592769622802734, + "epoch": 0.5512977209001366, + "kl_loss": 0.0826018676161766, + "loss_ib": 0.001321709481999278, + "step": 1917 + }, + { + "ce_ib": 7.0795207023620605, + "ce_orig": 1.0599364042282104, + "epoch": 0.5512977209001366, + "kl_loss": 0.09705859422683716, + "loss_ib": 0.001678538043051958, + "step": 1917 + }, + { + "ce_ib": 5.238564491271973, + "ce_orig": 0.7713952660560608, + "epoch": 0.5512977209001366, + "kl_loss": 0.07971039414405823, + "loss_ib": 0.0013209603494033217, + "step": 1917 + }, + { + "ce_ib": 5.751657485961914, + "ce_orig": 0.6261478662490845, + "epoch": 0.5512977209001366, + "kl_loss": 0.09142602235078812, + "loss_ib": 0.0014894258929416537, + "step": 1917 + }, + { + "ce_ib": 2.5457875728607178, + "ce_orig": 0.4133254587650299, + "epoch": 0.5515853044791142, + "kl_loss": 0.06194275990128517, + "loss_ib": 0.0008740063640289009, + "step": 1918 + }, + { + "ce_ib": 3.733719825744629, + "ce_orig": 0.3669700622558594, + "epoch": 0.5515853044791142, + "kl_loss": 0.12208331376314163, + "loss_ib": 0.001594205154106021, + "step": 1918 + }, + { + "ce_ib": 3.8233559131622314, + "ce_orig": 0.627654492855072, + "epoch": 0.5515853044791142, + "kl_loss": 0.09067053347826004, + "loss_ib": 0.0012890408979728818, + "step": 1918 + }, + { + "ce_ib": 4.722920894622803, + "ce_orig": 0.8331701159477234, + "epoch": 0.5515853044791142, + "kl_loss": 0.08284607529640198, + "loss_ib": 0.0013007527450099587, + "step": 1918 + }, + { + "ce_ib": 6.8741326332092285, + "ce_orig": 1.2548303604125977, + "epoch": 0.5518728880580919, + "kl_loss": 0.08055051416158676, + "loss_ib": 0.0014929183525964618, + "step": 1919 + }, + { + "ce_ib": 3.511838436126709, + "ce_orig": 0.7445605993270874, + "epoch": 0.5518728880580919, + "kl_loss": 0.07347454130649567, + "loss_ib": 0.0010859292233362794, + "step": 1919 + }, + { + "ce_ib": 4.535142421722412, + "ce_orig": 0.7420067191123962, + "epoch": 0.5518728880580919, + "kl_loss": 0.07915159314870834, + "loss_ib": 0.001245030085556209, + "step": 1919 + }, + { + "ce_ib": 4.825951099395752, + "ce_orig": 0.848966121673584, + "epoch": 0.5518728880580919, + "kl_loss": 0.05617580562829971, + "loss_ib": 0.0010443531209602952, + "step": 1919 + }, + { + "epoch": 0.5521604716370695, + "grad_norm": 0.09959237277507782, + "learning_rate": 4.697906275466279e-05, + "loss": 0.8456, + "step": 1920 + }, + { + "ce_ib": 4.672802448272705, + "ce_orig": 0.9393095970153809, + "epoch": 0.5521604716370695, + "kl_loss": 0.09551084041595459, + "loss_ib": 0.0014223884791135788, + "step": 1920 + }, + { + "ce_ib": 4.508411884307861, + "ce_orig": 0.6267635226249695, + "epoch": 0.5521604716370695, + "kl_loss": 0.11981897801160812, + "loss_ib": 0.0016490309499204159, + "step": 1920 + }, + { + "ce_ib": 5.813779830932617, + "ce_orig": 0.8487799167633057, + "epoch": 0.5521604716370695, + "kl_loss": 0.07831750810146332, + "loss_ib": 0.001364552997983992, + "step": 1920 + }, + { + "ce_ib": 3.199180841445923, + "ce_orig": 0.6433333158493042, + "epoch": 0.5521604716370695, + "kl_loss": 0.05436542630195618, + "loss_ib": 0.0008635723497718573, + "step": 1920 + }, + { + "ce_ib": 3.751150608062744, + "ce_orig": 0.4748503267765045, + "epoch": 0.5524480552160471, + "kl_loss": 0.09960165619850159, + "loss_ib": 0.0013711315114051104, + "step": 1921 + }, + { + "ce_ib": 5.455814361572266, + "ce_orig": 1.1211718320846558, + "epoch": 0.5524480552160471, + "kl_loss": 0.10638467222452164, + "loss_ib": 0.00160942820366472, + "step": 1921 + }, + { + "ce_ib": 5.780877590179443, + "ce_orig": 1.0550447702407837, + "epoch": 0.5524480552160471, + "kl_loss": 0.1115802749991417, + "loss_ib": 0.0016938905464485288, + "step": 1921 + }, + { + "ce_ib": 6.630991458892822, + "ce_orig": 1.0307990312576294, + "epoch": 0.5524480552160471, + "kl_loss": 0.1155117005109787, + "loss_ib": 0.0018182160565629601, + "step": 1921 + }, + { + "ce_ib": 4.387683391571045, + "ce_orig": 0.7767910957336426, + "epoch": 0.5527356387950249, + "kl_loss": 0.09510751068592072, + "loss_ib": 0.0013898435281589627, + "step": 1922 + }, + { + "ce_ib": 7.768795967102051, + "ce_orig": 1.77301824092865, + "epoch": 0.5527356387950249, + "kl_loss": 0.13410422205924988, + "loss_ib": 0.0021179215982556343, + "step": 1922 + }, + { + "ce_ib": 3.3980367183685303, + "ce_orig": 0.6729345321655273, + "epoch": 0.5527356387950249, + "kl_loss": 0.06067945063114166, + "loss_ib": 0.0009465981856919825, + "step": 1922 + }, + { + "ce_ib": 4.024316787719727, + "ce_orig": 0.559777557849884, + "epoch": 0.5527356387950249, + "kl_loss": 0.08573693037033081, + "loss_ib": 0.0012598008615896106, + "step": 1922 + }, + { + "ce_ib": 3.932161808013916, + "ce_orig": 0.7146768569946289, + "epoch": 0.5530232223740025, + "kl_loss": 0.06095574051141739, + "loss_ib": 0.0010027735261246562, + "step": 1923 + }, + { + "ce_ib": 7.295309543609619, + "ce_orig": 1.7684872150421143, + "epoch": 0.5530232223740025, + "kl_loss": 0.12955114245414734, + "loss_ib": 0.002025042427703738, + "step": 1923 + }, + { + "ce_ib": 3.9632012844085693, + "ce_orig": 0.6761289834976196, + "epoch": 0.5530232223740025, + "kl_loss": 0.052575308829545975, + "loss_ib": 0.0009220732026733458, + "step": 1923 + }, + { + "ce_ib": 3.9555931091308594, + "ce_orig": 0.4912514388561249, + "epoch": 0.5530232223740025, + "kl_loss": 0.10711593925952911, + "loss_ib": 0.0014667186187580228, + "step": 1923 + }, + { + "ce_ib": 4.5316314697265625, + "ce_orig": 1.1726627349853516, + "epoch": 0.5533108059529801, + "kl_loss": 0.051527321338653564, + "loss_ib": 0.0009684363030828536, + "step": 1924 + }, + { + "ce_ib": 1.2184661626815796, + "ce_orig": 0.13913534581661224, + "epoch": 0.5533108059529801, + "kl_loss": 0.18971668183803558, + "loss_ib": 0.002019013511016965, + "step": 1924 + }, + { + "ce_ib": 4.116618633270264, + "ce_orig": 0.6647286415100098, + "epoch": 0.5533108059529801, + "kl_loss": 0.07209490239620209, + "loss_ib": 0.0011326108360663056, + "step": 1924 + }, + { + "ce_ib": 3.457303524017334, + "ce_orig": 0.7009277939796448, + "epoch": 0.5533108059529801, + "kl_loss": 0.07253727316856384, + "loss_ib": 0.0010711030336096883, + "step": 1924 + }, + { + "epoch": 0.5535983895319577, + "grad_norm": 0.09650114923715591, + "learning_rate": 4.696054520094737e-05, + "loss": 0.8343, + "step": 1925 + }, + { + "ce_ib": 5.450641632080078, + "ce_orig": 0.8013394474983215, + "epoch": 0.5535983895319577, + "kl_loss": 0.09588983654975891, + "loss_ib": 0.0015039625577628613, + "step": 1925 + }, + { + "ce_ib": 3.620069742202759, + "ce_orig": 0.6995202898979187, + "epoch": 0.5535983895319577, + "kl_loss": 0.12757082283496857, + "loss_ib": 0.001637715264223516, + "step": 1925 + }, + { + "ce_ib": 7.098886489868164, + "ce_orig": 1.1889004707336426, + "epoch": 0.5535983895319577, + "kl_loss": 0.08775443583726883, + "loss_ib": 0.0015874329255893826, + "step": 1925 + }, + { + "ce_ib": 6.680222511291504, + "ce_orig": 1.4492888450622559, + "epoch": 0.5535983895319577, + "kl_loss": 0.054971788078546524, + "loss_ib": 0.0012177401222288609, + "step": 1925 + }, + { + "ce_ib": 3.0028889179229736, + "ce_orig": 0.464687317609787, + "epoch": 0.5538859731109353, + "kl_loss": 0.10299734026193619, + "loss_ib": 0.0013302622828632593, + "step": 1926 + }, + { + "ce_ib": 2.07023286819458, + "ce_orig": 0.5395156145095825, + "epoch": 0.5538859731109353, + "kl_loss": 0.04243440926074982, + "loss_ib": 0.0006313673802651465, + "step": 1926 + }, + { + "ce_ib": 3.768923282623291, + "ce_orig": 0.6606646180152893, + "epoch": 0.5538859731109353, + "kl_loss": 0.0877990797162056, + "loss_ib": 0.0012548831291496754, + "step": 1926 + }, + { + "ce_ib": 3.051236391067505, + "ce_orig": 0.5949603915214539, + "epoch": 0.5538859731109353, + "kl_loss": 0.07870738953351974, + "loss_ib": 0.0010921974899247289, + "step": 1926 + }, + { + "ce_ib": 4.608800888061523, + "ce_orig": 0.6883280873298645, + "epoch": 0.554173556689913, + "kl_loss": 0.07362256944179535, + "loss_ib": 0.0011971057392656803, + "step": 1927 + }, + { + "ce_ib": 3.193166971206665, + "ce_orig": 0.6154248714447021, + "epoch": 0.554173556689913, + "kl_loss": 0.06280265748500824, + "loss_ib": 0.0009473432437516749, + "step": 1927 + }, + { + "ce_ib": 5.442756652832031, + "ce_orig": 0.975753664970398, + "epoch": 0.554173556689913, + "kl_loss": 0.08660571277141571, + "loss_ib": 0.001410332741215825, + "step": 1927 + }, + { + "ce_ib": 6.922241687774658, + "ce_orig": 1.344877004623413, + "epoch": 0.554173556689913, + "kl_loss": 0.1226564571261406, + "loss_ib": 0.001918788650073111, + "step": 1927 + }, + { + "ce_ib": 4.614167213439941, + "ce_orig": 0.7154021859169006, + "epoch": 0.5544611402688906, + "kl_loss": 0.07747409492731094, + "loss_ib": 0.001236157724633813, + "step": 1928 + }, + { + "ce_ib": 3.877755880355835, + "ce_orig": 0.6276264190673828, + "epoch": 0.5544611402688906, + "kl_loss": 0.0632265955209732, + "loss_ib": 0.0010200415272265673, + "step": 1928 + }, + { + "ce_ib": 5.4692182540893555, + "ce_orig": 1.0055973529815674, + "epoch": 0.5544611402688906, + "kl_loss": 0.10576103627681732, + "loss_ib": 0.0016045321244746447, + "step": 1928 + }, + { + "ce_ib": 5.167081832885742, + "ce_orig": 0.9041604995727539, + "epoch": 0.5544611402688906, + "kl_loss": 0.08564558625221252, + "loss_ib": 0.0013731640065088868, + "step": 1928 + }, + { + "ce_ib": 3.184565305709839, + "ce_orig": 0.5815608501434326, + "epoch": 0.5547487238478683, + "kl_loss": 0.04710540175437927, + "loss_ib": 0.000789510493632406, + "step": 1929 + }, + { + "ce_ib": 6.104895114898682, + "ce_orig": 1.2935887575149536, + "epoch": 0.5547487238478683, + "kl_loss": 0.07742717862129211, + "loss_ib": 0.001384761300869286, + "step": 1929 + }, + { + "ce_ib": 5.025946617126465, + "ce_orig": 1.0913149118423462, + "epoch": 0.5547487238478683, + "kl_loss": 0.04909174144268036, + "loss_ib": 0.0009935119887813926, + "step": 1929 + }, + { + "ce_ib": 3.054976224899292, + "ce_orig": 0.5806114077568054, + "epoch": 0.5547487238478683, + "kl_loss": 0.06961973756551743, + "loss_ib": 0.0010016949381679296, + "step": 1929 + }, + { + "epoch": 0.555036307426846, + "grad_norm": 0.09321849793195724, + "learning_rate": 4.6941974739181395e-05, + "loss": 0.8656, + "step": 1930 + }, + { + "ce_ib": 5.966780185699463, + "ce_orig": 0.8329640030860901, + "epoch": 0.555036307426846, + "kl_loss": 0.10147197544574738, + "loss_ib": 0.0016113977180793881, + "step": 1930 + }, + { + "ce_ib": 4.516094207763672, + "ce_orig": 1.0570826530456543, + "epoch": 0.555036307426846, + "kl_loss": 0.08193090558052063, + "loss_ib": 0.0012709185248240829, + "step": 1930 + }, + { + "ce_ib": 3.262152910232544, + "ce_orig": 0.5078640580177307, + "epoch": 0.555036307426846, + "kl_loss": 0.05605974793434143, + "loss_ib": 0.0008868128061294556, + "step": 1930 + }, + { + "ce_ib": 4.160703659057617, + "ce_orig": 0.826073944568634, + "epoch": 0.555036307426846, + "kl_loss": 0.07509216666221619, + "loss_ib": 0.001166992005892098, + "step": 1930 + }, + { + "ce_ib": 3.419538736343384, + "ce_orig": 0.6153329014778137, + "epoch": 0.5553238910058236, + "kl_loss": 0.08440309017896652, + "loss_ib": 0.001185984699986875, + "step": 1931 + }, + { + "ce_ib": 3.1744120121002197, + "ce_orig": 0.7169322967529297, + "epoch": 0.5553238910058236, + "kl_loss": 0.04897625371813774, + "loss_ib": 0.0008072037016972899, + "step": 1931 + }, + { + "ce_ib": 5.373103618621826, + "ce_orig": 0.9085671305656433, + "epoch": 0.5553238910058236, + "kl_loss": 0.08393597602844238, + "loss_ib": 0.0013766700867563486, + "step": 1931 + }, + { + "ce_ib": 4.715808391571045, + "ce_orig": 0.6827978491783142, + "epoch": 0.5553238910058236, + "kl_loss": 0.08275207877159119, + "loss_ib": 0.0012991015100851655, + "step": 1931 + }, + { + "ce_ib": 5.665656566619873, + "ce_orig": 1.1956825256347656, + "epoch": 0.5556114745848012, + "kl_loss": 0.07183442264795303, + "loss_ib": 0.0012849097838625312, + "step": 1932 + }, + { + "ce_ib": 5.016260623931885, + "ce_orig": 0.7663189172744751, + "epoch": 0.5556114745848012, + "kl_loss": 0.13232015073299408, + "loss_ib": 0.0018248273991048336, + "step": 1932 + }, + { + "ce_ib": 7.9488115310668945, + "ce_orig": 1.8298797607421875, + "epoch": 0.5556114745848012, + "kl_loss": 0.09742999821901321, + "loss_ib": 0.0017691811081022024, + "step": 1932 + }, + { + "ce_ib": 6.2991204261779785, + "ce_orig": 1.115250587463379, + "epoch": 0.5556114745848012, + "kl_loss": 0.08892250806093216, + "loss_ib": 0.0015191369457170367, + "step": 1932 + }, + { + "ce_ib": 5.175693988800049, + "ce_orig": 0.8735966086387634, + "epoch": 0.5558990581637788, + "kl_loss": 0.08031416684389114, + "loss_ib": 0.0013207111041992903, + "step": 1933 + }, + { + "ce_ib": 2.453418254852295, + "ce_orig": 0.46159401535987854, + "epoch": 0.5558990581637788, + "kl_loss": 0.0539497509598732, + "loss_ib": 0.0007848392706364393, + "step": 1933 + }, + { + "ce_ib": 5.831915378570557, + "ce_orig": 0.7443711161613464, + "epoch": 0.5558990581637788, + "kl_loss": 0.10215287655591965, + "loss_ib": 0.0016047203680500388, + "step": 1933 + }, + { + "ce_ib": 5.453242301940918, + "ce_orig": 0.807707667350769, + "epoch": 0.5558990581637788, + "kl_loss": 0.07887540757656097, + "loss_ib": 0.0013340782606974244, + "step": 1933 + }, + { + "ce_ib": 6.2472920417785645, + "ce_orig": 1.1077812910079956, + "epoch": 0.5561866417427564, + "kl_loss": 0.09016699343919754, + "loss_ib": 0.0015263990499079227, + "step": 1934 + }, + { + "ce_ib": 4.347500801086426, + "ce_orig": 0.7160792946815491, + "epoch": 0.5561866417427564, + "kl_loss": 0.10213708877563477, + "loss_ib": 0.0014561208663508296, + "step": 1934 + }, + { + "ce_ib": 4.706624984741211, + "ce_orig": 0.8632693886756897, + "epoch": 0.5561866417427564, + "kl_loss": 0.0795624628663063, + "loss_ib": 0.00126628705766052, + "step": 1934 + }, + { + "ce_ib": 6.7972235679626465, + "ce_orig": 1.445935845375061, + "epoch": 0.5561866417427564, + "kl_loss": 0.08092060685157776, + "loss_ib": 0.0014889284502714872, + "step": 1934 + }, + { + "epoch": 0.5564742253217342, + "grad_norm": 0.10082092136144638, + "learning_rate": 4.6923351414105434e-05, + "loss": 0.8718, + "step": 1935 + }, + { + "ce_ib": 3.172208547592163, + "ce_orig": 0.5339158177375793, + "epoch": 0.5564742253217342, + "kl_loss": 0.08489201962947845, + "loss_ib": 0.001166141009889543, + "step": 1935 + }, + { + "ce_ib": 3.8487794399261475, + "ce_orig": 0.32496124505996704, + "epoch": 0.5564742253217342, + "kl_loss": 0.08489054441452026, + "loss_ib": 0.0012337833177298307, + "step": 1935 + }, + { + "ce_ib": 3.637190341949463, + "ce_orig": 0.6446729302406311, + "epoch": 0.5564742253217342, + "kl_loss": 0.060493528842926025, + "loss_ib": 0.0009686542907729745, + "step": 1935 + }, + { + "ce_ib": 2.376370429992676, + "ce_orig": 0.32883602380752563, + "epoch": 0.5564742253217342, + "kl_loss": 0.19712747633457184, + "loss_ib": 0.0022089118137955666, + "step": 1935 + }, + { + "ce_ib": 4.310609340667725, + "ce_orig": 0.6608479619026184, + "epoch": 0.5567618089007118, + "kl_loss": 0.07856308668851852, + "loss_ib": 0.0012166918022558093, + "step": 1936 + }, + { + "ce_ib": 6.378981590270996, + "ce_orig": 1.3210370540618896, + "epoch": 0.5567618089007118, + "kl_loss": 0.09683927893638611, + "loss_ib": 0.001606290927156806, + "step": 1936 + }, + { + "ce_ib": 5.218775749206543, + "ce_orig": 1.039247751235962, + "epoch": 0.5567618089007118, + "kl_loss": 0.07904958724975586, + "loss_ib": 0.0013123734388500452, + "step": 1936 + }, + { + "ce_ib": 3.299030065536499, + "ce_orig": 0.5713255405426025, + "epoch": 0.5567618089007118, + "kl_loss": 0.06562425196170807, + "loss_ib": 0.0009861454600468278, + "step": 1936 + }, + { + "ce_ib": 4.047051906585693, + "ce_orig": 1.1236011981964111, + "epoch": 0.5570493924796894, + "kl_loss": 0.0696166604757309, + "loss_ib": 0.0011008717119693756, + "step": 1937 + }, + { + "ce_ib": 3.500868082046509, + "ce_orig": 0.5961604118347168, + "epoch": 0.5570493924796894, + "kl_loss": 0.08188607543706894, + "loss_ib": 0.001168947434052825, + "step": 1937 + }, + { + "ce_ib": 4.17113733291626, + "ce_orig": 0.7005755305290222, + "epoch": 0.5570493924796894, + "kl_loss": 0.062321439385414124, + "loss_ib": 0.0010403281776234508, + "step": 1937 + }, + { + "ce_ib": 3.4896955490112305, + "ce_orig": 0.7553475499153137, + "epoch": 0.5570493924796894, + "kl_loss": 0.13924303650856018, + "loss_ib": 0.0017413998721167445, + "step": 1937 + }, + { + "ce_ib": 3.2346439361572266, + "ce_orig": 0.8824635744094849, + "epoch": 0.557336976058667, + "kl_loss": 0.047813281416893005, + "loss_ib": 0.0008015971980057657, + "step": 1938 + }, + { + "ce_ib": 6.895779132843018, + "ce_orig": 1.641066551208496, + "epoch": 0.557336976058667, + "kl_loss": 0.05537009984254837, + "loss_ib": 0.0012432788498699665, + "step": 1938 + }, + { + "ce_ib": 3.2976508140563965, + "ce_orig": 0.7816270589828491, + "epoch": 0.557336976058667, + "kl_loss": 0.08391252160072327, + "loss_ib": 0.001168890274129808, + "step": 1938 + }, + { + "ce_ib": 5.213710784912109, + "ce_orig": 1.0773584842681885, + "epoch": 0.557336976058667, + "kl_loss": 0.07279780507087708, + "loss_ib": 0.0012493490939959884, + "step": 1938 + }, + { + "ce_ib": 3.542112350463867, + "ce_orig": 0.7803473472595215, + "epoch": 0.5576245596376447, + "kl_loss": 0.05923328548669815, + "loss_ib": 0.0009465441107749939, + "step": 1939 + }, + { + "ce_ib": 3.030064582824707, + "ce_orig": 0.4030344486236572, + "epoch": 0.5576245596376447, + "kl_loss": 0.06228256598114967, + "loss_ib": 0.0009258320787921548, + "step": 1939 + }, + { + "ce_ib": 2.945636034011841, + "ce_orig": 0.40936049818992615, + "epoch": 0.5576245596376447, + "kl_loss": 0.09282398223876953, + "loss_ib": 0.0012228033738210797, + "step": 1939 + }, + { + "ce_ib": 4.228244781494141, + "ce_orig": 0.5401631593704224, + "epoch": 0.5576245596376447, + "kl_loss": 0.09863375872373581, + "loss_ib": 0.0014091619523242116, + "step": 1939 + }, + { + "epoch": 0.5579121432166223, + "grad_norm": 0.08983512222766876, + "learning_rate": 4.69046752705874e-05, + "loss": 0.8133, + "step": 1940 + }, + { + "ce_ib": 4.310100078582764, + "ce_orig": 0.8854047656059265, + "epoch": 0.5579121432166223, + "kl_loss": 0.08301101624965668, + "loss_ib": 0.0012611200800165534, + "step": 1940 + }, + { + "ce_ib": 2.9034817218780518, + "ce_orig": 0.6497253775596619, + "epoch": 0.5579121432166223, + "kl_loss": 0.054164137691259384, + "loss_ib": 0.000831989455036819, + "step": 1940 + }, + { + "ce_ib": 4.72149658203125, + "ce_orig": 0.6987029314041138, + "epoch": 0.5579121432166223, + "kl_loss": 0.13687220215797424, + "loss_ib": 0.0018408716423437, + "step": 1940 + }, + { + "ce_ib": 9.148179054260254, + "ce_orig": 1.2564067840576172, + "epoch": 0.5579121432166223, + "kl_loss": 0.10189461708068848, + "loss_ib": 0.0019337640842422843, + "step": 1940 + }, + { + "ce_ib": 6.246368885040283, + "ce_orig": 1.050748348236084, + "epoch": 0.5581997267955999, + "kl_loss": 0.07856760919094086, + "loss_ib": 0.0014103129506111145, + "step": 1941 + }, + { + "ce_ib": 3.215911626815796, + "ce_orig": 0.696168839931488, + "epoch": 0.5581997267955999, + "kl_loss": 0.06066550686955452, + "loss_ib": 0.0009282461833208799, + "step": 1941 + }, + { + "ce_ib": 5.427129745483398, + "ce_orig": 1.028993010520935, + "epoch": 0.5581997267955999, + "kl_loss": 0.10902377963066101, + "loss_ib": 0.001632950734347105, + "step": 1941 + }, + { + "ce_ib": 3.8248772621154785, + "ce_orig": 0.7308775186538696, + "epoch": 0.5581997267955999, + "kl_loss": 0.06246565282344818, + "loss_ib": 0.0010071442229673266, + "step": 1941 + }, + { + "ce_ib": 4.1581597328186035, + "ce_orig": 0.9127385020256042, + "epoch": 0.5584873103745777, + "kl_loss": 0.06476981192827225, + "loss_ib": 0.0010635140351951122, + "step": 1942 + }, + { + "ce_ib": 2.2253198623657227, + "ce_orig": 0.4216524362564087, + "epoch": 0.5584873103745777, + "kl_loss": 0.09294724464416504, + "loss_ib": 0.0011520044645294547, + "step": 1942 + }, + { + "ce_ib": 5.510898113250732, + "ce_orig": 1.1277520656585693, + "epoch": 0.5584873103745777, + "kl_loss": 0.08884826302528381, + "loss_ib": 0.0014395724283531308, + "step": 1942 + }, + { + "ce_ib": 3.793038845062256, + "ce_orig": 1.0310249328613281, + "epoch": 0.5584873103745777, + "kl_loss": 0.049077607691287994, + "loss_ib": 0.0008700799662619829, + "step": 1942 + }, + { + "ce_ib": 3.201446294784546, + "ce_orig": 0.42110079526901245, + "epoch": 0.5587748939535553, + "kl_loss": 0.15740466117858887, + "loss_ib": 0.0018941911403089762, + "step": 1943 + }, + { + "ce_ib": 3.5603582859039307, + "ce_orig": 0.6455524563789368, + "epoch": 0.5587748939535553, + "kl_loss": 0.076188825070858, + "loss_ib": 0.0011179241118952632, + "step": 1943 + }, + { + "ce_ib": 3.7455034255981445, + "ce_orig": 0.9351016879081726, + "epoch": 0.5587748939535553, + "kl_loss": 0.05626329034566879, + "loss_ib": 0.0009371831547468901, + "step": 1943 + }, + { + "ce_ib": 4.2337260246276855, + "ce_orig": 0.7112996578216553, + "epoch": 0.5587748939535553, + "kl_loss": 0.12551730871200562, + "loss_ib": 0.0016785457264631987, + "step": 1943 + }, + { + "ce_ib": 4.482950687408447, + "ce_orig": 0.743457019329071, + "epoch": 0.5590624775325329, + "kl_loss": 0.08077970147132874, + "loss_ib": 0.0012560919858515263, + "step": 1944 + }, + { + "ce_ib": 3.1684834957122803, + "ce_orig": 0.605969250202179, + "epoch": 0.5590624775325329, + "kl_loss": 0.07346437871456146, + "loss_ib": 0.0010514920577406883, + "step": 1944 + }, + { + "ce_ib": 3.279144525527954, + "ce_orig": 0.6021302938461304, + "epoch": 0.5590624775325329, + "kl_loss": 0.07395978271961212, + "loss_ib": 0.0010675122030079365, + "step": 1944 + }, + { + "ce_ib": 4.144535064697266, + "ce_orig": 0.9160466194152832, + "epoch": 0.5590624775325329, + "kl_loss": 0.05833573639392853, + "loss_ib": 0.000997810740955174, + "step": 1944 + }, + { + "epoch": 0.5593500611115105, + "grad_norm": 0.08799563348293304, + "learning_rate": 4.6885946353622456e-05, + "loss": 0.8787, + "step": 1945 + }, + { + "ce_ib": 5.794684410095215, + "ce_orig": 0.641095757484436, + "epoch": 0.5593500611115105, + "kl_loss": 0.11798180639743805, + "loss_ib": 0.001759286504238844, + "step": 1945 + }, + { + "ce_ib": 6.698928356170654, + "ce_orig": 1.618621587753296, + "epoch": 0.5593500611115105, + "kl_loss": 0.07779666781425476, + "loss_ib": 0.0014478593366220593, + "step": 1945 + }, + { + "ce_ib": 7.501846790313721, + "ce_orig": 1.6306451559066772, + "epoch": 0.5593500611115105, + "kl_loss": 0.09789915382862091, + "loss_ib": 0.0017291761469095945, + "step": 1945 + }, + { + "ce_ib": 5.438978672027588, + "ce_orig": 1.0133185386657715, + "epoch": 0.5593500611115105, + "kl_loss": 0.06232821196317673, + "loss_ib": 0.0011671799002215266, + "step": 1945 + }, + { + "ce_ib": 4.670761585235596, + "ce_orig": 1.0205674171447754, + "epoch": 0.5596376446904882, + "kl_loss": 0.0780145674943924, + "loss_ib": 0.0012472218368202448, + "step": 1946 + }, + { + "ce_ib": 5.302339553833008, + "ce_orig": 0.8421439528465271, + "epoch": 0.5596376446904882, + "kl_loss": 0.0664285197854042, + "loss_ib": 0.0011945191072300076, + "step": 1946 + }, + { + "ce_ib": 3.399172067642212, + "ce_orig": 0.6667072772979736, + "epoch": 0.5596376446904882, + "kl_loss": 0.07170562446117401, + "loss_ib": 0.0010569734731689095, + "step": 1946 + }, + { + "ce_ib": 4.930813312530518, + "ce_orig": 0.977545976638794, + "epoch": 0.5596376446904882, + "kl_loss": 0.07767369598150253, + "loss_ib": 0.0012698181672021747, + "step": 1946 + }, + { + "ce_ib": 5.311861515045166, + "ce_orig": 0.49567368626594543, + "epoch": 0.5599252282694658, + "kl_loss": 0.07563919574022293, + "loss_ib": 0.0012875780230388045, + "step": 1947 + }, + { + "ce_ib": 0.9847033619880676, + "ce_orig": 0.13023775815963745, + "epoch": 0.5599252282694658, + "kl_loss": 0.18346261978149414, + "loss_ib": 0.0019330964423716068, + "step": 1947 + }, + { + "ce_ib": 7.960301876068115, + "ce_orig": 1.3822221755981445, + "epoch": 0.5599252282694658, + "kl_loss": 0.11171118915081024, + "loss_ib": 0.0019131420413032174, + "step": 1947 + }, + { + "ce_ib": 3.2189996242523193, + "ce_orig": 0.5385721921920776, + "epoch": 0.5599252282694658, + "kl_loss": 0.0895732045173645, + "loss_ib": 0.0012176319723948836, + "step": 1947 + }, + { + "ce_ib": 4.923396110534668, + "ce_orig": 0.7708672881126404, + "epoch": 0.5602128118484434, + "kl_loss": 0.08079712092876434, + "loss_ib": 0.0013003107160329819, + "step": 1948 + }, + { + "ce_ib": 5.407022953033447, + "ce_orig": 0.99074387550354, + "epoch": 0.5602128118484434, + "kl_loss": 0.08255760371685028, + "loss_ib": 0.0013662781566381454, + "step": 1948 + }, + { + "ce_ib": 2.074165105819702, + "ce_orig": 0.43613600730895996, + "epoch": 0.5602128118484434, + "kl_loss": 0.1709517389535904, + "loss_ib": 0.0019169339211657643, + "step": 1948 + }, + { + "ce_ib": 1.5896848440170288, + "ce_orig": 0.2595054507255554, + "epoch": 0.5602128118484434, + "kl_loss": 0.17473191022872925, + "loss_ib": 0.0019062875071540475, + "step": 1948 + }, + { + "ce_ib": 8.224786758422852, + "ce_orig": 1.9359705448150635, + "epoch": 0.5605003954274211, + "kl_loss": 0.08173473179340363, + "loss_ib": 0.0016398259904235601, + "step": 1949 + }, + { + "ce_ib": 3.1440415382385254, + "ce_orig": 0.5309240221977234, + "epoch": 0.5605003954274211, + "kl_loss": 0.06852979212999344, + "loss_ib": 0.0009997020242735744, + "step": 1949 + }, + { + "ce_ib": 8.560279846191406, + "ce_orig": 1.6044319868087769, + "epoch": 0.5605003954274211, + "kl_loss": 0.09348025918006897, + "loss_ib": 0.0017908303998410702, + "step": 1949 + }, + { + "ce_ib": 3.4381375312805176, + "ce_orig": 0.7286140322685242, + "epoch": 0.5605003954274211, + "kl_loss": 0.061030313372612, + "loss_ib": 0.0009541168692521751, + "step": 1949 + }, + { + "epoch": 0.5607879790063988, + "grad_norm": 0.10401875525712967, + "learning_rate": 4.6867164708332914e-05, + "loss": 0.8569, + "step": 1950 + }, + { + "ce_ib": 5.248720169067383, + "ce_orig": 1.0409910678863525, + "epoch": 0.5607879790063988, + "kl_loss": 0.08351179212331772, + "loss_ib": 0.0013599898666143417, + "step": 1950 + }, + { + "ce_ib": 4.3359270095825195, + "ce_orig": 0.7283666729927063, + "epoch": 0.5607879790063988, + "kl_loss": 0.10401954501867294, + "loss_ib": 0.001473788172006607, + "step": 1950 + }, + { + "ce_ib": 6.330207824707031, + "ce_orig": 1.5334374904632568, + "epoch": 0.5607879790063988, + "kl_loss": 0.08978278934955597, + "loss_ib": 0.0015308486763387918, + "step": 1950 + }, + { + "ce_ib": 3.993730068206787, + "ce_orig": 0.5515294671058655, + "epoch": 0.5607879790063988, + "kl_loss": 0.11537996679544449, + "loss_ib": 0.0015531725948676467, + "step": 1950 + }, + { + "ce_ib": 5.145559787750244, + "ce_orig": 1.0982015132904053, + "epoch": 0.5610755625853764, + "kl_loss": 0.062048327177762985, + "loss_ib": 0.0011350392596796155, + "step": 1951 + }, + { + "ce_ib": 2.9302337169647217, + "ce_orig": 0.5866143107414246, + "epoch": 0.5610755625853764, + "kl_loss": 0.07474147528409958, + "loss_ib": 0.0010404380736872554, + "step": 1951 + }, + { + "ce_ib": 4.184976577758789, + "ce_orig": 1.278201699256897, + "epoch": 0.5610755625853764, + "kl_loss": 0.05899571254849434, + "loss_ib": 0.0010084547102451324, + "step": 1951 + }, + { + "ce_ib": 4.320436000823975, + "ce_orig": 0.7271730899810791, + "epoch": 0.5610755625853764, + "kl_loss": 0.087269127368927, + "loss_ib": 0.0013047349639236927, + "step": 1951 + }, + { + "ce_ib": 5.748152256011963, + "ce_orig": 0.7181047201156616, + "epoch": 0.561363146164354, + "kl_loss": 0.11937468498945236, + "loss_ib": 0.0017685620114207268, + "step": 1952 + }, + { + "ce_ib": 4.4428391456604, + "ce_orig": 0.6132622957229614, + "epoch": 0.561363146164354, + "kl_loss": 0.10413666069507599, + "loss_ib": 0.0014856505440548062, + "step": 1952 + }, + { + "ce_ib": 4.181711673736572, + "ce_orig": 0.5815970301628113, + "epoch": 0.561363146164354, + "kl_loss": 0.10145939886569977, + "loss_ib": 0.0014327650424093008, + "step": 1952 + }, + { + "ce_ib": 6.715764999389648, + "ce_orig": 1.7080940008163452, + "epoch": 0.561363146164354, + "kl_loss": 0.301811546087265, + "loss_ib": 0.0036896918900310993, + "step": 1952 + }, + { + "ce_ib": 3.219310760498047, + "ce_orig": 0.6457242965698242, + "epoch": 0.5616507297433316, + "kl_loss": 0.10958519577980042, + "loss_ib": 0.0014177829725667834, + "step": 1953 + }, + { + "ce_ib": 8.113302230834961, + "ce_orig": 0.6846543550491333, + "epoch": 0.5616507297433316, + "kl_loss": 0.07521779835224152, + "loss_ib": 0.0015635081799700856, + "step": 1953 + }, + { + "ce_ib": 4.748743534088135, + "ce_orig": 0.4016384780406952, + "epoch": 0.5616507297433316, + "kl_loss": 0.10218265652656555, + "loss_ib": 0.0014967009192332625, + "step": 1953 + }, + { + "ce_ib": 4.491012096405029, + "ce_orig": 0.6748306155204773, + "epoch": 0.5616507297433316, + "kl_loss": 0.11963670700788498, + "loss_ib": 0.0016454682918265462, + "step": 1953 + }, + { + "ce_ib": 6.667892932891846, + "ce_orig": 1.2705307006835938, + "epoch": 0.5619383133223093, + "kl_loss": 0.09615553915500641, + "loss_ib": 0.0016283446457237005, + "step": 1954 + }, + { + "ce_ib": 6.289905548095703, + "ce_orig": 1.0998377799987793, + "epoch": 0.5619383133223093, + "kl_loss": 0.06471849977970123, + "loss_ib": 0.0012761754915118217, + "step": 1954 + }, + { + "ce_ib": 2.961129665374756, + "ce_orig": 0.5690889954566956, + "epoch": 0.5619383133223093, + "kl_loss": 0.07119568437337875, + "loss_ib": 0.0010080698411911726, + "step": 1954 + }, + { + "ce_ib": 3.370374917984009, + "ce_orig": 0.5404449105262756, + "epoch": 0.5619383133223093, + "kl_loss": 0.11712504178285599, + "loss_ib": 0.0015082878526300192, + "step": 1954 + }, + { + "epoch": 0.5622258969012869, + "grad_norm": 0.10906948149204254, + "learning_rate": 4.68483303799681e-05, + "loss": 0.8118, + "step": 1955 + }, + { + "ce_ib": 4.224491596221924, + "ce_orig": 0.5777459740638733, + "epoch": 0.5622258969012869, + "kl_loss": 0.0793982446193695, + "loss_ib": 0.0012164314975962043, + "step": 1955 + }, + { + "ce_ib": 3.8268649578094482, + "ce_orig": 0.7513483762741089, + "epoch": 0.5622258969012869, + "kl_loss": 0.08438890427350998, + "loss_ib": 0.001226575463078916, + "step": 1955 + }, + { + "ce_ib": 6.1211838722229, + "ce_orig": 0.5500869750976562, + "epoch": 0.5622258969012869, + "kl_loss": 0.3260646164417267, + "loss_ib": 0.0038727642968297005, + "step": 1955 + }, + { + "ce_ib": 4.77570915222168, + "ce_orig": 1.0761535167694092, + "epoch": 0.5622258969012869, + "kl_loss": 0.0990295335650444, + "loss_ib": 0.0014678662410005927, + "step": 1955 + }, + { + "ce_ib": 4.561951160430908, + "ce_orig": 0.7675835490226746, + "epoch": 0.5625134804802646, + "kl_loss": 0.13056915998458862, + "loss_ib": 0.0017618867568671703, + "step": 1956 + }, + { + "ce_ib": 4.019299030303955, + "ce_orig": 0.5983139276504517, + "epoch": 0.5625134804802646, + "kl_loss": 0.11726750433444977, + "loss_ib": 0.001574604888446629, + "step": 1956 + }, + { + "ce_ib": 3.3375840187072754, + "ce_orig": 0.576325535774231, + "epoch": 0.5625134804802646, + "kl_loss": 0.07492341846227646, + "loss_ib": 0.0010829925304278731, + "step": 1956 + }, + { + "ce_ib": 3.049140691757202, + "ce_orig": 0.5011207461357117, + "epoch": 0.5625134804802646, + "kl_loss": 0.06307435780763626, + "loss_ib": 0.000935657590162009, + "step": 1956 + }, + { + "ce_ib": 5.095494270324707, + "ce_orig": 1.0936931371688843, + "epoch": 0.5628010640592422, + "kl_loss": 0.06574530899524689, + "loss_ib": 0.0011670024832710624, + "step": 1957 + }, + { + "ce_ib": 4.407210826873779, + "ce_orig": 0.8866761326789856, + "epoch": 0.5628010640592422, + "kl_loss": 0.07675804197788239, + "loss_ib": 0.0012083014007657766, + "step": 1957 + }, + { + "ce_ib": 5.302249431610107, + "ce_orig": 1.1811383962631226, + "epoch": 0.5628010640592422, + "kl_loss": 0.08683241903781891, + "loss_ib": 0.001398549065925181, + "step": 1957 + }, + { + "ce_ib": 5.239888668060303, + "ce_orig": 1.0076631307601929, + "epoch": 0.5628010640592422, + "kl_loss": 0.06444663554430008, + "loss_ib": 0.0011684551136568189, + "step": 1957 + }, + { + "ce_ib": 2.8636131286621094, + "ce_orig": 0.7453442215919495, + "epoch": 0.5630886476382199, + "kl_loss": 0.059520356357097626, + "loss_ib": 0.0008815649198368192, + "step": 1958 + }, + { + "ce_ib": 3.5893921852111816, + "ce_orig": 0.9165306687355042, + "epoch": 0.5630886476382199, + "kl_loss": 0.05453399196267128, + "loss_ib": 0.0009042791207320988, + "step": 1958 + }, + { + "ce_ib": 4.074574947357178, + "ce_orig": 0.6629688143730164, + "epoch": 0.5630886476382199, + "kl_loss": 0.07503208518028259, + "loss_ib": 0.0011577783152461052, + "step": 1958 + }, + { + "ce_ib": 4.465240001678467, + "ce_orig": 0.7166985273361206, + "epoch": 0.5630886476382199, + "kl_loss": 0.07121461629867554, + "loss_ib": 0.0011586701730266213, + "step": 1958 + }, + { + "ce_ib": 4.428246974945068, + "ce_orig": 0.595910906791687, + "epoch": 0.5633762312171975, + "kl_loss": 0.0863913893699646, + "loss_ib": 0.0013067384716123343, + "step": 1959 + }, + { + "ce_ib": 2.502281427383423, + "ce_orig": 0.5623095035552979, + "epoch": 0.5633762312171975, + "kl_loss": 0.03660721331834793, + "loss_ib": 0.0006163002690300345, + "step": 1959 + }, + { + "ce_ib": 2.8155159950256348, + "ce_orig": 0.48509472608566284, + "epoch": 0.5633762312171975, + "kl_loss": 0.0637698620557785, + "loss_ib": 0.0009192502475343645, + "step": 1959 + }, + { + "ce_ib": 4.694539546966553, + "ce_orig": 0.9675668478012085, + "epoch": 0.5633762312171975, + "kl_loss": 0.07476603239774704, + "loss_ib": 0.0012171142734587193, + "step": 1959 + }, + { + "epoch": 0.5636638147961751, + "grad_norm": 0.0918775200843811, + "learning_rate": 4.68294434139043e-05, + "loss": 0.8554, + "step": 1960 + }, + { + "ce_ib": 1.7745625972747803, + "ce_orig": 0.18564695119857788, + "epoch": 0.5636638147961751, + "kl_loss": 0.1614246368408203, + "loss_ib": 0.0017917025834321976, + "step": 1960 + }, + { + "ce_ib": 4.97669792175293, + "ce_orig": 0.5785374641418457, + "epoch": 0.5636638147961751, + "kl_loss": 0.11750780045986176, + "loss_ib": 0.0016727476613596082, + "step": 1960 + }, + { + "ce_ib": 5.817353248596191, + "ce_orig": 0.5623673796653748, + "epoch": 0.5636638147961751, + "kl_loss": 0.12709468603134155, + "loss_ib": 0.0018526822095736861, + "step": 1960 + }, + { + "ce_ib": 5.944303512573242, + "ce_orig": 1.0559977293014526, + "epoch": 0.5636638147961751, + "kl_loss": 0.06137792766094208, + "loss_ib": 0.0012082095490768552, + "step": 1960 + }, + { + "ce_ib": 5.0465407371521, + "ce_orig": 0.6973176598548889, + "epoch": 0.5639513983751527, + "kl_loss": 0.0863470584154129, + "loss_ib": 0.0013681246200576425, + "step": 1961 + }, + { + "ce_ib": 3.4272005558013916, + "ce_orig": 0.7509713768959045, + "epoch": 0.5639513983751527, + "kl_loss": 0.05020086094737053, + "loss_ib": 0.0008447286090813577, + "step": 1961 + }, + { + "ce_ib": 3.4931864738464355, + "ce_orig": 0.6557930707931519, + "epoch": 0.5639513983751527, + "kl_loss": 0.0642247200012207, + "loss_ib": 0.0009915658738464117, + "step": 1961 + }, + { + "ce_ib": 4.61355447769165, + "ce_orig": 0.8143686056137085, + "epoch": 0.5639513983751527, + "kl_loss": 0.09734024107456207, + "loss_ib": 0.0014347578398883343, + "step": 1961 + }, + { + "ce_ib": 6.335759162902832, + "ce_orig": 0.996334969997406, + "epoch": 0.5642389819541305, + "kl_loss": 0.09346070140600204, + "loss_ib": 0.001568182953633368, + "step": 1962 + }, + { + "ce_ib": 3.491563558578491, + "ce_orig": 0.8398751020431519, + "epoch": 0.5642389819541305, + "kl_loss": 0.038994964212179184, + "loss_ib": 0.0007391059189103544, + "step": 1962 + }, + { + "ce_ib": 5.2361345291137695, + "ce_orig": 1.2043551206588745, + "epoch": 0.5642389819541305, + "kl_loss": 0.07922018319368362, + "loss_ib": 0.0013158152578398585, + "step": 1962 + }, + { + "ce_ib": 3.5640170574188232, + "ce_orig": 0.849390983581543, + "epoch": 0.5642389819541305, + "kl_loss": 0.049172140657901764, + "loss_ib": 0.0008481231052428484, + "step": 1962 + }, + { + "ce_ib": 3.2854270935058594, + "ce_orig": 0.8007976412773132, + "epoch": 0.5645265655331081, + "kl_loss": 0.06403769552707672, + "loss_ib": 0.000968919659499079, + "step": 1963 + }, + { + "ce_ib": 4.224299907684326, + "ce_orig": 0.7560908794403076, + "epoch": 0.5645265655331081, + "kl_loss": 0.07308094203472137, + "loss_ib": 0.0011532393982633948, + "step": 1963 + }, + { + "ce_ib": 4.194509506225586, + "ce_orig": 0.764187753200531, + "epoch": 0.5645265655331081, + "kl_loss": 0.07656742632389069, + "loss_ib": 0.0011851252056658268, + "step": 1963 + }, + { + "ce_ib": 4.258493423461914, + "ce_orig": 0.9742867350578308, + "epoch": 0.5645265655331081, + "kl_loss": 0.06675132364034653, + "loss_ib": 0.0010933625744655728, + "step": 1963 + }, + { + "ce_ib": 6.327304363250732, + "ce_orig": 0.667304277420044, + "epoch": 0.5648141491120857, + "kl_loss": 0.12408095598220825, + "loss_ib": 0.0018735399935394526, + "step": 1964 + }, + { + "ce_ib": 2.702357292175293, + "ce_orig": 0.26950058341026306, + "epoch": 0.5648141491120857, + "kl_loss": 0.0684877410531044, + "loss_ib": 0.00095511315157637, + "step": 1964 + }, + { + "ce_ib": 3.2462103366851807, + "ce_orig": 0.603496789932251, + "epoch": 0.5648141491120857, + "kl_loss": 0.13054127991199493, + "loss_ib": 0.0016300338320434093, + "step": 1964 + }, + { + "ce_ib": 5.35595178604126, + "ce_orig": 0.9912991523742676, + "epoch": 0.5648141491120857, + "kl_loss": 0.08628630638122559, + "loss_ib": 0.001398458145558834, + "step": 1964 + }, + { + "epoch": 0.5651017326910633, + "grad_norm": 0.09601251780986786, + "learning_rate": 4.68105038556446e-05, + "loss": 0.7858, + "step": 1965 + }, + { + "ce_ib": 2.280003070831299, + "ce_orig": 0.40050143003463745, + "epoch": 0.5651017326910633, + "kl_loss": 0.08137905597686768, + "loss_ib": 0.0010417908197268844, + "step": 1965 + }, + { + "ce_ib": 5.864077568054199, + "ce_orig": 1.1242923736572266, + "epoch": 0.5651017326910633, + "kl_loss": 0.09855780750513077, + "loss_ib": 0.0015719857765361667, + "step": 1965 + }, + { + "ce_ib": 3.7183265686035156, + "ce_orig": 0.5747319459915161, + "epoch": 0.5651017326910633, + "kl_loss": 0.09104914963245392, + "loss_ib": 0.001282324199564755, + "step": 1965 + }, + { + "ce_ib": 4.4792866706848145, + "ce_orig": 0.6815457940101624, + "epoch": 0.5651017326910633, + "kl_loss": 0.08306923508644104, + "loss_ib": 0.0012786209117621183, + "step": 1965 + }, + { + "ce_ib": 5.180027484893799, + "ce_orig": 0.7645912170410156, + "epoch": 0.565389316270041, + "kl_loss": 0.08781914412975311, + "loss_ib": 0.0013961941003799438, + "step": 1966 + }, + { + "ce_ib": 4.010493755340576, + "ce_orig": 0.7190892100334167, + "epoch": 0.565389316270041, + "kl_loss": 0.10989493131637573, + "loss_ib": 0.001499998732469976, + "step": 1966 + }, + { + "ce_ib": 5.792258262634277, + "ce_orig": 0.8136773705482483, + "epoch": 0.565389316270041, + "kl_loss": 0.0637887716293335, + "loss_ib": 0.0012171135749667883, + "step": 1966 + }, + { + "ce_ib": 4.7784423828125, + "ce_orig": 0.8124526739120483, + "epoch": 0.565389316270041, + "kl_loss": 0.05549684166908264, + "loss_ib": 0.0010328125208616257, + "step": 1966 + }, + { + "ce_ib": 4.873855113983154, + "ce_orig": 0.8233740329742432, + "epoch": 0.5656768998490186, + "kl_loss": 0.08795235306024551, + "loss_ib": 0.0013669090112671256, + "step": 1967 + }, + { + "ce_ib": 3.168221950531006, + "ce_orig": 0.6436535120010376, + "epoch": 0.5656768998490186, + "kl_loss": 0.057536277920007706, + "loss_ib": 0.0008921849657781422, + "step": 1967 + }, + { + "ce_ib": 6.49403190612793, + "ce_orig": 1.3492056131362915, + "epoch": 0.5656768998490186, + "kl_loss": 0.05696912109851837, + "loss_ib": 0.0012190943816676736, + "step": 1967 + }, + { + "ce_ib": 4.227766036987305, + "ce_orig": 0.7239964008331299, + "epoch": 0.5656768998490186, + "kl_loss": 0.09366890788078308, + "loss_ib": 0.001359465648420155, + "step": 1967 + }, + { + "ce_ib": 5.592937469482422, + "ce_orig": 0.6012019515037537, + "epoch": 0.5659644834279962, + "kl_loss": 0.10134854912757874, + "loss_ib": 0.001572779263369739, + "step": 1968 + }, + { + "ce_ib": 7.333575248718262, + "ce_orig": 1.2268191576004028, + "epoch": 0.5659644834279962, + "kl_loss": 0.10260283946990967, + "loss_ib": 0.0017593859229236841, + "step": 1968 + }, + { + "ce_ib": 2.503349542617798, + "ce_orig": 0.36347636580467224, + "epoch": 0.5659644834279962, + "kl_loss": 0.21179398894309998, + "loss_ib": 0.002368274610489607, + "step": 1968 + }, + { + "ce_ib": 5.286550045013428, + "ce_orig": 0.9204768538475037, + "epoch": 0.5659644834279962, + "kl_loss": 0.06197813153266907, + "loss_ib": 0.0011484362185001373, + "step": 1968 + }, + { + "ce_ib": 5.122837543487549, + "ce_orig": 0.9221827983856201, + "epoch": 0.566252067006974, + "kl_loss": 0.07732348889112473, + "loss_ib": 0.001285518635995686, + "step": 1969 + }, + { + "ce_ib": 3.427137613296509, + "ce_orig": 0.397264689207077, + "epoch": 0.566252067006974, + "kl_loss": 0.08980880677700043, + "loss_ib": 0.001240801764652133, + "step": 1969 + }, + { + "ce_ib": 3.6866345405578613, + "ce_orig": 0.6615527868270874, + "epoch": 0.566252067006974, + "kl_loss": 0.08322572708129883, + "loss_ib": 0.001200920669361949, + "step": 1969 + }, + { + "ce_ib": 4.68393611907959, + "ce_orig": 0.5818220376968384, + "epoch": 0.566252067006974, + "kl_loss": 0.11027654260396957, + "loss_ib": 0.0015711589949205518, + "step": 1969 + }, + { + "epoch": 0.5665396505859516, + "grad_norm": 0.09489289671182632, + "learning_rate": 4.6791511750818784e-05, + "loss": 0.8435, + "step": 1970 + }, + { + "ce_ib": 4.739940166473389, + "ce_orig": 0.955788791179657, + "epoch": 0.5665396505859516, + "kl_loss": 0.154568612575531, + "loss_ib": 0.002019680105149746, + "step": 1970 + }, + { + "ce_ib": 4.194762229919434, + "ce_orig": 0.7959542870521545, + "epoch": 0.5665396505859516, + "kl_loss": 0.06918502599000931, + "loss_ib": 0.0011113265063613653, + "step": 1970 + }, + { + "ce_ib": 6.11726188659668, + "ce_orig": 1.1126803159713745, + "epoch": 0.5665396505859516, + "kl_loss": 0.08686895668506622, + "loss_ib": 0.0014804156962782145, + "step": 1970 + }, + { + "ce_ib": 4.941152572631836, + "ce_orig": 0.7510586977005005, + "epoch": 0.5665396505859516, + "kl_loss": 0.0788385346531868, + "loss_ib": 0.0012825005687773228, + "step": 1970 + }, + { + "ce_ib": 3.0111844539642334, + "ce_orig": 0.7899438142776489, + "epoch": 0.5668272341649292, + "kl_loss": 0.07195956259965897, + "loss_ib": 0.0010207140585407615, + "step": 1971 + }, + { + "ce_ib": 3.418217658996582, + "ce_orig": 0.553936779499054, + "epoch": 0.5668272341649292, + "kl_loss": 0.07853636145591736, + "loss_ib": 0.0011271852999925613, + "step": 1971 + }, + { + "ce_ib": 5.742977142333984, + "ce_orig": 1.207983374595642, + "epoch": 0.5668272341649292, + "kl_loss": 0.08691638708114624, + "loss_ib": 0.001443461631424725, + "step": 1971 + }, + { + "ce_ib": 4.938532829284668, + "ce_orig": 0.905624508857727, + "epoch": 0.5668272341649292, + "kl_loss": 0.1167895719408989, + "loss_ib": 0.001661748974584043, + "step": 1971 + }, + { + "ce_ib": 5.547784805297852, + "ce_orig": 0.680888831615448, + "epoch": 0.5671148177439068, + "kl_loss": 0.08124236762523651, + "loss_ib": 0.001367202028632164, + "step": 1972 + }, + { + "ce_ib": 2.5462639331817627, + "ce_orig": 0.6534244418144226, + "epoch": 0.5671148177439068, + "kl_loss": 0.04674097150564194, + "loss_ib": 0.0007220360566861928, + "step": 1972 + }, + { + "ce_ib": 5.300449371337891, + "ce_orig": 1.3504968881607056, + "epoch": 0.5671148177439068, + "kl_loss": 0.1055799275636673, + "loss_ib": 0.0015858440892770886, + "step": 1972 + }, + { + "ce_ib": 7.681647777557373, + "ce_orig": 1.1922661066055298, + "epoch": 0.5671148177439068, + "kl_loss": 0.08959387242794037, + "loss_ib": 0.0016641034744679928, + "step": 1972 + }, + { + "ce_ib": 4.003046989440918, + "ce_orig": 0.725699782371521, + "epoch": 0.5674024013228844, + "kl_loss": 0.05127523094415665, + "loss_ib": 0.0009130570106208324, + "step": 1973 + }, + { + "ce_ib": 5.829302787780762, + "ce_orig": 0.9941497445106506, + "epoch": 0.5674024013228844, + "kl_loss": 0.07511179149150848, + "loss_ib": 0.001334048225544393, + "step": 1973 + }, + { + "ce_ib": 5.182688236236572, + "ce_orig": 1.0781404972076416, + "epoch": 0.5674024013228844, + "kl_loss": 0.08208617568016052, + "loss_ib": 0.0013391305692493916, + "step": 1973 + }, + { + "ce_ib": 6.269689083099365, + "ce_orig": 1.2348562479019165, + "epoch": 0.5674024013228844, + "kl_loss": 0.07636990398168564, + "loss_ib": 0.0013906678650528193, + "step": 1973 + }, + { + "ce_ib": 4.280139446258545, + "ce_orig": 0.7548874020576477, + "epoch": 0.5676899849018621, + "kl_loss": 0.05363544821739197, + "loss_ib": 0.000964368402492255, + "step": 1974 + }, + { + "ce_ib": 3.9896092414855957, + "ce_orig": 0.8541274666786194, + "epoch": 0.5676899849018621, + "kl_loss": 0.0865066647529602, + "loss_ib": 0.001264027669094503, + "step": 1974 + }, + { + "ce_ib": 7.109053611755371, + "ce_orig": 1.3502659797668457, + "epoch": 0.5676899849018621, + "kl_loss": 0.0709662213921547, + "loss_ib": 0.0014205676270648837, + "step": 1974 + }, + { + "ce_ib": 4.213406085968018, + "ce_orig": 0.9690558314323425, + "epoch": 0.5676899849018621, + "kl_loss": 0.07550661265850067, + "loss_ib": 0.0011764067457988858, + "step": 1974 + }, + { + "epoch": 0.5679775684808397, + "grad_norm": 0.09653083980083466, + "learning_rate": 4.677246714518324e-05, + "loss": 0.966, + "step": 1975 + }, + { + "ce_ib": 7.043652534484863, + "ce_orig": 0.7337558269500732, + "epoch": 0.5679775684808397, + "kl_loss": 0.11198560893535614, + "loss_ib": 0.0018242212245240808, + "step": 1975 + }, + { + "ce_ib": 5.914465427398682, + "ce_orig": 1.1125526428222656, + "epoch": 0.5679775684808397, + "kl_loss": 0.1147436872124672, + "loss_ib": 0.0017388833221048117, + "step": 1975 + }, + { + "ce_ib": 6.91348123550415, + "ce_orig": 1.1799991130828857, + "epoch": 0.5679775684808397, + "kl_loss": 0.10196605324745178, + "loss_ib": 0.001711008488200605, + "step": 1975 + }, + { + "ce_ib": 4.890042304992676, + "ce_orig": 0.37071356177330017, + "epoch": 0.5679775684808397, + "kl_loss": 0.2704794406890869, + "loss_ib": 0.0031937984749674797, + "step": 1975 + }, + { + "ce_ib": 7.680137634277344, + "ce_orig": 1.8655290603637695, + "epoch": 0.5682651520598174, + "kl_loss": 0.07680245488882065, + "loss_ib": 0.001536038238555193, + "step": 1976 + }, + { + "ce_ib": 6.90033483505249, + "ce_orig": 1.0135672092437744, + "epoch": 0.5682651520598174, + "kl_loss": 0.11059199273586273, + "loss_ib": 0.0017959533724933863, + "step": 1976 + }, + { + "ce_ib": 4.5213942527771, + "ce_orig": 0.7308627963066101, + "epoch": 0.5682651520598174, + "kl_loss": 0.12329922616481781, + "loss_ib": 0.001685131574049592, + "step": 1976 + }, + { + "ce_ib": 3.1446456909179688, + "ce_orig": 0.4436934292316437, + "epoch": 0.5682651520598174, + "kl_loss": 0.11416661739349365, + "loss_ib": 0.0014561307616531849, + "step": 1976 + }, + { + "ce_ib": 3.9550869464874268, + "ce_orig": 0.7813758850097656, + "epoch": 0.568552735638795, + "kl_loss": 0.0972772091627121, + "loss_ib": 0.0013682807330042124, + "step": 1977 + }, + { + "ce_ib": 4.0675177574157715, + "ce_orig": 0.7945940494537354, + "epoch": 0.568552735638795, + "kl_loss": 0.07163093984127045, + "loss_ib": 0.0011230611708015203, + "step": 1977 + }, + { + "ce_ib": 3.822026014328003, + "ce_orig": 0.602843701839447, + "epoch": 0.568552735638795, + "kl_loss": 0.10643879324197769, + "loss_ib": 0.0014465905260294676, + "step": 1977 + }, + { + "ce_ib": 5.2364115715026855, + "ce_orig": 0.9610447287559509, + "epoch": 0.568552735638795, + "kl_loss": 0.10946646332740784, + "loss_ib": 0.0016183056868612766, + "step": 1977 + }, + { + "ce_ib": 4.0665764808654785, + "ce_orig": 0.8960332274436951, + "epoch": 0.5688403192177727, + "kl_loss": 0.06639538705348969, + "loss_ib": 0.0010706114117056131, + "step": 1978 + }, + { + "ce_ib": 3.983731269836426, + "ce_orig": 0.8340597152709961, + "epoch": 0.5688403192177727, + "kl_loss": 0.10358984768390656, + "loss_ib": 0.0014342715730890632, + "step": 1978 + }, + { + "ce_ib": 7.083614826202393, + "ce_orig": 1.204768180847168, + "epoch": 0.5688403192177727, + "kl_loss": 0.11273694038391113, + "loss_ib": 0.0018357309745624661, + "step": 1978 + }, + { + "ce_ib": 4.363039016723633, + "ce_orig": 0.7933036684989929, + "epoch": 0.5688403192177727, + "kl_loss": 0.11547520011663437, + "loss_ib": 0.0015910557704046369, + "step": 1978 + }, + { + "ce_ib": 5.3679022789001465, + "ce_orig": 1.198567509651184, + "epoch": 0.5691279027967503, + "kl_loss": 0.1003759354352951, + "loss_ib": 0.0015405495651066303, + "step": 1979 + }, + { + "ce_ib": 3.325381278991699, + "ce_orig": 0.5789466500282288, + "epoch": 0.5691279027967503, + "kl_loss": 0.04999261349439621, + "loss_ib": 0.0008324642549268901, + "step": 1979 + }, + { + "ce_ib": 5.320559024810791, + "ce_orig": 1.0441560745239258, + "epoch": 0.5691279027967503, + "kl_loss": 0.12872713804244995, + "loss_ib": 0.0018193272408097982, + "step": 1979 + }, + { + "ce_ib": 2.591191291809082, + "ce_orig": 0.5840861201286316, + "epoch": 0.5691279027967503, + "kl_loss": 0.06888680160045624, + "loss_ib": 0.0009479871368966997, + "step": 1979 + }, + { + "epoch": 0.5694154863757279, + "grad_norm": 0.10657630115747452, + "learning_rate": 4.6753370084620844e-05, + "loss": 0.918, + "step": 1980 + }, + { + "ce_ib": 5.57462739944458, + "ce_orig": 1.1059004068374634, + "epoch": 0.5694154863757279, + "kl_loss": 0.08679717034101486, + "loss_ib": 0.0014254343695938587, + "step": 1980 + }, + { + "ce_ib": 3.9722366333007812, + "ce_orig": 0.47185155749320984, + "epoch": 0.5694154863757279, + "kl_loss": 0.0725012868642807, + "loss_ib": 0.0011222364846616983, + "step": 1980 + }, + { + "ce_ib": 3.515568256378174, + "ce_orig": 0.46012741327285767, + "epoch": 0.5694154863757279, + "kl_loss": 0.1952338069677353, + "loss_ib": 0.002303894842043519, + "step": 1980 + }, + { + "ce_ib": 4.204768180847168, + "ce_orig": 0.6022050976753235, + "epoch": 0.5694154863757279, + "kl_loss": 0.08826540410518646, + "loss_ib": 0.0013031307607889175, + "step": 1980 + }, + { + "ce_ib": 4.620275020599365, + "ce_orig": 0.360134094953537, + "epoch": 0.5697030699547055, + "kl_loss": 0.13378003239631653, + "loss_ib": 0.0017998277908191085, + "step": 1981 + }, + { + "ce_ib": 5.376315116882324, + "ce_orig": 0.8145768642425537, + "epoch": 0.5697030699547055, + "kl_loss": 0.11072784662246704, + "loss_ib": 0.0016449099639430642, + "step": 1981 + }, + { + "ce_ib": 4.40403938293457, + "ce_orig": 0.5124900341033936, + "epoch": 0.5697030699547055, + "kl_loss": 0.09817782044410706, + "loss_ib": 0.0014221821911633015, + "step": 1981 + }, + { + "ce_ib": 3.8054332733154297, + "ce_orig": 0.4441893696784973, + "epoch": 0.5697030699547055, + "kl_loss": 0.10582395642995834, + "loss_ib": 0.0014387827832251787, + "step": 1981 + }, + { + "ce_ib": 2.544375419616699, + "ce_orig": 0.5236086249351501, + "epoch": 0.5699906535336833, + "kl_loss": 0.0433952733874321, + "loss_ib": 0.0006883902824483812, + "step": 1982 + }, + { + "ce_ib": 4.477120876312256, + "ce_orig": 0.9069566130638123, + "epoch": 0.5699906535336833, + "kl_loss": 0.0873197466135025, + "loss_ib": 0.0013209094759076834, + "step": 1982 + }, + { + "ce_ib": 5.4027252197265625, + "ce_orig": 1.2926335334777832, + "epoch": 0.5699906535336833, + "kl_loss": 0.060752466320991516, + "loss_ib": 0.0011477970983833075, + "step": 1982 + }, + { + "ce_ib": 3.2431740760803223, + "ce_orig": 0.48677805066108704, + "epoch": 0.5699906535336833, + "kl_loss": 0.1204669326543808, + "loss_ib": 0.00152898661326617, + "step": 1982 + }, + { + "ce_ib": 4.236042499542236, + "ce_orig": 1.256900429725647, + "epoch": 0.5702782371126609, + "kl_loss": 0.061071231961250305, + "loss_ib": 0.0010343164904043078, + "step": 1983 + }, + { + "ce_ib": 4.670779705047607, + "ce_orig": 0.9403422474861145, + "epoch": 0.5702782371126609, + "kl_loss": 0.13809645175933838, + "loss_ib": 0.0018480423605069518, + "step": 1983 + }, + { + "ce_ib": 4.080404281616211, + "ce_orig": 0.7556179761886597, + "epoch": 0.5702782371126609, + "kl_loss": 0.14495061337947845, + "loss_ib": 0.0018575465073809028, + "step": 1983 + }, + { + "ce_ib": 3.6395750045776367, + "ce_orig": 0.4911423623561859, + "epoch": 0.5702782371126609, + "kl_loss": 0.06678073108196259, + "loss_ib": 0.0010317647829651833, + "step": 1983 + }, + { + "ce_ib": 3.3056328296661377, + "ce_orig": 0.4679483473300934, + "epoch": 0.5705658206916385, + "kl_loss": 0.06985975801944733, + "loss_ib": 0.0010291608050465584, + "step": 1984 + }, + { + "ce_ib": 4.763602256774902, + "ce_orig": 0.8504299521446228, + "epoch": 0.5705658206916385, + "kl_loss": 0.07627188414335251, + "loss_ib": 0.001239079050719738, + "step": 1984 + }, + { + "ce_ib": 5.771358489990234, + "ce_orig": 1.096095323562622, + "epoch": 0.5705658206916385, + "kl_loss": 0.09705349057912827, + "loss_ib": 0.001547670573927462, + "step": 1984 + }, + { + "ce_ib": 5.267760753631592, + "ce_orig": 0.9385091066360474, + "epoch": 0.5705658206916385, + "kl_loss": 0.08681712299585342, + "loss_ib": 0.0013949472922831774, + "step": 1984 + }, + { + "epoch": 0.5708534042706161, + "grad_norm": 0.09662514179944992, + "learning_rate": 4.673422061514086e-05, + "loss": 0.8365, + "step": 1985 + }, + { + "ce_ib": 4.934109687805176, + "ce_orig": 0.8600794672966003, + "epoch": 0.5708534042706161, + "kl_loss": 0.07623735070228577, + "loss_ib": 0.0012557844165712595, + "step": 1985 + }, + { + "ce_ib": 3.987933874130249, + "ce_orig": 0.8990747928619385, + "epoch": 0.5708534042706161, + "kl_loss": 0.11162467300891876, + "loss_ib": 0.0015150400577113032, + "step": 1985 + }, + { + "ce_ib": 6.800766944885254, + "ce_orig": 1.0961169004440308, + "epoch": 0.5708534042706161, + "kl_loss": 0.08355967700481415, + "loss_ib": 0.0015156733570620418, + "step": 1985 + }, + { + "ce_ib": 5.622281074523926, + "ce_orig": 1.010031819343567, + "epoch": 0.5708534042706161, + "kl_loss": 0.08740022778511047, + "loss_ib": 0.0014362303772941232, + "step": 1985 + }, + { + "ce_ib": 4.272572994232178, + "ce_orig": 0.686816394329071, + "epoch": 0.5711409878495938, + "kl_loss": 0.0727725699543953, + "loss_ib": 0.0011549830669537187, + "step": 1986 + }, + { + "ce_ib": 4.9965643882751465, + "ce_orig": 0.573198139667511, + "epoch": 0.5711409878495938, + "kl_loss": 0.06519833207130432, + "loss_ib": 0.001151639735326171, + "step": 1986 + }, + { + "ce_ib": 5.983783721923828, + "ce_orig": 0.6499956846237183, + "epoch": 0.5711409878495938, + "kl_loss": 0.20731639862060547, + "loss_ib": 0.0026715423446148634, + "step": 1986 + }, + { + "ce_ib": 4.596940517425537, + "ce_orig": 0.8198102712631226, + "epoch": 0.5711409878495938, + "kl_loss": 0.08719105273485184, + "loss_ib": 0.0013316045515239239, + "step": 1986 + }, + { + "ce_ib": 3.7305142879486084, + "ce_orig": 0.5820647478103638, + "epoch": 0.5714285714285714, + "kl_loss": 0.048475757241249084, + "loss_ib": 0.0008578090346418321, + "step": 1987 + }, + { + "ce_ib": 5.179656982421875, + "ce_orig": 0.9031145572662354, + "epoch": 0.5714285714285714, + "kl_loss": 0.06891524791717529, + "loss_ib": 0.0012071181554347277, + "step": 1987 + }, + { + "ce_ib": 4.588144779205322, + "ce_orig": 1.0381220579147339, + "epoch": 0.5714285714285714, + "kl_loss": 0.06946767866611481, + "loss_ib": 0.0011534912046045065, + "step": 1987 + }, + { + "ce_ib": 3.2191145420074463, + "ce_orig": 0.6345332264900208, + "epoch": 0.5714285714285714, + "kl_loss": 0.0907372385263443, + "loss_ib": 0.0012292837491258979, + "step": 1987 + }, + { + "ce_ib": 3.9721553325653076, + "ce_orig": 0.5545108914375305, + "epoch": 0.571716155007549, + "kl_loss": 0.05512217804789543, + "loss_ib": 0.0009484372567385435, + "step": 1988 + }, + { + "ce_ib": 3.3781046867370605, + "ce_orig": 0.3004768192768097, + "epoch": 0.571716155007549, + "kl_loss": 0.20599517226219177, + "loss_ib": 0.002397762145847082, + "step": 1988 + }, + { + "ce_ib": 3.137331008911133, + "ce_orig": 0.7369969487190247, + "epoch": 0.571716155007549, + "kl_loss": 0.06038379669189453, + "loss_ib": 0.0009175710147246718, + "step": 1988 + }, + { + "ce_ib": 4.148172378540039, + "ce_orig": 0.8267027735710144, + "epoch": 0.571716155007549, + "kl_loss": 0.11484304070472717, + "loss_ib": 0.0015632475260645151, + "step": 1988 + }, + { + "ce_ib": 3.6094810962677, + "ce_orig": 0.723459780216217, + "epoch": 0.5720037385865268, + "kl_loss": 0.07016587257385254, + "loss_ib": 0.001062606810592115, + "step": 1989 + }, + { + "ce_ib": 3.1901426315307617, + "ce_orig": 0.5057554244995117, + "epoch": 0.5720037385865268, + "kl_loss": 0.10008519887924194, + "loss_ib": 0.0013198661617934704, + "step": 1989 + }, + { + "ce_ib": 3.0300464630126953, + "ce_orig": 0.6580599546432495, + "epoch": 0.5720037385865268, + "kl_loss": 0.04249326512217522, + "loss_ib": 0.0007279372657649219, + "step": 1989 + }, + { + "ce_ib": 3.8843185901641846, + "ce_orig": 0.8888494372367859, + "epoch": 0.5720037385865268, + "kl_loss": 0.061535559594631195, + "loss_ib": 0.001003787387162447, + "step": 1989 + }, + { + "epoch": 0.5722913221655044, + "grad_norm": 0.11193746328353882, + "learning_rate": 4.6715018782878785e-05, + "loss": 0.8503, + "step": 1990 + }, + { + "ce_ib": 3.29616379737854, + "ce_orig": 0.410920649766922, + "epoch": 0.5722913221655044, + "kl_loss": 0.09137226641178131, + "loss_ib": 0.001243339036591351, + "step": 1990 + }, + { + "ce_ib": 4.457232475280762, + "ce_orig": 1.0111052989959717, + "epoch": 0.5722913221655044, + "kl_loss": 0.06779945641756058, + "loss_ib": 0.0011237177532166243, + "step": 1990 + }, + { + "ce_ib": 5.985278129577637, + "ce_orig": 1.446540117263794, + "epoch": 0.5722913221655044, + "kl_loss": 0.04329767823219299, + "loss_ib": 0.0010315045947209, + "step": 1990 + }, + { + "ce_ib": 4.280043125152588, + "ce_orig": 0.6531007289886475, + "epoch": 0.5722913221655044, + "kl_loss": 0.12978705763816833, + "loss_ib": 0.0017258748412132263, + "step": 1990 + }, + { + "ce_ib": 4.452704429626465, + "ce_orig": 0.7714152336120605, + "epoch": 0.572578905744482, + "kl_loss": 0.08596138656139374, + "loss_ib": 0.0013048842083662748, + "step": 1991 + }, + { + "ce_ib": 4.669787406921387, + "ce_orig": 1.1048576831817627, + "epoch": 0.572578905744482, + "kl_loss": 0.06219197064638138, + "loss_ib": 0.0010888985125347972, + "step": 1991 + }, + { + "ce_ib": 7.6194167137146, + "ce_orig": 1.5835448503494263, + "epoch": 0.572578905744482, + "kl_loss": 0.09535622596740723, + "loss_ib": 0.001715503865852952, + "step": 1991 + }, + { + "ce_ib": 4.180673599243164, + "ce_orig": 0.7945668697357178, + "epoch": 0.572578905744482, + "kl_loss": 0.13110807538032532, + "loss_ib": 0.0017291479744017124, + "step": 1991 + }, + { + "ce_ib": 6.103562831878662, + "ce_orig": 0.5811029076576233, + "epoch": 0.5728664893234596, + "kl_loss": 0.1411518007516861, + "loss_ib": 0.0020218740683048964, + "step": 1992 + }, + { + "ce_ib": 3.6509385108947754, + "ce_orig": 0.4945192039012909, + "epoch": 0.5728664893234596, + "kl_loss": 0.08782428503036499, + "loss_ib": 0.0012433365918695927, + "step": 1992 + }, + { + "ce_ib": 4.089202880859375, + "ce_orig": 0.7927929162979126, + "epoch": 0.5728664893234596, + "kl_loss": 0.04977094754576683, + "loss_ib": 0.0009066296624951065, + "step": 1992 + }, + { + "ce_ib": 3.219717025756836, + "ce_orig": 0.6153027415275574, + "epoch": 0.5728664893234596, + "kl_loss": 0.046362075954675674, + "loss_ib": 0.0007855924777686596, + "step": 1992 + }, + { + "ce_ib": 3.3021416664123535, + "ce_orig": 0.6016639471054077, + "epoch": 0.5731540729024373, + "kl_loss": 0.07935211062431335, + "loss_ib": 0.0011237353319302201, + "step": 1993 + }, + { + "ce_ib": 5.139393329620361, + "ce_orig": 1.167960286140442, + "epoch": 0.5731540729024373, + "kl_loss": 0.0820508599281311, + "loss_ib": 0.001334447879344225, + "step": 1993 + }, + { + "ce_ib": 3.486297607421875, + "ce_orig": 0.6869091987609863, + "epoch": 0.5731540729024373, + "kl_loss": 0.06699474155902863, + "loss_ib": 0.0010185771388933063, + "step": 1993 + }, + { + "ce_ib": 3.998258113861084, + "ce_orig": 0.7381168603897095, + "epoch": 0.5731540729024373, + "kl_loss": 0.11470642685890198, + "loss_ib": 0.0015468901256099343, + "step": 1993 + }, + { + "ce_ib": 3.046184778213501, + "ce_orig": 0.5479092597961426, + "epoch": 0.5734416564814149, + "kl_loss": 0.08408159017562866, + "loss_ib": 0.001145434333011508, + "step": 1994 + }, + { + "ce_ib": 7.051313400268555, + "ce_orig": 1.1906174421310425, + "epoch": 0.5734416564814149, + "kl_loss": 0.07277187705039978, + "loss_ib": 0.0014328500255942345, + "step": 1994 + }, + { + "ce_ib": 2.862262487411499, + "ce_orig": 0.6278262734413147, + "epoch": 0.5734416564814149, + "kl_loss": 0.07842084020376205, + "loss_ib": 0.001070434576831758, + "step": 1994 + }, + { + "ce_ib": 6.027700424194336, + "ce_orig": 0.8440670371055603, + "epoch": 0.5734416564814149, + "kl_loss": 0.12356296181678772, + "loss_ib": 0.001838399562984705, + "step": 1994 + }, + { + "epoch": 0.5737292400603925, + "grad_norm": 0.09038259088993073, + "learning_rate": 4.6695764634096294e-05, + "loss": 0.8229, + "step": 1995 + }, + { + "ce_ib": 5.6056227684021, + "ce_orig": 1.1663364171981812, + "epoch": 0.5737292400603925, + "kl_loss": 0.08868061006069183, + "loss_ib": 0.0014473684132099152, + "step": 1995 + }, + { + "ce_ib": 3.6886541843414307, + "ce_orig": 0.558696448802948, + "epoch": 0.5737292400603925, + "kl_loss": 0.06630785763263702, + "loss_ib": 0.001031943946145475, + "step": 1995 + }, + { + "ce_ib": 6.682548999786377, + "ce_orig": 1.0367622375488281, + "epoch": 0.5737292400603925, + "kl_loss": 0.07508498430252075, + "loss_ib": 0.0014191046357154846, + "step": 1995 + }, + { + "ce_ib": 3.703700304031372, + "ce_orig": 0.4435713291168213, + "epoch": 0.5737292400603925, + "kl_loss": 0.1197780966758728, + "loss_ib": 0.0015681509394198656, + "step": 1995 + }, + { + "ce_ib": 3.6129496097564697, + "ce_orig": 0.6848329305648804, + "epoch": 0.5740168236393702, + "kl_loss": 0.0800422877073288, + "loss_ib": 0.0011617178097367287, + "step": 1996 + }, + { + "ce_ib": 3.02913761138916, + "ce_orig": 0.7430704832077026, + "epoch": 0.5740168236393702, + "kl_loss": 0.05102024972438812, + "loss_ib": 0.0008131162612698972, + "step": 1996 + }, + { + "ce_ib": 2.5370116233825684, + "ce_orig": 0.4718396067619324, + "epoch": 0.5740168236393702, + "kl_loss": 0.05911542475223541, + "loss_ib": 0.0008448553853668272, + "step": 1996 + }, + { + "ce_ib": 5.842480659484863, + "ce_orig": 1.4181674718856812, + "epoch": 0.5740168236393702, + "kl_loss": 0.13227145373821259, + "loss_ib": 0.00190696248319, + "step": 1996 + }, + { + "ce_ib": 4.69348669052124, + "ce_orig": 0.8653129935264587, + "epoch": 0.5743044072183479, + "kl_loss": 0.07009907066822052, + "loss_ib": 0.0011703392956405878, + "step": 1997 + }, + { + "ce_ib": 2.8843765258789062, + "ce_orig": 0.4995043873786926, + "epoch": 0.5743044072183479, + "kl_loss": 0.0416344553232193, + "loss_ib": 0.0007047821418382227, + "step": 1997 + }, + { + "ce_ib": 3.541111707687378, + "ce_orig": 0.763451337814331, + "epoch": 0.5743044072183479, + "kl_loss": 0.08401051163673401, + "loss_ib": 0.0011942163109779358, + "step": 1997 + }, + { + "ce_ib": 4.429714679718018, + "ce_orig": 0.7937890887260437, + "epoch": 0.5743044072183479, + "kl_loss": 0.07476060092449188, + "loss_ib": 0.0011905774008482695, + "step": 1997 + }, + { + "ce_ib": 2.660907030105591, + "ce_orig": 0.5282990336418152, + "epoch": 0.5745919907973255, + "kl_loss": 0.0629044845700264, + "loss_ib": 0.0008951355121098459, + "step": 1998 + }, + { + "ce_ib": 3.842715263366699, + "ce_orig": 0.8554955124855042, + "epoch": 0.5745919907973255, + "kl_loss": 0.08724615722894669, + "loss_ib": 0.0012567330850288272, + "step": 1998 + }, + { + "ce_ib": 6.745482444763184, + "ce_orig": 1.3008232116699219, + "epoch": 0.5745919907973255, + "kl_loss": 0.06568608433008194, + "loss_ib": 0.0013314090901985765, + "step": 1998 + }, + { + "ce_ib": 3.1266820430755615, + "ce_orig": 0.6292819976806641, + "epoch": 0.5745919907973255, + "kl_loss": 0.06733492016792297, + "loss_ib": 0.0009860174031928182, + "step": 1998 + }, + { + "ce_ib": 2.933603525161743, + "ce_orig": 0.5497424602508545, + "epoch": 0.5748795743763031, + "kl_loss": 0.05501857399940491, + "loss_ib": 0.0008435460622422397, + "step": 1999 + }, + { + "ce_ib": 4.88081169128418, + "ce_orig": 0.819663941860199, + "epoch": 0.5748795743763031, + "kl_loss": 0.06542935967445374, + "loss_ib": 0.0011423747055232525, + "step": 1999 + }, + { + "ce_ib": 6.069686412811279, + "ce_orig": 0.8352373242378235, + "epoch": 0.5748795743763031, + "kl_loss": 0.08044139295816422, + "loss_ib": 0.0014113825745880604, + "step": 1999 + }, + { + "ce_ib": 4.726129055023193, + "ce_orig": 0.7042459845542908, + "epoch": 0.5748795743763031, + "kl_loss": 0.08418527245521545, + "loss_ib": 0.0013144656550139189, + "step": 1999 + }, + { + "epoch": 0.5751671579552807, + "grad_norm": 0.09278979152441025, + "learning_rate": 4.667645821518111e-05, + "loss": 0.8686, + "step": 2000 + }, + { + "ce_ib": 4.7381720542907715, + "ce_orig": 0.7667548060417175, + "epoch": 0.5751671579552807, + "kl_loss": 0.06924396753311157, + "loss_ib": 0.0011662568431347609, + "step": 2000 + }, + { + "ce_ib": 6.185446739196777, + "ce_orig": 0.9914663434028625, + "epoch": 0.5751671579552807, + "kl_loss": 0.10125076770782471, + "loss_ib": 0.0016310523496940732, + "step": 2000 + }, + { + "ce_ib": 3.9529600143432617, + "ce_orig": 0.7959910035133362, + "epoch": 0.5751671579552807, + "kl_loss": 0.08596288412809372, + "loss_ib": 0.0012549248058348894, + "step": 2000 + }, + { + "ce_ib": 4.807860851287842, + "ce_orig": 0.8189019560813904, + "epoch": 0.5751671579552807, + "kl_loss": 0.056936271488666534, + "loss_ib": 0.0010501487413421273, + "step": 2000 + }, + { + "ce_ib": 3.4483773708343506, + "ce_orig": 0.7824946045875549, + "epoch": 0.5754547415342584, + "kl_loss": 0.09102329611778259, + "loss_ib": 0.0012550706742331386, + "step": 2001 + }, + { + "ce_ib": 2.5068585872650146, + "ce_orig": 0.5709012150764465, + "epoch": 0.5754547415342584, + "kl_loss": 0.056783635169267654, + "loss_ib": 0.0008185221813619137, + "step": 2001 + }, + { + "ce_ib": 3.3684260845184326, + "ce_orig": 0.7557722926139832, + "epoch": 0.5754547415342584, + "kl_loss": 0.07010000944137573, + "loss_ib": 0.0010378427105024457, + "step": 2001 + }, + { + "ce_ib": 3.346388578414917, + "ce_orig": 0.6786103248596191, + "epoch": 0.5754547415342584, + "kl_loss": 0.06772549450397491, + "loss_ib": 0.001011893735267222, + "step": 2001 + }, + { + "ce_ib": 4.719963073730469, + "ce_orig": 0.5611307621002197, + "epoch": 0.5757423251132361, + "kl_loss": 0.11800660192966461, + "loss_ib": 0.0016520621720701456, + "step": 2002 + }, + { + "ce_ib": 4.451277732849121, + "ce_orig": 0.8972640633583069, + "epoch": 0.5757423251132361, + "kl_loss": 0.0832831859588623, + "loss_ib": 0.0012779596727341413, + "step": 2002 + }, + { + "ce_ib": 3.8485934734344482, + "ce_orig": 0.5940215587615967, + "epoch": 0.5757423251132361, + "kl_loss": 0.08714320510625839, + "loss_ib": 0.0012562914052978158, + "step": 2002 + }, + { + "ce_ib": 4.713588237762451, + "ce_orig": 0.9998626112937927, + "epoch": 0.5757423251132361, + "kl_loss": 0.08216868340969086, + "loss_ib": 0.0012930455850437284, + "step": 2002 + }, + { + "ce_ib": 3.1058030128479004, + "ce_orig": 0.4528571367263794, + "epoch": 0.5760299086922137, + "kl_loss": 0.0769834816455841, + "loss_ib": 0.0010804150952026248, + "step": 2003 + }, + { + "ce_ib": 5.102503776550293, + "ce_orig": 0.5935077667236328, + "epoch": 0.5760299086922137, + "kl_loss": 0.1083073541522026, + "loss_ib": 0.001593323890119791, + "step": 2003 + }, + { + "ce_ib": 5.178896903991699, + "ce_orig": 0.8716755509376526, + "epoch": 0.5760299086922137, + "kl_loss": 0.12696272134780884, + "loss_ib": 0.0017875168705359101, + "step": 2003 + }, + { + "ce_ib": 7.3684210777282715, + "ce_orig": 1.459415316581726, + "epoch": 0.5760299086922137, + "kl_loss": 0.08616820722818375, + "loss_ib": 0.0015985241625458002, + "step": 2003 + }, + { + "ce_ib": 4.102677345275879, + "ce_orig": 0.7349299192428589, + "epoch": 0.5763174922711913, + "kl_loss": 0.082815021276474, + "loss_ib": 0.0012384179281070828, + "step": 2004 + }, + { + "ce_ib": 3.7351036071777344, + "ce_orig": 0.8264210820198059, + "epoch": 0.5763174922711913, + "kl_loss": 0.04580068588256836, + "loss_ib": 0.0008315171580761671, + "step": 2004 + }, + { + "ce_ib": 6.56650972366333, + "ce_orig": 1.334507703781128, + "epoch": 0.5763174922711913, + "kl_loss": 0.06093139946460724, + "loss_ib": 0.001265964936465025, + "step": 2004 + }, + { + "ce_ib": 3.622185230255127, + "ce_orig": 0.7810819745063782, + "epoch": 0.5763174922711913, + "kl_loss": 0.05320556089282036, + "loss_ib": 0.0008942740969359875, + "step": 2004 + }, + { + "epoch": 0.576605075850169, + "grad_norm": 0.09934309124946594, + "learning_rate": 4.665709957264687e-05, + "loss": 0.8719, + "step": 2005 + }, + { + "ce_ib": 8.384037971496582, + "ce_orig": 1.6915264129638672, + "epoch": 0.576605075850169, + "kl_loss": 0.10707975924015045, + "loss_ib": 0.0019092013826593757, + "step": 2005 + }, + { + "ce_ib": 5.389034271240234, + "ce_orig": 1.1430290937423706, + "epoch": 0.576605075850169, + "kl_loss": 0.12188690900802612, + "loss_ib": 0.0017577725229784846, + "step": 2005 + }, + { + "ce_ib": 4.10136604309082, + "ce_orig": 0.6652698516845703, + "epoch": 0.576605075850169, + "kl_loss": 0.07405024766921997, + "loss_ib": 0.0011506390292197466, + "step": 2005 + }, + { + "ce_ib": 4.155540943145752, + "ce_orig": 0.597294807434082, + "epoch": 0.576605075850169, + "kl_loss": 0.0758456140756607, + "loss_ib": 0.0011740101035684347, + "step": 2005 + }, + { + "ce_ib": 4.113492488861084, + "ce_orig": 0.889549195766449, + "epoch": 0.5768926594291466, + "kl_loss": 0.07134812325239182, + "loss_ib": 0.001124830450862646, + "step": 2006 + }, + { + "ce_ib": 4.259766578674316, + "ce_orig": 0.7977469563484192, + "epoch": 0.5768926594291466, + "kl_loss": 0.06592670828104019, + "loss_ib": 0.0010852437699213624, + "step": 2006 + }, + { + "ce_ib": 3.6658823490142822, + "ce_orig": 0.784963071346283, + "epoch": 0.5768926594291466, + "kl_loss": 0.08337907493114471, + "loss_ib": 0.0012003789888694882, + "step": 2006 + }, + { + "ce_ib": 2.6721603870391846, + "ce_orig": 0.3435841202735901, + "epoch": 0.5768926594291466, + "kl_loss": 0.07179874926805496, + "loss_ib": 0.000985203543677926, + "step": 2006 + }, + { + "ce_ib": 3.818535804748535, + "ce_orig": 0.8686417937278748, + "epoch": 0.5771802430081242, + "kl_loss": 0.03878583014011383, + "loss_ib": 0.000769711856264621, + "step": 2007 + }, + { + "ce_ib": 6.2454376220703125, + "ce_orig": 1.5839707851409912, + "epoch": 0.5771802430081242, + "kl_loss": 0.07947902381420135, + "loss_ib": 0.0014193339738994837, + "step": 2007 + }, + { + "ce_ib": 7.073714733123779, + "ce_orig": 1.3507516384124756, + "epoch": 0.5771802430081242, + "kl_loss": 0.11942370235919952, + "loss_ib": 0.0019016083097085357, + "step": 2007 + }, + { + "ce_ib": 4.317225933074951, + "ce_orig": 1.040898084640503, + "epoch": 0.5771802430081242, + "kl_loss": 0.06442729383707047, + "loss_ib": 0.001075995503924787, + "step": 2007 + }, + { + "ce_ib": 4.867289066314697, + "ce_orig": 1.0482796430587769, + "epoch": 0.5774678265871018, + "kl_loss": 0.10058677196502686, + "loss_ib": 0.0014925965806469321, + "step": 2008 + }, + { + "ce_ib": 2.8674023151397705, + "ce_orig": 0.4176560342311859, + "epoch": 0.5774678265871018, + "kl_loss": 0.09753378480672836, + "loss_ib": 0.0012620780616998672, + "step": 2008 + }, + { + "ce_ib": 2.9439525604248047, + "ce_orig": 0.7321798801422119, + "epoch": 0.5774678265871018, + "kl_loss": 0.054294951260089874, + "loss_ib": 0.0008373447344638407, + "step": 2008 + }, + { + "ce_ib": 6.30354642868042, + "ce_orig": 1.1354666948318481, + "epoch": 0.5774678265871018, + "kl_loss": 0.08569367229938507, + "loss_ib": 0.0014872914180159569, + "step": 2008 + }, + { + "ce_ib": 4.372932434082031, + "ce_orig": 0.7446667551994324, + "epoch": 0.5777554101660796, + "kl_loss": 0.10344913601875305, + "loss_ib": 0.0014717845479026437, + "step": 2009 + }, + { + "ce_ib": 3.802776575088501, + "ce_orig": 0.6199583411216736, + "epoch": 0.5777554101660796, + "kl_loss": 0.08620652556419373, + "loss_ib": 0.001242342870682478, + "step": 2009 + }, + { + "ce_ib": 2.49391770362854, + "ce_orig": 0.38733407855033875, + "epoch": 0.5777554101660796, + "kl_loss": 0.08221118152141571, + "loss_ib": 0.0010715036187320948, + "step": 2009 + }, + { + "ce_ib": 2.961163282394409, + "ce_orig": 0.4928281903266907, + "epoch": 0.5777554101660796, + "kl_loss": 0.06056179106235504, + "loss_ib": 0.0009017342235893011, + "step": 2009 + }, + { + "epoch": 0.5780429937450572, + "grad_norm": 0.08660446852445602, + "learning_rate": 4.6637688753133046e-05, + "loss": 0.8341, + "step": 2010 + }, + { + "ce_ib": 4.014667510986328, + "ce_orig": 0.6289469599723816, + "epoch": 0.5780429937450572, + "kl_loss": 0.10371512174606323, + "loss_ib": 0.0014386179391294718, + "step": 2010 + }, + { + "ce_ib": 6.542060375213623, + "ce_orig": 1.3589767217636108, + "epoch": 0.5780429937450572, + "kl_loss": 0.08636632561683655, + "loss_ib": 0.0015178691828623414, + "step": 2010 + }, + { + "ce_ib": 3.575890064239502, + "ce_orig": 0.41844943165779114, + "epoch": 0.5780429937450572, + "kl_loss": 0.08413472771644592, + "loss_ib": 0.001198936253786087, + "step": 2010 + }, + { + "ce_ib": 4.403506755828857, + "ce_orig": 0.7376459240913391, + "epoch": 0.5780429937450572, + "kl_loss": 0.0726398229598999, + "loss_ib": 0.0011667488142848015, + "step": 2010 + }, + { + "ce_ib": 3.1175119876861572, + "ce_orig": 0.3719865679740906, + "epoch": 0.5783305773240348, + "kl_loss": 0.2877184748649597, + "loss_ib": 0.0031889358069747686, + "step": 2011 + }, + { + "ce_ib": 3.1239097118377686, + "ce_orig": 0.8375557065010071, + "epoch": 0.5783305773240348, + "kl_loss": 0.07694227248430252, + "loss_ib": 0.0010818137088790536, + "step": 2011 + }, + { + "ce_ib": 4.8361005783081055, + "ce_orig": 0.6818785071372986, + "epoch": 0.5783305773240348, + "kl_loss": 0.07164740562438965, + "loss_ib": 0.0012000841088593006, + "step": 2011 + }, + { + "ce_ib": 6.42293119430542, + "ce_orig": 1.4506372213363647, + "epoch": 0.5783305773240348, + "kl_loss": 0.07023526728153229, + "loss_ib": 0.0013446457451209426, + "step": 2011 + }, + { + "ce_ib": 3.155590295791626, + "ce_orig": 0.7457772493362427, + "epoch": 0.5786181609030124, + "kl_loss": 0.09450055658817291, + "loss_ib": 0.0012605645461007953, + "step": 2012 + }, + { + "ce_ib": 3.1419599056243896, + "ce_orig": 0.5923829674720764, + "epoch": 0.5786181609030124, + "kl_loss": 0.0846894159913063, + "loss_ib": 0.0011610900983214378, + "step": 2012 + }, + { + "ce_ib": 4.221803665161133, + "ce_orig": 0.9383298754692078, + "epoch": 0.5786181609030124, + "kl_loss": 0.0627596527338028, + "loss_ib": 0.001049776910804212, + "step": 2012 + }, + { + "ce_ib": 6.445415019989014, + "ce_orig": 1.212923526763916, + "epoch": 0.5786181609030124, + "kl_loss": 0.12742683291435242, + "loss_ib": 0.0019188097212463617, + "step": 2012 + }, + { + "ce_ib": 3.687980890274048, + "ce_orig": 0.6746757626533508, + "epoch": 0.5789057444819901, + "kl_loss": 0.11938036978244781, + "loss_ib": 0.001562601770274341, + "step": 2013 + }, + { + "ce_ib": 7.678395748138428, + "ce_orig": 1.2167807817459106, + "epoch": 0.5789057444819901, + "kl_loss": 0.055825963616371155, + "loss_ib": 0.0013260991545394063, + "step": 2013 + }, + { + "ce_ib": 5.9067888259887695, + "ce_orig": 1.187322735786438, + "epoch": 0.5789057444819901, + "kl_loss": 0.08296777307987213, + "loss_ib": 0.0014203565660864115, + "step": 2013 + }, + { + "ce_ib": 5.316205978393555, + "ce_orig": 1.132082223892212, + "epoch": 0.5789057444819901, + "kl_loss": 0.09856992959976196, + "loss_ib": 0.00151731981895864, + "step": 2013 + }, + { + "ce_ib": 3.603752613067627, + "ce_orig": 0.5496395826339722, + "epoch": 0.5791933280609677, + "kl_loss": 0.051588837057352066, + "loss_ib": 0.0008762636571191251, + "step": 2014 + }, + { + "ce_ib": 2.378552198410034, + "ce_orig": 0.5140752792358398, + "epoch": 0.5791933280609677, + "kl_loss": 0.04453272372484207, + "loss_ib": 0.0006831824430264533, + "step": 2014 + }, + { + "ce_ib": 5.9691877365112305, + "ce_orig": 1.2371056079864502, + "epoch": 0.5791933280609677, + "kl_loss": 0.06708849966526031, + "loss_ib": 0.0012678037164732814, + "step": 2014 + }, + { + "ce_ib": 5.3364973068237305, + "ce_orig": 0.9513978958129883, + "epoch": 0.5791933280609677, + "kl_loss": 0.0646238923072815, + "loss_ib": 0.0011798886116594076, + "step": 2014 + }, + { + "epoch": 0.5794809116399453, + "grad_norm": 0.09768121689558029, + "learning_rate": 4.6618225803404796e-05, + "loss": 0.8053, + "step": 2015 + }, + { + "ce_ib": 9.729337692260742, + "ce_orig": 1.6138731241226196, + "epoch": 0.5794809116399453, + "kl_loss": 0.08846911787986755, + "loss_ib": 0.0018576248548924923, + "step": 2015 + }, + { + "ce_ib": 5.705235481262207, + "ce_orig": 1.3202130794525146, + "epoch": 0.5794809116399453, + "kl_loss": 0.1180456280708313, + "loss_ib": 0.0017509798053652048, + "step": 2015 + }, + { + "ce_ib": 4.098585605621338, + "ce_orig": 0.9137470126152039, + "epoch": 0.5794809116399453, + "kl_loss": 0.05790020525455475, + "loss_ib": 0.0009888604981824756, + "step": 2015 + }, + { + "ce_ib": 3.162682056427002, + "ce_orig": 0.6687626242637634, + "epoch": 0.5794809116399453, + "kl_loss": 0.0613526850938797, + "loss_ib": 0.0009297950309701264, + "step": 2015 + }, + { + "ce_ib": 4.755229473114014, + "ce_orig": 0.7178450226783752, + "epoch": 0.579768495218923, + "kl_loss": 0.0816982164978981, + "loss_ib": 0.001292505068704486, + "step": 2016 + }, + { + "ce_ib": 2.4334046840667725, + "ce_orig": 0.29849573969841003, + "epoch": 0.579768495218923, + "kl_loss": 0.07011058926582336, + "loss_ib": 0.0009444463066756725, + "step": 2016 + }, + { + "ce_ib": 2.276764154434204, + "ce_orig": 0.46977829933166504, + "epoch": 0.579768495218923, + "kl_loss": 0.05720705911517143, + "loss_ib": 0.0007997469510883093, + "step": 2016 + }, + { + "ce_ib": 5.106555938720703, + "ce_orig": 0.8176964521408081, + "epoch": 0.579768495218923, + "kl_loss": 0.07577840238809586, + "loss_ib": 0.0012684395769611, + "step": 2016 + }, + { + "ce_ib": 4.477478981018066, + "ce_orig": 0.6582205295562744, + "epoch": 0.5800560787979007, + "kl_loss": 0.10030704736709595, + "loss_ib": 0.0014508182648569345, + "step": 2017 + }, + { + "ce_ib": 3.2256271839141846, + "ce_orig": 0.49293291568756104, + "epoch": 0.5800560787979007, + "kl_loss": 0.06736671179533005, + "loss_ib": 0.0009962298208847642, + "step": 2017 + }, + { + "ce_ib": 4.24847412109375, + "ce_orig": 0.6557114720344543, + "epoch": 0.5800560787979007, + "kl_loss": 0.07698210328817368, + "loss_ib": 0.0011946683516725898, + "step": 2017 + }, + { + "ce_ib": 4.843341827392578, + "ce_orig": 0.8867438435554504, + "epoch": 0.5800560787979007, + "kl_loss": 0.06920981407165527, + "loss_ib": 0.001176432240754366, + "step": 2017 + }, + { + "ce_ib": 5.735297679901123, + "ce_orig": 1.055503249168396, + "epoch": 0.5803436623768783, + "kl_loss": 0.06676360219717026, + "loss_ib": 0.0012411657953634858, + "step": 2018 + }, + { + "ce_ib": 4.17169713973999, + "ce_orig": 0.9282371997833252, + "epoch": 0.5803436623768783, + "kl_loss": 0.08663184940814972, + "loss_ib": 0.0012834882363677025, + "step": 2018 + }, + { + "ce_ib": 4.648031234741211, + "ce_orig": 1.088525652885437, + "epoch": 0.5803436623768783, + "kl_loss": 0.07992123067378998, + "loss_ib": 0.001264015445485711, + "step": 2018 + }, + { + "ce_ib": 2.679481267929077, + "ce_orig": 0.5024892091751099, + "epoch": 0.5803436623768783, + "kl_loss": 0.08213428407907486, + "loss_ib": 0.0010892909485846758, + "step": 2018 + }, + { + "ce_ib": 4.266699314117432, + "ce_orig": 0.6895722150802612, + "epoch": 0.5806312459558559, + "kl_loss": 0.09308237582445145, + "loss_ib": 0.0013574936892837286, + "step": 2019 + }, + { + "ce_ib": 3.1279807090759277, + "ce_orig": 0.6650795936584473, + "epoch": 0.5806312459558559, + "kl_loss": 0.06668146699666977, + "loss_ib": 0.0009796126978471875, + "step": 2019 + }, + { + "ce_ib": 7.60420560836792, + "ce_orig": 1.4092960357666016, + "epoch": 0.5806312459558559, + "kl_loss": 0.08923632651567459, + "loss_ib": 0.001652783714234829, + "step": 2019 + }, + { + "ce_ib": 4.315398216247559, + "ce_orig": 0.9921033382415771, + "epoch": 0.5806312459558559, + "kl_loss": 0.07646635919809341, + "loss_ib": 0.0011962034041061997, + "step": 2019 + }, + { + "epoch": 0.5809188295348335, + "grad_norm": 0.08810504525899887, + "learning_rate": 4.6598710770352897e-05, + "loss": 0.833, + "step": 2020 + }, + { + "ce_ib": 3.3521859645843506, + "ce_orig": 0.47444915771484375, + "epoch": 0.5809188295348335, + "kl_loss": 0.04911565035581589, + "loss_ib": 0.0008263750351034105, + "step": 2020 + }, + { + "ce_ib": 4.658594608306885, + "ce_orig": 0.990524411201477, + "epoch": 0.5809188295348335, + "kl_loss": 0.08183325827121735, + "loss_ib": 0.0012841920834034681, + "step": 2020 + }, + { + "ce_ib": 6.106077194213867, + "ce_orig": 1.4382680654525757, + "epoch": 0.5809188295348335, + "kl_loss": 0.07016189396381378, + "loss_ib": 0.0013122266391292214, + "step": 2020 + }, + { + "ce_ib": 4.61679220199585, + "ce_orig": 0.8093340992927551, + "epoch": 0.5809188295348335, + "kl_loss": 0.0753345862030983, + "loss_ib": 0.0012150249676778913, + "step": 2020 + }, + { + "ce_ib": 4.034084796905518, + "ce_orig": 0.35140150785446167, + "epoch": 0.5812064131138112, + "kl_loss": 0.12296713143587112, + "loss_ib": 0.0016330797225236893, + "step": 2021 + }, + { + "ce_ib": 2.946342945098877, + "ce_orig": 0.5959498286247253, + "epoch": 0.5812064131138112, + "kl_loss": 0.05010417848825455, + "loss_ib": 0.0007956760819070041, + "step": 2021 + }, + { + "ce_ib": 4.119324684143066, + "ce_orig": 0.5134299397468567, + "epoch": 0.5812064131138112, + "kl_loss": 0.07578804343938828, + "loss_ib": 0.0011698128655552864, + "step": 2021 + }, + { + "ce_ib": 6.666695594787598, + "ce_orig": 1.4210636615753174, + "epoch": 0.5812064131138112, + "kl_loss": 0.07421207427978516, + "loss_ib": 0.00140879035461694, + "step": 2021 + }, + { + "ce_ib": 3.317093849182129, + "ce_orig": 0.6808748841285706, + "epoch": 0.5814939966927889, + "kl_loss": 0.04797123372554779, + "loss_ib": 0.0008114217198453844, + "step": 2022 + }, + { + "ce_ib": 5.401058197021484, + "ce_orig": 1.1836018562316895, + "epoch": 0.5814939966927889, + "kl_loss": 0.07236078381538391, + "loss_ib": 0.0012637136969715357, + "step": 2022 + }, + { + "ce_ib": 7.327861785888672, + "ce_orig": 1.2424945831298828, + "epoch": 0.5814939966927889, + "kl_loss": 0.10749910771846771, + "loss_ib": 0.0018077772110700607, + "step": 2022 + }, + { + "ce_ib": 11.270186424255371, + "ce_orig": 2.2991769313812256, + "epoch": 0.5814939966927889, + "kl_loss": 0.10713590681552887, + "loss_ib": 0.002198377624154091, + "step": 2022 + }, + { + "ce_ib": 6.073554515838623, + "ce_orig": 1.2062621116638184, + "epoch": 0.5817815802717665, + "kl_loss": 0.04944906383752823, + "loss_ib": 0.001101846108213067, + "step": 2023 + }, + { + "ce_ib": 3.1157004833221436, + "ce_orig": 0.7705442309379578, + "epoch": 0.5817815802717665, + "kl_loss": 0.0514216311275959, + "loss_ib": 0.0008257863228209317, + "step": 2023 + }, + { + "ce_ib": 5.744011402130127, + "ce_orig": 1.3239465951919556, + "epoch": 0.5817815802717665, + "kl_loss": 0.10335258394479752, + "loss_ib": 0.00160792691167444, + "step": 2023 + }, + { + "ce_ib": 6.266079902648926, + "ce_orig": 0.6476110219955444, + "epoch": 0.5817815802717665, + "kl_loss": 0.1909639537334442, + "loss_ib": 0.0025362472515553236, + "step": 2023 + }, + { + "ce_ib": 4.291951656341553, + "ce_orig": 0.6375108361244202, + "epoch": 0.5820691638507441, + "kl_loss": 0.08829502761363983, + "loss_ib": 0.0013121453812345862, + "step": 2024 + }, + { + "ce_ib": 4.464111804962158, + "ce_orig": 0.8467534184455872, + "epoch": 0.5820691638507441, + "kl_loss": 0.05542716383934021, + "loss_ib": 0.0010006828233599663, + "step": 2024 + }, + { + "ce_ib": 3.372269868850708, + "ce_orig": 0.5076906085014343, + "epoch": 0.5820691638507441, + "kl_loss": 0.06684467196464539, + "loss_ib": 0.0010056736646220088, + "step": 2024 + }, + { + "ce_ib": 3.2867679595947266, + "ce_orig": 0.5394257307052612, + "epoch": 0.5820691638507441, + "kl_loss": 0.07050927728414536, + "loss_ib": 0.001033769571222365, + "step": 2024 + }, + { + "epoch": 0.5823567474297218, + "grad_norm": 0.09361595660448074, + "learning_rate": 4.657914370099357e-05, + "loss": 0.8339, + "step": 2025 + }, + { + "ce_ib": 6.080501079559326, + "ce_orig": 1.1142053604125977, + "epoch": 0.5823567474297218, + "kl_loss": 0.0996551513671875, + "loss_ib": 0.0016046015080064535, + "step": 2025 + }, + { + "ce_ib": 2.7012405395507812, + "ce_orig": 0.5527401566505432, + "epoch": 0.5823567474297218, + "kl_loss": 0.0427560955286026, + "loss_ib": 0.0006976849981583655, + "step": 2025 + }, + { + "ce_ib": 3.603679895401001, + "ce_orig": 0.6303413510322571, + "epoch": 0.5823567474297218, + "kl_loss": 0.07848852127790451, + "loss_ib": 0.0011452531907707453, + "step": 2025 + }, + { + "ce_ib": 4.182196617126465, + "ce_orig": 0.5818547010421753, + "epoch": 0.5823567474297218, + "kl_loss": 0.06278377771377563, + "loss_ib": 0.0010460574412718415, + "step": 2025 + }, + { + "ce_ib": 3.317700147628784, + "ce_orig": 0.4418105185031891, + "epoch": 0.5826443310086994, + "kl_loss": 0.09371384978294373, + "loss_ib": 0.001268908497877419, + "step": 2026 + }, + { + "ce_ib": 3.925766944885254, + "ce_orig": 0.6571186184883118, + "epoch": 0.5826443310086994, + "kl_loss": 0.04111338406801224, + "loss_ib": 0.00080371048534289, + "step": 2026 + }, + { + "ce_ib": 2.5642752647399902, + "ce_orig": 0.5153859257698059, + "epoch": 0.5826443310086994, + "kl_loss": 0.049859605729579926, + "loss_ib": 0.0007550235604867339, + "step": 2026 + }, + { + "ce_ib": 3.228949785232544, + "ce_orig": 0.43013179302215576, + "epoch": 0.5826443310086994, + "kl_loss": 0.09801869839429855, + "loss_ib": 0.001303081982769072, + "step": 2026 + }, + { + "ce_ib": 5.282431602478027, + "ce_orig": 1.2418466806411743, + "epoch": 0.582931914587677, + "kl_loss": 0.09277214854955673, + "loss_ib": 0.001455964520573616, + "step": 2027 + }, + { + "ce_ib": 3.554309368133545, + "ce_orig": 0.8098000884056091, + "epoch": 0.582931914587677, + "kl_loss": 0.06565161794424057, + "loss_ib": 0.0010119470534846187, + "step": 2027 + }, + { + "ce_ib": 2.859760046005249, + "ce_orig": 0.574192225933075, + "epoch": 0.582931914587677, + "kl_loss": 0.06509241461753845, + "loss_ib": 0.0009369001490995288, + "step": 2027 + }, + { + "ce_ib": 3.1002490520477295, + "ce_orig": 0.799948513507843, + "epoch": 0.582931914587677, + "kl_loss": 0.044379591941833496, + "loss_ib": 0.0007538208155892789, + "step": 2027 + }, + { + "ce_ib": 2.553058385848999, + "ce_orig": 0.4929788112640381, + "epoch": 0.5832194981666546, + "kl_loss": 0.06133830547332764, + "loss_ib": 0.000868688861373812, + "step": 2028 + }, + { + "ce_ib": 4.22827672958374, + "ce_orig": 0.6271825432777405, + "epoch": 0.5832194981666546, + "kl_loss": 0.11522883921861649, + "loss_ib": 0.0015751160681247711, + "step": 2028 + }, + { + "ce_ib": 7.556985855102539, + "ce_orig": 1.5960707664489746, + "epoch": 0.5832194981666546, + "kl_loss": 0.10099978744983673, + "loss_ib": 0.001765696331858635, + "step": 2028 + }, + { + "ce_ib": 3.9897584915161133, + "ce_orig": 0.5135675072669983, + "epoch": 0.5832194981666546, + "kl_loss": 0.11925222724676132, + "loss_ib": 0.0015914980322122574, + "step": 2028 + }, + { + "ce_ib": 4.252439975738525, + "ce_orig": 0.7363337874412537, + "epoch": 0.5835070817456324, + "kl_loss": 0.08480304479598999, + "loss_ib": 0.0012732744216918945, + "step": 2029 + }, + { + "ce_ib": 4.188432693481445, + "ce_orig": 0.87666255235672, + "epoch": 0.5835070817456324, + "kl_loss": 0.1086680144071579, + "loss_ib": 0.001505523337982595, + "step": 2029 + }, + { + "ce_ib": 5.6722259521484375, + "ce_orig": 0.9371324181556702, + "epoch": 0.5835070817456324, + "kl_loss": 0.09682632237672806, + "loss_ib": 0.0015354858478531241, + "step": 2029 + }, + { + "ce_ib": 6.399697780609131, + "ce_orig": 1.3195101022720337, + "epoch": 0.5835070817456324, + "kl_loss": 0.0971706211566925, + "loss_ib": 0.0016116758342832327, + "step": 2029 + }, + { + "epoch": 0.58379466532461, + "grad_norm": 0.08494076877832413, + "learning_rate": 4.655952464246845e-05, + "loss": 0.8071, + "step": 2030 + }, + { + "ce_ib": 2.102947473526001, + "ce_orig": 0.4207648038864136, + "epoch": 0.58379466532461, + "kl_loss": 0.06320713460445404, + "loss_ib": 0.0008423660765402019, + "step": 2030 + }, + { + "ce_ib": 5.934902191162109, + "ce_orig": 1.1558259725570679, + "epoch": 0.58379466532461, + "kl_loss": 0.1001289039850235, + "loss_ib": 0.0015947791980579495, + "step": 2030 + }, + { + "ce_ib": 3.1391124725341797, + "ce_orig": 0.4194270372390747, + "epoch": 0.58379466532461, + "kl_loss": 0.07824330031871796, + "loss_ib": 0.0010963443201035261, + "step": 2030 + }, + { + "ce_ib": 5.974737167358398, + "ce_orig": 1.068595290184021, + "epoch": 0.58379466532461, + "kl_loss": 0.0886555165052414, + "loss_ib": 0.0014840287622064352, + "step": 2030 + }, + { + "ce_ib": 3.960604190826416, + "ce_orig": 0.7665067315101624, + "epoch": 0.5840822489035876, + "kl_loss": 0.12673716247081757, + "loss_ib": 0.0016634321073070168, + "step": 2031 + }, + { + "ce_ib": 4.032761573791504, + "ce_orig": 0.757562518119812, + "epoch": 0.5840822489035876, + "kl_loss": 0.17807495594024658, + "loss_ib": 0.0021840257104486227, + "step": 2031 + }, + { + "ce_ib": 3.1796092987060547, + "ce_orig": 0.632624626159668, + "epoch": 0.5840822489035876, + "kl_loss": 0.07726400345563889, + "loss_ib": 0.0010906009702011943, + "step": 2031 + }, + { + "ce_ib": 3.9003450870513916, + "ce_orig": 0.7355715036392212, + "epoch": 0.5840822489035876, + "kl_loss": 0.11673317849636078, + "loss_ib": 0.0015573662240058184, + "step": 2031 + }, + { + "ce_ib": 4.992671012878418, + "ce_orig": 0.9748689532279968, + "epoch": 0.5843698324825652, + "kl_loss": 0.10398383438587189, + "loss_ib": 0.0015391054330393672, + "step": 2032 + }, + { + "ce_ib": 5.459630489349365, + "ce_orig": 1.1683826446533203, + "epoch": 0.5843698324825652, + "kl_loss": 0.0678820013999939, + "loss_ib": 0.0012247830163687468, + "step": 2032 + }, + { + "ce_ib": 1.5908578634262085, + "ce_orig": 0.21944506466388702, + "epoch": 0.5843698324825652, + "kl_loss": 0.054189763963222504, + "loss_ib": 0.000700983393471688, + "step": 2032 + }, + { + "ce_ib": 6.320504665374756, + "ce_orig": 1.2554084062576294, + "epoch": 0.5843698324825652, + "kl_loss": 0.08931966125965118, + "loss_ib": 0.0015252471202984452, + "step": 2032 + }, + { + "ce_ib": 2.1938796043395996, + "ce_orig": 0.46823227405548096, + "epoch": 0.5846574160615429, + "kl_loss": 0.055079735815525055, + "loss_ib": 0.0007701852591708302, + "step": 2033 + }, + { + "ce_ib": 3.1461942195892334, + "ce_orig": 0.5529213547706604, + "epoch": 0.5846574160615429, + "kl_loss": 0.057206280529499054, + "loss_ib": 0.0008866822463460267, + "step": 2033 + }, + { + "ce_ib": 2.8030734062194824, + "ce_orig": 0.7001335024833679, + "epoch": 0.5846574160615429, + "kl_loss": 0.058364272117614746, + "loss_ib": 0.0008639500010758638, + "step": 2033 + }, + { + "ce_ib": 4.115478038787842, + "ce_orig": 0.7716702222824097, + "epoch": 0.5846574160615429, + "kl_loss": 0.08812357485294342, + "loss_ib": 0.001292783534154296, + "step": 2033 + }, + { + "ce_ib": 3.805929183959961, + "ce_orig": 0.7593316435813904, + "epoch": 0.5849449996405205, + "kl_loss": 0.05430496111512184, + "loss_ib": 0.0009236424812115729, + "step": 2034 + }, + { + "ce_ib": 3.5093514919281006, + "ce_orig": 0.8731566667556763, + "epoch": 0.5849449996405205, + "kl_loss": 0.030429992824792862, + "loss_ib": 0.0006552350241690874, + "step": 2034 + }, + { + "ce_ib": 6.79688835144043, + "ce_orig": 1.4837602376937866, + "epoch": 0.5849449996405205, + "kl_loss": 0.11590899527072906, + "loss_ib": 0.0018387788441032171, + "step": 2034 + }, + { + "ce_ib": 3.561189889907837, + "ce_orig": 0.6179026961326599, + "epoch": 0.5849449996405205, + "kl_loss": 0.08256538957357407, + "loss_ib": 0.0011817727936431766, + "step": 2034 + }, + { + "epoch": 0.5852325832194981, + "grad_norm": 0.11583594232797623, + "learning_rate": 4.65398536420444e-05, + "loss": 0.8893, + "step": 2035 + }, + { + "ce_ib": 3.4750640392303467, + "ce_orig": 0.31595537066459656, + "epoch": 0.5852325832194981, + "kl_loss": 0.10555543005466461, + "loss_ib": 0.001403060625307262, + "step": 2035 + }, + { + "ce_ib": 3.978022813796997, + "ce_orig": 0.5297307968139648, + "epoch": 0.5852325832194981, + "kl_loss": 0.07544425874948502, + "loss_ib": 0.0011522447457537055, + "step": 2035 + }, + { + "ce_ib": 4.4415507316589355, + "ce_orig": 0.7714311480522156, + "epoch": 0.5852325832194981, + "kl_loss": 0.0957164615392685, + "loss_ib": 0.0014013196341693401, + "step": 2035 + }, + { + "ce_ib": 5.807650566101074, + "ce_orig": 1.2554059028625488, + "epoch": 0.5852325832194981, + "kl_loss": 0.08336826413869858, + "loss_ib": 0.0014144476735964417, + "step": 2035 + }, + { + "ce_ib": 6.648195266723633, + "ce_orig": 1.2857075929641724, + "epoch": 0.5855201667984758, + "kl_loss": 0.1076415404677391, + "loss_ib": 0.0017412349116057158, + "step": 2036 + }, + { + "ce_ib": 3.6307997703552246, + "ce_orig": 0.7613444924354553, + "epoch": 0.5855201667984758, + "kl_loss": 0.05388565734028816, + "loss_ib": 0.0009019365534186363, + "step": 2036 + }, + { + "ce_ib": 4.625122547149658, + "ce_orig": 0.7264091372489929, + "epoch": 0.5855201667984758, + "kl_loss": 0.060444798320531845, + "loss_ib": 0.001066960277967155, + "step": 2036 + }, + { + "ce_ib": 4.56347131729126, + "ce_orig": 1.0091989040374756, + "epoch": 0.5855201667984758, + "kl_loss": 0.21853941679000854, + "loss_ib": 0.0026417411863803864, + "step": 2036 + }, + { + "ce_ib": 3.3193130493164062, + "ce_orig": 0.5675449967384338, + "epoch": 0.5858077503774535, + "kl_loss": 0.09524855017662048, + "loss_ib": 0.0012844167649745941, + "step": 2037 + }, + { + "ce_ib": 3.1635890007019043, + "ce_orig": 0.6505133509635925, + "epoch": 0.5858077503774535, + "kl_loss": 0.06704285740852356, + "loss_ib": 0.0009867873741313815, + "step": 2037 + }, + { + "ce_ib": 4.334797382354736, + "ce_orig": 0.9767372012138367, + "epoch": 0.5858077503774535, + "kl_loss": 0.07628752291202545, + "loss_ib": 0.0011963548604398966, + "step": 2037 + }, + { + "ce_ib": 2.4521825313568115, + "ce_orig": 0.6900293231010437, + "epoch": 0.5858077503774535, + "kl_loss": 0.041968874633312225, + "loss_ib": 0.000664906925521791, + "step": 2037 + }, + { + "ce_ib": 7.894253253936768, + "ce_orig": 1.628238558769226, + "epoch": 0.5860953339564311, + "kl_loss": 0.11309190839529037, + "loss_ib": 0.0019203443080186844, + "step": 2038 + }, + { + "ce_ib": 4.390299320220947, + "ce_orig": 0.8751962184906006, + "epoch": 0.5860953339564311, + "kl_loss": 0.12095611542463303, + "loss_ib": 0.0016485911328345537, + "step": 2038 + }, + { + "ce_ib": 6.873483657836914, + "ce_orig": 0.744406521320343, + "epoch": 0.5860953339564311, + "kl_loss": 0.3343762457370758, + "loss_ib": 0.004031110554933548, + "step": 2038 + }, + { + "ce_ib": 5.363774299621582, + "ce_orig": 0.5709425210952759, + "epoch": 0.5860953339564311, + "kl_loss": 0.09840154647827148, + "loss_ib": 0.0015203928342089057, + "step": 2038 + }, + { + "ce_ib": 6.324906349182129, + "ce_orig": 1.3165900707244873, + "epoch": 0.5863829175354087, + "kl_loss": 0.06637325137853622, + "loss_ib": 0.0012962230248376727, + "step": 2039 + }, + { + "ce_ib": 3.1559431552886963, + "ce_orig": 0.6607347726821899, + "epoch": 0.5863829175354087, + "kl_loss": 0.05676073580980301, + "loss_ib": 0.000883201661054045, + "step": 2039 + }, + { + "ce_ib": 4.906577110290527, + "ce_orig": 0.9158421754837036, + "epoch": 0.5863829175354087, + "kl_loss": 0.08166482299566269, + "loss_ib": 0.0013073059963062406, + "step": 2039 + }, + { + "ce_ib": 7.970769882202148, + "ce_orig": 1.7942582368850708, + "epoch": 0.5863829175354087, + "kl_loss": 0.3182886838912964, + "loss_ib": 0.003979963716119528, + "step": 2039 + }, + { + "epoch": 0.5866705011143863, + "grad_norm": 0.09424319118261337, + "learning_rate": 4.65201307471134e-05, + "loss": 0.8394, + "step": 2040 + }, + { + "ce_ib": 5.8340630531311035, + "ce_orig": 1.2353798151016235, + "epoch": 0.5866705011143863, + "kl_loss": 0.10919980704784393, + "loss_ib": 0.0016754042590036988, + "step": 2040 + }, + { + "ce_ib": 6.629415512084961, + "ce_orig": 1.1576297283172607, + "epoch": 0.5866705011143863, + "kl_loss": 0.10775483399629593, + "loss_ib": 0.0017404898535460234, + "step": 2040 + }, + { + "ce_ib": 3.968146800994873, + "ce_orig": 0.8507120609283447, + "epoch": 0.5866705011143863, + "kl_loss": 0.06898339092731476, + "loss_ib": 0.001086648553609848, + "step": 2040 + }, + { + "ce_ib": 4.752996921539307, + "ce_orig": 0.8242056965827942, + "epoch": 0.5866705011143863, + "kl_loss": 0.06952349841594696, + "loss_ib": 0.0011705346405506134, + "step": 2040 + }, + { + "ce_ib": 5.695987701416016, + "ce_orig": 1.4135266542434692, + "epoch": 0.586958084693364, + "kl_loss": 0.05267796665430069, + "loss_ib": 0.0010963784297928214, + "step": 2041 + }, + { + "ce_ib": 4.066835403442383, + "ce_orig": 0.7699145674705505, + "epoch": 0.586958084693364, + "kl_loss": 0.0443059504032135, + "loss_ib": 0.0008497430244460702, + "step": 2041 + }, + { + "ce_ib": 8.163403511047363, + "ce_orig": 1.6039273738861084, + "epoch": 0.586958084693364, + "kl_loss": 0.1438167244195938, + "loss_ib": 0.002254507504403591, + "step": 2041 + }, + { + "ce_ib": 6.09199333190918, + "ce_orig": 0.8647274374961853, + "epoch": 0.586958084693364, + "kl_loss": 0.09483753144741058, + "loss_ib": 0.0015575744910165668, + "step": 2041 + }, + { + "ce_ib": 3.0585029125213623, + "ce_orig": 0.6006951332092285, + "epoch": 0.5872456682723417, + "kl_loss": 0.06109433248639107, + "loss_ib": 0.0009167936514131725, + "step": 2042 + }, + { + "ce_ib": 5.772488594055176, + "ce_orig": 0.8846545219421387, + "epoch": 0.5872456682723417, + "kl_loss": 0.07155363261699677, + "loss_ib": 0.0012927850475534797, + "step": 2042 + }, + { + "ce_ib": 4.5323357582092285, + "ce_orig": 0.5432944297790527, + "epoch": 0.5872456682723417, + "kl_loss": 0.05695075914263725, + "loss_ib": 0.0010227411985397339, + "step": 2042 + }, + { + "ce_ib": 6.545085906982422, + "ce_orig": 1.4495124816894531, + "epoch": 0.5872456682723417, + "kl_loss": 0.06774143874645233, + "loss_ib": 0.0013319229474291205, + "step": 2042 + }, + { + "ce_ib": 5.375219821929932, + "ce_orig": 1.0865528583526611, + "epoch": 0.5875332518513193, + "kl_loss": 0.10563570261001587, + "loss_ib": 0.0015938789583742619, + "step": 2043 + }, + { + "ce_ib": 3.5467565059661865, + "ce_orig": 0.9108145236968994, + "epoch": 0.5875332518513193, + "kl_loss": 0.0429227352142334, + "loss_ib": 0.0007839030004106462, + "step": 2043 + }, + { + "ce_ib": 2.9593658447265625, + "ce_orig": 0.6286619901657104, + "epoch": 0.5875332518513193, + "kl_loss": 0.06000431254506111, + "loss_ib": 0.0008959796978160739, + "step": 2043 + }, + { + "ce_ib": 5.995804786682129, + "ce_orig": 1.2220312356948853, + "epoch": 0.5875332518513193, + "kl_loss": 0.13617894053459167, + "loss_ib": 0.001961369765922427, + "step": 2043 + }, + { + "ce_ib": 4.4640583992004395, + "ce_orig": 1.054542064666748, + "epoch": 0.587820835430297, + "kl_loss": 0.05077815055847168, + "loss_ib": 0.0009541873587295413, + "step": 2044 + }, + { + "ce_ib": 5.712314128875732, + "ce_orig": 1.3896074295043945, + "epoch": 0.587820835430297, + "kl_loss": 0.08283805102109909, + "loss_ib": 0.001399611821398139, + "step": 2044 + }, + { + "ce_ib": 3.2419872283935547, + "ce_orig": 0.6002771258354187, + "epoch": 0.587820835430297, + "kl_loss": 0.10271742194890976, + "loss_ib": 0.0013513729209080338, + "step": 2044 + }, + { + "ce_ib": 3.5436296463012695, + "ce_orig": 0.6841360330581665, + "epoch": 0.587820835430297, + "kl_loss": 0.06413637101650238, + "loss_ib": 0.0009957266738638282, + "step": 2044 + }, + { + "epoch": 0.5881084190092746, + "grad_norm": 0.12266967445611954, + "learning_rate": 4.6500356005192514e-05, + "loss": 0.9172, + "step": 2045 + }, + { + "ce_ib": 6.627530574798584, + "ce_orig": 0.8365546464920044, + "epoch": 0.5881084190092746, + "kl_loss": 0.12072031199932098, + "loss_ib": 0.0018699562642723322, + "step": 2045 + }, + { + "ce_ib": 2.412168264389038, + "ce_orig": 0.5904463529586792, + "epoch": 0.5881084190092746, + "kl_loss": 0.039270151406526566, + "loss_ib": 0.0006339183310046792, + "step": 2045 + }, + { + "ce_ib": 1.9586596488952637, + "ce_orig": 0.3443754017353058, + "epoch": 0.5881084190092746, + "kl_loss": 0.0715847760438919, + "loss_ib": 0.0009117136942222714, + "step": 2045 + }, + { + "ce_ib": 6.992125034332275, + "ce_orig": 0.9909374713897705, + "epoch": 0.5881084190092746, + "kl_loss": 0.13320910930633545, + "loss_ib": 0.002031303709372878, + "step": 2045 + }, + { + "ce_ib": 5.750677108764648, + "ce_orig": 1.0655937194824219, + "epoch": 0.5883960025882522, + "kl_loss": 0.07006116956472397, + "loss_ib": 0.0012756794458255172, + "step": 2046 + }, + { + "ce_ib": 4.87274694442749, + "ce_orig": 0.6196299195289612, + "epoch": 0.5883960025882522, + "kl_loss": 0.0925484225153923, + "loss_ib": 0.0014127588365226984, + "step": 2046 + }, + { + "ce_ib": 5.37498664855957, + "ce_orig": 1.1246055364608765, + "epoch": 0.5883960025882522, + "kl_loss": 0.1491096317768097, + "loss_ib": 0.002028594957664609, + "step": 2046 + }, + { + "ce_ib": 6.0852508544921875, + "ce_orig": 0.7786949276924133, + "epoch": 0.5883960025882522, + "kl_loss": 0.0808965340256691, + "loss_ib": 0.0014174903044477105, + "step": 2046 + }, + { + "ce_ib": 3.8106331825256348, + "ce_orig": 0.7314810752868652, + "epoch": 0.5886835861672298, + "kl_loss": 0.054646991193294525, + "loss_ib": 0.0009275331976823509, + "step": 2047 + }, + { + "ce_ib": 6.897280693054199, + "ce_orig": 1.3549307584762573, + "epoch": 0.5886835861672298, + "kl_loss": 0.08145168423652649, + "loss_ib": 0.0015042447485029697, + "step": 2047 + }, + { + "ce_ib": 5.091572284698486, + "ce_orig": 0.6132656931877136, + "epoch": 0.5886835861672298, + "kl_loss": 0.08205744624137878, + "loss_ib": 0.0013297316618263721, + "step": 2047 + }, + { + "ce_ib": 6.3929443359375, + "ce_orig": 1.0522645711898804, + "epoch": 0.5886835861672298, + "kl_loss": 0.06897477060556412, + "loss_ib": 0.0013290420174598694, + "step": 2047 + }, + { + "ce_ib": 4.215744972229004, + "ce_orig": 1.0365911722183228, + "epoch": 0.5889711697462074, + "kl_loss": 0.10262474417686462, + "loss_ib": 0.001447821850888431, + "step": 2048 + }, + { + "ce_ib": 5.7379069328308105, + "ce_orig": 1.0669254064559937, + "epoch": 0.5889711697462074, + "kl_loss": 0.1017623245716095, + "loss_ib": 0.0015914138639345765, + "step": 2048 + }, + { + "ce_ib": 5.4386444091796875, + "ce_orig": 0.9652189612388611, + "epoch": 0.5889711697462074, + "kl_loss": 0.10068536549806595, + "loss_ib": 0.0015507180942222476, + "step": 2048 + }, + { + "ce_ib": 6.332909107208252, + "ce_orig": 1.243117332458496, + "epoch": 0.5889711697462074, + "kl_loss": 0.052840497344732285, + "loss_ib": 0.0011616958072409034, + "step": 2048 + }, + { + "ce_ib": 6.765389919281006, + "ce_orig": 0.9466679096221924, + "epoch": 0.5892587533251852, + "kl_loss": 0.07780750095844269, + "loss_ib": 0.00145461387000978, + "step": 2049 + }, + { + "ce_ib": 3.981379747390747, + "ce_orig": 0.9079366326332092, + "epoch": 0.5892587533251852, + "kl_loss": 0.07055927067995071, + "loss_ib": 0.0011037306394428015, + "step": 2049 + }, + { + "ce_ib": 4.0784382820129395, + "ce_orig": 0.9248310327529907, + "epoch": 0.5892587533251852, + "kl_loss": 0.07809135317802429, + "loss_ib": 0.0011887573637068272, + "step": 2049 + }, + { + "ce_ib": 7.082020282745361, + "ce_orig": 1.0790510177612305, + "epoch": 0.5892587533251852, + "kl_loss": 0.05939589440822601, + "loss_ib": 0.0013021609047427773, + "step": 2049 + }, + { + "epoch": 0.5895463369041628, + "grad_norm": 0.1081758514046669, + "learning_rate": 4.6480529463923675e-05, + "loss": 0.8822, + "step": 2050 + }, + { + "ce_ib": 4.19923210144043, + "ce_orig": 0.5721045732498169, + "epoch": 0.5895463369041628, + "kl_loss": 0.06999799609184265, + "loss_ib": 0.001119903172366321, + "step": 2050 + }, + { + "ce_ib": 5.226706027984619, + "ce_orig": 1.243316888809204, + "epoch": 0.5895463369041628, + "kl_loss": 0.2894411087036133, + "loss_ib": 0.0034170816652476788, + "step": 2050 + }, + { + "ce_ib": 4.088243007659912, + "ce_orig": 0.8286119699478149, + "epoch": 0.5895463369041628, + "kl_loss": 0.1221693605184555, + "loss_ib": 0.001630517770536244, + "step": 2050 + }, + { + "ce_ib": 5.500934600830078, + "ce_orig": 1.034201979637146, + "epoch": 0.5895463369041628, + "kl_loss": 0.20154252648353577, + "loss_ib": 0.0025655184872448444, + "step": 2050 + }, + { + "ce_ib": 4.116730690002441, + "ce_orig": 0.46242547035217285, + "epoch": 0.5898339204831404, + "kl_loss": 0.10050550103187561, + "loss_ib": 0.0014167280169203877, + "step": 2051 + }, + { + "ce_ib": 5.735299587249756, + "ce_orig": 1.3255870342254639, + "epoch": 0.5898339204831404, + "kl_loss": 0.08452253043651581, + "loss_ib": 0.0014187551569193602, + "step": 2051 + }, + { + "ce_ib": 3.518056869506836, + "ce_orig": 0.7265956997871399, + "epoch": 0.5898339204831404, + "kl_loss": 0.04258617013692856, + "loss_ib": 0.0007776673883199692, + "step": 2051 + }, + { + "ce_ib": 5.878917217254639, + "ce_orig": 1.5182766914367676, + "epoch": 0.5898339204831404, + "kl_loss": 0.11835642158985138, + "loss_ib": 0.0017714559799060225, + "step": 2051 + }, + { + "ce_ib": 3.2067954540252686, + "ce_orig": 0.3664304316043854, + "epoch": 0.590121504062118, + "kl_loss": 0.1013995110988617, + "loss_ib": 0.0013346746563911438, + "step": 2052 + }, + { + "ce_ib": 4.539405345916748, + "ce_orig": 1.0713406801223755, + "epoch": 0.590121504062118, + "kl_loss": 0.06059395149350166, + "loss_ib": 0.0010598800145089626, + "step": 2052 + }, + { + "ce_ib": 3.3338184356689453, + "ce_orig": 0.6945024728775024, + "epoch": 0.590121504062118, + "kl_loss": 0.0493946298956871, + "loss_ib": 0.0008273280691355467, + "step": 2052 + }, + { + "ce_ib": 5.9124040603637695, + "ce_orig": 1.23175847530365, + "epoch": 0.590121504062118, + "kl_loss": 0.09957283735275269, + "loss_ib": 0.0015869686612859368, + "step": 2052 + }, + { + "ce_ib": 3.82425856590271, + "ce_orig": 0.5128980875015259, + "epoch": 0.5904090876410957, + "kl_loss": 0.06283324956893921, + "loss_ib": 0.0010107583366334438, + "step": 2053 + }, + { + "ce_ib": 5.354079246520996, + "ce_orig": 1.0630453824996948, + "epoch": 0.5904090876410957, + "kl_loss": 0.0587586984038353, + "loss_ib": 0.001122994814068079, + "step": 2053 + }, + { + "ce_ib": 3.552175998687744, + "ce_orig": 0.8089817762374878, + "epoch": 0.5904090876410957, + "kl_loss": 0.09100337326526642, + "loss_ib": 0.0012652513105422258, + "step": 2053 + }, + { + "ce_ib": 5.723545074462891, + "ce_orig": 1.0746796131134033, + "epoch": 0.5904090876410957, + "kl_loss": 0.09782656282186508, + "loss_ib": 0.0015506201889365911, + "step": 2053 + }, + { + "ce_ib": 4.935130596160889, + "ce_orig": 0.7773773074150085, + "epoch": 0.5906966712200733, + "kl_loss": 0.1119060218334198, + "loss_ib": 0.0016125732799991965, + "step": 2054 + }, + { + "ce_ib": 4.038084030151367, + "ce_orig": 0.8203911185264587, + "epoch": 0.5906966712200733, + "kl_loss": 0.06681142747402191, + "loss_ib": 0.00107192259747535, + "step": 2054 + }, + { + "ce_ib": 3.6259610652923584, + "ce_orig": 0.6518917679786682, + "epoch": 0.5906966712200733, + "kl_loss": 0.08289454877376556, + "loss_ib": 0.0011915415525436401, + "step": 2054 + }, + { + "ce_ib": 5.136064052581787, + "ce_orig": 0.8619645833969116, + "epoch": 0.5906966712200733, + "kl_loss": 0.1003623753786087, + "loss_ib": 0.0015172300627455115, + "step": 2054 + }, + { + "epoch": 0.5909842547990509, + "grad_norm": 0.08082108199596405, + "learning_rate": 4.646065117107361e-05, + "loss": 0.8315, + "step": 2055 + }, + { + "ce_ib": 2.40557861328125, + "ce_orig": 0.4118955731391907, + "epoch": 0.5909842547990509, + "kl_loss": 0.08587822318077087, + "loss_ib": 0.0010993400355800986, + "step": 2055 + }, + { + "ce_ib": 5.753388404846191, + "ce_orig": 0.785480260848999, + "epoch": 0.5909842547990509, + "kl_loss": 0.06827150285243988, + "loss_ib": 0.001258053700439632, + "step": 2055 + }, + { + "ce_ib": 4.476674556732178, + "ce_orig": 0.5982604622840881, + "epoch": 0.5909842547990509, + "kl_loss": 0.06031617522239685, + "loss_ib": 0.001050829072482884, + "step": 2055 + }, + { + "ce_ib": 4.422375679016113, + "ce_orig": 0.732926607131958, + "epoch": 0.5909842547990509, + "kl_loss": 0.11808363348245621, + "loss_ib": 0.0016230738256126642, + "step": 2055 + }, + { + "ce_ib": 5.077948570251465, + "ce_orig": 0.8266111016273499, + "epoch": 0.5912718383780287, + "kl_loss": 0.08961308747529984, + "loss_ib": 0.001403925707563758, + "step": 2056 + }, + { + "ce_ib": 2.699659824371338, + "ce_orig": 0.677245020866394, + "epoch": 0.5912718383780287, + "kl_loss": 0.03275396302342415, + "loss_ib": 0.000597505597397685, + "step": 2056 + }, + { + "ce_ib": 5.497993469238281, + "ce_orig": 0.667269229888916, + "epoch": 0.5912718383780287, + "kl_loss": 0.09359412640333176, + "loss_ib": 0.0014857405330985785, + "step": 2056 + }, + { + "ce_ib": 4.043445587158203, + "ce_orig": 0.5731774568557739, + "epoch": 0.5912718383780287, + "kl_loss": 0.07998596131801605, + "loss_ib": 0.0012042040470987558, + "step": 2056 + }, + { + "ce_ib": 3.2453598976135254, + "ce_orig": 0.596089780330658, + "epoch": 0.5915594219570063, + "kl_loss": 0.062302395701408386, + "loss_ib": 0.0009475598926655948, + "step": 2057 + }, + { + "ce_ib": 3.8999950885772705, + "ce_orig": 0.75999915599823, + "epoch": 0.5915594219570063, + "kl_loss": 0.12473268061876297, + "loss_ib": 0.0016373263206332922, + "step": 2057 + }, + { + "ce_ib": 7.115580081939697, + "ce_orig": 1.4431239366531372, + "epoch": 0.5915594219570063, + "kl_loss": 0.09124178439378738, + "loss_ib": 0.0016239759279415011, + "step": 2057 + }, + { + "ce_ib": 6.023095607757568, + "ce_orig": 1.0459864139556885, + "epoch": 0.5915594219570063, + "kl_loss": 0.07542262971401215, + "loss_ib": 0.0013565358240157366, + "step": 2057 + }, + { + "ce_ib": 3.0938870906829834, + "ce_orig": 0.5621851086616516, + "epoch": 0.5918470055359839, + "kl_loss": 0.0523073673248291, + "loss_ib": 0.0008324623922817409, + "step": 2058 + }, + { + "ce_ib": 4.281264305114746, + "ce_orig": 0.7708843946456909, + "epoch": 0.5918470055359839, + "kl_loss": 0.05666860193014145, + "loss_ib": 0.0009948123479261994, + "step": 2058 + }, + { + "ce_ib": 2.8394787311553955, + "ce_orig": 0.5900179147720337, + "epoch": 0.5918470055359839, + "kl_loss": 0.07317844033241272, + "loss_ib": 0.0010157322976738214, + "step": 2058 + }, + { + "ce_ib": 5.09627103805542, + "ce_orig": 0.9207716584205627, + "epoch": 0.5918470055359839, + "kl_loss": 0.08200590312480927, + "loss_ib": 0.001329686027020216, + "step": 2058 + }, + { + "ce_ib": 3.536482334136963, + "ce_orig": 0.5585939884185791, + "epoch": 0.5921345891149615, + "kl_loss": 0.16077418625354767, + "loss_ib": 0.001961390022188425, + "step": 2059 + }, + { + "ce_ib": 3.8069915771484375, + "ce_orig": 0.8199010491371155, + "epoch": 0.5921345891149615, + "kl_loss": 0.08136554062366486, + "loss_ib": 0.0011943546123802662, + "step": 2059 + }, + { + "ce_ib": 5.493878364562988, + "ce_orig": 1.1195169687271118, + "epoch": 0.5921345891149615, + "kl_loss": 0.06674382090568542, + "loss_ib": 0.001216825912706554, + "step": 2059 + }, + { + "ce_ib": 3.5955402851104736, + "ce_orig": 0.7451704144477844, + "epoch": 0.5921345891149615, + "kl_loss": 0.0499025397002697, + "loss_ib": 0.0008585794130340219, + "step": 2059 + }, + { + "epoch": 0.5924221726939392, + "grad_norm": 0.09343920648097992, + "learning_rate": 4.644072117453376e-05, + "loss": 0.9232, + "step": 2060 + }, + { + "ce_ib": 4.204752445220947, + "ce_orig": 0.6054661273956299, + "epoch": 0.5924221726939392, + "kl_loss": 0.055034298449754715, + "loss_ib": 0.0009708182187750936, + "step": 2060 + }, + { + "ce_ib": 4.31412410736084, + "ce_orig": 0.9928908944129944, + "epoch": 0.5924221726939392, + "kl_loss": 0.07821621745824814, + "loss_ib": 0.0012135745491832495, + "step": 2060 + }, + { + "ce_ib": 5.5565409660339355, + "ce_orig": 1.168175458908081, + "epoch": 0.5924221726939392, + "kl_loss": 0.07055503129959106, + "loss_ib": 0.001261204481124878, + "step": 2060 + }, + { + "ce_ib": 3.2178139686584473, + "ce_orig": 0.8180133104324341, + "epoch": 0.5924221726939392, + "kl_loss": 0.05941479653120041, + "loss_ib": 0.0009159293840639293, + "step": 2060 + }, + { + "ce_ib": 4.455956935882568, + "ce_orig": 0.6298547983169556, + "epoch": 0.5927097562729168, + "kl_loss": 0.10731804370880127, + "loss_ib": 0.0015187760582193732, + "step": 2061 + }, + { + "ce_ib": 7.090084552764893, + "ce_orig": 1.0713565349578857, + "epoch": 0.5927097562729168, + "kl_loss": 0.067634716629982, + "loss_ib": 0.0013853556010872126, + "step": 2061 + }, + { + "ce_ib": 5.147576808929443, + "ce_orig": 0.8333153128623962, + "epoch": 0.5927097562729168, + "kl_loss": 0.07789528369903564, + "loss_ib": 0.001293710432946682, + "step": 2061 + }, + { + "ce_ib": 2.548065423965454, + "ce_orig": 0.6109502911567688, + "epoch": 0.5927097562729168, + "kl_loss": 0.04820297658443451, + "loss_ib": 0.0007368363440036774, + "step": 2061 + }, + { + "ce_ib": 7.6440935134887695, + "ce_orig": 1.6081788539886475, + "epoch": 0.5929973398518945, + "kl_loss": 0.11264361441135406, + "loss_ib": 0.00189084536395967, + "step": 2062 + }, + { + "ce_ib": 4.742297172546387, + "ce_orig": 0.8929616212844849, + "epoch": 0.5929973398518945, + "kl_loss": 0.09952028095722198, + "loss_ib": 0.0014694324927404523, + "step": 2062 + }, + { + "ce_ib": 3.300107717514038, + "ce_orig": 0.6712356209754944, + "epoch": 0.5929973398518945, + "kl_loss": 0.051182106137275696, + "loss_ib": 0.0008418318466283381, + "step": 2062 + }, + { + "ce_ib": 4.406223297119141, + "ce_orig": 0.8410440683364868, + "epoch": 0.5929973398518945, + "kl_loss": 0.05530940741300583, + "loss_ib": 0.0009937164140865207, + "step": 2062 + }, + { + "ce_ib": 5.304649829864502, + "ce_orig": 0.8441688418388367, + "epoch": 0.5932849234308721, + "kl_loss": 0.07898960262537003, + "loss_ib": 0.0013203610433265567, + "step": 2063 + }, + { + "ce_ib": 5.503871917724609, + "ce_orig": 1.0656681060791016, + "epoch": 0.5932849234308721, + "kl_loss": 0.08057437092065811, + "loss_ib": 0.0013561308151111007, + "step": 2063 + }, + { + "ce_ib": 4.339456081390381, + "ce_orig": 0.8475828766822815, + "epoch": 0.5932849234308721, + "kl_loss": 0.09946542978286743, + "loss_ib": 0.0014285999350249767, + "step": 2063 + }, + { + "ce_ib": 3.0389437675476074, + "ce_orig": 0.6237771511077881, + "epoch": 0.5932849234308721, + "kl_loss": 0.054678723216056824, + "loss_ib": 0.0008506815647706389, + "step": 2063 + }, + { + "ce_ib": 4.895567893981934, + "ce_orig": 0.5457957983016968, + "epoch": 0.5935725070098498, + "kl_loss": 0.06195592135190964, + "loss_ib": 0.0011091160122305155, + "step": 2064 + }, + { + "ce_ib": 7.533478736877441, + "ce_orig": 1.3536343574523926, + "epoch": 0.5935725070098498, + "kl_loss": 0.07016502320766449, + "loss_ib": 0.0014549980405718088, + "step": 2064 + }, + { + "ce_ib": 4.340816974639893, + "ce_orig": 0.8144227862358093, + "epoch": 0.5935725070098498, + "kl_loss": 0.1810043752193451, + "loss_ib": 0.0022441253531724215, + "step": 2064 + }, + { + "ce_ib": 5.256992340087891, + "ce_orig": 0.777481734752655, + "epoch": 0.5935725070098498, + "kl_loss": 0.30031710863113403, + "loss_ib": 0.003528870176523924, + "step": 2064 + }, + { + "epoch": 0.5938600905888274, + "grad_norm": 0.08773989975452423, + "learning_rate": 4.64207395223201e-05, + "loss": 0.838, + "step": 2065 + }, + { + "ce_ib": 3.6695809364318848, + "ce_orig": 0.6021111011505127, + "epoch": 0.5938600905888274, + "kl_loss": 0.058787599205970764, + "loss_ib": 0.0009548340458422899, + "step": 2065 + }, + { + "ce_ib": 3.8775150775909424, + "ce_orig": 0.4667925536632538, + "epoch": 0.5938600905888274, + "kl_loss": 0.1042977124452591, + "loss_ib": 0.0014307285891845822, + "step": 2065 + }, + { + "ce_ib": 5.5592360496521, + "ce_orig": 1.1223832368850708, + "epoch": 0.5938600905888274, + "kl_loss": 0.05669412761926651, + "loss_ib": 0.0011228647781535983, + "step": 2065 + }, + { + "ce_ib": 3.4668588638305664, + "ce_orig": 0.646662712097168, + "epoch": 0.5938600905888274, + "kl_loss": 0.047085195779800415, + "loss_ib": 0.000817537831608206, + "step": 2065 + }, + { + "ce_ib": 5.526556968688965, + "ce_orig": 1.0005755424499512, + "epoch": 0.594147674167805, + "kl_loss": 0.06490753591060638, + "loss_ib": 0.0012017310364171863, + "step": 2066 + }, + { + "ce_ib": 5.461888790130615, + "ce_orig": 0.6652884483337402, + "epoch": 0.594147674167805, + "kl_loss": 0.06769086420536041, + "loss_ib": 0.0012230974389240146, + "step": 2066 + }, + { + "ce_ib": 2.8657302856445312, + "ce_orig": 0.6103954315185547, + "epoch": 0.594147674167805, + "kl_loss": 0.04990319907665253, + "loss_ib": 0.000785604992415756, + "step": 2066 + }, + { + "ce_ib": 5.835948944091797, + "ce_orig": 1.144286036491394, + "epoch": 0.594147674167805, + "kl_loss": 0.07933592051267624, + "loss_ib": 0.0013769540237262845, + "step": 2066 + }, + { + "ce_ib": 2.6371524333953857, + "ce_orig": 0.4652028977870941, + "epoch": 0.5944352577467826, + "kl_loss": 0.06705740094184875, + "loss_ib": 0.0009342892444692552, + "step": 2067 + }, + { + "ce_ib": 5.327298641204834, + "ce_orig": 1.1518586874008179, + "epoch": 0.5944352577467826, + "kl_loss": 0.07748109847307205, + "loss_ib": 0.0013075408060103655, + "step": 2067 + }, + { + "ce_ib": 3.618540048599243, + "ce_orig": 0.6872230172157288, + "epoch": 0.5944352577467826, + "kl_loss": 0.04563106968998909, + "loss_ib": 0.0008181646117009223, + "step": 2067 + }, + { + "ce_ib": 2.9946000576019287, + "ce_orig": 0.6202853322029114, + "epoch": 0.5944352577467826, + "kl_loss": 0.08429291099309921, + "loss_ib": 0.0011423890246078372, + "step": 2067 + }, + { + "ce_ib": 5.91436767578125, + "ce_orig": 1.219032883644104, + "epoch": 0.5947228413257603, + "kl_loss": 0.07837577164173126, + "loss_ib": 0.0013751944061368704, + "step": 2068 + }, + { + "ce_ib": 3.552194356918335, + "ce_orig": 0.5270843505859375, + "epoch": 0.5947228413257603, + "kl_loss": 0.07128369808197021, + "loss_ib": 0.0010680563282221556, + "step": 2068 + }, + { + "ce_ib": 2.782329797744751, + "ce_orig": 0.47876259684562683, + "epoch": 0.5947228413257603, + "kl_loss": 0.04466177523136139, + "loss_ib": 0.0007248507463373244, + "step": 2068 + }, + { + "ce_ib": 2.8958351612091064, + "ce_orig": 0.6140813231468201, + "epoch": 0.5947228413257603, + "kl_loss": 0.04070543125271797, + "loss_ib": 0.0006966377841308713, + "step": 2068 + }, + { + "ce_ib": 3.000549793243408, + "ce_orig": 0.5682402849197388, + "epoch": 0.595010424904738, + "kl_loss": 0.08354972302913666, + "loss_ib": 0.001135552185587585, + "step": 2069 + }, + { + "ce_ib": 3.4190237522125244, + "ce_orig": 0.5711938142776489, + "epoch": 0.595010424904738, + "kl_loss": 0.061060160398483276, + "loss_ib": 0.0009525039349682629, + "step": 2069 + }, + { + "ce_ib": 3.69130539894104, + "ce_orig": 0.9374002814292908, + "epoch": 0.595010424904738, + "kl_loss": 0.0619327574968338, + "loss_ib": 0.0009884580504149199, + "step": 2069 + }, + { + "ce_ib": 4.0469865798950195, + "ce_orig": 0.5500388145446777, + "epoch": 0.595010424904738, + "kl_loss": 0.08261410892009735, + "loss_ib": 0.0012308397563174367, + "step": 2069 + }, + { + "epoch": 0.5952980084837156, + "grad_norm": 0.10215536504983902, + "learning_rate": 4.640070626257307e-05, + "loss": 0.7862, + "step": 2070 + }, + { + "ce_ib": 5.379420280456543, + "ce_orig": 0.5844455361366272, + "epoch": 0.5952980084837156, + "kl_loss": 0.12153248488903046, + "loss_ib": 0.0017532669007778168, + "step": 2070 + }, + { + "ce_ib": 6.176889896392822, + "ce_orig": 1.0730990171432495, + "epoch": 0.5952980084837156, + "kl_loss": 0.08182847499847412, + "loss_ib": 0.001435973565094173, + "step": 2070 + }, + { + "ce_ib": 3.254359006881714, + "ce_orig": 0.46784764528274536, + "epoch": 0.5952980084837156, + "kl_loss": 0.0848698690533638, + "loss_ib": 0.0011741345515474677, + "step": 2070 + }, + { + "ce_ib": 5.4775190353393555, + "ce_orig": 0.7717992663383484, + "epoch": 0.5952980084837156, + "kl_loss": 0.10611025989055634, + "loss_ib": 0.001608854508958757, + "step": 2070 + }, + { + "ce_ib": 4.691743850708008, + "ce_orig": 0.6788686513900757, + "epoch": 0.5955855920626932, + "kl_loss": 0.05634431913495064, + "loss_ib": 0.0010326175251975656, + "step": 2071 + }, + { + "ce_ib": 2.841134548187256, + "ce_orig": 0.5684543251991272, + "epoch": 0.5955855920626932, + "kl_loss": 0.0625995546579361, + "loss_ib": 0.0009101089672185481, + "step": 2071 + }, + { + "ce_ib": 3.745478868484497, + "ce_orig": 0.6492653489112854, + "epoch": 0.5955855920626932, + "kl_loss": 0.08486060798168182, + "loss_ib": 0.0012231539003551006, + "step": 2071 + }, + { + "ce_ib": 5.78892183303833, + "ce_orig": 0.7276739478111267, + "epoch": 0.5955855920626932, + "kl_loss": 0.1131758987903595, + "loss_ib": 0.001710651209577918, + "step": 2071 + }, + { + "ce_ib": 3.158191442489624, + "ce_orig": 0.340239018201828, + "epoch": 0.5958731756416709, + "kl_loss": 0.06669876724481583, + "loss_ib": 0.0009828067850321531, + "step": 2072 + }, + { + "ce_ib": 4.847036838531494, + "ce_orig": 0.850360631942749, + "epoch": 0.5958731756416709, + "kl_loss": 0.10564351826906204, + "loss_ib": 0.0015411388594657183, + "step": 2072 + }, + { + "ce_ib": 5.559203147888184, + "ce_orig": 0.7950502038002014, + "epoch": 0.5958731756416709, + "kl_loss": 0.07581854611635208, + "loss_ib": 0.00131410569883883, + "step": 2072 + }, + { + "ce_ib": 4.818588733673096, + "ce_orig": 0.8367884755134583, + "epoch": 0.5958731756416709, + "kl_loss": 0.07469891756772995, + "loss_ib": 0.0012288480065762997, + "step": 2072 + }, + { + "ce_ib": 5.3987603187561035, + "ce_orig": 0.7248184680938721, + "epoch": 0.5961607592206485, + "kl_loss": 0.10326874256134033, + "loss_ib": 0.001572563429363072, + "step": 2073 + }, + { + "ce_ib": 6.712170600891113, + "ce_orig": 1.3333362340927124, + "epoch": 0.5961607592206485, + "kl_loss": 0.09438936412334442, + "loss_ib": 0.0016151105519384146, + "step": 2073 + }, + { + "ce_ib": 4.688754081726074, + "ce_orig": 0.6698209643363953, + "epoch": 0.5961607592206485, + "kl_loss": 0.0757724940776825, + "loss_ib": 0.0012266002595424652, + "step": 2073 + }, + { + "ce_ib": 5.888779163360596, + "ce_orig": 0.6099490523338318, + "epoch": 0.5961607592206485, + "kl_loss": 0.07685381919145584, + "loss_ib": 0.00135741604026407, + "step": 2073 + }, + { + "ce_ib": 5.56715726852417, + "ce_orig": 1.1920900344848633, + "epoch": 0.5964483427996261, + "kl_loss": 0.0877915769815445, + "loss_ib": 0.0014346314128488302, + "step": 2074 + }, + { + "ce_ib": 4.014995098114014, + "ce_orig": 0.9569207429885864, + "epoch": 0.5964483427996261, + "kl_loss": 0.0815640538930893, + "loss_ib": 0.001217140001244843, + "step": 2074 + }, + { + "ce_ib": 5.602878570556641, + "ce_orig": 1.1999964714050293, + "epoch": 0.5964483427996261, + "kl_loss": 0.07075855135917664, + "loss_ib": 0.0012678733328357339, + "step": 2074 + }, + { + "ce_ib": 4.6295013427734375, + "ce_orig": 0.9096139669418335, + "epoch": 0.5964483427996261, + "kl_loss": 0.07487296313047409, + "loss_ib": 0.0012116796569898725, + "step": 2074 + }, + { + "epoch": 0.5967359263786037, + "grad_norm": 0.10837686061859131, + "learning_rate": 4.638062144355745e-05, + "loss": 0.8241, + "step": 2075 + }, + { + "ce_ib": 3.2095203399658203, + "ce_orig": 0.4709847569465637, + "epoch": 0.5967359263786037, + "kl_loss": 0.06025013327598572, + "loss_ib": 0.0009234533063136041, + "step": 2075 + }, + { + "ce_ib": 1.55014169216156, + "ce_orig": 0.17787082493305206, + "epoch": 0.5967359263786037, + "kl_loss": 0.12394766509532928, + "loss_ib": 0.0013944907113909721, + "step": 2075 + }, + { + "ce_ib": 4.04144287109375, + "ce_orig": 0.8642741441726685, + "epoch": 0.5967359263786037, + "kl_loss": 0.08751553297042847, + "loss_ib": 0.0012792994966730475, + "step": 2075 + }, + { + "ce_ib": 3.9950637817382812, + "ce_orig": 0.7230364680290222, + "epoch": 0.5967359263786037, + "kl_loss": 0.07402998208999634, + "loss_ib": 0.0011398062342777848, + "step": 2075 + }, + { + "ce_ib": 3.3049919605255127, + "ce_orig": 0.4729066789150238, + "epoch": 0.5970235099575815, + "kl_loss": 0.08689044415950775, + "loss_ib": 0.0011994035448879004, + "step": 2076 + }, + { + "ce_ib": 4.3504133224487305, + "ce_orig": 0.7108391523361206, + "epoch": 0.5970235099575815, + "kl_loss": 0.08049355447292328, + "loss_ib": 0.0012399768456816673, + "step": 2076 + }, + { + "ce_ib": 5.506763458251953, + "ce_orig": 0.954072892665863, + "epoch": 0.5970235099575815, + "kl_loss": 0.11084399372339249, + "loss_ib": 0.001659116242080927, + "step": 2076 + }, + { + "ce_ib": 3.3170828819274902, + "ce_orig": 0.2528087794780731, + "epoch": 0.5970235099575815, + "kl_loss": 0.06096717715263367, + "loss_ib": 0.0009413800435140729, + "step": 2076 + }, + { + "ce_ib": 3.334319829940796, + "ce_orig": 0.6052706241607666, + "epoch": 0.5973110935365591, + "kl_loss": 0.07117627561092377, + "loss_ib": 0.0010451946873217821, + "step": 2077 + }, + { + "ce_ib": 5.092136383056641, + "ce_orig": 0.7540692687034607, + "epoch": 0.5973110935365591, + "kl_loss": 0.06461460143327713, + "loss_ib": 0.001155359554104507, + "step": 2077 + }, + { + "ce_ib": 7.818737983703613, + "ce_orig": 1.0914714336395264, + "epoch": 0.5973110935365591, + "kl_loss": 0.09560540318489075, + "loss_ib": 0.0017379277851432562, + "step": 2077 + }, + { + "ce_ib": 6.016637325286865, + "ce_orig": 0.9305354952812195, + "epoch": 0.5973110935365591, + "kl_loss": 0.07199804484844208, + "loss_ib": 0.0013216441730037332, + "step": 2077 + }, + { + "ce_ib": 3.6876585483551025, + "ce_orig": 0.4878508746623993, + "epoch": 0.5975986771155367, + "kl_loss": 0.11475399136543274, + "loss_ib": 0.0015163057250902057, + "step": 2078 + }, + { + "ce_ib": 6.245387077331543, + "ce_orig": 1.2329305410385132, + "epoch": 0.5975986771155367, + "kl_loss": 0.08983008563518524, + "loss_ib": 0.0015228395350277424, + "step": 2078 + }, + { + "ce_ib": 2.8920717239379883, + "ce_orig": 0.8226154446601868, + "epoch": 0.5975986771155367, + "kl_loss": 0.03781715780496597, + "loss_ib": 0.0006673787138424814, + "step": 2078 + }, + { + "ce_ib": 4.954875469207764, + "ce_orig": 0.8334072232246399, + "epoch": 0.5975986771155367, + "kl_loss": 0.10071307420730591, + "loss_ib": 0.0015026181936264038, + "step": 2078 + }, + { + "ce_ib": 4.471312522888184, + "ce_orig": 0.7321937680244446, + "epoch": 0.5978862606945143, + "kl_loss": 0.07011980563402176, + "loss_ib": 0.0011483292328193784, + "step": 2079 + }, + { + "ce_ib": 5.54780387878418, + "ce_orig": 0.7626389861106873, + "epoch": 0.5978862606945143, + "kl_loss": 0.1450086236000061, + "loss_ib": 0.002004866488277912, + "step": 2079 + }, + { + "ce_ib": 4.522610187530518, + "ce_orig": 0.7549071907997131, + "epoch": 0.5978862606945143, + "kl_loss": 0.055982112884521484, + "loss_ib": 0.001012082095257938, + "step": 2079 + }, + { + "ce_ib": 4.017058372497559, + "ce_orig": 0.5780177116394043, + "epoch": 0.5978862606945143, + "kl_loss": 0.09856615960597992, + "loss_ib": 0.0013873673742637038, + "step": 2079 + }, + { + "epoch": 0.598173844273492, + "grad_norm": 0.11253448575735092, + "learning_rate": 4.6360485113662216e-05, + "loss": 0.8449, + "step": 2080 + }, + { + "ce_ib": 4.134436130523682, + "ce_orig": 0.8664209246635437, + "epoch": 0.598173844273492, + "kl_loss": 0.06711418181657791, + "loss_ib": 0.001084585441276431, + "step": 2080 + }, + { + "ce_ib": 4.156423568725586, + "ce_orig": 0.6882872581481934, + "epoch": 0.598173844273492, + "kl_loss": 0.06302356719970703, + "loss_ib": 0.0010458779288455844, + "step": 2080 + }, + { + "ce_ib": 3.6680073738098145, + "ce_orig": 0.940829873085022, + "epoch": 0.598173844273492, + "kl_loss": 0.06245287507772446, + "loss_ib": 0.0009913294343277812, + "step": 2080 + }, + { + "ce_ib": 3.5014290809631348, + "ce_orig": 0.6589921712875366, + "epoch": 0.598173844273492, + "kl_loss": 0.06951694935560226, + "loss_ib": 0.0010453123832121491, + "step": 2080 + }, + { + "ce_ib": 2.78205943107605, + "ce_orig": 0.5813868641853333, + "epoch": 0.5984614278524696, + "kl_loss": 0.07668274641036987, + "loss_ib": 0.00104503333568573, + "step": 2081 + }, + { + "ce_ib": 2.699397087097168, + "ce_orig": 0.36945998668670654, + "epoch": 0.5984614278524696, + "kl_loss": 0.08271525055170059, + "loss_ib": 0.0010970921721309423, + "step": 2081 + }, + { + "ce_ib": 4.577265739440918, + "ce_orig": 0.7430624961853027, + "epoch": 0.5984614278524696, + "kl_loss": 0.04880734160542488, + "loss_ib": 0.0009457999258302152, + "step": 2081 + }, + { + "ce_ib": 4.9296369552612305, + "ce_orig": 1.0248816013336182, + "epoch": 0.5984614278524696, + "kl_loss": 0.07763966917991638, + "loss_ib": 0.0012693603057414293, + "step": 2081 + }, + { + "ce_ib": 3.648362636566162, + "ce_orig": 0.7424232959747314, + "epoch": 0.5987490114314472, + "kl_loss": 0.05293530970811844, + "loss_ib": 0.0008941892883740366, + "step": 2082 + }, + { + "ce_ib": 3.175560712814331, + "ce_orig": 0.736873209476471, + "epoch": 0.5987490114314472, + "kl_loss": 0.052593983709812164, + "loss_ib": 0.0008434958290308714, + "step": 2082 + }, + { + "ce_ib": 3.1550662517547607, + "ce_orig": 0.13815343379974365, + "epoch": 0.5987490114314472, + "kl_loss": 0.040712859481573105, + "loss_ib": 0.000722635246347636, + "step": 2082 + }, + { + "ce_ib": 5.496281147003174, + "ce_orig": 1.0098540782928467, + "epoch": 0.5987490114314472, + "kl_loss": 0.07159233093261719, + "loss_ib": 0.0012655514292418957, + "step": 2082 + }, + { + "ce_ib": 4.588836193084717, + "ce_orig": 0.505266547203064, + "epoch": 0.5990365950104249, + "kl_loss": 0.11280664056539536, + "loss_ib": 0.0015869499184191227, + "step": 2083 + }, + { + "ce_ib": 2.713188648223877, + "ce_orig": 0.35786911845207214, + "epoch": 0.5990365950104249, + "kl_loss": 0.039982397109270096, + "loss_ib": 0.0006711427704431117, + "step": 2083 + }, + { + "ce_ib": 1.479994297027588, + "ce_orig": 0.19070060551166534, + "epoch": 0.5990365950104249, + "kl_loss": 0.13411271572113037, + "loss_ib": 0.0014891264727339149, + "step": 2083 + }, + { + "ce_ib": 6.927892208099365, + "ce_orig": 1.25346839427948, + "epoch": 0.5990365950104249, + "kl_loss": 0.07030518352985382, + "loss_ib": 0.001395840896293521, + "step": 2083 + }, + { + "ce_ib": 4.141465663909912, + "ce_orig": 0.8182768225669861, + "epoch": 0.5993241785894026, + "kl_loss": 0.06246529147028923, + "loss_ib": 0.0010387994116172194, + "step": 2084 + }, + { + "ce_ib": 4.957434177398682, + "ce_orig": 0.43235883116722107, + "epoch": 0.5993241785894026, + "kl_loss": 0.05625959113240242, + "loss_ib": 0.0010583392577245831, + "step": 2084 + }, + { + "ce_ib": 6.143950462341309, + "ce_orig": 1.1102148294448853, + "epoch": 0.5993241785894026, + "kl_loss": 0.05589437484741211, + "loss_ib": 0.0011733387364074588, + "step": 2084 + }, + { + "ce_ib": 6.579360485076904, + "ce_orig": 1.46086585521698, + "epoch": 0.5993241785894026, + "kl_loss": 0.05461021512746811, + "loss_ib": 0.0012040381552651525, + "step": 2084 + }, + { + "epoch": 0.5996117621683802, + "grad_norm": 0.10154346376657486, + "learning_rate": 4.634029732140047e-05, + "loss": 0.8047, + "step": 2085 + }, + { + "ce_ib": 7.064938068389893, + "ce_orig": 1.7814552783966064, + "epoch": 0.5996117621683802, + "kl_loss": 0.09985016286373138, + "loss_ib": 0.0017049952875822783, + "step": 2085 + }, + { + "ce_ib": 3.140153646469116, + "ce_orig": 0.6607750654220581, + "epoch": 0.5996117621683802, + "kl_loss": 0.06638900935649872, + "loss_ib": 0.0009779054671525955, + "step": 2085 + }, + { + "ce_ib": 3.0057175159454346, + "ce_orig": 0.5867830514907837, + "epoch": 0.5996117621683802, + "kl_loss": 0.04931400343775749, + "loss_ib": 0.0007937117479741573, + "step": 2085 + }, + { + "ce_ib": 5.576006889343262, + "ce_orig": 0.8134492635726929, + "epoch": 0.5996117621683802, + "kl_loss": 0.11208108067512512, + "loss_ib": 0.0016784114995971322, + "step": 2085 + }, + { + "ce_ib": 4.188833236694336, + "ce_orig": 0.7269355058670044, + "epoch": 0.5998993457473578, + "kl_loss": 0.04908500611782074, + "loss_ib": 0.0009097332949750125, + "step": 2086 + }, + { + "ce_ib": 3.675173044204712, + "ce_orig": 0.5030489563941956, + "epoch": 0.5998993457473578, + "kl_loss": 0.11733119189739227, + "loss_ib": 0.0015408291947096586, + "step": 2086 + }, + { + "ce_ib": 5.291172504425049, + "ce_orig": 0.7020983099937439, + "epoch": 0.5998993457473578, + "kl_loss": 0.09048501402139664, + "loss_ib": 0.0014339672634378076, + "step": 2086 + }, + { + "ce_ib": 4.290249347686768, + "ce_orig": 0.6305073499679565, + "epoch": 0.5998993457473578, + "kl_loss": 0.11802785843610764, + "loss_ib": 0.0016093035228550434, + "step": 2086 + }, + { + "ce_ib": 2.922851800918579, + "ce_orig": 0.6575307250022888, + "epoch": 0.6001869293263354, + "kl_loss": 0.044972263276576996, + "loss_ib": 0.0007420078036375344, + "step": 2087 + }, + { + "ce_ib": 2.4510555267333984, + "ce_orig": 0.5721491575241089, + "epoch": 0.6001869293263354, + "kl_loss": 0.0492713488638401, + "loss_ib": 0.0007378190639428794, + "step": 2087 + }, + { + "ce_ib": 2.977273941040039, + "ce_orig": 0.4918285608291626, + "epoch": 0.6001869293263354, + "kl_loss": 0.08045618236064911, + "loss_ib": 0.0011022891849279404, + "step": 2087 + }, + { + "ce_ib": 2.9311697483062744, + "ce_orig": 0.4098494350910187, + "epoch": 0.6001869293263354, + "kl_loss": 0.058041058480739594, + "loss_ib": 0.0008735274896025658, + "step": 2087 + }, + { + "ce_ib": 4.193668365478516, + "ce_orig": 0.7143855690956116, + "epoch": 0.6004745129053131, + "kl_loss": 0.07815922796726227, + "loss_ib": 0.0012009590864181519, + "step": 2088 + }, + { + "ce_ib": 4.414350509643555, + "ce_orig": 0.9603128433227539, + "epoch": 0.6004745129053131, + "kl_loss": 0.06138592213392258, + "loss_ib": 0.0010552942985668778, + "step": 2088 + }, + { + "ce_ib": 4.055280685424805, + "ce_orig": 0.6393210887908936, + "epoch": 0.6004745129053131, + "kl_loss": 0.08152930438518524, + "loss_ib": 0.0012208211701363325, + "step": 2088 + }, + { + "ce_ib": 4.369076728820801, + "ce_orig": 1.0526084899902344, + "epoch": 0.6004745129053131, + "kl_loss": 0.0800827294588089, + "loss_ib": 0.0012377349194139242, + "step": 2088 + }, + { + "ce_ib": 5.9587836265563965, + "ce_orig": 0.9938870072364807, + "epoch": 0.6007620964842908, + "kl_loss": 0.09581607580184937, + "loss_ib": 0.0015540390741080046, + "step": 2089 + }, + { + "ce_ib": 4.813379764556885, + "ce_orig": 1.070404291152954, + "epoch": 0.6007620964842908, + "kl_loss": 0.17942826449871063, + "loss_ib": 0.002275620587170124, + "step": 2089 + }, + { + "ce_ib": 3.5038113594055176, + "ce_orig": 0.6876385807991028, + "epoch": 0.6007620964842908, + "kl_loss": 0.07104212790727615, + "loss_ib": 0.0010608023731037974, + "step": 2089 + }, + { + "ce_ib": 5.571468353271484, + "ce_orig": 1.0281459093093872, + "epoch": 0.6007620964842908, + "kl_loss": 0.08737383037805557, + "loss_ib": 0.0014308851677924395, + "step": 2089 + }, + { + "epoch": 0.6010496800632684, + "grad_norm": 0.0805075466632843, + "learning_rate": 4.632005811540929e-05, + "loss": 0.8124, + "step": 2090 + }, + { + "ce_ib": 4.273382186889648, + "ce_orig": 0.6330124139785767, + "epoch": 0.6010496800632684, + "kl_loss": 0.05465317890048027, + "loss_ib": 0.0009738699882291257, + "step": 2090 + }, + { + "ce_ib": 3.7746946811676025, + "ce_orig": 0.7944877743721008, + "epoch": 0.6010496800632684, + "kl_loss": 0.07060226798057556, + "loss_ib": 0.0010834921849891543, + "step": 2090 + }, + { + "ce_ib": 4.2204155921936035, + "ce_orig": 1.1091099977493286, + "epoch": 0.6010496800632684, + "kl_loss": 0.07148639857769012, + "loss_ib": 0.001136905513703823, + "step": 2090 + }, + { + "ce_ib": 3.5369465351104736, + "ce_orig": 0.6430749297142029, + "epoch": 0.6010496800632684, + "kl_loss": 0.10132335126399994, + "loss_ib": 0.001366928219795227, + "step": 2090 + }, + { + "ce_ib": 3.3819878101348877, + "ce_orig": 0.7084623575210571, + "epoch": 0.601337263642246, + "kl_loss": 0.06496082991361618, + "loss_ib": 0.0009878070559352636, + "step": 2091 + }, + { + "ce_ib": 3.5955123901367188, + "ce_orig": 0.6423996090888977, + "epoch": 0.601337263642246, + "kl_loss": 0.04727928340435028, + "loss_ib": 0.0008323440561071038, + "step": 2091 + }, + { + "ce_ib": 4.621158599853516, + "ce_orig": 0.763398289680481, + "epoch": 0.601337263642246, + "kl_loss": 0.07024911791086197, + "loss_ib": 0.0011646070051938295, + "step": 2091 + }, + { + "ce_ib": 4.137864589691162, + "ce_orig": 0.9241937398910522, + "epoch": 0.601337263642246, + "kl_loss": 0.05778088420629501, + "loss_ib": 0.000991595210507512, + "step": 2091 + }, + { + "ce_ib": 4.023037433624268, + "ce_orig": 0.8944536447525024, + "epoch": 0.6016248472212237, + "kl_loss": 0.17321926355361938, + "loss_ib": 0.002134496346116066, + "step": 2092 + }, + { + "ce_ib": 5.952149868011475, + "ce_orig": 1.3722182512283325, + "epoch": 0.6016248472212237, + "kl_loss": 0.0630234107375145, + "loss_ib": 0.0012254490284249187, + "step": 2092 + }, + { + "ce_ib": 5.916097640991211, + "ce_orig": 1.4014886617660522, + "epoch": 0.6016248472212237, + "kl_loss": 0.07763876020908356, + "loss_ib": 0.0013679973781108856, + "step": 2092 + }, + { + "ce_ib": 5.214680194854736, + "ce_orig": 1.2824018001556396, + "epoch": 0.6016248472212237, + "kl_loss": 0.14720818400382996, + "loss_ib": 0.001993549754843116, + "step": 2092 + }, + { + "ce_ib": 3.949597120285034, + "ce_orig": 0.8021050095558167, + "epoch": 0.6019124308002013, + "kl_loss": 0.07519830763339996, + "loss_ib": 0.0011469428427517414, + "step": 2093 + }, + { + "ce_ib": 7.2143144607543945, + "ce_orig": 1.3919119834899902, + "epoch": 0.6019124308002013, + "kl_loss": 0.07183560729026794, + "loss_ib": 0.0014397874474525452, + "step": 2093 + }, + { + "ce_ib": 6.066972255706787, + "ce_orig": 1.0502837896347046, + "epoch": 0.6019124308002013, + "kl_loss": 0.1115509644150734, + "loss_ib": 0.0017222067108377814, + "step": 2093 + }, + { + "ce_ib": 4.3752264976501465, + "ce_orig": 0.5817583799362183, + "epoch": 0.6019124308002013, + "kl_loss": 0.12368129193782806, + "loss_ib": 0.0016743355663493276, + "step": 2093 + }, + { + "ce_ib": 3.596672296524048, + "ce_orig": 0.5850139260292053, + "epoch": 0.6022000143791789, + "kl_loss": 0.07193173468112946, + "loss_ib": 0.0010789845837280154, + "step": 2094 + }, + { + "ce_ib": 2.4870073795318604, + "ce_orig": 0.4089277982711792, + "epoch": 0.6022000143791789, + "kl_loss": 0.08463941514492035, + "loss_ib": 0.0010950948344543576, + "step": 2094 + }, + { + "ce_ib": 3.7254185676574707, + "ce_orig": 1.0527628660202026, + "epoch": 0.6022000143791789, + "kl_loss": 0.05408704653382301, + "loss_ib": 0.0009134122519753873, + "step": 2094 + }, + { + "ce_ib": 2.5877742767333984, + "ce_orig": 0.37362349033355713, + "epoch": 0.6022000143791789, + "kl_loss": 0.1393352448940277, + "loss_ib": 0.001652129809372127, + "step": 2094 + }, + { + "epoch": 0.6024875979581565, + "grad_norm": 0.0906853973865509, + "learning_rate": 4.629976754444962e-05, + "loss": 0.8819, + "step": 2095 + }, + { + "ce_ib": 3.024550199508667, + "ce_orig": 0.8227662444114685, + "epoch": 0.6024875979581565, + "kl_loss": 0.06042005866765976, + "loss_ib": 0.0009066556231118739, + "step": 2095 + }, + { + "ce_ib": 2.4847307205200195, + "ce_orig": 0.6544086337089539, + "epoch": 0.6024875979581565, + "kl_loss": 0.047141753137111664, + "loss_ib": 0.0007198905805125833, + "step": 2095 + }, + { + "ce_ib": 5.185691833496094, + "ce_orig": 1.0781711339950562, + "epoch": 0.6024875979581565, + "kl_loss": 0.06270484626293182, + "loss_ib": 0.0011456176871433854, + "step": 2095 + }, + { + "ce_ib": 5.090315818786621, + "ce_orig": 1.0655585527420044, + "epoch": 0.6024875979581565, + "kl_loss": 0.05357814580202103, + "loss_ib": 0.0010448129614815116, + "step": 2095 + }, + { + "ce_ib": 4.575950622558594, + "ce_orig": 0.7886934876441956, + "epoch": 0.6027751815371343, + "kl_loss": 0.13329452276229858, + "loss_ib": 0.0017905401764437556, + "step": 2096 + }, + { + "ce_ib": 4.882061004638672, + "ce_orig": 0.6528818011283875, + "epoch": 0.6027751815371343, + "kl_loss": 0.10093575716018677, + "loss_ib": 0.001497563673183322, + "step": 2096 + }, + { + "ce_ib": 3.8041934967041016, + "ce_orig": 0.7179158329963684, + "epoch": 0.6027751815371343, + "kl_loss": 0.08763104677200317, + "loss_ib": 0.001256729825399816, + "step": 2096 + }, + { + "ce_ib": 4.358056545257568, + "ce_orig": 0.9228951930999756, + "epoch": 0.6027751815371343, + "kl_loss": 0.047177013009786606, + "loss_ib": 0.000907575769815594, + "step": 2096 + }, + { + "ce_ib": 6.95786190032959, + "ce_orig": 1.8144116401672363, + "epoch": 0.6030627651161119, + "kl_loss": 0.05249132215976715, + "loss_ib": 0.0012206993997097015, + "step": 2097 + }, + { + "ce_ib": 4.221665382385254, + "ce_orig": 1.0254933834075928, + "epoch": 0.6030627651161119, + "kl_loss": 0.11511662602424622, + "loss_ib": 0.0015733328182250261, + "step": 2097 + }, + { + "ce_ib": 6.25553035736084, + "ce_orig": 0.7407022714614868, + "epoch": 0.6030627651161119, + "kl_loss": 0.11253818869590759, + "loss_ib": 0.0017509349854663014, + "step": 2097 + }, + { + "ce_ib": 4.652104377746582, + "ce_orig": 0.9551563262939453, + "epoch": 0.6030627651161119, + "kl_loss": 0.08007493615150452, + "loss_ib": 0.0012659596977755427, + "step": 2097 + }, + { + "ce_ib": 5.760951995849609, + "ce_orig": 1.3133198022842407, + "epoch": 0.6033503486950895, + "kl_loss": 0.11237670481204987, + "loss_ib": 0.0016998621867969632, + "step": 2098 + }, + { + "ce_ib": 4.015384197235107, + "ce_orig": 0.7997077107429504, + "epoch": 0.6033503486950895, + "kl_loss": 0.06969357281923294, + "loss_ib": 0.0010984741384163499, + "step": 2098 + }, + { + "ce_ib": 5.648448944091797, + "ce_orig": 1.1794495582580566, + "epoch": 0.6033503486950895, + "kl_loss": 0.07929180562496185, + "loss_ib": 0.0013577629579231143, + "step": 2098 + }, + { + "ce_ib": 4.772024631500244, + "ce_orig": 1.0110080242156982, + "epoch": 0.6033503486950895, + "kl_loss": 0.08070142567157745, + "loss_ib": 0.0012842166470363736, + "step": 2098 + }, + { + "ce_ib": 4.712130069732666, + "ce_orig": 0.908007800579071, + "epoch": 0.6036379322740671, + "kl_loss": 0.1198878288269043, + "loss_ib": 0.0016700912965461612, + "step": 2099 + }, + { + "ce_ib": 4.057485580444336, + "ce_orig": 0.949890673160553, + "epoch": 0.6036379322740671, + "kl_loss": 0.04618522524833679, + "loss_ib": 0.0008676007855683565, + "step": 2099 + }, + { + "ce_ib": 4.9820170402526855, + "ce_orig": 0.784426748752594, + "epoch": 0.6036379322740671, + "kl_loss": 0.06336407363414764, + "loss_ib": 0.001131842378526926, + "step": 2099 + }, + { + "ce_ib": 5.131165504455566, + "ce_orig": 1.1834725141525269, + "epoch": 0.6036379322740671, + "kl_loss": 0.0842083990573883, + "loss_ib": 0.0013552005402743816, + "step": 2099 + }, + { + "epoch": 0.6039255158530448, + "grad_norm": 0.09092824906110764, + "learning_rate": 4.627942565740615e-05, + "loss": 0.8701, + "step": 2100 + }, + { + "ce_ib": 5.375036239624023, + "ce_orig": 1.1978404521942139, + "epoch": 0.6039255158530448, + "kl_loss": 0.051506102085113525, + "loss_ib": 0.0010525647085160017, + "step": 2100 + }, + { + "ce_ib": 3.72866153717041, + "ce_orig": 0.5934877395629883, + "epoch": 0.6039255158530448, + "kl_loss": 0.09789986908435822, + "loss_ib": 0.0013518647756427526, + "step": 2100 + }, + { + "ce_ib": 6.080323696136475, + "ce_orig": 0.8683134317398071, + "epoch": 0.6039255158530448, + "kl_loss": 0.08430097997188568, + "loss_ib": 0.0014510421315208077, + "step": 2100 + }, + { + "ce_ib": 2.022094249725342, + "ce_orig": 0.19769932329654694, + "epoch": 0.6039255158530448, + "kl_loss": 0.17412647604942322, + "loss_ib": 0.0019434740534052253, + "step": 2100 + }, + { + "ce_ib": 6.6803483963012695, + "ce_orig": 1.0400179624557495, + "epoch": 0.6042130994320224, + "kl_loss": 0.09957490861415863, + "loss_ib": 0.0016637839144095778, + "step": 2101 + }, + { + "ce_ib": 3.1458163261413574, + "ce_orig": 0.25337520241737366, + "epoch": 0.6042130994320224, + "kl_loss": 0.07369203865528107, + "loss_ib": 0.0010515019530430436, + "step": 2101 + }, + { + "ce_ib": 4.353493690490723, + "ce_orig": 0.7449761033058167, + "epoch": 0.6042130994320224, + "kl_loss": 0.11585259437561035, + "loss_ib": 0.0015938752330839634, + "step": 2101 + }, + { + "ce_ib": 4.035175800323486, + "ce_orig": 0.9653478264808655, + "epoch": 0.6042130994320224, + "kl_loss": 0.07370004057884216, + "loss_ib": 0.0011405179975554347, + "step": 2101 + }, + { + "ce_ib": 3.639697313308716, + "ce_orig": 0.462158203125, + "epoch": 0.604500683011, + "kl_loss": 0.06513676047325134, + "loss_ib": 0.0010153373004868627, + "step": 2102 + }, + { + "ce_ib": 4.505585670471191, + "ce_orig": 0.7815728783607483, + "epoch": 0.604500683011, + "kl_loss": 0.08241977542638779, + "loss_ib": 0.0012747562723234296, + "step": 2102 + }, + { + "ce_ib": 3.0293803215026855, + "ce_orig": 0.7031589150428772, + "epoch": 0.604500683011, + "kl_loss": 0.08319664001464844, + "loss_ib": 0.001134904334321618, + "step": 2102 + }, + { + "ce_ib": 4.739193916320801, + "ce_orig": 1.0691999197006226, + "epoch": 0.604500683011, + "kl_loss": 0.10004912316799164, + "loss_ib": 0.0014744105283170938, + "step": 2102 + }, + { + "ce_ib": 3.228957176208496, + "ce_orig": 0.6590166687965393, + "epoch": 0.6047882665899778, + "kl_loss": 0.03296642005443573, + "loss_ib": 0.0006525599164888263, + "step": 2103 + }, + { + "ce_ib": 4.4636945724487305, + "ce_orig": 0.7095764875411987, + "epoch": 0.6047882665899778, + "kl_loss": 0.07328576594591141, + "loss_ib": 0.0011792270233854651, + "step": 2103 + }, + { + "ce_ib": 7.297725677490234, + "ce_orig": 1.5814653635025024, + "epoch": 0.6047882665899778, + "kl_loss": 0.0825095921754837, + "loss_ib": 0.0015548685332760215, + "step": 2103 + }, + { + "ce_ib": 5.243929386138916, + "ce_orig": 1.2692538499832153, + "epoch": 0.6047882665899778, + "kl_loss": 0.10582824051380157, + "loss_ib": 0.0015826752642169595, + "step": 2103 + }, + { + "ce_ib": 3.3679261207580566, + "ce_orig": 0.5503583550453186, + "epoch": 0.6050758501689554, + "kl_loss": 0.09954661130905151, + "loss_ib": 0.0013322586892172694, + "step": 2104 + }, + { + "ce_ib": 2.447443723678589, + "ce_orig": 0.5644069314002991, + "epoch": 0.6050758501689554, + "kl_loss": 0.04997321963310242, + "loss_ib": 0.0007444765069521964, + "step": 2104 + }, + { + "ce_ib": 3.665867567062378, + "ce_orig": 0.944517970085144, + "epoch": 0.6050758501689554, + "kl_loss": 0.04292048513889313, + "loss_ib": 0.0007957915659062564, + "step": 2104 + }, + { + "ce_ib": 3.756237030029297, + "ce_orig": 0.7990572452545166, + "epoch": 0.6050758501689554, + "kl_loss": 0.08926662057638168, + "loss_ib": 0.0012682899832725525, + "step": 2104 + }, + { + "epoch": 0.605363433747933, + "grad_norm": 0.10049489140510559, + "learning_rate": 4.625903250328722e-05, + "loss": 0.8233, + "step": 2105 + }, + { + "ce_ib": 5.858414173126221, + "ce_orig": 0.745522677898407, + "epoch": 0.605363433747933, + "kl_loss": 0.07734955847263336, + "loss_ib": 0.0013593368930742145, + "step": 2105 + }, + { + "ce_ib": 4.32288122177124, + "ce_orig": 0.598530650138855, + "epoch": 0.605363433747933, + "kl_loss": 0.054299335926771164, + "loss_ib": 0.0009752814075909555, + "step": 2105 + }, + { + "ce_ib": 3.787050485610962, + "ce_orig": 0.8906273245811462, + "epoch": 0.605363433747933, + "kl_loss": 0.05556686967611313, + "loss_ib": 0.0009343737037852407, + "step": 2105 + }, + { + "ce_ib": 3.701219081878662, + "ce_orig": 0.8011507391929626, + "epoch": 0.605363433747933, + "kl_loss": 0.05124020576477051, + "loss_ib": 0.0008825239492580295, + "step": 2105 + }, + { + "ce_ib": 4.434439659118652, + "ce_orig": 0.5035112500190735, + "epoch": 0.6056510173269106, + "kl_loss": 0.07970254123210907, + "loss_ib": 0.0012404692824929953, + "step": 2106 + }, + { + "ce_ib": 3.7979648113250732, + "ce_orig": 0.8568388223648071, + "epoch": 0.6056510173269106, + "kl_loss": 0.044780340045690536, + "loss_ib": 0.0008275998989120126, + "step": 2106 + }, + { + "ce_ib": 3.9661481380462646, + "ce_orig": 0.9782201051712036, + "epoch": 0.6056510173269106, + "kl_loss": 0.08031740039587021, + "loss_ib": 0.0011997887631878257, + "step": 2106 + }, + { + "ce_ib": 4.0200700759887695, + "ce_orig": 0.6767290830612183, + "epoch": 0.6056510173269106, + "kl_loss": 0.06555796414613724, + "loss_ib": 0.001057586632668972, + "step": 2106 + }, + { + "ce_ib": 6.110088348388672, + "ce_orig": 1.2779786586761475, + "epoch": 0.6059386009058882, + "kl_loss": 0.12352372705936432, + "loss_ib": 0.0018462460720911622, + "step": 2107 + }, + { + "ce_ib": 4.238732814788818, + "ce_orig": 0.6250486969947815, + "epoch": 0.6059386009058882, + "kl_loss": 0.075376957654953, + "loss_ib": 0.0011776428436860442, + "step": 2107 + }, + { + "ce_ib": 3.8421528339385986, + "ce_orig": 0.749046802520752, + "epoch": 0.6059386009058882, + "kl_loss": 0.07274497300386429, + "loss_ib": 0.0011116649257019162, + "step": 2107 + }, + { + "ce_ib": 5.472637176513672, + "ce_orig": 0.8838510513305664, + "epoch": 0.6059386009058882, + "kl_loss": 0.10430526733398438, + "loss_ib": 0.0015903163002803922, + "step": 2107 + }, + { + "ce_ib": 3.3995673656463623, + "ce_orig": 0.581117570400238, + "epoch": 0.6062261844848659, + "kl_loss": 0.053407222032547, + "loss_ib": 0.0008740289486013353, + "step": 2108 + }, + { + "ce_ib": 3.923219680786133, + "ce_orig": 0.8381117582321167, + "epoch": 0.6062261844848659, + "kl_loss": 0.0918244868516922, + "loss_ib": 0.001310566789470613, + "step": 2108 + }, + { + "ce_ib": 6.260982036590576, + "ce_orig": 1.1389058828353882, + "epoch": 0.6062261844848659, + "kl_loss": 0.1293182075023651, + "loss_ib": 0.0019192802719771862, + "step": 2108 + }, + { + "ce_ib": 4.611570835113525, + "ce_orig": 0.8482216596603394, + "epoch": 0.6062261844848659, + "kl_loss": 0.08880294859409332, + "loss_ib": 0.0013491865247488022, + "step": 2108 + }, + { + "ce_ib": 2.9517178535461426, + "ce_orig": 0.6799688935279846, + "epoch": 0.6065137680638436, + "kl_loss": 0.04097023606300354, + "loss_ib": 0.000704874109942466, + "step": 2109 + }, + { + "ce_ib": 2.564368724822998, + "ce_orig": 0.46525925397872925, + "epoch": 0.6065137680638436, + "kl_loss": 0.13525329530239105, + "loss_ib": 0.0016089698765426874, + "step": 2109 + }, + { + "ce_ib": 4.8692827224731445, + "ce_orig": 1.0328447818756104, + "epoch": 0.6065137680638436, + "kl_loss": 0.07837416231632233, + "loss_ib": 0.0012706698616966605, + "step": 2109 + }, + { + "ce_ib": 5.070730209350586, + "ce_orig": 1.0898451805114746, + "epoch": 0.6065137680638436, + "kl_loss": 0.060781609266996384, + "loss_ib": 0.0011148890480399132, + "step": 2109 + }, + { + "epoch": 0.6068013516428212, + "grad_norm": 0.11829555034637451, + "learning_rate": 4.623858813122465e-05, + "loss": 0.8136, + "step": 2110 + }, + { + "ce_ib": 5.07642936706543, + "ce_orig": 0.8896445631980896, + "epoch": 0.6068013516428212, + "kl_loss": 0.07124791294336319, + "loss_ib": 0.00122012197971344, + "step": 2110 + }, + { + "ce_ib": 6.578906059265137, + "ce_orig": 1.2526313066482544, + "epoch": 0.6068013516428212, + "kl_loss": 0.11567571759223938, + "loss_ib": 0.001814647694118321, + "step": 2110 + }, + { + "ce_ib": 5.7854719161987305, + "ce_orig": 0.9175752401351929, + "epoch": 0.6068013516428212, + "kl_loss": 0.10602528601884842, + "loss_ib": 0.0016388000221922994, + "step": 2110 + }, + { + "ce_ib": 5.328070163726807, + "ce_orig": 0.9272412657737732, + "epoch": 0.6068013516428212, + "kl_loss": 0.08977164328098297, + "loss_ib": 0.0014305233489722013, + "step": 2110 + }, + { + "ce_ib": 3.9089906215667725, + "ce_orig": 0.7759481072425842, + "epoch": 0.6070889352217989, + "kl_loss": 0.06008484214544296, + "loss_ib": 0.0009917474817484617, + "step": 2111 + }, + { + "ce_ib": 4.98436164855957, + "ce_orig": 0.6122896671295166, + "epoch": 0.6070889352217989, + "kl_loss": 0.11229458451271057, + "loss_ib": 0.0016213818453252316, + "step": 2111 + }, + { + "ce_ib": 5.675203800201416, + "ce_orig": 1.3551528453826904, + "epoch": 0.6070889352217989, + "kl_loss": 0.059727489948272705, + "loss_ib": 0.0011647952487692237, + "step": 2111 + }, + { + "ce_ib": 3.5226855278015137, + "ce_orig": 0.47625109553337097, + "epoch": 0.6070889352217989, + "kl_loss": 0.07345856726169586, + "loss_ib": 0.0010868541430681944, + "step": 2111 + }, + { + "ce_ib": 4.1217427253723145, + "ce_orig": 1.0125263929367065, + "epoch": 0.6073765188007765, + "kl_loss": 0.04767127335071564, + "loss_ib": 0.000888887036126107, + "step": 2112 + }, + { + "ce_ib": 7.309972763061523, + "ce_orig": 1.6429550647735596, + "epoch": 0.6073765188007765, + "kl_loss": 0.11242333799600601, + "loss_ib": 0.0018552305409684777, + "step": 2112 + }, + { + "ce_ib": 5.421329021453857, + "ce_orig": 1.2163773775100708, + "epoch": 0.6073765188007765, + "kl_loss": 0.08804646134376526, + "loss_ib": 0.0014225974446162581, + "step": 2112 + }, + { + "ce_ib": 7.917539119720459, + "ce_orig": 1.544349193572998, + "epoch": 0.6073765188007765, + "kl_loss": 0.06748948246240616, + "loss_ib": 0.0014666486531496048, + "step": 2112 + }, + { + "ce_ib": 4.271539688110352, + "ce_orig": 0.8847162127494812, + "epoch": 0.6076641023797541, + "kl_loss": 0.06685605645179749, + "loss_ib": 0.0010957145132124424, + "step": 2113 + }, + { + "ce_ib": 3.2614548206329346, + "ce_orig": 0.6797826886177063, + "epoch": 0.6076641023797541, + "kl_loss": 0.06210213527083397, + "loss_ib": 0.0009471668163314462, + "step": 2113 + }, + { + "ce_ib": 4.889660835266113, + "ce_orig": 0.7387590408325195, + "epoch": 0.6076641023797541, + "kl_loss": 0.10135084390640259, + "loss_ib": 0.0015024745371192694, + "step": 2113 + }, + { + "ce_ib": 5.078985214233398, + "ce_orig": 1.1637930870056152, + "epoch": 0.6076641023797541, + "kl_loss": 0.05732191726565361, + "loss_ib": 0.0010811176616698503, + "step": 2113 + }, + { + "ce_ib": 7.236518383026123, + "ce_orig": 1.7157841920852661, + "epoch": 0.6079516859587317, + "kl_loss": 0.2923043668270111, + "loss_ib": 0.0036466955207288265, + "step": 2114 + }, + { + "ce_ib": 4.124276638031006, + "ce_orig": 0.8495044112205505, + "epoch": 0.6079516859587317, + "kl_loss": 0.0421450212597847, + "loss_ib": 0.0008338778861798346, + "step": 2114 + }, + { + "ce_ib": 4.322336196899414, + "ce_orig": 0.6328456997871399, + "epoch": 0.6079516859587317, + "kl_loss": 0.06399020552635193, + "loss_ib": 0.0010721356375142932, + "step": 2114 + }, + { + "ce_ib": 3.3560211658477783, + "ce_orig": 0.7119355201721191, + "epoch": 0.6079516859587317, + "kl_loss": 0.05065736174583435, + "loss_ib": 0.0008421757374890149, + "step": 2114 + }, + { + "epoch": 0.6082392695377093, + "grad_norm": 0.1274888813495636, + "learning_rate": 4.6218092590473697e-05, + "loss": 0.9697, + "step": 2115 + }, + { + "ce_ib": 5.6808390617370605, + "ce_orig": 0.8836041688919067, + "epoch": 0.6082392695377093, + "kl_loss": 0.08002272993326187, + "loss_ib": 0.0013683111174032092, + "step": 2115 + }, + { + "ce_ib": 2.99267578125, + "ce_orig": 0.8709316253662109, + "epoch": 0.6082392695377093, + "kl_loss": 0.07463672757148743, + "loss_ib": 0.0010456348536536098, + "step": 2115 + }, + { + "ce_ib": 9.182353973388672, + "ce_orig": 1.745758295059204, + "epoch": 0.6082392695377093, + "kl_loss": 0.11045174300670624, + "loss_ib": 0.002022752771154046, + "step": 2115 + }, + { + "ce_ib": 3.061645746231079, + "ce_orig": 0.4548913538455963, + "epoch": 0.6082392695377093, + "kl_loss": 0.030425990000367165, + "loss_ib": 0.0006104244384914637, + "step": 2115 + }, + { + "ce_ib": 3.076526641845703, + "ce_orig": 0.4652811288833618, + "epoch": 0.6085268531166871, + "kl_loss": 0.07132110744714737, + "loss_ib": 0.001020863652229309, + "step": 2116 + }, + { + "ce_ib": 3.1666269302368164, + "ce_orig": 0.6259980797767639, + "epoch": 0.6085268531166871, + "kl_loss": 0.04862047731876373, + "loss_ib": 0.0008028674055822194, + "step": 2116 + }, + { + "ce_ib": 6.907692909240723, + "ce_orig": 1.3125783205032349, + "epoch": 0.6085268531166871, + "kl_loss": 0.051560595631599426, + "loss_ib": 0.0012063751928508282, + "step": 2116 + }, + { + "ce_ib": 4.086557865142822, + "ce_orig": 0.7462382316589355, + "epoch": 0.6085268531166871, + "kl_loss": 0.09993855655193329, + "loss_ib": 0.0014080413384363055, + "step": 2116 + }, + { + "ce_ib": 4.250082015991211, + "ce_orig": 0.8379251956939697, + "epoch": 0.6088144366956647, + "kl_loss": 0.11557206511497498, + "loss_ib": 0.0015807288000360131, + "step": 2117 + }, + { + "ce_ib": 2.9417543411254883, + "ce_orig": 0.7925905585289001, + "epoch": 0.6088144366956647, + "kl_loss": 0.0618843212723732, + "loss_ib": 0.0009130186517722905, + "step": 2117 + }, + { + "ce_ib": 3.0246241092681885, + "ce_orig": 0.7028128504753113, + "epoch": 0.6088144366956647, + "kl_loss": 0.08815879374742508, + "loss_ib": 0.0011840503429993987, + "step": 2117 + }, + { + "ce_ib": 3.677220106124878, + "ce_orig": 0.4575631022453308, + "epoch": 0.6088144366956647, + "kl_loss": 0.04170417785644531, + "loss_ib": 0.0007847637753002346, + "step": 2117 + }, + { + "ce_ib": 3.4329771995544434, + "ce_orig": 0.4832436144351959, + "epoch": 0.6091020202746423, + "kl_loss": 0.08547630161046982, + "loss_ib": 0.0011980606941506267, + "step": 2118 + }, + { + "ce_ib": 5.9969282150268555, + "ce_orig": 1.4144861698150635, + "epoch": 0.6091020202746423, + "kl_loss": 0.06675498932600021, + "loss_ib": 0.0012672427110373974, + "step": 2118 + }, + { + "ce_ib": 3.8617002964019775, + "ce_orig": 0.49814870953559875, + "epoch": 0.6091020202746423, + "kl_loss": 0.1250779628753662, + "loss_ib": 0.0016369496006518602, + "step": 2118 + }, + { + "ce_ib": 3.9126012325286865, + "ce_orig": 0.4855698347091675, + "epoch": 0.6091020202746423, + "kl_loss": 0.05646023154258728, + "loss_ib": 0.0009558624005876482, + "step": 2118 + }, + { + "ce_ib": 4.367434501647949, + "ce_orig": 0.9236563444137573, + "epoch": 0.60938960385362, + "kl_loss": 0.046216100454330444, + "loss_ib": 0.000898904399946332, + "step": 2119 + }, + { + "ce_ib": 4.5313873291015625, + "ce_orig": 0.6322845816612244, + "epoch": 0.60938960385362, + "kl_loss": 0.09628628194332123, + "loss_ib": 0.0014160015853121877, + "step": 2119 + }, + { + "ce_ib": 3.280775785446167, + "ce_orig": 0.885408878326416, + "epoch": 0.60938960385362, + "kl_loss": 0.03929038345813751, + "loss_ib": 0.0007209813338704407, + "step": 2119 + }, + { + "ce_ib": 8.252899169921875, + "ce_orig": 1.8456898927688599, + "epoch": 0.60938960385362, + "kl_loss": 0.09774784743785858, + "loss_ib": 0.001802768325433135, + "step": 2119 + }, + { + "epoch": 0.6096771874325976, + "grad_norm": 0.10403478145599365, + "learning_rate": 4.6197545930412874e-05, + "loss": 0.8953, + "step": 2120 + }, + { + "ce_ib": 2.44612717628479, + "ce_orig": 0.7247377038002014, + "epoch": 0.6096771874325976, + "kl_loss": 0.04192016273736954, + "loss_ib": 0.0006638143095187843, + "step": 2120 + }, + { + "ce_ib": 2.6771647930145264, + "ce_orig": 0.5953008532524109, + "epoch": 0.6096771874325976, + "kl_loss": 0.03320113569498062, + "loss_ib": 0.0005997278494760394, + "step": 2120 + }, + { + "ce_ib": 4.401892185211182, + "ce_orig": 0.72336745262146, + "epoch": 0.6096771874325976, + "kl_loss": 0.05527305603027344, + "loss_ib": 0.000992919784039259, + "step": 2120 + }, + { + "ce_ib": 4.515476703643799, + "ce_orig": 0.817031741142273, + "epoch": 0.6096771874325976, + "kl_loss": 0.09027643501758575, + "loss_ib": 0.0013543119421228766, + "step": 2120 + }, + { + "ce_ib": 3.083125591278076, + "ce_orig": 0.6485257148742676, + "epoch": 0.6099647710115752, + "kl_loss": 0.08700120449066162, + "loss_ib": 0.0011783245718106627, + "step": 2121 + }, + { + "ce_ib": 2.2886581420898438, + "ce_orig": 0.44620031118392944, + "epoch": 0.6099647710115752, + "kl_loss": 0.06950455904006958, + "loss_ib": 0.0009239114006049931, + "step": 2121 + }, + { + "ce_ib": 6.562998294830322, + "ce_orig": 1.1422693729400635, + "epoch": 0.6099647710115752, + "kl_loss": 0.06436900794506073, + "loss_ib": 0.001299989758990705, + "step": 2121 + }, + { + "ce_ib": 2.5764622688293457, + "ce_orig": 0.5953276753425598, + "epoch": 0.6099647710115752, + "kl_loss": 0.06554518640041351, + "loss_ib": 0.0009130980470217764, + "step": 2121 + }, + { + "ce_ib": 5.8467936515808105, + "ce_orig": 1.1176669597625732, + "epoch": 0.6102523545905528, + "kl_loss": 0.06876656413078308, + "loss_ib": 0.0012723449617624283, + "step": 2122 + }, + { + "ce_ib": 4.210081577301025, + "ce_orig": 1.0805062055587769, + "epoch": 0.6102523545905528, + "kl_loss": 0.11189743131399155, + "loss_ib": 0.0015399823896586895, + "step": 2122 + }, + { + "ce_ib": 7.388996124267578, + "ce_orig": 1.6680258512496948, + "epoch": 0.6102523545905528, + "kl_loss": 0.1011250913143158, + "loss_ib": 0.0017501504626125097, + "step": 2122 + }, + { + "ce_ib": 5.616105556488037, + "ce_orig": 1.067963719367981, + "epoch": 0.6102523545905528, + "kl_loss": 0.05666200816631317, + "loss_ib": 0.0011282305931672454, + "step": 2122 + }, + { + "ce_ib": 7.300571441650391, + "ce_orig": 1.6014631986618042, + "epoch": 0.6105399381695306, + "kl_loss": 0.08380818367004395, + "loss_ib": 0.0015681388322263956, + "step": 2123 + }, + { + "ce_ib": 5.39241361618042, + "ce_orig": 1.0481703281402588, + "epoch": 0.6105399381695306, + "kl_loss": 0.08279883861541748, + "loss_ib": 0.0013672297354787588, + "step": 2123 + }, + { + "ce_ib": 3.2914793491363525, + "ce_orig": 0.5914108753204346, + "epoch": 0.6105399381695306, + "kl_loss": 0.06013810262084007, + "loss_ib": 0.0009305289131589234, + "step": 2123 + }, + { + "ce_ib": 7.5370588302612305, + "ce_orig": 1.1312572956085205, + "epoch": 0.6105399381695306, + "kl_loss": 0.11214877665042877, + "loss_ib": 0.0018751936731860042, + "step": 2123 + }, + { + "ce_ib": 3.0349385738372803, + "ce_orig": 0.7078620791435242, + "epoch": 0.6108275217485082, + "kl_loss": 0.04794413596391678, + "loss_ib": 0.000782935181632638, + "step": 2124 + }, + { + "ce_ib": 3.190608024597168, + "ce_orig": 0.5264415144920349, + "epoch": 0.6108275217485082, + "kl_loss": 0.029690319672226906, + "loss_ib": 0.0006159640033729374, + "step": 2124 + }, + { + "ce_ib": 6.008327484130859, + "ce_orig": 1.161428451538086, + "epoch": 0.6108275217485082, + "kl_loss": 0.11699721962213516, + "loss_ib": 0.0017708049854263663, + "step": 2124 + }, + { + "ce_ib": 5.628240585327148, + "ce_orig": 1.2998149394989014, + "epoch": 0.6108275217485082, + "kl_loss": 0.057433292269706726, + "loss_ib": 0.0011371568543836474, + "step": 2124 + }, + { + "epoch": 0.6111151053274858, + "grad_norm": 0.10445261746644974, + "learning_rate": 4.6176948200543845e-05, + "loss": 0.8844, + "step": 2125 + }, + { + "ce_ib": 4.42794942855835, + "ce_orig": 1.0341403484344482, + "epoch": 0.6111151053274858, + "kl_loss": 0.043096140027046204, + "loss_ib": 0.0008737563039176166, + "step": 2125 + }, + { + "ce_ib": 6.390162467956543, + "ce_orig": 1.2739611864089966, + "epoch": 0.6111151053274858, + "kl_loss": 0.07830707728862762, + "loss_ib": 0.001422086963430047, + "step": 2125 + }, + { + "ce_ib": 6.357510566711426, + "ce_orig": 1.4907798767089844, + "epoch": 0.6111151053274858, + "kl_loss": 0.08754830807447433, + "loss_ib": 0.0015112339751794934, + "step": 2125 + }, + { + "ce_ib": 3.071319341659546, + "ce_orig": 0.3882489800453186, + "epoch": 0.6111151053274858, + "kl_loss": 0.08888719975948334, + "loss_ib": 0.0011960038682445884, + "step": 2125 + }, + { + "ce_ib": 2.705981492996216, + "ce_orig": 0.5075544118881226, + "epoch": 0.6114026889064634, + "kl_loss": 0.052047304809093475, + "loss_ib": 0.0007910712156444788, + "step": 2126 + }, + { + "ce_ib": 2.963066816329956, + "ce_orig": 0.4317620098590851, + "epoch": 0.6114026889064634, + "kl_loss": 0.08263621479272842, + "loss_ib": 0.0011226688511669636, + "step": 2126 + }, + { + "ce_ib": 6.129615306854248, + "ce_orig": 1.1080386638641357, + "epoch": 0.6114026889064634, + "kl_loss": 0.1652947962284088, + "loss_ib": 0.002265909453853965, + "step": 2126 + }, + { + "ce_ib": 2.0316572189331055, + "ce_orig": 0.3762401342391968, + "epoch": 0.6114026889064634, + "kl_loss": 0.04712727665901184, + "loss_ib": 0.0006744384882040322, + "step": 2126 + }, + { + "ce_ib": 3.0170717239379883, + "ce_orig": 0.7342363595962524, + "epoch": 0.6116902724854411, + "kl_loss": 0.05859731137752533, + "loss_ib": 0.000887680274900049, + "step": 2127 + }, + { + "ce_ib": 3.424060821533203, + "ce_orig": 0.6819241642951965, + "epoch": 0.6116902724854411, + "kl_loss": 0.040339428931474686, + "loss_ib": 0.0007458003237843513, + "step": 2127 + }, + { + "ce_ib": 0.9950816631317139, + "ce_orig": 0.11236406117677689, + "epoch": 0.6116902724854411, + "kl_loss": 0.1402096450328827, + "loss_ib": 0.0015016045654192567, + "step": 2127 + }, + { + "ce_ib": 2.5789530277252197, + "ce_orig": 0.5227527618408203, + "epoch": 0.6116902724854411, + "kl_loss": 0.06415748596191406, + "loss_ib": 0.00089947012020275, + "step": 2127 + }, + { + "ce_ib": 5.212370872497559, + "ce_orig": 0.7960782647132874, + "epoch": 0.6119778560644187, + "kl_loss": 0.08583296835422516, + "loss_ib": 0.0013795667327940464, + "step": 2128 + }, + { + "ce_ib": 2.603602170944214, + "ce_orig": 0.6239344477653503, + "epoch": 0.6119778560644187, + "kl_loss": 0.04866599291563034, + "loss_ib": 0.0007470200653187931, + "step": 2128 + }, + { + "ce_ib": 6.994669437408447, + "ce_orig": 1.3643969297409058, + "epoch": 0.6119778560644187, + "kl_loss": 0.07424238324165344, + "loss_ib": 0.0014418907230719924, + "step": 2128 + }, + { + "ce_ib": 5.533613204956055, + "ce_orig": 1.097877025604248, + "epoch": 0.6119778560644187, + "kl_loss": 0.07483794540166855, + "loss_ib": 0.0013017406454309821, + "step": 2128 + }, + { + "ce_ib": 6.8846516609191895, + "ce_orig": 1.5899391174316406, + "epoch": 0.6122654396433964, + "kl_loss": 0.058303024619817734, + "loss_ib": 0.0012714953627437353, + "step": 2129 + }, + { + "ce_ib": 6.287890434265137, + "ce_orig": 1.2851872444152832, + "epoch": 0.6122654396433964, + "kl_loss": 0.17879167199134827, + "loss_ib": 0.0024167057126760483, + "step": 2129 + }, + { + "ce_ib": 5.91441011428833, + "ce_orig": 1.0932822227478027, + "epoch": 0.6122654396433964, + "kl_loss": 0.054182566702365875, + "loss_ib": 0.0011332667199894786, + "step": 2129 + }, + { + "ce_ib": 2.9441380500793457, + "ce_orig": 0.38599660992622375, + "epoch": 0.6122654396433964, + "kl_loss": 0.1251201629638672, + "loss_ib": 0.0015456154942512512, + "step": 2129 + }, + { + "epoch": 0.612553023222374, + "grad_norm": 0.11367835104465485, + "learning_rate": 4.615629945049132e-05, + "loss": 0.866, + "step": 2130 + }, + { + "ce_ib": 4.520206451416016, + "ce_orig": 1.0577067136764526, + "epoch": 0.612553023222374, + "kl_loss": 0.08850064873695374, + "loss_ib": 0.0013370270607993007, + "step": 2130 + }, + { + "ce_ib": 4.681437969207764, + "ce_orig": 0.6803049445152283, + "epoch": 0.612553023222374, + "kl_loss": 0.0795048400759697, + "loss_ib": 0.001263192156329751, + "step": 2130 + }, + { + "ce_ib": 4.578001022338867, + "ce_orig": 0.8737592697143555, + "epoch": 0.612553023222374, + "kl_loss": 0.09316752851009369, + "loss_ib": 0.001389475422911346, + "step": 2130 + }, + { + "ce_ib": 5.4172492027282715, + "ce_orig": 1.0582387447357178, + "epoch": 0.612553023222374, + "kl_loss": 0.12600106000900269, + "loss_ib": 0.0018017353722825646, + "step": 2130 + }, + { + "ce_ib": 3.604768753051758, + "ce_orig": 0.8891803026199341, + "epoch": 0.6128406068013517, + "kl_loss": 0.0661703497171402, + "loss_ib": 0.0010221803095191717, + "step": 2131 + }, + { + "ce_ib": 5.880786418914795, + "ce_orig": 1.2775758504867554, + "epoch": 0.6128406068013517, + "kl_loss": 0.09148233383893967, + "loss_ib": 0.001502901897765696, + "step": 2131 + }, + { + "ce_ib": 6.899085998535156, + "ce_orig": 1.5766624212265015, + "epoch": 0.6128406068013517, + "kl_loss": 0.08876755088567734, + "loss_ib": 0.0015775840729475021, + "step": 2131 + }, + { + "ce_ib": 5.662644863128662, + "ce_orig": 1.1254585981369019, + "epoch": 0.6128406068013517, + "kl_loss": 0.1023118793964386, + "loss_ib": 0.0015893831150606275, + "step": 2131 + }, + { + "ce_ib": 5.6457839012146, + "ce_orig": 0.7764841914176941, + "epoch": 0.6131281903803293, + "kl_loss": 0.08738420903682709, + "loss_ib": 0.0014384203823283315, + "step": 2132 + }, + { + "ce_ib": 3.079623222351074, + "ce_orig": 0.6898913979530334, + "epoch": 0.6131281903803293, + "kl_loss": 0.04960900917649269, + "loss_ib": 0.0008040523971430957, + "step": 2132 + }, + { + "ce_ib": 5.796226978302002, + "ce_orig": 0.7572131752967834, + "epoch": 0.6131281903803293, + "kl_loss": 0.17134834825992584, + "loss_ib": 0.0022931061685085297, + "step": 2132 + }, + { + "ce_ib": 6.666922569274902, + "ce_orig": 1.089788556098938, + "epoch": 0.6131281903803293, + "kl_loss": 0.07554057985544205, + "loss_ib": 0.0014220979064702988, + "step": 2132 + }, + { + "ce_ib": 3.6575767993927, + "ce_orig": 0.9281088709831238, + "epoch": 0.6134157739593069, + "kl_loss": 0.05932061746716499, + "loss_ib": 0.0009589638211764395, + "step": 2133 + }, + { + "ce_ib": 5.385511875152588, + "ce_orig": 1.0796208381652832, + "epoch": 0.6134157739593069, + "kl_loss": 0.06787852197885513, + "loss_ib": 0.0012173362774774432, + "step": 2133 + }, + { + "ce_ib": 5.949214935302734, + "ce_orig": 1.2517486810684204, + "epoch": 0.6134157739593069, + "kl_loss": 0.0974278450012207, + "loss_ib": 0.0015691998414695263, + "step": 2133 + }, + { + "ce_ib": 4.9255475997924805, + "ce_orig": 1.0752049684524536, + "epoch": 0.6134157739593069, + "kl_loss": 0.07150843739509583, + "loss_ib": 0.0012076391139999032, + "step": 2133 + }, + { + "ce_ib": 4.903815746307373, + "ce_orig": 0.8029479384422302, + "epoch": 0.6137033575382845, + "kl_loss": 0.13923510909080505, + "loss_ib": 0.0018827326130121946, + "step": 2134 + }, + { + "ce_ib": 3.721654176712036, + "ce_orig": 0.7431964874267578, + "epoch": 0.6137033575382845, + "kl_loss": 0.09125018864870071, + "loss_ib": 0.0012846671743318439, + "step": 2134 + }, + { + "ce_ib": 4.437850475311279, + "ce_orig": 0.7063111066818237, + "epoch": 0.6137033575382845, + "kl_loss": 0.11492069065570831, + "loss_ib": 0.0015929918736219406, + "step": 2134 + }, + { + "ce_ib": 3.2016563415527344, + "ce_orig": 0.7576561570167542, + "epoch": 0.6137033575382845, + "kl_loss": 0.09448878467082977, + "loss_ib": 0.00126505340449512, + "step": 2134 + }, + { + "epoch": 0.6139909411172622, + "grad_norm": 0.0897144079208374, + "learning_rate": 4.613559973000295e-05, + "loss": 0.8733, + "step": 2135 + }, + { + "ce_ib": 4.216825485229492, + "ce_orig": 0.759712278842926, + "epoch": 0.6139909411172622, + "kl_loss": 0.09884629398584366, + "loss_ib": 0.0014101454289630055, + "step": 2135 + }, + { + "ce_ib": 3.363421678543091, + "ce_orig": 0.8810320496559143, + "epoch": 0.6139909411172622, + "kl_loss": 0.06063312664628029, + "loss_ib": 0.0009426733595319092, + "step": 2135 + }, + { + "ce_ib": 5.384389400482178, + "ce_orig": 1.176129698753357, + "epoch": 0.6139909411172622, + "kl_loss": 0.0556916743516922, + "loss_ib": 0.00109535560477525, + "step": 2135 + }, + { + "ce_ib": 5.341346740722656, + "ce_orig": 0.9712060689926147, + "epoch": 0.6139909411172622, + "kl_loss": 0.0945320725440979, + "loss_ib": 0.001479455386288464, + "step": 2135 + }, + { + "ce_ib": 5.3867974281311035, + "ce_orig": 1.276092290878296, + "epoch": 0.6142785246962399, + "kl_loss": 0.10315363109111786, + "loss_ib": 0.0015702160308137536, + "step": 2136 + }, + { + "ce_ib": 4.610352039337158, + "ce_orig": 0.7471790909767151, + "epoch": 0.6142785246962399, + "kl_loss": 0.13643750548362732, + "loss_ib": 0.001825410290621221, + "step": 2136 + }, + { + "ce_ib": 5.5619306564331055, + "ce_orig": 1.1309258937835693, + "epoch": 0.6142785246962399, + "kl_loss": 0.09040091186761856, + "loss_ib": 0.0014602021547034383, + "step": 2136 + }, + { + "ce_ib": 2.5646138191223145, + "ce_orig": 0.5799875855445862, + "epoch": 0.6142785246962399, + "kl_loss": 0.05626867711544037, + "loss_ib": 0.0008191480883397162, + "step": 2136 + }, + { + "ce_ib": 3.340177297592163, + "ce_orig": 0.6244290471076965, + "epoch": 0.6145661082752175, + "kl_loss": 0.04843217507004738, + "loss_ib": 0.0008183394093066454, + "step": 2137 + }, + { + "ce_ib": 3.293400287628174, + "ce_orig": 0.5490813851356506, + "epoch": 0.6145661082752175, + "kl_loss": 0.057213734835386276, + "loss_ib": 0.0009014772949740291, + "step": 2137 + }, + { + "ce_ib": 1.8210623264312744, + "ce_orig": 0.2941298484802246, + "epoch": 0.6145661082752175, + "kl_loss": 0.18456658720970154, + "loss_ib": 0.002027772134169936, + "step": 2137 + }, + { + "ce_ib": 6.596519947052002, + "ce_orig": 1.070206880569458, + "epoch": 0.6145661082752175, + "kl_loss": 0.10145094990730286, + "loss_ib": 0.0016741615254431963, + "step": 2137 + }, + { + "ce_ib": 4.51741361618042, + "ce_orig": 0.7501292824745178, + "epoch": 0.6148536918541951, + "kl_loss": 0.095456562936306, + "loss_ib": 0.001406306866556406, + "step": 2138 + }, + { + "ce_ib": 6.008347988128662, + "ce_orig": 1.2383707761764526, + "epoch": 0.6148536918541951, + "kl_loss": 0.06337891519069672, + "loss_ib": 0.001234623952768743, + "step": 2138 + }, + { + "ce_ib": 3.556936502456665, + "ce_orig": 0.640124499797821, + "epoch": 0.6148536918541951, + "kl_loss": 0.10116206854581833, + "loss_ib": 0.0013673142530024052, + "step": 2138 + }, + { + "ce_ib": 5.051974296569824, + "ce_orig": 1.3137478828430176, + "epoch": 0.6148536918541951, + "kl_loss": 0.07010713219642639, + "loss_ib": 0.0012062686728313565, + "step": 2138 + }, + { + "ce_ib": 4.427859783172607, + "ce_orig": 0.7249847054481506, + "epoch": 0.6151412754331728, + "kl_loss": 0.04720549285411835, + "loss_ib": 0.0009148408425971866, + "step": 2139 + }, + { + "ce_ib": 3.8565664291381836, + "ce_orig": 0.8553259968757629, + "epoch": 0.6151412754331728, + "kl_loss": 0.05592528358101845, + "loss_ib": 0.0009449094068259001, + "step": 2139 + }, + { + "ce_ib": 6.818292617797852, + "ce_orig": 1.3467859029769897, + "epoch": 0.6151412754331728, + "kl_loss": 0.05404631048440933, + "loss_ib": 0.0012222923105582595, + "step": 2139 + }, + { + "ce_ib": 6.990148544311523, + "ce_orig": 1.9079153537750244, + "epoch": 0.6151412754331728, + "kl_loss": 0.09146611392498016, + "loss_ib": 0.0016136759659275413, + "step": 2139 + }, + { + "epoch": 0.6154288590121504, + "grad_norm": 0.10002702474594116, + "learning_rate": 4.611484908894914e-05, + "loss": 0.891, + "step": 2140 + }, + { + "ce_ib": 3.7983789443969727, + "ce_orig": 0.7454319000244141, + "epoch": 0.6154288590121504, + "kl_loss": 0.09669461846351624, + "loss_ib": 0.0013467840617522597, + "step": 2140 + }, + { + "ce_ib": 4.583083629608154, + "ce_orig": 0.8186443448066711, + "epoch": 0.6154288590121504, + "kl_loss": 0.1289961338043213, + "loss_ib": 0.0017482696566730738, + "step": 2140 + }, + { + "ce_ib": 2.699578046798706, + "ce_orig": 0.2750304341316223, + "epoch": 0.6154288590121504, + "kl_loss": 0.06942585110664368, + "loss_ib": 0.0009642162476666272, + "step": 2140 + }, + { + "ce_ib": 4.414261341094971, + "ce_orig": 0.7525402307510376, + "epoch": 0.6154288590121504, + "kl_loss": 0.07212242484092712, + "loss_ib": 0.0011626502964645624, + "step": 2140 + }, + { + "ce_ib": 5.275213241577148, + "ce_orig": 1.0873106718063354, + "epoch": 0.615716442591128, + "kl_loss": 0.06643961369991302, + "loss_ib": 0.0011919174576178193, + "step": 2141 + }, + { + "ce_ib": 4.318671703338623, + "ce_orig": 1.035599708557129, + "epoch": 0.615716442591128, + "kl_loss": 0.057751815766096115, + "loss_ib": 0.0010093852179124951, + "step": 2141 + }, + { + "ce_ib": 3.8172285556793213, + "ce_orig": 0.8194168210029602, + "epoch": 0.615716442591128, + "kl_loss": 0.051933061331510544, + "loss_ib": 0.0009010534267872572, + "step": 2141 + }, + { + "ce_ib": 4.932580471038818, + "ce_orig": 1.1076687574386597, + "epoch": 0.615716442591128, + "kl_loss": 0.05505797639489174, + "loss_ib": 0.0010438377503305674, + "step": 2141 + }, + { + "ce_ib": 5.849124908447266, + "ce_orig": 1.0995845794677734, + "epoch": 0.6160040261701056, + "kl_loss": 0.08460432291030884, + "loss_ib": 0.0014309555990621448, + "step": 2142 + }, + { + "ce_ib": 3.015653610229492, + "ce_orig": 0.7459291815757751, + "epoch": 0.6160040261701056, + "kl_loss": 0.0969051793217659, + "loss_ib": 0.001270617125555873, + "step": 2142 + }, + { + "ce_ib": 6.148834228515625, + "ce_orig": 1.281098484992981, + "epoch": 0.6160040261701056, + "kl_loss": 0.08108934760093689, + "loss_ib": 0.0014257768634706736, + "step": 2142 + }, + { + "ce_ib": 3.4297776222229004, + "ce_orig": 0.7064605951309204, + "epoch": 0.6160040261701056, + "kl_loss": 0.043543990701436996, + "loss_ib": 0.0007784176268614829, + "step": 2142 + }, + { + "ce_ib": 3.097769260406494, + "ce_orig": 0.38375556468963623, + "epoch": 0.6162916097490834, + "kl_loss": 0.1094382256269455, + "loss_ib": 0.0014041592366993427, + "step": 2143 + }, + { + "ce_ib": 4.552896022796631, + "ce_orig": 0.7539324760437012, + "epoch": 0.6162916097490834, + "kl_loss": 0.060967814177274704, + "loss_ib": 0.0010649677133187652, + "step": 2143 + }, + { + "ce_ib": 5.880504131317139, + "ce_orig": 0.8042673468589783, + "epoch": 0.6162916097490834, + "kl_loss": 0.07979841530323029, + "loss_ib": 0.0013860344188287854, + "step": 2143 + }, + { + "ce_ib": 4.307216644287109, + "ce_orig": 0.5395128130912781, + "epoch": 0.6162916097490834, + "kl_loss": 0.1008296087384224, + "loss_ib": 0.0014390175929293036, + "step": 2143 + }, + { + "ce_ib": 5.969088554382324, + "ce_orig": 1.3242963552474976, + "epoch": 0.616579193328061, + "kl_loss": 0.0860796794295311, + "loss_ib": 0.0014577056281268597, + "step": 2144 + }, + { + "ce_ib": 8.382493019104004, + "ce_orig": 0.9745354652404785, + "epoch": 0.616579193328061, + "kl_loss": 0.08921171724796295, + "loss_ib": 0.0017303662607446313, + "step": 2144 + }, + { + "ce_ib": 4.27830171585083, + "ce_orig": 0.6430113911628723, + "epoch": 0.616579193328061, + "kl_loss": 0.08340782672166824, + "loss_ib": 0.0012619083281606436, + "step": 2144 + }, + { + "ce_ib": 3.4903361797332764, + "ce_orig": 0.6540915369987488, + "epoch": 0.616579193328061, + "kl_loss": 0.06698796898126602, + "loss_ib": 0.0010189133463427424, + "step": 2144 + }, + { + "epoch": 0.6168667769070386, + "grad_norm": 0.08684661984443665, + "learning_rate": 4.6094047577323025e-05, + "loss": 0.7728, + "step": 2145 + }, + { + "ce_ib": 5.215157508850098, + "ce_orig": 1.3990168571472168, + "epoch": 0.6168667769070386, + "kl_loss": 0.06902924180030823, + "loss_ib": 0.0012118081795051694, + "step": 2145 + }, + { + "ce_ib": 6.399679660797119, + "ce_orig": 1.5016319751739502, + "epoch": 0.6168667769070386, + "kl_loss": 0.07472557574510574, + "loss_ib": 0.0013872236013412476, + "step": 2145 + }, + { + "ce_ib": 3.2721874713897705, + "ce_orig": 0.4779513478279114, + "epoch": 0.6168667769070386, + "kl_loss": 0.04786447435617447, + "loss_ib": 0.0008058634120970964, + "step": 2145 + }, + { + "ce_ib": 6.675848484039307, + "ce_orig": 0.9066933393478394, + "epoch": 0.6168667769070386, + "kl_loss": 0.0984276607632637, + "loss_ib": 0.0016518613556399941, + "step": 2145 + }, + { + "ce_ib": 5.474479675292969, + "ce_orig": 1.0937905311584473, + "epoch": 0.6171543604860162, + "kl_loss": 0.10391192138195038, + "loss_ib": 0.0015865671448409557, + "step": 2146 + }, + { + "ce_ib": 4.575948238372803, + "ce_orig": 1.1403383016586304, + "epoch": 0.6171543604860162, + "kl_loss": 0.08320329338312149, + "loss_ib": 0.0012896276311948895, + "step": 2146 + }, + { + "ce_ib": 6.497585773468018, + "ce_orig": 1.3095849752426147, + "epoch": 0.6171543604860162, + "kl_loss": 0.08020438253879547, + "loss_ib": 0.0014518023235723376, + "step": 2146 + }, + { + "ce_ib": 6.586029529571533, + "ce_orig": 1.324447751045227, + "epoch": 0.6171543604860162, + "kl_loss": 0.0861404538154602, + "loss_ib": 0.0015200074994936585, + "step": 2146 + }, + { + "ce_ib": 2.6205761432647705, + "ce_orig": 0.404054194688797, + "epoch": 0.6174419440649939, + "kl_loss": 0.08311313390731812, + "loss_ib": 0.0010931889992207289, + "step": 2147 + }, + { + "ce_ib": 6.877864360809326, + "ce_orig": 1.4787178039550781, + "epoch": 0.6174419440649939, + "kl_loss": 0.07055573165416718, + "loss_ib": 0.0013933437876403332, + "step": 2147 + }, + { + "ce_ib": 2.9871866703033447, + "ce_orig": 0.4839642643928528, + "epoch": 0.6174419440649939, + "kl_loss": 0.07119922339916229, + "loss_ib": 0.0010107108391821384, + "step": 2147 + }, + { + "ce_ib": 3.5313520431518555, + "ce_orig": 0.5339369177818298, + "epoch": 0.6174419440649939, + "kl_loss": 0.061465151607990265, + "loss_ib": 0.0009677867637947202, + "step": 2147 + }, + { + "ce_ib": 4.4624457359313965, + "ce_orig": 1.0715278387069702, + "epoch": 0.6177295276439715, + "kl_loss": 0.07233014702796936, + "loss_ib": 0.001169546041637659, + "step": 2148 + }, + { + "ce_ib": 4.48839807510376, + "ce_orig": 0.9709970355033875, + "epoch": 0.6177295276439715, + "kl_loss": 0.11251771450042725, + "loss_ib": 0.0015740168746560812, + "step": 2148 + }, + { + "ce_ib": 6.13091516494751, + "ce_orig": 1.0412473678588867, + "epoch": 0.6177295276439715, + "kl_loss": 0.18154552578926086, + "loss_ib": 0.002428546780720353, + "step": 2148 + }, + { + "ce_ib": 4.601873874664307, + "ce_orig": 0.8932811617851257, + "epoch": 0.6177295276439715, + "kl_loss": 0.0685279369354248, + "loss_ib": 0.0011454666964709759, + "step": 2148 + }, + { + "ce_ib": 4.450700759887695, + "ce_orig": 1.1220837831497192, + "epoch": 0.6180171112229492, + "kl_loss": 0.07617814093828201, + "loss_ib": 0.0012068514479324222, + "step": 2149 + }, + { + "ce_ib": 4.1052446365356445, + "ce_orig": 0.8697106242179871, + "epoch": 0.6180171112229492, + "kl_loss": 0.1462956666946411, + "loss_ib": 0.0018734810873866081, + "step": 2149 + }, + { + "ce_ib": 5.138076305389404, + "ce_orig": 0.8915497064590454, + "epoch": 0.6180171112229492, + "kl_loss": 0.0670715719461441, + "loss_ib": 0.0011845233384519815, + "step": 2149 + }, + { + "ce_ib": 4.452025890350342, + "ce_orig": 0.8820591568946838, + "epoch": 0.6180171112229492, + "kl_loss": 0.08045956492424011, + "loss_ib": 0.0012497982243075967, + "step": 2149 + }, + { + "epoch": 0.6183046948019268, + "grad_norm": 0.09184721857309341, + "learning_rate": 4.6073195245240254e-05, + "loss": 0.8461, + "step": 2150 + }, + { + "ce_ib": 8.027897834777832, + "ce_orig": 1.5544525384902954, + "epoch": 0.6183046948019268, + "kl_loss": 0.1270279884338379, + "loss_ib": 0.00207306956872344, + "step": 2150 + }, + { + "ce_ib": 7.210544586181641, + "ce_orig": 1.339048147201538, + "epoch": 0.6183046948019268, + "kl_loss": 0.08849625289440155, + "loss_ib": 0.0016060170019045472, + "step": 2150 + }, + { + "ce_ib": 6.11804723739624, + "ce_orig": 0.714860737323761, + "epoch": 0.6183046948019268, + "kl_loss": 0.1576751470565796, + "loss_ib": 0.0021885561291128397, + "step": 2150 + }, + { + "ce_ib": 2.9954307079315186, + "ce_orig": 0.7778920531272888, + "epoch": 0.6183046948019268, + "kl_loss": 0.050678666681051254, + "loss_ib": 0.0008063296554610133, + "step": 2150 + }, + { + "ce_ib": 4.540492057800293, + "ce_orig": 0.7177664637565613, + "epoch": 0.6185922783809045, + "kl_loss": 0.09440696239471436, + "loss_ib": 0.0013981186784803867, + "step": 2151 + }, + { + "ce_ib": 3.142174482345581, + "ce_orig": 0.6334543228149414, + "epoch": 0.6185922783809045, + "kl_loss": 0.05201367288827896, + "loss_ib": 0.0008343541412614286, + "step": 2151 + }, + { + "ce_ib": 5.2286696434021, + "ce_orig": 1.3041399717330933, + "epoch": 0.6185922783809045, + "kl_loss": 0.1073487177491188, + "loss_ib": 0.0015963540645316243, + "step": 2151 + }, + { + "ce_ib": 3.2217679023742676, + "ce_orig": 0.5591689944267273, + "epoch": 0.6185922783809045, + "kl_loss": 0.09469662606716156, + "loss_ib": 0.0012691430747509003, + "step": 2151 + }, + { + "ce_ib": 3.5581893920898438, + "ce_orig": 0.9557476043701172, + "epoch": 0.6188798619598821, + "kl_loss": 0.030786175280809402, + "loss_ib": 0.0006636807229369879, + "step": 2152 + }, + { + "ce_ib": 5.672779560089111, + "ce_orig": 1.2852243185043335, + "epoch": 0.6188798619598821, + "kl_loss": 0.11488641053438187, + "loss_ib": 0.0017161419382318854, + "step": 2152 + }, + { + "ce_ib": 5.523462772369385, + "ce_orig": 0.6969853043556213, + "epoch": 0.6188798619598821, + "kl_loss": 0.1202317550778389, + "loss_ib": 0.00175466388463974, + "step": 2152 + }, + { + "ce_ib": 3.478274345397949, + "ce_orig": 0.692083477973938, + "epoch": 0.6188798619598821, + "kl_loss": 0.09080985933542252, + "loss_ib": 0.001255925977602601, + "step": 2152 + }, + { + "ce_ib": 4.274655818939209, + "ce_orig": 0.4424313008785248, + "epoch": 0.6191674455388597, + "kl_loss": 0.08494143187999725, + "loss_ib": 0.0012768799206241965, + "step": 2153 + }, + { + "ce_ib": 3.2239654064178467, + "ce_orig": 0.6948463916778564, + "epoch": 0.6191674455388597, + "kl_loss": 0.05577589571475983, + "loss_ib": 0.0008801554795354605, + "step": 2153 + }, + { + "ce_ib": 6.635892391204834, + "ce_orig": 1.554545283317566, + "epoch": 0.6191674455388597, + "kl_loss": 0.06224273890256882, + "loss_ib": 0.0012860166607424617, + "step": 2153 + }, + { + "ce_ib": 3.2314252853393555, + "ce_orig": 0.7428099513053894, + "epoch": 0.6191674455388597, + "kl_loss": 0.044484786689281464, + "loss_ib": 0.0007679903064854443, + "step": 2153 + }, + { + "ce_ib": 2.426420211791992, + "ce_orig": 0.48065540194511414, + "epoch": 0.6194550291178373, + "kl_loss": 0.06002812832593918, + "loss_ib": 0.0008429232984781265, + "step": 2154 + }, + { + "ce_ib": 6.123720169067383, + "ce_orig": 1.162538766860962, + "epoch": 0.6194550291178373, + "kl_loss": 0.0928068608045578, + "loss_ib": 0.0015404406003654003, + "step": 2154 + }, + { + "ce_ib": 4.861904621124268, + "ce_orig": 0.9812806844711304, + "epoch": 0.6194550291178373, + "kl_loss": 0.062216948717832565, + "loss_ib": 0.0011083600111305714, + "step": 2154 + }, + { + "ce_ib": 3.8453478813171387, + "ce_orig": 0.7694811224937439, + "epoch": 0.6194550291178373, + "kl_loss": 0.06407826393842697, + "loss_ib": 0.001025317469611764, + "step": 2154 + }, + { + "epoch": 0.619742612696815, + "grad_norm": 0.10556904971599579, + "learning_rate": 4.605229214293895e-05, + "loss": 0.8064, + "step": 2155 + }, + { + "ce_ib": 4.290290355682373, + "ce_orig": 0.8508915901184082, + "epoch": 0.619742612696815, + "kl_loss": 0.32119059562683105, + "loss_ib": 0.0036409348249435425, + "step": 2155 + }, + { + "ce_ib": 5.103554725646973, + "ce_orig": 0.8541312217712402, + "epoch": 0.619742612696815, + "kl_loss": 0.09943894296884537, + "loss_ib": 0.0015047448687255383, + "step": 2155 + }, + { + "ce_ib": 4.345101356506348, + "ce_orig": 0.9767761826515198, + "epoch": 0.619742612696815, + "kl_loss": 0.10897430777549744, + "loss_ib": 0.0015242531662806869, + "step": 2155 + }, + { + "ce_ib": 2.7094480991363525, + "ce_orig": 0.6191776990890503, + "epoch": 0.619742612696815, + "kl_loss": 0.06893707811832428, + "loss_ib": 0.000960315577685833, + "step": 2155 + }, + { + "ce_ib": 5.318765163421631, + "ce_orig": 1.2784050703048706, + "epoch": 0.6200301962757927, + "kl_loss": 0.07385098934173584, + "loss_ib": 0.001270386390388012, + "step": 2156 + }, + { + "ce_ib": 2.619965076446533, + "ce_orig": 0.5242246389389038, + "epoch": 0.6200301962757927, + "kl_loss": 0.03947953134775162, + "loss_ib": 0.0006567917880602181, + "step": 2156 + }, + { + "ce_ib": 4.35165548324585, + "ce_orig": 0.7783385515213013, + "epoch": 0.6200301962757927, + "kl_loss": 0.150834321975708, + "loss_ib": 0.0019435086287558079, + "step": 2156 + }, + { + "ce_ib": 0.9877685308456421, + "ce_orig": 0.1006137877702713, + "epoch": 0.6200301962757927, + "kl_loss": 0.16297301650047302, + "loss_ib": 0.0017285069916397333, + "step": 2156 + }, + { + "ce_ib": 2.8639392852783203, + "ce_orig": 0.5943777561187744, + "epoch": 0.6203177798547703, + "kl_loss": 0.05195143073797226, + "loss_ib": 0.0008059081737883389, + "step": 2157 + }, + { + "ce_ib": 5.144629955291748, + "ce_orig": 1.110735297203064, + "epoch": 0.6203177798547703, + "kl_loss": 0.0561366081237793, + "loss_ib": 0.0010758291464298964, + "step": 2157 + }, + { + "ce_ib": 7.471113681793213, + "ce_orig": 1.2922343015670776, + "epoch": 0.6203177798547703, + "kl_loss": 0.08937765657901764, + "loss_ib": 0.0016408878145739436, + "step": 2157 + }, + { + "ce_ib": 3.2883365154266357, + "ce_orig": 0.9505813717842102, + "epoch": 0.6203177798547703, + "kl_loss": 0.06237562373280525, + "loss_ib": 0.0009525898494757712, + "step": 2157 + }, + { + "ce_ib": 2.6224851608276367, + "ce_orig": 0.556162416934967, + "epoch": 0.620605363433748, + "kl_loss": 0.06939177960157394, + "loss_ib": 0.0009561663027852774, + "step": 2158 + }, + { + "ce_ib": 5.332659721374512, + "ce_orig": 1.1618690490722656, + "epoch": 0.620605363433748, + "kl_loss": 0.09641939401626587, + "loss_ib": 0.0014974598307162523, + "step": 2158 + }, + { + "ce_ib": 4.198543548583984, + "ce_orig": 0.7771295309066772, + "epoch": 0.620605363433748, + "kl_loss": 0.045916080474853516, + "loss_ib": 0.0008790151332505047, + "step": 2158 + }, + { + "ce_ib": 4.2310566902160645, + "ce_orig": 0.8469802141189575, + "epoch": 0.620605363433748, + "kl_loss": 0.09143031388521194, + "loss_ib": 0.0013374086702242494, + "step": 2158 + }, + { + "ce_ib": 3.6888716220855713, + "ce_orig": 0.5471396446228027, + "epoch": 0.6208929470127256, + "kl_loss": 0.09214089810848236, + "loss_ib": 0.0012902960879728198, + "step": 2159 + }, + { + "ce_ib": 3.8735196590423584, + "ce_orig": 0.605591356754303, + "epoch": 0.6208929470127256, + "kl_loss": 0.08963274210691452, + "loss_ib": 0.0012836792739108205, + "step": 2159 + }, + { + "ce_ib": 4.995337009429932, + "ce_orig": 0.7900415062904358, + "epoch": 0.6208929470127256, + "kl_loss": 0.0927543118596077, + "loss_ib": 0.0014270767569541931, + "step": 2159 + }, + { + "ce_ib": 3.757711887359619, + "ce_orig": 0.7637308835983276, + "epoch": 0.6208929470127256, + "kl_loss": 0.04946363717317581, + "loss_ib": 0.000870407500769943, + "step": 2159 + }, + { + "epoch": 0.6211805305917032, + "grad_norm": 0.08966559171676636, + "learning_rate": 4.6031338320779534e-05, + "loss": 0.8501, + "step": 2160 + }, + { + "ce_ib": 3.0929079055786133, + "ce_orig": 0.6655248403549194, + "epoch": 0.6211805305917032, + "kl_loss": 0.07331294566392899, + "loss_ib": 0.0010424202773720026, + "step": 2160 + }, + { + "ce_ib": 5.469526767730713, + "ce_orig": 1.1530534029006958, + "epoch": 0.6211805305917032, + "kl_loss": 0.07538903504610062, + "loss_ib": 0.0013008430832996964, + "step": 2160 + }, + { + "ce_ib": 3.7441134452819824, + "ce_orig": 0.7024093866348267, + "epoch": 0.6211805305917032, + "kl_loss": 0.06576579809188843, + "loss_ib": 0.0010320693254470825, + "step": 2160 + }, + { + "ce_ib": 2.862966775894165, + "ce_orig": 0.5835277438163757, + "epoch": 0.6211805305917032, + "kl_loss": 0.04859703779220581, + "loss_ib": 0.0007722670561634004, + "step": 2160 + }, + { + "ce_ib": 4.072968006134033, + "ce_orig": 0.7007986307144165, + "epoch": 0.6214681141706808, + "kl_loss": 0.11041681468486786, + "loss_ib": 0.001511464943177998, + "step": 2161 + }, + { + "ce_ib": 5.695746421813965, + "ce_orig": 1.3297935724258423, + "epoch": 0.6214681141706808, + "kl_loss": 0.05639047175645828, + "loss_ib": 0.0011334792943671346, + "step": 2161 + }, + { + "ce_ib": 4.600011825561523, + "ce_orig": 0.606274425983429, + "epoch": 0.6214681141706808, + "kl_loss": 0.06817669421434402, + "loss_ib": 0.0011417680652812123, + "step": 2161 + }, + { + "ce_ib": 7.137728691101074, + "ce_orig": 1.5089761018753052, + "epoch": 0.6214681141706808, + "kl_loss": 0.06570373475551605, + "loss_ib": 0.001370810205116868, + "step": 2161 + }, + { + "ce_ib": 5.192899703979492, + "ce_orig": 1.245900273323059, + "epoch": 0.6217556977496584, + "kl_loss": 0.07913186401128769, + "loss_ib": 0.0013106086989864707, + "step": 2162 + }, + { + "ce_ib": 4.083761215209961, + "ce_orig": 0.8201823830604553, + "epoch": 0.6217556977496584, + "kl_loss": 0.05433163046836853, + "loss_ib": 0.0009516924619674683, + "step": 2162 + }, + { + "ce_ib": 3.3676211833953857, + "ce_orig": 0.6773144006729126, + "epoch": 0.6217556977496584, + "kl_loss": 0.2890886664390564, + "loss_ib": 0.0032276485580950975, + "step": 2162 + }, + { + "ce_ib": 4.741292476654053, + "ce_orig": 1.0616066455841064, + "epoch": 0.6217556977496584, + "kl_loss": 0.08529266715049744, + "loss_ib": 0.0013270559720695019, + "step": 2162 + }, + { + "ce_ib": 3.163848638534546, + "ce_orig": 0.7215872406959534, + "epoch": 0.6220432813286362, + "kl_loss": 0.047107353806495667, + "loss_ib": 0.0007874583825469017, + "step": 2163 + }, + { + "ce_ib": 2.950733184814453, + "ce_orig": 0.39207494258880615, + "epoch": 0.6220432813286362, + "kl_loss": 0.112467922270298, + "loss_ib": 0.0014197524869814515, + "step": 2163 + }, + { + "ce_ib": 4.190966606140137, + "ce_orig": 0.4511050581932068, + "epoch": 0.6220432813286362, + "kl_loss": 0.12142747640609741, + "loss_ib": 0.0016333713429048657, + "step": 2163 + }, + { + "ce_ib": 6.074443340301514, + "ce_orig": 1.1792010068893433, + "epoch": 0.6220432813286362, + "kl_loss": 0.07631777226924896, + "loss_ib": 0.0013706220779567957, + "step": 2163 + }, + { + "ce_ib": 5.3955397605896, + "ce_orig": 1.2076873779296875, + "epoch": 0.6223308649076138, + "kl_loss": 0.08377861976623535, + "loss_ib": 0.0013773400569334626, + "step": 2164 + }, + { + "ce_ib": 6.045674800872803, + "ce_orig": 0.7504338026046753, + "epoch": 0.6223308649076138, + "kl_loss": 0.17176978290081024, + "loss_ib": 0.0023222651798278093, + "step": 2164 + }, + { + "ce_ib": 2.5627613067626953, + "ce_orig": 0.39971715211868286, + "epoch": 0.6223308649076138, + "kl_loss": 0.08833823353052139, + "loss_ib": 0.0011396583868190646, + "step": 2164 + }, + { + "ce_ib": 3.378539562225342, + "ce_orig": 0.5626003742218018, + "epoch": 0.6223308649076138, + "kl_loss": 0.12205922603607178, + "loss_ib": 0.0015584462089464068, + "step": 2164 + }, + { + "epoch": 0.6226184484865914, + "grad_norm": 0.08750616014003754, + "learning_rate": 4.6010333829244624e-05, + "loss": 0.8911, + "step": 2165 + }, + { + "ce_ib": 5.361645698547363, + "ce_orig": 0.7173381447792053, + "epoch": 0.6226184484865914, + "kl_loss": 0.06962748616933823, + "loss_ib": 0.0012324394192546606, + "step": 2165 + }, + { + "ce_ib": 3.6365842819213867, + "ce_orig": 0.8078867197036743, + "epoch": 0.6226184484865914, + "kl_loss": 0.05983446538448334, + "loss_ib": 0.0009620030177757144, + "step": 2165 + }, + { + "ce_ib": 4.049839496612549, + "ce_orig": 0.6610264182090759, + "epoch": 0.6226184484865914, + "kl_loss": 0.09368491172790527, + "loss_ib": 0.001341833034530282, + "step": 2165 + }, + { + "ce_ib": 4.676388263702393, + "ce_orig": 0.8894670009613037, + "epoch": 0.6226184484865914, + "kl_loss": 0.06664589047431946, + "loss_ib": 0.0011340976925566792, + "step": 2165 + }, + { + "ce_ib": 4.660468101501465, + "ce_orig": 0.9578759670257568, + "epoch": 0.622906032065569, + "kl_loss": 0.07005058228969574, + "loss_ib": 0.0011665525380522013, + "step": 2166 + }, + { + "ce_ib": 3.425196886062622, + "ce_orig": 0.4291315972805023, + "epoch": 0.622906032065569, + "kl_loss": 0.08391250669956207, + "loss_ib": 0.001181644736789167, + "step": 2166 + }, + { + "ce_ib": 3.469102382659912, + "ce_orig": 0.39507120847702026, + "epoch": 0.622906032065569, + "kl_loss": 0.23752811551094055, + "loss_ib": 0.0027221913915127516, + "step": 2166 + }, + { + "ce_ib": 5.639920711517334, + "ce_orig": 1.089990258216858, + "epoch": 0.622906032065569, + "kl_loss": 0.09855040907859802, + "loss_ib": 0.001549496199004352, + "step": 2166 + }, + { + "ce_ib": 6.138688087463379, + "ce_orig": 1.3139363527297974, + "epoch": 0.6231936156445467, + "kl_loss": 0.06868664175271988, + "loss_ib": 0.0013007351662963629, + "step": 2167 + }, + { + "ce_ib": 4.341277122497559, + "ce_orig": 0.6431174874305725, + "epoch": 0.6231936156445467, + "kl_loss": 0.060545407235622406, + "loss_ib": 0.0010395817225798965, + "step": 2167 + }, + { + "ce_ib": 3.745562791824341, + "ce_orig": 0.7563671469688416, + "epoch": 0.6231936156445467, + "kl_loss": 0.07759904861450195, + "loss_ib": 0.0011505467118695378, + "step": 2167 + }, + { + "ce_ib": 3.7491650581359863, + "ce_orig": 0.7485407590866089, + "epoch": 0.6231936156445467, + "kl_loss": 0.08520488440990448, + "loss_ib": 0.0012269653379917145, + "step": 2167 + }, + { + "ce_ib": 4.06697940826416, + "ce_orig": 0.843721330165863, + "epoch": 0.6234811992235243, + "kl_loss": 0.09800135344266891, + "loss_ib": 0.001386711373925209, + "step": 2168 + }, + { + "ce_ib": 3.596447229385376, + "ce_orig": 0.6388657093048096, + "epoch": 0.6234811992235243, + "kl_loss": 0.031227130442857742, + "loss_ib": 0.0006719160010106862, + "step": 2168 + }, + { + "ce_ib": 4.706135272979736, + "ce_orig": 0.6308959722518921, + "epoch": 0.6234811992235243, + "kl_loss": 0.0846552699804306, + "loss_ib": 0.0013171662576496601, + "step": 2168 + }, + { + "ce_ib": 2.6781883239746094, + "ce_orig": 0.32284706830978394, + "epoch": 0.6234811992235243, + "kl_loss": 0.09747272729873657, + "loss_ib": 0.0012425461318343878, + "step": 2168 + }, + { + "ce_ib": 0.9653261303901672, + "ce_orig": 0.08791682124137878, + "epoch": 0.623768782802502, + "kl_loss": 0.16834142804145813, + "loss_ib": 0.0017799468478187919, + "step": 2169 + }, + { + "ce_ib": 4.301488876342773, + "ce_orig": 0.5392917394638062, + "epoch": 0.623768782802502, + "kl_loss": 0.060038141906261444, + "loss_ib": 0.0010305303148925304, + "step": 2169 + }, + { + "ce_ib": 3.383807420730591, + "ce_orig": 0.6738284230232239, + "epoch": 0.623768782802502, + "kl_loss": 0.07238990068435669, + "loss_ib": 0.001062279799953103, + "step": 2169 + }, + { + "ce_ib": 5.665176868438721, + "ce_orig": 1.2289042472839355, + "epoch": 0.623768782802502, + "kl_loss": 0.051335036754608154, + "loss_ib": 0.00107986805960536, + "step": 2169 + }, + { + "epoch": 0.6240563663814797, + "grad_norm": 0.08932362496852875, + "learning_rate": 4.598927871893891e-05, + "loss": 0.7941, + "step": 2170 + }, + { + "ce_ib": 3.626512050628662, + "ce_orig": 0.5624256730079651, + "epoch": 0.6240563663814797, + "kl_loss": 0.06104190647602081, + "loss_ib": 0.0009730702149681747, + "step": 2170 + }, + { + "ce_ib": 2.738222360610962, + "ce_orig": 0.6250942349433899, + "epoch": 0.6240563663814797, + "kl_loss": 0.044976286590099335, + "loss_ib": 0.0007235850789584219, + "step": 2170 + }, + { + "ce_ib": 4.555876731872559, + "ce_orig": 0.8252726197242737, + "epoch": 0.6240563663814797, + "kl_loss": 0.10089778900146484, + "loss_ib": 0.0014645655173808336, + "step": 2170 + }, + { + "ce_ib": 3.765709161758423, + "ce_orig": 0.6403543949127197, + "epoch": 0.6240563663814797, + "kl_loss": 0.08130620419979095, + "loss_ib": 0.0011896329233422875, + "step": 2170 + }, + { + "ce_ib": 4.464447975158691, + "ce_orig": 1.180139422416687, + "epoch": 0.6243439499604573, + "kl_loss": 0.07330193370580673, + "loss_ib": 0.0011794641613960266, + "step": 2171 + }, + { + "ce_ib": 3.8802969455718994, + "ce_orig": 0.8269608020782471, + "epoch": 0.6243439499604573, + "kl_loss": 0.08157356083393097, + "loss_ib": 0.0012037652777507901, + "step": 2171 + }, + { + "ce_ib": 4.941407203674316, + "ce_orig": 1.0607072114944458, + "epoch": 0.6243439499604573, + "kl_loss": 0.04914043843746185, + "loss_ib": 0.0009855449898168445, + "step": 2171 + }, + { + "ce_ib": 4.275618553161621, + "ce_orig": 0.8395652174949646, + "epoch": 0.6243439499604573, + "kl_loss": 0.06845224648714066, + "loss_ib": 0.0011120842536911368, + "step": 2171 + }, + { + "ce_ib": 5.11061429977417, + "ce_orig": 0.7817720174789429, + "epoch": 0.6246315335394349, + "kl_loss": 0.11886554211378098, + "loss_ib": 0.0016997166676446795, + "step": 2172 + }, + { + "ce_ib": 4.633273601531982, + "ce_orig": 0.5988442301750183, + "epoch": 0.6246315335394349, + "kl_loss": 0.07619314640760422, + "loss_ib": 0.0012252588057890534, + "step": 2172 + }, + { + "ce_ib": 3.6233630180358887, + "ce_orig": 0.8105876445770264, + "epoch": 0.6246315335394349, + "kl_loss": 0.07065343111753464, + "loss_ib": 0.0010688705369830132, + "step": 2172 + }, + { + "ce_ib": 3.944640874862671, + "ce_orig": 0.5795543193817139, + "epoch": 0.6246315335394349, + "kl_loss": 0.04521939903497696, + "loss_ib": 0.0008466580184176564, + "step": 2172 + }, + { + "ce_ib": 4.917751312255859, + "ce_orig": 0.7871399521827698, + "epoch": 0.6249191171184125, + "kl_loss": 0.12764036655426025, + "loss_ib": 0.0017681787721812725, + "step": 2173 + }, + { + "ce_ib": 4.455690383911133, + "ce_orig": 0.8599062561988831, + "epoch": 0.6249191171184125, + "kl_loss": 0.06346234679222107, + "loss_ib": 0.0010801925091072917, + "step": 2173 + }, + { + "ce_ib": 4.459980010986328, + "ce_orig": 1.0315552949905396, + "epoch": 0.6249191171184125, + "kl_loss": 0.07744257897138596, + "loss_ib": 0.0012204237282276154, + "step": 2173 + }, + { + "ce_ib": 4.279569149017334, + "ce_orig": 0.9835892915725708, + "epoch": 0.6249191171184125, + "kl_loss": 0.05376824736595154, + "loss_ib": 0.0009656393085606396, + "step": 2173 + }, + { + "ce_ib": 5.968739986419678, + "ce_orig": 1.1140315532684326, + "epoch": 0.6252067006973901, + "kl_loss": 0.04961919039487839, + "loss_ib": 0.001093065831810236, + "step": 2174 + }, + { + "ce_ib": 3.9045941829681396, + "ce_orig": 0.7652754783630371, + "epoch": 0.6252067006973901, + "kl_loss": 0.07161853462457657, + "loss_ib": 0.0011066447477787733, + "step": 2174 + }, + { + "ce_ib": 4.143120765686035, + "ce_orig": 0.47693297266960144, + "epoch": 0.6252067006973901, + "kl_loss": 0.09719762206077576, + "loss_ib": 0.0013862882042303681, + "step": 2174 + }, + { + "ce_ib": 7.124604225158691, + "ce_orig": 1.6143684387207031, + "epoch": 0.6252067006973901, + "kl_loss": 0.11310448497533798, + "loss_ib": 0.0018435051897540689, + "step": 2174 + }, + { + "epoch": 0.6254942842763678, + "grad_norm": 0.11055126041173935, + "learning_rate": 4.596817304058905e-05, + "loss": 0.8887, + "step": 2175 + }, + { + "ce_ib": 3.921645164489746, + "ce_orig": 0.6068258881568909, + "epoch": 0.6254942842763678, + "kl_loss": 0.07804898172616959, + "loss_ib": 0.0011726543307304382, + "step": 2175 + }, + { + "ce_ib": 4.793003082275391, + "ce_orig": 0.9391719102859497, + "epoch": 0.6254942842763678, + "kl_loss": 0.0925200879573822, + "loss_ib": 0.0014045011484995484, + "step": 2175 + }, + { + "ce_ib": 2.9856832027435303, + "ce_orig": 0.3660471737384796, + "epoch": 0.6254942842763678, + "kl_loss": 0.0689852386713028, + "loss_ib": 0.0009884206810966134, + "step": 2175 + }, + { + "ce_ib": 3.577877998352051, + "ce_orig": 0.6999541521072388, + "epoch": 0.6254942842763678, + "kl_loss": 0.07972133159637451, + "loss_ib": 0.00115500099491328, + "step": 2175 + }, + { + "ce_ib": 3.3260438442230225, + "ce_orig": 0.8272963762283325, + "epoch": 0.6257818678553455, + "kl_loss": 0.06475529074668884, + "loss_ib": 0.000980157288722694, + "step": 2176 + }, + { + "ce_ib": 2.6071712970733643, + "ce_orig": 0.519180953502655, + "epoch": 0.6257818678553455, + "kl_loss": 0.08254865556955338, + "loss_ib": 0.0010862036142498255, + "step": 2176 + }, + { + "ce_ib": 6.399956226348877, + "ce_orig": 1.0289915800094604, + "epoch": 0.6257818678553455, + "kl_loss": 0.0799022689461708, + "loss_ib": 0.0014390181750059128, + "step": 2176 + }, + { + "ce_ib": 3.723053455352783, + "ce_orig": 0.7740863561630249, + "epoch": 0.6257818678553455, + "kl_loss": 0.08253106474876404, + "loss_ib": 0.0011976159876212478, + "step": 2176 + }, + { + "ce_ib": 4.011207103729248, + "ce_orig": 1.0049313306808472, + "epoch": 0.6260694514343231, + "kl_loss": 0.04100434482097626, + "loss_ib": 0.0008111641509458423, + "step": 2177 + }, + { + "ce_ib": 6.587381839752197, + "ce_orig": 1.5271114110946655, + "epoch": 0.6260694514343231, + "kl_loss": 0.11604306101799011, + "loss_ib": 0.00181916868314147, + "step": 2177 + }, + { + "ce_ib": 3.3944311141967773, + "ce_orig": 0.6468304991722107, + "epoch": 0.6260694514343231, + "kl_loss": 0.04048474505543709, + "loss_ib": 0.0007442904752679169, + "step": 2177 + }, + { + "ce_ib": 3.9534380435943604, + "ce_orig": 0.6061633825302124, + "epoch": 0.6260694514343231, + "kl_loss": 0.07244448363780975, + "loss_ib": 0.0011197886196896434, + "step": 2177 + }, + { + "ce_ib": 5.054883003234863, + "ce_orig": 0.8014604449272156, + "epoch": 0.6263570350133008, + "kl_loss": 0.09476368129253387, + "loss_ib": 0.001453125150874257, + "step": 2178 + }, + { + "ce_ib": 5.389901638031006, + "ce_orig": 1.022364616394043, + "epoch": 0.6263570350133008, + "kl_loss": 0.11710761487483978, + "loss_ib": 0.0017100663390010595, + "step": 2178 + }, + { + "ce_ib": 4.174549102783203, + "ce_orig": 0.6153713464736938, + "epoch": 0.6263570350133008, + "kl_loss": 0.09128081053495407, + "loss_ib": 0.0013302629813551903, + "step": 2178 + }, + { + "ce_ib": 2.9860661029815674, + "ce_orig": 0.4559691846370697, + "epoch": 0.6263570350133008, + "kl_loss": 0.07407945394515991, + "loss_ib": 0.0010394011624157429, + "step": 2178 + }, + { + "ce_ib": 4.475777626037598, + "ce_orig": 0.577476441860199, + "epoch": 0.6266446185922784, + "kl_loss": 0.07389193028211594, + "loss_ib": 0.0011864970438182354, + "step": 2179 + }, + { + "ce_ib": 4.852311611175537, + "ce_orig": 0.7872619032859802, + "epoch": 0.6266446185922784, + "kl_loss": 0.07747691869735718, + "loss_ib": 0.0012600002810359001, + "step": 2179 + }, + { + "ce_ib": 4.793821334838867, + "ce_orig": 0.5859935879707336, + "epoch": 0.6266446185922784, + "kl_loss": 0.06979022920131683, + "loss_ib": 0.001177284400910139, + "step": 2179 + }, + { + "ce_ib": 7.346603870391846, + "ce_orig": 1.375746726989746, + "epoch": 0.6266446185922784, + "kl_loss": 0.094086654484272, + "loss_ib": 0.0016755269607529044, + "step": 2179 + }, + { + "epoch": 0.626932202171256, + "grad_norm": 0.10044164955615997, + "learning_rate": 4.594701684504352e-05, + "loss": 0.8405, + "step": 2180 + }, + { + "ce_ib": 4.528838634490967, + "ce_orig": 0.8989998698234558, + "epoch": 0.626932202171256, + "kl_loss": 0.11561260372400284, + "loss_ib": 0.001609009806998074, + "step": 2180 + }, + { + "ce_ib": 3.968939781188965, + "ce_orig": 0.6812228560447693, + "epoch": 0.626932202171256, + "kl_loss": 0.0814574658870697, + "loss_ib": 0.0012114685960114002, + "step": 2180 + }, + { + "ce_ib": 4.81767463684082, + "ce_orig": 0.6193702220916748, + "epoch": 0.626932202171256, + "kl_loss": 0.0844789445400238, + "loss_ib": 0.0013265568995848298, + "step": 2180 + }, + { + "ce_ib": 5.096348285675049, + "ce_orig": 0.23683135211467743, + "epoch": 0.626932202171256, + "kl_loss": 0.1706298589706421, + "loss_ib": 0.0022159332875162363, + "step": 2180 + }, + { + "ce_ib": 5.097021102905273, + "ce_orig": 0.9980034828186035, + "epoch": 0.6272197857502336, + "kl_loss": 0.08766165375709534, + "loss_ib": 0.001386318588629365, + "step": 2181 + }, + { + "ce_ib": 3.7050561904907227, + "ce_orig": 0.7302767634391785, + "epoch": 0.6272197857502336, + "kl_loss": 0.06826108694076538, + "loss_ib": 0.0010531164007261395, + "step": 2181 + }, + { + "ce_ib": 4.648011207580566, + "ce_orig": 1.0168439149856567, + "epoch": 0.6272197857502336, + "kl_loss": 0.0704076811671257, + "loss_ib": 0.0011688779341056943, + "step": 2181 + }, + { + "ce_ib": 4.480157375335693, + "ce_orig": 0.9158455729484558, + "epoch": 0.6272197857502336, + "kl_loss": 0.045630425214767456, + "loss_ib": 0.0009043199825100601, + "step": 2181 + }, + { + "ce_ib": 2.864414691925049, + "ce_orig": 0.4127558171749115, + "epoch": 0.6275073693292113, + "kl_loss": 0.05606982111930847, + "loss_ib": 0.0008471396286040545, + "step": 2182 + }, + { + "ce_ib": 3.925870180130005, + "ce_orig": 0.8509836196899414, + "epoch": 0.6275073693292113, + "kl_loss": 0.07882564514875412, + "loss_ib": 0.0011808433337137103, + "step": 2182 + }, + { + "ce_ib": 3.9060072898864746, + "ce_orig": 0.6305397748947144, + "epoch": 0.6275073693292113, + "kl_loss": 0.12542277574539185, + "loss_ib": 0.0016448284732177854, + "step": 2182 + }, + { + "ce_ib": 4.521088600158691, + "ce_orig": 1.0980554819107056, + "epoch": 0.6275073693292113, + "kl_loss": 0.09182662516832352, + "loss_ib": 0.0013703751610592008, + "step": 2182 + }, + { + "ce_ib": 3.2773139476776123, + "ce_orig": 0.46494677662849426, + "epoch": 0.627794952908189, + "kl_loss": 0.08026741445064545, + "loss_ib": 0.0011304054642096162, + "step": 2183 + }, + { + "ce_ib": 3.0205323696136475, + "ce_orig": 0.5567916631698608, + "epoch": 0.627794952908189, + "kl_loss": 0.07900263369083405, + "loss_ib": 0.0010920795612037182, + "step": 2183 + }, + { + "ce_ib": 5.03877067565918, + "ce_orig": 0.9755697846412659, + "epoch": 0.627794952908189, + "kl_loss": 0.10454459488391876, + "loss_ib": 0.0015493229730054736, + "step": 2183 + }, + { + "ce_ib": 4.479219436645508, + "ce_orig": 0.9992311000823975, + "epoch": 0.627794952908189, + "kl_loss": 0.0719410628080368, + "loss_ib": 0.0011673325207084417, + "step": 2183 + }, + { + "ce_ib": 3.956545829772949, + "ce_orig": 0.44521740078926086, + "epoch": 0.6280825364871666, + "kl_loss": 0.12030567973852158, + "loss_ib": 0.001598711241967976, + "step": 2184 + }, + { + "ce_ib": 6.638095855712891, + "ce_orig": 1.1588108539581299, + "epoch": 0.6280825364871666, + "kl_loss": 0.05944342538714409, + "loss_ib": 0.0012582436902448535, + "step": 2184 + }, + { + "ce_ib": 4.629542827606201, + "ce_orig": 0.9723213315010071, + "epoch": 0.6280825364871666, + "kl_loss": 0.08204073458909988, + "loss_ib": 0.0012833615764975548, + "step": 2184 + }, + { + "ce_ib": 2.5331785678863525, + "ce_orig": 0.2634347379207611, + "epoch": 0.6280825364871666, + "kl_loss": 0.08663541078567505, + "loss_ib": 0.0011196719715371728, + "step": 2184 + }, + { + "epoch": 0.6283701200661442, + "grad_norm": 0.09597148001194, + "learning_rate": 4.5925810183272506e-05, + "loss": 0.7991, + "step": 2185 + }, + { + "ce_ib": 2.7087647914886475, + "ce_orig": 0.6606652736663818, + "epoch": 0.6283701200661442, + "kl_loss": 0.058086905628442764, + "loss_ib": 0.0008517454843968153, + "step": 2185 + }, + { + "ce_ib": 5.040375232696533, + "ce_orig": 0.7336907386779785, + "epoch": 0.6283701200661442, + "kl_loss": 0.10371921956539154, + "loss_ib": 0.001541229779832065, + "step": 2185 + }, + { + "ce_ib": 5.12093448638916, + "ce_orig": 0.8212912082672119, + "epoch": 0.6283701200661442, + "kl_loss": 0.11741408705711365, + "loss_ib": 0.0016862342599779367, + "step": 2185 + }, + { + "ce_ib": 3.858508586883545, + "ce_orig": 0.5607463717460632, + "epoch": 0.6283701200661442, + "kl_loss": 0.1323002278804779, + "loss_ib": 0.0017088530585169792, + "step": 2185 + }, + { + "ce_ib": 3.0011181831359863, + "ce_orig": 0.7913538813591003, + "epoch": 0.6286577036451219, + "kl_loss": 0.07172340154647827, + "loss_ib": 0.0010173458140343428, + "step": 2186 + }, + { + "ce_ib": 6.2632927894592285, + "ce_orig": 1.4254472255706787, + "epoch": 0.6286577036451219, + "kl_loss": 0.07040427625179291, + "loss_ib": 0.0013303720625117421, + "step": 2186 + }, + { + "ce_ib": 3.0733065605163574, + "ce_orig": 0.23755109310150146, + "epoch": 0.6286577036451219, + "kl_loss": 0.2772117257118225, + "loss_ib": 0.0030794478952884674, + "step": 2186 + }, + { + "ce_ib": 4.291209697723389, + "ce_orig": 0.8969475626945496, + "epoch": 0.6286577036451219, + "kl_loss": 0.07809294015169144, + "loss_ib": 0.0012100503081455827, + "step": 2186 + }, + { + "ce_ib": 3.7420248985290527, + "ce_orig": 0.8019711375236511, + "epoch": 0.6289452872240995, + "kl_loss": 0.09311630576848984, + "loss_ib": 0.0013053655857220292, + "step": 2187 + }, + { + "ce_ib": 3.330974578857422, + "ce_orig": 0.5768353939056396, + "epoch": 0.6289452872240995, + "kl_loss": 0.06751730293035507, + "loss_ib": 0.0010082705412060022, + "step": 2187 + }, + { + "ce_ib": 4.432240009307861, + "ce_orig": 0.9077537655830383, + "epoch": 0.6289452872240995, + "kl_loss": 0.0585620142519474, + "loss_ib": 0.0010288440389558673, + "step": 2187 + }, + { + "ce_ib": 6.604727268218994, + "ce_orig": 1.1661957502365112, + "epoch": 0.6289452872240995, + "kl_loss": 0.13249501585960388, + "loss_ib": 0.001985423034057021, + "step": 2187 + }, + { + "ce_ib": 4.690701961517334, + "ce_orig": 0.9439345002174377, + "epoch": 0.6292328708030771, + "kl_loss": 0.09544114768505096, + "loss_ib": 0.0014234816189855337, + "step": 2188 + }, + { + "ce_ib": 4.104753017425537, + "ce_orig": 0.7216899991035461, + "epoch": 0.6292328708030771, + "kl_loss": 0.08665056526660919, + "loss_ib": 0.0012769808527082205, + "step": 2188 + }, + { + "ce_ib": 4.205255031585693, + "ce_orig": 1.0310866832733154, + "epoch": 0.6292328708030771, + "kl_loss": 0.07312801480293274, + "loss_ib": 0.0011518056271597743, + "step": 2188 + }, + { + "ce_ib": 3.9064111709594727, + "ce_orig": 0.8021534085273743, + "epoch": 0.6292328708030771, + "kl_loss": 0.044542666524648666, + "loss_ib": 0.0008360677747987211, + "step": 2188 + }, + { + "ce_ib": 5.749292850494385, + "ce_orig": 1.1124426126480103, + "epoch": 0.6295204543820548, + "kl_loss": 0.07918888330459595, + "loss_ib": 0.0013668180909007788, + "step": 2189 + }, + { + "ce_ib": 5.023887634277344, + "ce_orig": 1.1617141962051392, + "epoch": 0.6295204543820548, + "kl_loss": 0.0827932134270668, + "loss_ib": 0.0013303208397701383, + "step": 2189 + }, + { + "ce_ib": 7.073423862457275, + "ce_orig": 1.4421056509017944, + "epoch": 0.6295204543820548, + "kl_loss": 0.086072638630867, + "loss_ib": 0.0015680688666179776, + "step": 2189 + }, + { + "ce_ib": 3.863220453262329, + "ce_orig": 0.6743496060371399, + "epoch": 0.6295204543820548, + "kl_loss": 0.06481044739484787, + "loss_ib": 0.0010344265028834343, + "step": 2189 + }, + { + "epoch": 0.6298080379610325, + "grad_norm": 0.09892277419567108, + "learning_rate": 4.5904553106367774e-05, + "loss": 0.8593, + "step": 2190 + }, + { + "ce_ib": 4.855657577514648, + "ce_orig": 1.0929301977157593, + "epoch": 0.6298080379610325, + "kl_loss": 0.17205342650413513, + "loss_ib": 0.0022060999181121588, + "step": 2190 + }, + { + "ce_ib": 4.7645649909973145, + "ce_orig": 0.8957669734954834, + "epoch": 0.6298080379610325, + "kl_loss": 0.0855335146188736, + "loss_ib": 0.001331791514530778, + "step": 2190 + }, + { + "ce_ib": 4.649975776672363, + "ce_orig": 0.951286256313324, + "epoch": 0.6298080379610325, + "kl_loss": 0.06602868437767029, + "loss_ib": 0.0011252843542024493, + "step": 2190 + }, + { + "ce_ib": 2.6478052139282227, + "ce_orig": 0.5048783421516418, + "epoch": 0.6298080379610325, + "kl_loss": 0.055862195789813995, + "loss_ib": 0.0008234024862758815, + "step": 2190 + }, + { + "ce_ib": 5.055224418640137, + "ce_orig": 1.2310832738876343, + "epoch": 0.6300956215400101, + "kl_loss": 0.0870046615600586, + "loss_ib": 0.0013755690306425095, + "step": 2191 + }, + { + "ce_ib": 6.393496513366699, + "ce_orig": 1.5740268230438232, + "epoch": 0.6300956215400101, + "kl_loss": 0.07612772285938263, + "loss_ib": 0.001400626846589148, + "step": 2191 + }, + { + "ce_ib": 2.4489150047302246, + "ce_orig": 0.45459723472595215, + "epoch": 0.6300956215400101, + "kl_loss": 0.07072404026985168, + "loss_ib": 0.000952131871599704, + "step": 2191 + }, + { + "ce_ib": 3.23158860206604, + "ce_orig": 0.7013914585113525, + "epoch": 0.6300956215400101, + "kl_loss": 0.07936008274555206, + "loss_ib": 0.0011167596094310284, + "step": 2191 + }, + { + "ce_ib": 4.490612030029297, + "ce_orig": 0.9541355967521667, + "epoch": 0.6303832051189877, + "kl_loss": 0.061400506645441055, + "loss_ib": 0.0010630663018673658, + "step": 2192 + }, + { + "ce_ib": 7.082334995269775, + "ce_orig": 1.2886918783187866, + "epoch": 0.6303832051189877, + "kl_loss": 0.05073480308055878, + "loss_ib": 0.0012155815493315458, + "step": 2192 + }, + { + "ce_ib": 4.025549411773682, + "ce_orig": 0.7045360207557678, + "epoch": 0.6303832051189877, + "kl_loss": 0.08550432324409485, + "loss_ib": 0.0012575980508700013, + "step": 2192 + }, + { + "ce_ib": 2.7941102981567383, + "ce_orig": 0.5039654970169067, + "epoch": 0.6303832051189877, + "kl_loss": 0.049417901784181595, + "loss_ib": 0.0007735900580883026, + "step": 2192 + }, + { + "ce_ib": 6.205007553100586, + "ce_orig": 1.1924623250961304, + "epoch": 0.6306707886979653, + "kl_loss": 0.09327878057956696, + "loss_ib": 0.0015532885445281863, + "step": 2193 + }, + { + "ce_ib": 4.732553005218506, + "ce_orig": 1.0089296102523804, + "epoch": 0.6306707886979653, + "kl_loss": 0.05288401246070862, + "loss_ib": 0.0010020954068750143, + "step": 2193 + }, + { + "ce_ib": 5.643589019775391, + "ce_orig": 1.3004121780395508, + "epoch": 0.6306707886979653, + "kl_loss": 0.08605747669935226, + "loss_ib": 0.001424933667294681, + "step": 2193 + }, + { + "ce_ib": 3.3192856311798096, + "ce_orig": 0.5082627534866333, + "epoch": 0.6306707886979653, + "kl_loss": 0.062465377151966095, + "loss_ib": 0.000956582254730165, + "step": 2193 + }, + { + "ce_ib": 4.064671516418457, + "ce_orig": 0.5789667963981628, + "epoch": 0.630958372276943, + "kl_loss": 0.13500751554965973, + "loss_ib": 0.0017565422458574176, + "step": 2194 + }, + { + "ce_ib": 1.883681058883667, + "ce_orig": 0.3100520968437195, + "epoch": 0.630958372276943, + "kl_loss": 0.04140905290842056, + "loss_ib": 0.0006024586036801338, + "step": 2194 + }, + { + "ce_ib": 3.681366443634033, + "ce_orig": 0.5101064443588257, + "epoch": 0.630958372276943, + "kl_loss": 0.0782397985458374, + "loss_ib": 0.0011505346046760678, + "step": 2194 + }, + { + "ce_ib": 5.029409885406494, + "ce_orig": 0.5936457514762878, + "epoch": 0.630958372276943, + "kl_loss": 0.09125344455242157, + "loss_ib": 0.00141547538805753, + "step": 2194 + }, + { + "epoch": 0.6312459558559206, + "grad_norm": 0.09312457591295242, + "learning_rate": 4.5883245665542556e-05, + "loss": 0.8512, + "step": 2195 + }, + { + "ce_ib": 5.941760540008545, + "ce_orig": 1.0202019214630127, + "epoch": 0.6312459558559206, + "kl_loss": 0.07122132182121277, + "loss_ib": 0.0013063892256468534, + "step": 2195 + }, + { + "ce_ib": 6.0774827003479, + "ce_orig": 1.2122687101364136, + "epoch": 0.6312459558559206, + "kl_loss": 0.08263225853443146, + "loss_ib": 0.0014340707566589117, + "step": 2195 + }, + { + "ce_ib": 4.255052089691162, + "ce_orig": 0.7629231810569763, + "epoch": 0.6312459558559206, + "kl_loss": 0.08857700228691101, + "loss_ib": 0.00131127517670393, + "step": 2195 + }, + { + "ce_ib": 4.358501434326172, + "ce_orig": 0.8557421565055847, + "epoch": 0.6312459558559206, + "kl_loss": 0.06788581609725952, + "loss_ib": 0.001114708255045116, + "step": 2195 + }, + { + "ce_ib": 3.30340838432312, + "ce_orig": 0.741714596748352, + "epoch": 0.6315335394348983, + "kl_loss": 0.03941718488931656, + "loss_ib": 0.000724512618035078, + "step": 2196 + }, + { + "ce_ib": 4.324622631072998, + "ce_orig": 0.6917997598648071, + "epoch": 0.6315335394348983, + "kl_loss": 0.06995829194784164, + "loss_ib": 0.0011320451740175486, + "step": 2196 + }, + { + "ce_ib": 3.8473739624023438, + "ce_orig": 0.7618699669837952, + "epoch": 0.6315335394348983, + "kl_loss": 0.07204753905534744, + "loss_ib": 0.0011052127229049802, + "step": 2196 + }, + { + "ce_ib": 5.115306377410889, + "ce_orig": 0.8108137249946594, + "epoch": 0.6315335394348983, + "kl_loss": 0.09223130345344543, + "loss_ib": 0.0014338436303660274, + "step": 2196 + }, + { + "ce_ib": 2.532693386077881, + "ce_orig": 0.3039875328540802, + "epoch": 0.6318211230138759, + "kl_loss": 0.03867659717798233, + "loss_ib": 0.0006400352576747537, + "step": 2197 + }, + { + "ce_ib": 5.969080448150635, + "ce_orig": 1.0221152305603027, + "epoch": 0.6318211230138759, + "kl_loss": 0.09246671199798584, + "loss_ib": 0.00152157514821738, + "step": 2197 + }, + { + "ce_ib": 4.256906509399414, + "ce_orig": 1.000579595565796, + "epoch": 0.6318211230138759, + "kl_loss": 0.06396994739770889, + "loss_ib": 0.0010653900681063533, + "step": 2197 + }, + { + "ce_ib": 4.262307643890381, + "ce_orig": 0.5290756821632385, + "epoch": 0.6318211230138759, + "kl_loss": 0.08238643407821655, + "loss_ib": 0.0012500950833782554, + "step": 2197 + }, + { + "ce_ib": 3.546448230743408, + "ce_orig": 0.520527720451355, + "epoch": 0.6321087065928536, + "kl_loss": 0.07504843175411224, + "loss_ib": 0.0011051291367039084, + "step": 2198 + }, + { + "ce_ib": 2.675328016281128, + "ce_orig": 0.43565449118614197, + "epoch": 0.6321087065928536, + "kl_loss": 0.13643388450145721, + "loss_ib": 0.0016318715643137693, + "step": 2198 + }, + { + "ce_ib": 1.6272244453430176, + "ce_orig": 0.17264170944690704, + "epoch": 0.6321087065928536, + "kl_loss": 0.15976591408252716, + "loss_ib": 0.00176038162317127, + "step": 2198 + }, + { + "ce_ib": 6.216552734375, + "ce_orig": 0.8286522030830383, + "epoch": 0.6321087065928536, + "kl_loss": 0.08996470272541046, + "loss_ib": 0.0015213022707030177, + "step": 2198 + }, + { + "ce_ib": 5.365137100219727, + "ce_orig": 1.1819941997528076, + "epoch": 0.6323962901718312, + "kl_loss": 0.08350858092308044, + "loss_ib": 0.0013715993845835328, + "step": 2199 + }, + { + "ce_ib": 6.609500885009766, + "ce_orig": 1.1536753177642822, + "epoch": 0.6323962901718312, + "kl_loss": 0.06872300058603287, + "loss_ib": 0.0013481801142916083, + "step": 2199 + }, + { + "ce_ib": 3.4604902267456055, + "ce_orig": 0.5205264091491699, + "epoch": 0.6323962901718312, + "kl_loss": 0.0998750701546669, + "loss_ib": 0.0013447996461763978, + "step": 2199 + }, + { + "ce_ib": 4.72666072845459, + "ce_orig": 1.1134867668151855, + "epoch": 0.6323962901718312, + "kl_loss": 0.05506756156682968, + "loss_ib": 0.0010233416687697172, + "step": 2199 + }, + { + "epoch": 0.6326838737508088, + "grad_norm": 0.09052322059869766, + "learning_rate": 4.586188791213143e-05, + "loss": 0.8144, + "step": 2200 + }, + { + "ce_ib": 3.0264885425567627, + "ce_orig": 0.7760957479476929, + "epoch": 0.6326838737508088, + "kl_loss": 0.06454719603061676, + "loss_ib": 0.000948120781686157, + "step": 2200 + }, + { + "ce_ib": 6.102992057800293, + "ce_orig": 1.11785888671875, + "epoch": 0.6326838737508088, + "kl_loss": 0.06367290019989014, + "loss_ib": 0.0012470281217247248, + "step": 2200 + }, + { + "ce_ib": 7.942079544067383, + "ce_orig": 1.7284612655639648, + "epoch": 0.6326838737508088, + "kl_loss": 0.070191890001297, + "loss_ib": 0.001496126758866012, + "step": 2200 + }, + { + "ce_ib": 2.099186658859253, + "ce_orig": 0.3759691119194031, + "epoch": 0.6326838737508088, + "kl_loss": 0.03324112668633461, + "loss_ib": 0.0005423299153335392, + "step": 2200 + }, + { + "ce_ib": 2.8422930240631104, + "ce_orig": 0.522254228591919, + "epoch": 0.6329714573297864, + "kl_loss": 0.035426005721092224, + "loss_ib": 0.0006384893204085529, + "step": 2201 + }, + { + "ce_ib": 4.349799633026123, + "ce_orig": 0.8152020573616028, + "epoch": 0.6329714573297864, + "kl_loss": 0.09699738770723343, + "loss_ib": 0.0014049538876861334, + "step": 2201 + }, + { + "ce_ib": 6.086206912994385, + "ce_orig": 1.036637783050537, + "epoch": 0.6329714573297864, + "kl_loss": 0.05676624923944473, + "loss_ib": 0.0011762832291424274, + "step": 2201 + }, + { + "ce_ib": 8.100164413452148, + "ce_orig": 1.636580228805542, + "epoch": 0.6329714573297864, + "kl_loss": 0.0866941437125206, + "loss_ib": 0.0016769578214734793, + "step": 2201 + }, + { + "ce_ib": 2.002901315689087, + "ce_orig": 0.2726563811302185, + "epoch": 0.6332590409087641, + "kl_loss": 0.06399551033973694, + "loss_ib": 0.0008402451639994979, + "step": 2202 + }, + { + "ce_ib": 5.351673126220703, + "ce_orig": 1.2876359224319458, + "epoch": 0.6332590409087641, + "kl_loss": 0.08179377764463425, + "loss_ib": 0.001353104948066175, + "step": 2202 + }, + { + "ce_ib": 5.4435906410217285, + "ce_orig": 1.108215570449829, + "epoch": 0.6332590409087641, + "kl_loss": 0.03455186262726784, + "loss_ib": 0.0008898776723071933, + "step": 2202 + }, + { + "ce_ib": 3.0663082599639893, + "ce_orig": 0.5513838529586792, + "epoch": 0.6332590409087641, + "kl_loss": 0.05576777458190918, + "loss_ib": 0.0008643085602670908, + "step": 2202 + }, + { + "ce_ib": 3.741661310195923, + "ce_orig": 0.897255539894104, + "epoch": 0.6335466244877418, + "kl_loss": 0.05316409096121788, + "loss_ib": 0.0009058070136234164, + "step": 2203 + }, + { + "ce_ib": 5.000251293182373, + "ce_orig": 0.8223066329956055, + "epoch": 0.6335466244877418, + "kl_loss": 0.1491294801235199, + "loss_ib": 0.0019913199357688427, + "step": 2203 + }, + { + "ce_ib": 3.985142230987549, + "ce_orig": 0.5736758708953857, + "epoch": 0.6335466244877418, + "kl_loss": 0.07821709662675858, + "loss_ib": 0.0011806851252913475, + "step": 2203 + }, + { + "ce_ib": 2.8208720684051514, + "ce_orig": 0.6204871535301208, + "epoch": 0.6335466244877418, + "kl_loss": 0.057375308126211166, + "loss_ib": 0.0008558402769267559, + "step": 2203 + }, + { + "ce_ib": 4.7277326583862305, + "ce_orig": 0.983251690864563, + "epoch": 0.6338342080667194, + "kl_loss": 0.045807015150785446, + "loss_ib": 0.0009308433509431779, + "step": 2204 + }, + { + "ce_ib": 3.178363561630249, + "ce_orig": 0.45104366540908813, + "epoch": 0.6338342080667194, + "kl_loss": 0.045399509370326996, + "loss_ib": 0.000771831430029124, + "step": 2204 + }, + { + "ce_ib": 4.510796070098877, + "ce_orig": 0.8687307238578796, + "epoch": 0.6338342080667194, + "kl_loss": 0.10794487595558167, + "loss_ib": 0.0015305283013731241, + "step": 2204 + }, + { + "ce_ib": 2.8072726726531982, + "ce_orig": 0.661266565322876, + "epoch": 0.6338342080667194, + "kl_loss": 0.04488531872630119, + "loss_ib": 0.0007295804098248482, + "step": 2204 + }, + { + "epoch": 0.634121791645697, + "grad_norm": 0.10754065215587616, + "learning_rate": 4.5840479897590174e-05, + "loss": 0.8233, + "step": 2205 + }, + { + "ce_ib": 4.281156539916992, + "ce_orig": 1.1132689714431763, + "epoch": 0.634121791645697, + "kl_loss": 0.03344622999429703, + "loss_ib": 0.0007625779253430665, + "step": 2205 + }, + { + "ce_ib": 3.484602689743042, + "ce_orig": 0.2644318640232086, + "epoch": 0.634121791645697, + "kl_loss": 0.08056162297725677, + "loss_ib": 0.0011540764244273305, + "step": 2205 + }, + { + "ce_ib": 6.032160758972168, + "ce_orig": 1.2110521793365479, + "epoch": 0.634121791645697, + "kl_loss": 0.1012934148311615, + "loss_ib": 0.001616150140762329, + "step": 2205 + }, + { + "ce_ib": 4.115828990936279, + "ce_orig": 0.6147916913032532, + "epoch": 0.634121791645697, + "kl_loss": 0.11557337641716003, + "loss_ib": 0.001567316590808332, + "step": 2205 + }, + { + "ce_ib": 4.997530460357666, + "ce_orig": 0.9089854955673218, + "epoch": 0.6344093752246747, + "kl_loss": 0.07427649945020676, + "loss_ib": 0.0012425179593265057, + "step": 2206 + }, + { + "ce_ib": 3.555556297302246, + "ce_orig": 0.8975948095321655, + "epoch": 0.6344093752246747, + "kl_loss": 0.055444322526454926, + "loss_ib": 0.0009099988383240998, + "step": 2206 + }, + { + "ce_ib": 2.838547468185425, + "ce_orig": 0.4438817501068115, + "epoch": 0.6344093752246747, + "kl_loss": 0.05088882893323898, + "loss_ib": 0.0007927429978735745, + "step": 2206 + }, + { + "ce_ib": 2.5359697341918945, + "ce_orig": 0.3497656285762787, + "epoch": 0.6344093752246747, + "kl_loss": 0.1673005074262619, + "loss_ib": 0.0019266020972281694, + "step": 2206 + }, + { + "ce_ib": 2.816683053970337, + "ce_orig": 0.5306253433227539, + "epoch": 0.6346969588036523, + "kl_loss": 0.10164858400821686, + "loss_ib": 0.001298154122196138, + "step": 2207 + }, + { + "ce_ib": 3.6599175930023193, + "ce_orig": 0.41638028621673584, + "epoch": 0.6346969588036523, + "kl_loss": 0.07525961101055145, + "loss_ib": 0.0011185877956449986, + "step": 2207 + }, + { + "ce_ib": 5.292668342590332, + "ce_orig": 0.8602123260498047, + "epoch": 0.6346969588036523, + "kl_loss": 0.0790577232837677, + "loss_ib": 0.0013198440428823233, + "step": 2207 + }, + { + "ce_ib": 4.317286014556885, + "ce_orig": 0.7294511198997498, + "epoch": 0.6346969588036523, + "kl_loss": 0.05028877034783363, + "loss_ib": 0.000934616313315928, + "step": 2207 + }, + { + "ce_ib": 5.614342212677002, + "ce_orig": 0.8278399705886841, + "epoch": 0.6349845423826299, + "kl_loss": 0.08747602999210358, + "loss_ib": 0.0014361944049596786, + "step": 2208 + }, + { + "ce_ib": 3.588707447052002, + "ce_orig": 0.5632727146148682, + "epoch": 0.6349845423826299, + "kl_loss": 0.030800411477684975, + "loss_ib": 0.0006668748683296144, + "step": 2208 + }, + { + "ce_ib": 4.012096881866455, + "ce_orig": 0.7664457559585571, + "epoch": 0.6349845423826299, + "kl_loss": 0.06815490871667862, + "loss_ib": 0.0010827587684616446, + "step": 2208 + }, + { + "ce_ib": 6.289650917053223, + "ce_orig": 1.288836121559143, + "epoch": 0.6349845423826299, + "kl_loss": 0.04646693542599678, + "loss_ib": 0.001093634287826717, + "step": 2208 + }, + { + "ce_ib": 3.764090061187744, + "ce_orig": 0.8961365222930908, + "epoch": 0.6352721259616076, + "kl_loss": 0.06687641143798828, + "loss_ib": 0.0010451730340719223, + "step": 2209 + }, + { + "ce_ib": 4.999477863311768, + "ce_orig": 0.8983562588691711, + "epoch": 0.6352721259616076, + "kl_loss": 0.08829592168331146, + "loss_ib": 0.0013829070376232266, + "step": 2209 + }, + { + "ce_ib": 2.1238136291503906, + "ce_orig": 0.43929967284202576, + "epoch": 0.6352721259616076, + "kl_loss": 0.0395226925611496, + "loss_ib": 0.0006076082354411483, + "step": 2209 + }, + { + "ce_ib": 4.656732082366943, + "ce_orig": 0.683840274810791, + "epoch": 0.6352721259616076, + "kl_loss": 0.1488896906375885, + "loss_ib": 0.0019545701798051596, + "step": 2209 + }, + { + "epoch": 0.6355597095405853, + "grad_norm": 0.12004818767309189, + "learning_rate": 4.581902167349566e-05, + "loss": 0.8437, + "step": 2210 + }, + { + "ce_ib": 4.953585147857666, + "ce_orig": 0.6557213664054871, + "epoch": 0.6355597095405853, + "kl_loss": 0.0857425183057785, + "loss_ib": 0.0013527837581932545, + "step": 2210 + }, + { + "ce_ib": 5.108480930328369, + "ce_orig": 1.1735361814498901, + "epoch": 0.6355597095405853, + "kl_loss": 0.0714813619852066, + "loss_ib": 0.0012256617192178965, + "step": 2210 + }, + { + "ce_ib": 4.325215816497803, + "ce_orig": 0.851255476474762, + "epoch": 0.6355597095405853, + "kl_loss": 0.125925675034523, + "loss_ib": 0.0016917783068493009, + "step": 2210 + }, + { + "ce_ib": 2.562511444091797, + "ce_orig": 0.5601713061332703, + "epoch": 0.6355597095405853, + "kl_loss": 0.03705168515443802, + "loss_ib": 0.000626767985522747, + "step": 2210 + }, + { + "ce_ib": 3.022651195526123, + "ce_orig": 0.6991404294967651, + "epoch": 0.6358472931195629, + "kl_loss": 0.09272313117980957, + "loss_ib": 0.0012294964399188757, + "step": 2211 + }, + { + "ce_ib": 3.2856836318969727, + "ce_orig": 0.43252676725387573, + "epoch": 0.6358472931195629, + "kl_loss": 0.06020241975784302, + "loss_ib": 0.0009305925341323018, + "step": 2211 + }, + { + "ce_ib": 5.865083694458008, + "ce_orig": 1.1297314167022705, + "epoch": 0.6358472931195629, + "kl_loss": 0.05896099656820297, + "loss_ib": 0.0011761182686313987, + "step": 2211 + }, + { + "ce_ib": 5.900834083557129, + "ce_orig": 1.3908593654632568, + "epoch": 0.6358472931195629, + "kl_loss": 0.07564721256494522, + "loss_ib": 0.0013465554220601916, + "step": 2211 + }, + { + "ce_ib": 5.290252685546875, + "ce_orig": 1.1528258323669434, + "epoch": 0.6361348766985405, + "kl_loss": 0.06677541136741638, + "loss_ib": 0.0011967793107032776, + "step": 2212 + }, + { + "ce_ib": 3.2269599437713623, + "ce_orig": 0.7694026231765747, + "epoch": 0.6361348766985405, + "kl_loss": 0.0705614686012268, + "loss_ib": 0.0010283106239512563, + "step": 2212 + }, + { + "ce_ib": 5.287619113922119, + "ce_orig": 0.8135850429534912, + "epoch": 0.6361348766985405, + "kl_loss": 0.08706799894571304, + "loss_ib": 0.0013994419714435935, + "step": 2212 + }, + { + "ce_ib": 4.54655122756958, + "ce_orig": 0.7042271494865417, + "epoch": 0.6361348766985405, + "kl_loss": 0.08373017609119415, + "loss_ib": 0.0012919568689540029, + "step": 2212 + }, + { + "ce_ib": 5.717927932739258, + "ce_orig": 0.7851917743682861, + "epoch": 0.6364224602775181, + "kl_loss": 0.17250239849090576, + "loss_ib": 0.0022968165576457977, + "step": 2213 + }, + { + "ce_ib": 2.219917058944702, + "ce_orig": 0.21261939406394958, + "epoch": 0.6364224602775181, + "kl_loss": 0.056329548358917236, + "loss_ib": 0.0007852871785871685, + "step": 2213 + }, + { + "ce_ib": 5.945958137512207, + "ce_orig": 0.8111812472343445, + "epoch": 0.6364224602775181, + "kl_loss": 0.08558132499456406, + "loss_ib": 0.001450408948585391, + "step": 2213 + }, + { + "ce_ib": 4.802860260009766, + "ce_orig": 0.8917756080627441, + "epoch": 0.6364224602775181, + "kl_loss": 0.06436464935541153, + "loss_ib": 0.001123932539485395, + "step": 2213 + }, + { + "ce_ib": 2.458232879638672, + "ce_orig": 0.43304911255836487, + "epoch": 0.6367100438564958, + "kl_loss": 0.052341073751449585, + "loss_ib": 0.0007692339713685215, + "step": 2214 + }, + { + "ce_ib": 3.5167863368988037, + "ce_orig": 0.5963485836982727, + "epoch": 0.6367100438564958, + "kl_loss": 0.07959683984518051, + "loss_ib": 0.0011476470390334725, + "step": 2214 + }, + { + "ce_ib": 3.334390878677368, + "ce_orig": 0.8390439748764038, + "epoch": 0.6367100438564958, + "kl_loss": 0.07169817388057709, + "loss_ib": 0.0010504208039492369, + "step": 2214 + }, + { + "ce_ib": 3.502380847930908, + "ce_orig": 0.7240342497825623, + "epoch": 0.6367100438564958, + "kl_loss": 0.06379472464323044, + "loss_ib": 0.0009881852893158793, + "step": 2214 + }, + { + "epoch": 0.6369976274354734, + "grad_norm": 0.09044720977544785, + "learning_rate": 4.5797513291545744e-05, + "loss": 0.8628, + "step": 2215 + }, + { + "ce_ib": 5.466261863708496, + "ce_orig": 0.8240851759910583, + "epoch": 0.6369976274354734, + "kl_loss": 0.0697450190782547, + "loss_ib": 0.001244076294824481, + "step": 2215 + }, + { + "ce_ib": 3.6158933639526367, + "ce_orig": 0.7123931646347046, + "epoch": 0.6369976274354734, + "kl_loss": 0.0867309421300888, + "loss_ib": 0.0012288987636566162, + "step": 2215 + }, + { + "ce_ib": 5.868069171905518, + "ce_orig": 0.7105886936187744, + "epoch": 0.6369976274354734, + "kl_loss": 0.10605274140834808, + "loss_ib": 0.0016473343130201101, + "step": 2215 + }, + { + "ce_ib": 3.2240638732910156, + "ce_orig": 0.4953426420688629, + "epoch": 0.6369976274354734, + "kl_loss": 0.06575021147727966, + "loss_ib": 0.0009799085091799498, + "step": 2215 + }, + { + "ce_ib": 4.508887767791748, + "ce_orig": 0.9461336135864258, + "epoch": 0.6372852110144511, + "kl_loss": 0.056713320314884186, + "loss_ib": 0.0010180219542235136, + "step": 2216 + }, + { + "ce_ib": 5.605930805206299, + "ce_orig": 1.311377763748169, + "epoch": 0.6372852110144511, + "kl_loss": 0.10911963880062103, + "loss_ib": 0.001651789527386427, + "step": 2216 + }, + { + "ce_ib": 3.1155707836151123, + "ce_orig": 0.37900155782699585, + "epoch": 0.6372852110144511, + "kl_loss": 0.045605067163705826, + "loss_ib": 0.000767607765737921, + "step": 2216 + }, + { + "ce_ib": 3.1133041381835938, + "ce_orig": 0.6968961358070374, + "epoch": 0.6372852110144511, + "kl_loss": 0.05716117471456528, + "loss_ib": 0.0008829421130940318, + "step": 2216 + }, + { + "ce_ib": 3.77815580368042, + "ce_orig": 0.8887144327163696, + "epoch": 0.6375727945934287, + "kl_loss": 0.05708356201648712, + "loss_ib": 0.0009486511698924005, + "step": 2217 + }, + { + "ce_ib": 5.029364585876465, + "ce_orig": 0.6774278879165649, + "epoch": 0.6375727945934287, + "kl_loss": 0.2680452764034271, + "loss_ib": 0.003183389315381646, + "step": 2217 + }, + { + "ce_ib": 3.9821553230285645, + "ce_orig": 0.588654100894928, + "epoch": 0.6375727945934287, + "kl_loss": 0.10517823696136475, + "loss_ib": 0.0014499978860840201, + "step": 2217 + }, + { + "ce_ib": 3.7988438606262207, + "ce_orig": 0.3067951500415802, + "epoch": 0.6375727945934287, + "kl_loss": 0.2243574857711792, + "loss_ib": 0.002623459091410041, + "step": 2217 + }, + { + "ce_ib": 4.020514965057373, + "ce_orig": 0.9358695149421692, + "epoch": 0.6378603781724064, + "kl_loss": 0.061065539717674255, + "loss_ib": 0.001012706896290183, + "step": 2218 + }, + { + "ce_ib": 5.4049072265625, + "ce_orig": 0.9634561538696289, + "epoch": 0.6378603781724064, + "kl_loss": 0.07294603437185287, + "loss_ib": 0.0012699509970843792, + "step": 2218 + }, + { + "ce_ib": 4.281907081604004, + "ce_orig": 0.5055077075958252, + "epoch": 0.6378603781724064, + "kl_loss": 0.05845058336853981, + "loss_ib": 0.0010126965353265405, + "step": 2218 + }, + { + "ce_ib": 6.84290075302124, + "ce_orig": 1.0905770063400269, + "epoch": 0.6378603781724064, + "kl_loss": 0.06175382435321808, + "loss_ib": 0.0013018283061683178, + "step": 2218 + }, + { + "ce_ib": 4.930593967437744, + "ce_orig": 1.133259654045105, + "epoch": 0.638147961751384, + "kl_loss": 0.15387198328971863, + "loss_ib": 0.0020317791495472193, + "step": 2219 + }, + { + "ce_ib": 4.863285064697266, + "ce_orig": 0.6738739013671875, + "epoch": 0.638147961751384, + "kl_loss": 0.08276741951704025, + "loss_ib": 0.0013140026712790132, + "step": 2219 + }, + { + "ce_ib": 4.264922142028809, + "ce_orig": 0.8176272511482239, + "epoch": 0.638147961751384, + "kl_loss": 0.06782460957765579, + "loss_ib": 0.0011047382140532136, + "step": 2219 + }, + { + "ce_ib": 2.4368903636932373, + "ce_orig": 0.543185293674469, + "epoch": 0.638147961751384, + "kl_loss": 0.051190294325351715, + "loss_ib": 0.0007555919582955539, + "step": 2219 + }, + { + "epoch": 0.6384355453303616, + "grad_norm": 0.09832285344600677, + "learning_rate": 4.577595480355911e-05, + "loss": 0.8439, + "step": 2220 + }, + { + "ce_ib": 3.590710401535034, + "ce_orig": 0.7949073314666748, + "epoch": 0.6384355453303616, + "kl_loss": 0.05359743535518646, + "loss_ib": 0.0008950454066507518, + "step": 2220 + }, + { + "ce_ib": 4.668309688568115, + "ce_orig": 0.9648749232292175, + "epoch": 0.6384355453303616, + "kl_loss": 0.03965630382299423, + "loss_ib": 0.0008633940014988184, + "step": 2220 + }, + { + "ce_ib": 3.0342254638671875, + "ce_orig": 0.5672757029533386, + "epoch": 0.6384355453303616, + "kl_loss": 0.05524477735161781, + "loss_ib": 0.0008558703120797873, + "step": 2220 + }, + { + "ce_ib": 3.4529004096984863, + "ce_orig": 0.5830516219139099, + "epoch": 0.6384355453303616, + "kl_loss": 0.09725745022296906, + "loss_ib": 0.001317864516749978, + "step": 2220 + }, + { + "ce_ib": 5.814901351928711, + "ce_orig": 0.9935370087623596, + "epoch": 0.6387231289093392, + "kl_loss": 0.08104930818080902, + "loss_ib": 0.00139198312535882, + "step": 2221 + }, + { + "ce_ib": 3.850423812866211, + "ce_orig": 0.5264503955841064, + "epoch": 0.6387231289093392, + "kl_loss": 0.0741264745593071, + "loss_ib": 0.001126307062804699, + "step": 2221 + }, + { + "ce_ib": 2.242478609085083, + "ce_orig": 0.33198556303977966, + "epoch": 0.6387231289093392, + "kl_loss": 0.06540630757808685, + "loss_ib": 0.0008783109369687736, + "step": 2221 + }, + { + "ce_ib": 4.535075664520264, + "ce_orig": 0.5803776383399963, + "epoch": 0.6387231289093392, + "kl_loss": 0.09596718102693558, + "loss_ib": 0.0014131793286651373, + "step": 2221 + }, + { + "ce_ib": 4.9298577308654785, + "ce_orig": 1.030724287033081, + "epoch": 0.6390107124883169, + "kl_loss": 0.08670561015605927, + "loss_ib": 0.0013600417878478765, + "step": 2222 + }, + { + "ce_ib": 5.375624656677246, + "ce_orig": 1.020103096961975, + "epoch": 0.6390107124883169, + "kl_loss": 0.0731312483549118, + "loss_ib": 0.0012688749702647328, + "step": 2222 + }, + { + "ce_ib": 3.324521780014038, + "ce_orig": 0.5970034003257751, + "epoch": 0.6390107124883169, + "kl_loss": 0.08222803473472595, + "loss_ib": 0.001154732541181147, + "step": 2222 + }, + { + "ce_ib": 4.8109846115112305, + "ce_orig": 1.1306519508361816, + "epoch": 0.6390107124883169, + "kl_loss": 0.07524999231100082, + "loss_ib": 0.0012335983337834477, + "step": 2222 + }, + { + "ce_ib": 5.286017894744873, + "ce_orig": 1.0928597450256348, + "epoch": 0.6392982960672946, + "kl_loss": 0.07988932728767395, + "loss_ib": 0.0013274949742481112, + "step": 2223 + }, + { + "ce_ib": 4.411930084228516, + "ce_orig": 0.7644689679145813, + "epoch": 0.6392982960672946, + "kl_loss": 0.1324423849582672, + "loss_ib": 0.0017656168201938272, + "step": 2223 + }, + { + "ce_ib": 4.360787868499756, + "ce_orig": 0.8022552728652954, + "epoch": 0.6392982960672946, + "kl_loss": 0.10440322756767273, + "loss_ib": 0.0014801110373809934, + "step": 2223 + }, + { + "ce_ib": 4.960841178894043, + "ce_orig": 0.3991324305534363, + "epoch": 0.6392982960672946, + "kl_loss": 0.12275563180446625, + "loss_ib": 0.0017236403655260801, + "step": 2223 + }, + { + "ce_ib": 4.730855464935303, + "ce_orig": 0.8222056031227112, + "epoch": 0.6395858796462722, + "kl_loss": 0.05832873284816742, + "loss_ib": 0.0010563727701082826, + "step": 2224 + }, + { + "ce_ib": 4.602653980255127, + "ce_orig": 1.125233769416809, + "epoch": 0.6395858796462722, + "kl_loss": 0.11231646686792374, + "loss_ib": 0.0015834299847483635, + "step": 2224 + }, + { + "ce_ib": 3.28989577293396, + "ce_orig": 0.6597758531570435, + "epoch": 0.6395858796462722, + "kl_loss": 0.051773801445961, + "loss_ib": 0.0008467276347801089, + "step": 2224 + }, + { + "ce_ib": 5.847143650054932, + "ce_orig": 1.4217113256454468, + "epoch": 0.6395858796462722, + "kl_loss": 0.07630711793899536, + "loss_ib": 0.0013477855827659369, + "step": 2224 + }, + { + "epoch": 0.6398734632252498, + "grad_norm": 0.10354489088058472, + "learning_rate": 4.5754346261475136e-05, + "loss": 0.8537, + "step": 2225 + }, + { + "ce_ib": 5.6565752029418945, + "ce_orig": 1.0730429887771606, + "epoch": 0.6398734632252498, + "kl_loss": 0.11065739393234253, + "loss_ib": 0.0016722313594073057, + "step": 2225 + }, + { + "ce_ib": 2.97402024269104, + "ce_orig": 0.6255046129226685, + "epoch": 0.6398734632252498, + "kl_loss": 0.09544001519680023, + "loss_ib": 0.0012518020812422037, + "step": 2225 + }, + { + "ce_ib": 3.396244525909424, + "ce_orig": 0.6824309229850769, + "epoch": 0.6398734632252498, + "kl_loss": 0.07141007483005524, + "loss_ib": 0.0010537251364439726, + "step": 2225 + }, + { + "ce_ib": 5.122203350067139, + "ce_orig": 1.0480408668518066, + "epoch": 0.6398734632252498, + "kl_loss": 0.09138929843902588, + "loss_ib": 0.0014261131873354316, + "step": 2225 + }, + { + "ce_ib": 4.9580864906311035, + "ce_orig": 0.9273549914360046, + "epoch": 0.6401610468042275, + "kl_loss": 0.0594140961766243, + "loss_ib": 0.0010899496264755726, + "step": 2226 + }, + { + "ce_ib": 7.629662036895752, + "ce_orig": 1.752103567123413, + "epoch": 0.6401610468042275, + "kl_loss": 0.09209112823009491, + "loss_ib": 0.0016838773153722286, + "step": 2226 + }, + { + "ce_ib": 3.1797921657562256, + "ce_orig": 0.7229439616203308, + "epoch": 0.6401610468042275, + "kl_loss": 0.08156383037567139, + "loss_ib": 0.0011336174793541431, + "step": 2226 + }, + { + "ce_ib": 7.554971218109131, + "ce_orig": 1.6303410530090332, + "epoch": 0.6401610468042275, + "kl_loss": 0.07429289817810059, + "loss_ib": 0.001498425961472094, + "step": 2226 + }, + { + "ce_ib": 5.206170558929443, + "ce_orig": 0.8232458233833313, + "epoch": 0.6404486303832051, + "kl_loss": 0.10364764928817749, + "loss_ib": 0.0015570935793220997, + "step": 2227 + }, + { + "ce_ib": 5.712701320648193, + "ce_orig": 1.2618986368179321, + "epoch": 0.6404486303832051, + "kl_loss": 0.0681428462266922, + "loss_ib": 0.001252698595635593, + "step": 2227 + }, + { + "ce_ib": 5.148933410644531, + "ce_orig": 0.9243124723434448, + "epoch": 0.6404486303832051, + "kl_loss": 0.10917998850345612, + "loss_ib": 0.0016066932585090399, + "step": 2227 + }, + { + "ce_ib": 4.583498954772949, + "ce_orig": 1.1261999607086182, + "epoch": 0.6404486303832051, + "kl_loss": 0.061709899455308914, + "loss_ib": 0.0010754488175734878, + "step": 2227 + }, + { + "ce_ib": 5.027528762817383, + "ce_orig": 0.7961782217025757, + "epoch": 0.6407362139621827, + "kl_loss": 0.07926402986049652, + "loss_ib": 0.0012953931000083685, + "step": 2228 + }, + { + "ce_ib": 4.816097736358643, + "ce_orig": 0.9044475555419922, + "epoch": 0.6407362139621827, + "kl_loss": 0.07412540167570114, + "loss_ib": 0.001222863793373108, + "step": 2228 + }, + { + "ce_ib": 5.582714557647705, + "ce_orig": 1.0352410078048706, + "epoch": 0.6407362139621827, + "kl_loss": 0.10370562970638275, + "loss_ib": 0.0015953276306390762, + "step": 2228 + }, + { + "ce_ib": 3.800222396850586, + "ce_orig": 0.833646833896637, + "epoch": 0.6407362139621827, + "kl_loss": 0.13548040390014648, + "loss_ib": 0.001734826248139143, + "step": 2228 + }, + { + "ce_ib": 5.396977424621582, + "ce_orig": 1.2705157995224, + "epoch": 0.6410237975411603, + "kl_loss": 0.06845897436141968, + "loss_ib": 0.0012242874363437295, + "step": 2229 + }, + { + "ce_ib": 5.31843376159668, + "ce_orig": 1.1857287883758545, + "epoch": 0.6410237975411603, + "kl_loss": 0.0714961439371109, + "loss_ib": 0.0012468048371374607, + "step": 2229 + }, + { + "ce_ib": 5.128273963928223, + "ce_orig": 1.2327933311462402, + "epoch": 0.6410237975411603, + "kl_loss": 0.11936938762664795, + "loss_ib": 0.0017065211432054639, + "step": 2229 + }, + { + "ce_ib": 6.229401111602783, + "ce_orig": 1.2832454442977905, + "epoch": 0.6410237975411603, + "kl_loss": 0.049115654081106186, + "loss_ib": 0.0011140966089442372, + "step": 2229 + }, + { + "epoch": 0.6413113811201381, + "grad_norm": 0.10486283153295517, + "learning_rate": 4.5732687717353844e-05, + "loss": 0.95, + "step": 2230 + }, + { + "ce_ib": 3.4886844158172607, + "ce_orig": 0.618911623954773, + "epoch": 0.6413113811201381, + "kl_loss": 0.06665097177028656, + "loss_ib": 0.001015378162264824, + "step": 2230 + }, + { + "ce_ib": 5.4652934074401855, + "ce_orig": 0.904254138469696, + "epoch": 0.6413113811201381, + "kl_loss": 0.09085063636302948, + "loss_ib": 0.001455035642720759, + "step": 2230 + }, + { + "ce_ib": 5.347981929779053, + "ce_orig": 0.6432874202728271, + "epoch": 0.6413113811201381, + "kl_loss": 0.10321113467216492, + "loss_ib": 0.0015669094864279032, + "step": 2230 + }, + { + "ce_ib": 3.7306559085845947, + "ce_orig": 0.6401677131652832, + "epoch": 0.6413113811201381, + "kl_loss": 0.07179635763168335, + "loss_ib": 0.0010910291457548738, + "step": 2230 + }, + { + "ce_ib": 5.999794006347656, + "ce_orig": 1.2879626750946045, + "epoch": 0.6415989646991157, + "kl_loss": 0.07111871987581253, + "loss_ib": 0.0013111665612086654, + "step": 2231 + }, + { + "ce_ib": 4.975278377532959, + "ce_orig": 0.9707606434822083, + "epoch": 0.6415989646991157, + "kl_loss": 0.11816352605819702, + "loss_ib": 0.001679163076914847, + "step": 2231 + }, + { + "ce_ib": 4.4670538902282715, + "ce_orig": 0.9862544536590576, + "epoch": 0.6415989646991157, + "kl_loss": 0.07443384826183319, + "loss_ib": 0.00119104387704283, + "step": 2231 + }, + { + "ce_ib": 3.650481939315796, + "ce_orig": 0.5174126625061035, + "epoch": 0.6415989646991157, + "kl_loss": 0.0765133947134018, + "loss_ib": 0.0011301821796223521, + "step": 2231 + }, + { + "ce_ib": 5.60222053527832, + "ce_orig": 1.5269510746002197, + "epoch": 0.6418865482780933, + "kl_loss": 0.08366281539201736, + "loss_ib": 0.0013968502171337605, + "step": 2232 + }, + { + "ce_ib": 5.2017621994018555, + "ce_orig": 0.8872727751731873, + "epoch": 0.6418865482780933, + "kl_loss": 0.09039013087749481, + "loss_ib": 0.001424077432602644, + "step": 2232 + }, + { + "ce_ib": 3.7759287357330322, + "ce_orig": 0.7030375599861145, + "epoch": 0.6418865482780933, + "kl_loss": 0.081936314702034, + "loss_ib": 0.0011969560291618109, + "step": 2232 + }, + { + "ce_ib": 4.692392349243164, + "ce_orig": 1.1309425830841064, + "epoch": 0.6418865482780933, + "kl_loss": 0.09513752907514572, + "loss_ib": 0.00142061454243958, + "step": 2232 + }, + { + "ce_ib": 5.003069877624512, + "ce_orig": 1.011318325996399, + "epoch": 0.642174131857071, + "kl_loss": 0.09941715002059937, + "loss_ib": 0.0014944784343242645, + "step": 2233 + }, + { + "ce_ib": 3.637000322341919, + "ce_orig": 0.5968040823936462, + "epoch": 0.642174131857071, + "kl_loss": 0.1205047070980072, + "loss_ib": 0.0015687471022829413, + "step": 2233 + }, + { + "ce_ib": 4.004416465759277, + "ce_orig": 0.8547906279563904, + "epoch": 0.642174131857071, + "kl_loss": 0.07011885195970535, + "loss_ib": 0.001101630157791078, + "step": 2233 + }, + { + "ce_ib": 4.530745506286621, + "ce_orig": 1.030186414718628, + "epoch": 0.642174131857071, + "kl_loss": 0.06667307019233704, + "loss_ib": 0.0011198052670806646, + "step": 2233 + }, + { + "ce_ib": 3.1671440601348877, + "ce_orig": 0.26483121514320374, + "epoch": 0.6424617154360486, + "kl_loss": 0.08316045254468918, + "loss_ib": 0.0011483188718557358, + "step": 2234 + }, + { + "ce_ib": 4.244889259338379, + "ce_orig": 0.7033414840698242, + "epoch": 0.6424617154360486, + "kl_loss": 0.11508256942033768, + "loss_ib": 0.0015753146726638079, + "step": 2234 + }, + { + "ce_ib": 4.059351921081543, + "ce_orig": 0.4279976785182953, + "epoch": 0.6424617154360486, + "kl_loss": 0.09839731454849243, + "loss_ib": 0.0013899082550778985, + "step": 2234 + }, + { + "ce_ib": 1.5879151821136475, + "ce_orig": 0.33087652921676636, + "epoch": 0.6424617154360486, + "kl_loss": 0.17114633321762085, + "loss_ib": 0.0018702548695728183, + "step": 2234 + }, + { + "epoch": 0.6427492990150262, + "grad_norm": 0.1081383153796196, + "learning_rate": 4.571097922337568e-05, + "loss": 0.8595, + "step": 2235 + }, + { + "ce_ib": 2.541903257369995, + "ce_orig": 0.22244945168495178, + "epoch": 0.6427492990150262, + "kl_loss": 0.1017913967370987, + "loss_ib": 0.0012721042148768902, + "step": 2235 + }, + { + "ce_ib": 2.6913692951202393, + "ce_orig": 0.5399989485740662, + "epoch": 0.6427492990150262, + "kl_loss": 0.09031626582145691, + "loss_ib": 0.0011722996132448316, + "step": 2235 + }, + { + "ce_ib": 2.703094482421875, + "ce_orig": 0.7507355213165283, + "epoch": 0.6427492990150262, + "kl_loss": 0.04895135015249252, + "loss_ib": 0.000759822956752032, + "step": 2235 + }, + { + "ce_ib": 8.725822448730469, + "ce_orig": 1.8899520635604858, + "epoch": 0.6427492990150262, + "kl_loss": 0.11096983402967453, + "loss_ib": 0.001982280518859625, + "step": 2235 + }, + { + "ce_ib": 5.633505821228027, + "ce_orig": 0.9107649922370911, + "epoch": 0.6430368825940039, + "kl_loss": 0.16162686049938202, + "loss_ib": 0.0021796191576868296, + "step": 2236 + }, + { + "ce_ib": 2.9916138648986816, + "ce_orig": 0.3433312177658081, + "epoch": 0.6430368825940039, + "kl_loss": 0.0626898854970932, + "loss_ib": 0.0009260601946152747, + "step": 2236 + }, + { + "ce_ib": 3.6991634368896484, + "ce_orig": 0.6991357207298279, + "epoch": 0.6430368825940039, + "kl_loss": 0.0771259069442749, + "loss_ib": 0.0011411753948777914, + "step": 2236 + }, + { + "ce_ib": 2.5044384002685547, + "ce_orig": 0.631290853023529, + "epoch": 0.6430368825940039, + "kl_loss": 0.04751031845808029, + "loss_ib": 0.0007255469681695104, + "step": 2236 + }, + { + "ce_ib": 5.135547637939453, + "ce_orig": 1.1683127880096436, + "epoch": 0.6433244661729816, + "kl_loss": 0.1311001181602478, + "loss_ib": 0.0018245559185743332, + "step": 2237 + }, + { + "ce_ib": 4.849590301513672, + "ce_orig": 0.6875205039978027, + "epoch": 0.6433244661729816, + "kl_loss": 0.09412701427936554, + "loss_ib": 0.0014262291369959712, + "step": 2237 + }, + { + "ce_ib": 5.847722053527832, + "ce_orig": 1.1477434635162354, + "epoch": 0.6433244661729816, + "kl_loss": 0.08795696496963501, + "loss_ib": 0.001464341883547604, + "step": 2237 + }, + { + "ce_ib": 3.191795825958252, + "ce_orig": 0.8214234709739685, + "epoch": 0.6433244661729816, + "kl_loss": 0.04983691871166229, + "loss_ib": 0.0008175487746484578, + "step": 2237 + }, + { + "ce_ib": 2.8259029388427734, + "ce_orig": 0.6842032074928284, + "epoch": 0.6436120497519592, + "kl_loss": 0.04106111079454422, + "loss_ib": 0.0006932013784535229, + "step": 2238 + }, + { + "ce_ib": 3.6049575805664062, + "ce_orig": 0.3575763702392578, + "epoch": 0.6436120497519592, + "kl_loss": 0.10939747095108032, + "loss_ib": 0.0014544704463332891, + "step": 2238 + }, + { + "ce_ib": 3.03627610206604, + "ce_orig": 0.6013334393501282, + "epoch": 0.6436120497519592, + "kl_loss": 0.05404996871948242, + "loss_ib": 0.0008441272657364607, + "step": 2238 + }, + { + "ce_ib": 3.99049973487854, + "ce_orig": 1.0503952503204346, + "epoch": 0.6436120497519592, + "kl_loss": 0.05178070068359375, + "loss_ib": 0.0009168569813482463, + "step": 2238 + }, + { + "ce_ib": 4.414327621459961, + "ce_orig": 0.9181632995605469, + "epoch": 0.6438996333309368, + "kl_loss": 0.06560538709163666, + "loss_ib": 0.001097486587241292, + "step": 2239 + }, + { + "ce_ib": 5.37255859375, + "ce_orig": 1.1513558626174927, + "epoch": 0.6438996333309368, + "kl_loss": 0.0985565260052681, + "loss_ib": 0.0015228211414068937, + "step": 2239 + }, + { + "ce_ib": 5.764385223388672, + "ce_orig": 1.2393784523010254, + "epoch": 0.6438996333309368, + "kl_loss": 0.102507084608078, + "loss_ib": 0.0016015092842280865, + "step": 2239 + }, + { + "ce_ib": 3.18369460105896, + "ce_orig": 0.3954504728317261, + "epoch": 0.6438996333309368, + "kl_loss": 0.08067043125629425, + "loss_ib": 0.0011250737588852644, + "step": 2239 + }, + { + "epoch": 0.6441872169099144, + "grad_norm": 0.09200676530599594, + "learning_rate": 4.568922083184144e-05, + "loss": 0.8533, + "step": 2240 + }, + { + "ce_ib": 4.210375785827637, + "ce_orig": 0.8139750361442566, + "epoch": 0.6441872169099144, + "kl_loss": 0.06457894295454025, + "loss_ib": 0.0010668269824236631, + "step": 2240 + }, + { + "ce_ib": 8.073270797729492, + "ce_orig": 1.5012484788894653, + "epoch": 0.6441872169099144, + "kl_loss": 0.11669165641069412, + "loss_ib": 0.001974243437871337, + "step": 2240 + }, + { + "ce_ib": 5.769414901733398, + "ce_orig": 1.2035630941390991, + "epoch": 0.6441872169099144, + "kl_loss": 0.06806989759206772, + "loss_ib": 0.0012576404260471463, + "step": 2240 + }, + { + "ce_ib": 2.985184907913208, + "ce_orig": 0.338777095079422, + "epoch": 0.6441872169099144, + "kl_loss": 0.15819907188415527, + "loss_ib": 0.001880509196780622, + "step": 2240 + }, + { + "ce_ib": 4.233379364013672, + "ce_orig": 0.8791909217834473, + "epoch": 0.644474800488892, + "kl_loss": 0.07315323501825333, + "loss_ib": 0.0011548702605068684, + "step": 2241 + }, + { + "ce_ib": 3.234675645828247, + "ce_orig": 0.5491044521331787, + "epoch": 0.644474800488892, + "kl_loss": 0.0748339593410492, + "loss_ib": 0.0010718071134760976, + "step": 2241 + }, + { + "ce_ib": 4.62645149230957, + "ce_orig": 0.9359559416770935, + "epoch": 0.644474800488892, + "kl_loss": 0.10505035519599915, + "loss_ib": 0.001513148657977581, + "step": 2241 + }, + { + "ce_ib": 4.727909564971924, + "ce_orig": 0.9530536532402039, + "epoch": 0.644474800488892, + "kl_loss": 0.0776815414428711, + "loss_ib": 0.001249606255441904, + "step": 2241 + }, + { + "ce_ib": 3.1498990058898926, + "ce_orig": 0.8561684489250183, + "epoch": 0.6447623840678697, + "kl_loss": 0.07827848196029663, + "loss_ib": 0.0010977747151628137, + "step": 2242 + }, + { + "ce_ib": 6.110605716705322, + "ce_orig": 0.6302587985992432, + "epoch": 0.6447623840678697, + "kl_loss": 0.08272241055965424, + "loss_ib": 0.0014382846420630813, + "step": 2242 + }, + { + "ce_ib": 3.599996566772461, + "ce_orig": 0.6757697463035583, + "epoch": 0.6447623840678697, + "kl_loss": 0.09432416409254074, + "loss_ib": 0.0013032412389293313, + "step": 2242 + }, + { + "ce_ib": 2.7530288696289062, + "ce_orig": 0.5918030738830566, + "epoch": 0.6447623840678697, + "kl_loss": 0.04350406676530838, + "loss_ib": 0.0007103435345925391, + "step": 2242 + }, + { + "ce_ib": 4.007449150085449, + "ce_orig": 0.6537938714027405, + "epoch": 0.6450499676468474, + "kl_loss": 0.08944808691740036, + "loss_ib": 0.0012952256947755814, + "step": 2243 + }, + { + "ce_ib": 2.883246660232544, + "ce_orig": 0.732550323009491, + "epoch": 0.6450499676468474, + "kl_loss": 0.039158716797828674, + "loss_ib": 0.0006799118127673864, + "step": 2243 + }, + { + "ce_ib": 4.056617736816406, + "ce_orig": 0.6576982140541077, + "epoch": 0.6450499676468474, + "kl_loss": 0.08198338747024536, + "loss_ib": 0.0012254955945536494, + "step": 2243 + }, + { + "ce_ib": 3.323707103729248, + "ce_orig": 0.658359169960022, + "epoch": 0.6450499676468474, + "kl_loss": 0.04193057864904404, + "loss_ib": 0.0007516764453612268, + "step": 2243 + }, + { + "ce_ib": 4.133695125579834, + "ce_orig": 1.1541327238082886, + "epoch": 0.645337551225825, + "kl_loss": 0.05788007751107216, + "loss_ib": 0.0009921703021973372, + "step": 2244 + }, + { + "ce_ib": 5.588550090789795, + "ce_orig": 0.8417210578918457, + "epoch": 0.645337551225825, + "kl_loss": 0.09692901372909546, + "loss_ib": 0.0015281450469046831, + "step": 2244 + }, + { + "ce_ib": 6.7572340965271, + "ce_orig": 1.035165786743164, + "epoch": 0.645337551225825, + "kl_loss": 0.10930627584457397, + "loss_ib": 0.0017687861109152436, + "step": 2244 + }, + { + "ce_ib": 3.599705457687378, + "ce_orig": 0.6933870315551758, + "epoch": 0.645337551225825, + "kl_loss": 0.03358640521764755, + "loss_ib": 0.0006958345766179264, + "step": 2244 + }, + { + "epoch": 0.6456251348048027, + "grad_norm": 0.0923142060637474, + "learning_rate": 4.566741259517214e-05, + "loss": 0.8383, + "step": 2245 + }, + { + "ce_ib": 5.978185653686523, + "ce_orig": 1.2322280406951904, + "epoch": 0.6456251348048027, + "kl_loss": 0.09457875788211823, + "loss_ib": 0.0015436061657965183, + "step": 2245 + }, + { + "ce_ib": 5.419113636016846, + "ce_orig": 1.255495548248291, + "epoch": 0.6456251348048027, + "kl_loss": 0.05820336937904358, + "loss_ib": 0.0011239449959248304, + "step": 2245 + }, + { + "ce_ib": 4.476484298706055, + "ce_orig": 1.01193106174469, + "epoch": 0.6456251348048027, + "kl_loss": 0.047243863344192505, + "loss_ib": 0.0009200870990753174, + "step": 2245 + }, + { + "ce_ib": 5.452282428741455, + "ce_orig": 0.8271125555038452, + "epoch": 0.6456251348048027, + "kl_loss": 0.08815623074769974, + "loss_ib": 0.0014267904916778207, + "step": 2245 + }, + { + "ce_ib": 4.242177963256836, + "ce_orig": 0.5627428889274597, + "epoch": 0.6459127183837803, + "kl_loss": 0.07026271522045135, + "loss_ib": 0.0011268449015915394, + "step": 2246 + }, + { + "ce_ib": 4.525074481964111, + "ce_orig": 0.8133836984634399, + "epoch": 0.6459127183837803, + "kl_loss": 0.056008581072092056, + "loss_ib": 0.0010125931585207582, + "step": 2246 + }, + { + "ce_ib": 2.9996485710144043, + "ce_orig": 0.48396024107933044, + "epoch": 0.6459127183837803, + "kl_loss": 0.04674283415079117, + "loss_ib": 0.000767393154092133, + "step": 2246 + }, + { + "ce_ib": 6.890351295471191, + "ce_orig": 0.7132139801979065, + "epoch": 0.6459127183837803, + "kl_loss": 0.10355844348669052, + "loss_ib": 0.0017246195347979665, + "step": 2246 + }, + { + "ce_ib": 2.747797727584839, + "ce_orig": 0.7812958359718323, + "epoch": 0.6462003019627579, + "kl_loss": 0.050304561853408813, + "loss_ib": 0.0007778254221193492, + "step": 2247 + }, + { + "ce_ib": 4.131923198699951, + "ce_orig": 0.8744434714317322, + "epoch": 0.6462003019627579, + "kl_loss": 0.068601593375206, + "loss_ib": 0.0010992082534357905, + "step": 2247 + }, + { + "ce_ib": 6.047113418579102, + "ce_orig": 1.2204774618148804, + "epoch": 0.6462003019627579, + "kl_loss": 0.09449388086795807, + "loss_ib": 0.0015496501000598073, + "step": 2247 + }, + { + "ce_ib": 4.262847423553467, + "ce_orig": 0.754558801651001, + "epoch": 0.6462003019627579, + "kl_loss": 0.06580022722482681, + "loss_ib": 0.0010842869523912668, + "step": 2247 + }, + { + "ce_ib": 2.3977456092834473, + "ce_orig": 0.5272009968757629, + "epoch": 0.6464878855417355, + "kl_loss": 0.04708137735724449, + "loss_ib": 0.0007105882978066802, + "step": 2248 + }, + { + "ce_ib": 4.640843391418457, + "ce_orig": 0.8003717064857483, + "epoch": 0.6464878855417355, + "kl_loss": 0.12465635687112808, + "loss_ib": 0.001710647949948907, + "step": 2248 + }, + { + "ce_ib": 2.0123777389526367, + "ce_orig": 0.31317487359046936, + "epoch": 0.6464878855417355, + "kl_loss": 0.13874095678329468, + "loss_ib": 0.0015886473702266812, + "step": 2248 + }, + { + "ce_ib": 2.934152603149414, + "ce_orig": 0.29173144698143005, + "epoch": 0.6464878855417355, + "kl_loss": 0.1157584935426712, + "loss_ib": 0.0014510001055896282, + "step": 2248 + }, + { + "ce_ib": 4.418186187744141, + "ce_orig": 0.6324502825737, + "epoch": 0.6467754691207132, + "kl_loss": 0.09208233654499054, + "loss_ib": 0.001362642040476203, + "step": 2249 + }, + { + "ce_ib": 5.0803914070129395, + "ce_orig": 1.067771077156067, + "epoch": 0.6467754691207132, + "kl_loss": 0.07347364723682404, + "loss_ib": 0.0012427755864337087, + "step": 2249 + }, + { + "ce_ib": 4.163920879364014, + "ce_orig": 1.1027624607086182, + "epoch": 0.6467754691207132, + "kl_loss": 0.05640750378370285, + "loss_ib": 0.0009804670698940754, + "step": 2249 + }, + { + "ce_ib": 5.3402557373046875, + "ce_orig": 1.1835896968841553, + "epoch": 0.6467754691207132, + "kl_loss": 0.06858684122562408, + "loss_ib": 0.001219893922097981, + "step": 2249 + }, + { + "epoch": 0.6470630526996909, + "grad_norm": 0.09092248231172562, + "learning_rate": 4.56455545659089e-05, + "loss": 0.8393, + "step": 2250 + }, + { + "ce_ib": 3.6515896320343018, + "ce_orig": 0.7155677080154419, + "epoch": 0.6470630526996909, + "kl_loss": 0.0929686427116394, + "loss_ib": 0.0012948453659191728, + "step": 2250 + }, + { + "ce_ib": 5.044528007507324, + "ce_orig": 1.083160161972046, + "epoch": 0.6470630526996909, + "kl_loss": 0.0892648696899414, + "loss_ib": 0.001397101441398263, + "step": 2250 + }, + { + "ce_ib": 3.1212098598480225, + "ce_orig": 0.7073313593864441, + "epoch": 0.6470630526996909, + "kl_loss": 0.05777765065431595, + "loss_ib": 0.0008898974629119039, + "step": 2250 + }, + { + "ce_ib": 3.9030306339263916, + "ce_orig": 0.7377669215202332, + "epoch": 0.6470630526996909, + "kl_loss": 0.11938148736953735, + "loss_ib": 0.0015841179993003607, + "step": 2250 + }, + { + "ce_ib": 4.870665550231934, + "ce_orig": 0.7691740393638611, + "epoch": 0.6473506362786685, + "kl_loss": 0.07717746496200562, + "loss_ib": 0.0012588411336764693, + "step": 2251 + }, + { + "ce_ib": 6.211418151855469, + "ce_orig": 1.6345127820968628, + "epoch": 0.6473506362786685, + "kl_loss": 0.08106111735105515, + "loss_ib": 0.0014317530440166593, + "step": 2251 + }, + { + "ce_ib": 4.7566118240356445, + "ce_orig": 0.5651468634605408, + "epoch": 0.6473506362786685, + "kl_loss": 0.12495700269937515, + "loss_ib": 0.0017252310644835234, + "step": 2251 + }, + { + "ce_ib": 4.174144744873047, + "ce_orig": 0.9576300382614136, + "epoch": 0.6473506362786685, + "kl_loss": 0.06946859508752823, + "loss_ib": 0.0011121004354208708, + "step": 2251 + }, + { + "ce_ib": 5.575498580932617, + "ce_orig": 1.26664400100708, + "epoch": 0.6476382198576461, + "kl_loss": 0.05170952528715134, + "loss_ib": 0.0010746450861915946, + "step": 2252 + }, + { + "ce_ib": 2.4584405422210693, + "ce_orig": 0.3117712736129761, + "epoch": 0.6476382198576461, + "kl_loss": 0.09197953343391418, + "loss_ib": 0.0011656393762677908, + "step": 2252 + }, + { + "ce_ib": 2.886988639831543, + "ce_orig": 0.4482458829879761, + "epoch": 0.6476382198576461, + "kl_loss": 0.05393718555569649, + "loss_ib": 0.0008280706824734807, + "step": 2252 + }, + { + "ce_ib": 5.042593479156494, + "ce_orig": 0.9169831871986389, + "epoch": 0.6476382198576461, + "kl_loss": 0.08578374981880188, + "loss_ib": 0.0013620967511087656, + "step": 2252 + }, + { + "ce_ib": 3.8594863414764404, + "ce_orig": 0.7838860154151917, + "epoch": 0.6479258034366238, + "kl_loss": 0.042746998369693756, + "loss_ib": 0.0008134185918606818, + "step": 2253 + }, + { + "ce_ib": 5.89522647857666, + "ce_orig": 1.0631190538406372, + "epoch": 0.6479258034366238, + "kl_loss": 0.08355875313282013, + "loss_ib": 0.0014251100365072489, + "step": 2253 + }, + { + "ce_ib": 5.083580493927002, + "ce_orig": 0.7040472030639648, + "epoch": 0.6479258034366238, + "kl_loss": 0.058801114559173584, + "loss_ib": 0.0010963691165670753, + "step": 2253 + }, + { + "ce_ib": 2.2211806774139404, + "ce_orig": 0.4939582943916321, + "epoch": 0.6479258034366238, + "kl_loss": 0.04186480864882469, + "loss_ib": 0.0006407661712728441, + "step": 2253 + }, + { + "ce_ib": 3.5483171939849854, + "ce_orig": 0.5922026634216309, + "epoch": 0.6482133870156014, + "kl_loss": 0.10362020879983902, + "loss_ib": 0.0013910337584093213, + "step": 2254 + }, + { + "ce_ib": 4.561907768249512, + "ce_orig": 1.0587536096572876, + "epoch": 0.6482133870156014, + "kl_loss": 0.0927678793668747, + "loss_ib": 0.0013838695595040917, + "step": 2254 + }, + { + "ce_ib": 3.918771505355835, + "ce_orig": 1.06467604637146, + "epoch": 0.6482133870156014, + "kl_loss": 0.03836088255047798, + "loss_ib": 0.0007754859398119152, + "step": 2254 + }, + { + "ce_ib": 4.510466575622559, + "ce_orig": 0.6127709150314331, + "epoch": 0.6482133870156014, + "kl_loss": 0.10040029883384705, + "loss_ib": 0.0014550496125593781, + "step": 2254 + }, + { + "epoch": 0.648500970594579, + "grad_norm": 0.1001354306936264, + "learning_rate": 4.5623646796712774e-05, + "loss": 0.8243, + "step": 2255 + }, + { + "ce_ib": 5.828423023223877, + "ce_orig": 1.3417309522628784, + "epoch": 0.648500970594579, + "kl_loss": 0.08130457252264023, + "loss_ib": 0.0013958880444988608, + "step": 2255 + }, + { + "ce_ib": 4.684239387512207, + "ce_orig": 0.38564202189445496, + "epoch": 0.648500970594579, + "kl_loss": 0.12258496880531311, + "loss_ib": 0.0016942736692726612, + "step": 2255 + }, + { + "ce_ib": 6.321585178375244, + "ce_orig": 1.2882518768310547, + "epoch": 0.648500970594579, + "kl_loss": 0.08591645956039429, + "loss_ib": 0.0014913231134414673, + "step": 2255 + }, + { + "ce_ib": 3.2147138118743896, + "ce_orig": 0.5695244669914246, + "epoch": 0.648500970594579, + "kl_loss": 0.07764263451099396, + "loss_ib": 0.0010978977661579847, + "step": 2255 + }, + { + "ce_ib": 3.0978622436523438, + "ce_orig": 0.5359968543052673, + "epoch": 0.6487885541735567, + "kl_loss": 0.08678986132144928, + "loss_ib": 0.001177684753201902, + "step": 2256 + }, + { + "ce_ib": 8.228111267089844, + "ce_orig": 1.4944881200790405, + "epoch": 0.6487885541735567, + "kl_loss": 0.05712651461362839, + "loss_ib": 0.0013940762728452682, + "step": 2256 + }, + { + "ce_ib": 4.540656089782715, + "ce_orig": 0.756723940372467, + "epoch": 0.6487885541735567, + "kl_loss": 0.07504387944936752, + "loss_ib": 0.0012045043986290693, + "step": 2256 + }, + { + "ce_ib": 4.685283660888672, + "ce_orig": 1.0155675411224365, + "epoch": 0.6487885541735567, + "kl_loss": 0.06247735023498535, + "loss_ib": 0.0010933018056675792, + "step": 2256 + }, + { + "ce_ib": 3.354616165161133, + "ce_orig": 0.6835289597511292, + "epoch": 0.6490761377525344, + "kl_loss": 0.08929853141307831, + "loss_ib": 0.0012284469557926059, + "step": 2257 + }, + { + "ce_ib": 2.7293875217437744, + "ce_orig": 0.5408226847648621, + "epoch": 0.6490761377525344, + "kl_loss": 0.05878724902868271, + "loss_ib": 0.000860811211168766, + "step": 2257 + }, + { + "ce_ib": 3.1879544258117676, + "ce_orig": 0.6054509282112122, + "epoch": 0.6490761377525344, + "kl_loss": 0.03403601422905922, + "loss_ib": 0.0006591555429622531, + "step": 2257 + }, + { + "ce_ib": 4.417482376098633, + "ce_orig": 1.0192781686782837, + "epoch": 0.6490761377525344, + "kl_loss": 0.06106653809547424, + "loss_ib": 0.0010524136014282703, + "step": 2257 + }, + { + "ce_ib": 3.6743974685668945, + "ce_orig": 0.7927655577659607, + "epoch": 0.649363721331512, + "kl_loss": 0.05335573852062225, + "loss_ib": 0.000900997023563832, + "step": 2258 + }, + { + "ce_ib": 5.903752326965332, + "ce_orig": 1.2490837574005127, + "epoch": 0.649363721331512, + "kl_loss": 0.05551975220441818, + "loss_ib": 0.0011455727508291602, + "step": 2258 + }, + { + "ce_ib": 4.232627868652344, + "ce_orig": 0.6127696633338928, + "epoch": 0.649363721331512, + "kl_loss": 0.11703859269618988, + "loss_ib": 0.00159364880528301, + "step": 2258 + }, + { + "ce_ib": 4.324455261230469, + "ce_orig": 0.69388347864151, + "epoch": 0.649363721331512, + "kl_loss": 0.04948773980140686, + "loss_ib": 0.0009273229516111314, + "step": 2258 + }, + { + "ce_ib": 4.747666835784912, + "ce_orig": 0.9629707336425781, + "epoch": 0.6496513049104896, + "kl_loss": 0.05812857300043106, + "loss_ib": 0.0010560523951426148, + "step": 2259 + }, + { + "ce_ib": 6.453109264373779, + "ce_orig": 1.0056365728378296, + "epoch": 0.6496513049104896, + "kl_loss": 0.11627036333084106, + "loss_ib": 0.001808014465495944, + "step": 2259 + }, + { + "ce_ib": 5.255545616149902, + "ce_orig": 1.3267358541488647, + "epoch": 0.6496513049104896, + "kl_loss": 0.06045638024806976, + "loss_ib": 0.001130118384025991, + "step": 2259 + }, + { + "ce_ib": 3.1262059211730957, + "ce_orig": 0.5447742342948914, + "epoch": 0.6496513049104896, + "kl_loss": 0.05449337512254715, + "loss_ib": 0.0008575543761253357, + "step": 2259 + }, + { + "epoch": 0.6499388884894672, + "grad_norm": 0.10189750045537949, + "learning_rate": 4.560168934036466e-05, + "loss": 0.8402, + "step": 2260 + }, + { + "ce_ib": 3.393195867538452, + "ce_orig": 0.9720718264579773, + "epoch": 0.6499388884894672, + "kl_loss": 0.04837355390191078, + "loss_ib": 0.00082305510295555, + "step": 2260 + }, + { + "ce_ib": 3.42690372467041, + "ce_orig": 0.7921063899993896, + "epoch": 0.6499388884894672, + "kl_loss": 0.07183823734521866, + "loss_ib": 0.0010610726894810796, + "step": 2260 + }, + { + "ce_ib": 3.388145923614502, + "ce_orig": 0.5620659589767456, + "epoch": 0.6499388884894672, + "kl_loss": 0.11142948269844055, + "loss_ib": 0.0014531093183904886, + "step": 2260 + }, + { + "ce_ib": 3.476191997528076, + "ce_orig": 0.6332195401191711, + "epoch": 0.6499388884894672, + "kl_loss": 0.09548868238925934, + "loss_ib": 0.0013025059597566724, + "step": 2260 + }, + { + "ce_ib": 4.748948574066162, + "ce_orig": 0.826274573802948, + "epoch": 0.6502264720684449, + "kl_loss": 0.10041515529155731, + "loss_ib": 0.0014790463028475642, + "step": 2261 + }, + { + "ce_ib": 4.560025691986084, + "ce_orig": 0.583285927772522, + "epoch": 0.6502264720684449, + "kl_loss": 0.08703190088272095, + "loss_ib": 0.0013263215078040957, + "step": 2261 + }, + { + "ce_ib": 3.7938625812530518, + "ce_orig": 0.48033952713012695, + "epoch": 0.6502264720684449, + "kl_loss": 0.10424364358186722, + "loss_ib": 0.001421822584234178, + "step": 2261 + }, + { + "ce_ib": 2.8533387184143066, + "ce_orig": 0.6417758464813232, + "epoch": 0.6502264720684449, + "kl_loss": 0.06373397260904312, + "loss_ib": 0.0009226735564880073, + "step": 2261 + }, + { + "ce_ib": 5.911954402923584, + "ce_orig": 1.281350016593933, + "epoch": 0.6505140556474225, + "kl_loss": 0.0610538125038147, + "loss_ib": 0.0012017334811389446, + "step": 2262 + }, + { + "ce_ib": 3.4531102180480957, + "ce_orig": 0.5086814761161804, + "epoch": 0.6505140556474225, + "kl_loss": 0.07311820238828659, + "loss_ib": 0.0010764930630102754, + "step": 2262 + }, + { + "ce_ib": 4.972541332244873, + "ce_orig": 0.9404982328414917, + "epoch": 0.6505140556474225, + "kl_loss": 0.06925240159034729, + "loss_ib": 0.0011897780932486057, + "step": 2262 + }, + { + "ce_ib": 4.162656784057617, + "ce_orig": 0.5425086617469788, + "epoch": 0.6505140556474225, + "kl_loss": 0.09209723770618439, + "loss_ib": 0.0013372378889471292, + "step": 2262 + }, + { + "ce_ib": 4.0468831062316895, + "ce_orig": 0.672670304775238, + "epoch": 0.6508016392264002, + "kl_loss": 0.07417095452547073, + "loss_ib": 0.0011463979026302695, + "step": 2263 + }, + { + "ce_ib": 3.5027401447296143, + "ce_orig": 0.514998197555542, + "epoch": 0.6508016392264002, + "kl_loss": 0.0724518671631813, + "loss_ib": 0.0010747927008196712, + "step": 2263 + }, + { + "ce_ib": 7.32985782623291, + "ce_orig": 1.6105339527130127, + "epoch": 0.6508016392264002, + "kl_loss": 0.10354353487491608, + "loss_ib": 0.0017684210324659944, + "step": 2263 + }, + { + "ce_ib": 4.01119327545166, + "ce_orig": 0.7137694954872131, + "epoch": 0.6508016392264002, + "kl_loss": 0.07605791091918945, + "loss_ib": 0.0011616983683779836, + "step": 2263 + }, + { + "ce_ib": 4.87504243850708, + "ce_orig": 1.182365894317627, + "epoch": 0.6510892228053778, + "kl_loss": 0.06628237664699554, + "loss_ib": 0.001150327967479825, + "step": 2264 + }, + { + "ce_ib": 5.280245304107666, + "ce_orig": 0.7392207384109497, + "epoch": 0.6510892228053778, + "kl_loss": 0.07168431580066681, + "loss_ib": 0.0012448676861822605, + "step": 2264 + }, + { + "ce_ib": 4.722143173217773, + "ce_orig": 0.6919271945953369, + "epoch": 0.6510892228053778, + "kl_loss": 0.09763983637094498, + "loss_ib": 0.0014486126601696014, + "step": 2264 + }, + { + "ce_ib": 5.984767913818359, + "ce_orig": 0.8624404072761536, + "epoch": 0.6510892228053778, + "kl_loss": 0.09041304886341095, + "loss_ib": 0.001502607250586152, + "step": 2264 + }, + { + "epoch": 0.6513768063843555, + "grad_norm": 0.08562284708023071, + "learning_rate": 4.557968224976518e-05, + "loss": 0.8313, + "step": 2265 + }, + { + "ce_ib": 3.689687967300415, + "ce_orig": 0.6775491237640381, + "epoch": 0.6513768063843555, + "kl_loss": 0.08978244662284851, + "loss_ib": 0.0012667932314798236, + "step": 2265 + }, + { + "ce_ib": 5.424607276916504, + "ce_orig": 0.9179068207740784, + "epoch": 0.6513768063843555, + "kl_loss": 0.11513639986515045, + "loss_ib": 0.0016938246553763747, + "step": 2265 + }, + { + "ce_ib": 4.219823837280273, + "ce_orig": 0.7224557399749756, + "epoch": 0.6513768063843555, + "kl_loss": 0.05617956817150116, + "loss_ib": 0.0009837780380621552, + "step": 2265 + }, + { + "ce_ib": 5.096823215484619, + "ce_orig": 0.8156121373176575, + "epoch": 0.6513768063843555, + "kl_loss": 0.0888018012046814, + "loss_ib": 0.0013977002818137407, + "step": 2265 + }, + { + "ce_ib": 5.084685325622559, + "ce_orig": 0.9011462926864624, + "epoch": 0.6516643899633331, + "kl_loss": 0.06627698242664337, + "loss_ib": 0.0011712383711710572, + "step": 2266 + }, + { + "ce_ib": 5.014079570770264, + "ce_orig": 0.9363381266593933, + "epoch": 0.6516643899633331, + "kl_loss": 0.0812264084815979, + "loss_ib": 0.0013136720517650247, + "step": 2266 + }, + { + "ce_ib": 3.392813205718994, + "ce_orig": 0.7315462827682495, + "epoch": 0.6516643899633331, + "kl_loss": 0.06856728345155716, + "loss_ib": 0.001024954137392342, + "step": 2266 + }, + { + "ce_ib": 4.7515668869018555, + "ce_orig": 1.4170714616775513, + "epoch": 0.6516643899633331, + "kl_loss": 0.06912566721439362, + "loss_ib": 0.0011664133053272963, + "step": 2266 + }, + { + "ce_ib": 4.8838934898376465, + "ce_orig": 0.816562831401825, + "epoch": 0.6519519735423107, + "kl_loss": 0.09491956233978271, + "loss_ib": 0.0014375848695635796, + "step": 2267 + }, + { + "ce_ib": 6.923623085021973, + "ce_orig": 1.5589663982391357, + "epoch": 0.6519519735423107, + "kl_loss": 0.10504873096942902, + "loss_ib": 0.0017428495921194553, + "step": 2267 + }, + { + "ce_ib": 3.3356943130493164, + "ce_orig": 0.5972881317138672, + "epoch": 0.6519519735423107, + "kl_loss": 0.07560107111930847, + "loss_ib": 0.001089580124244094, + "step": 2267 + }, + { + "ce_ib": 5.425398349761963, + "ce_orig": 0.614367663860321, + "epoch": 0.6519519735423107, + "kl_loss": 0.10973822325468063, + "loss_ib": 0.0016399219166487455, + "step": 2267 + }, + { + "ce_ib": 6.249393463134766, + "ce_orig": 0.8280339241027832, + "epoch": 0.6522395571212883, + "kl_loss": 0.07958365976810455, + "loss_ib": 0.001420775894075632, + "step": 2268 + }, + { + "ce_ib": 4.183853626251221, + "ce_orig": 0.8928884267807007, + "epoch": 0.6522395571212883, + "kl_loss": 0.08208107948303223, + "loss_ib": 0.001239196164533496, + "step": 2268 + }, + { + "ce_ib": 6.015268802642822, + "ce_orig": 1.313049077987671, + "epoch": 0.6522395571212883, + "kl_loss": 0.08784487098455429, + "loss_ib": 0.0014799755299463868, + "step": 2268 + }, + { + "ce_ib": 4.124445915222168, + "ce_orig": 0.9583092331886292, + "epoch": 0.6522395571212883, + "kl_loss": 0.07219264656305313, + "loss_ib": 0.001134371035732329, + "step": 2268 + }, + { + "ce_ib": 4.334308624267578, + "ce_orig": 0.9356188178062439, + "epoch": 0.652527140700266, + "kl_loss": 0.051878005266189575, + "loss_ib": 0.0009522108593955636, + "step": 2269 + }, + { + "ce_ib": 5.041485786437988, + "ce_orig": 0.838961660861969, + "epoch": 0.652527140700266, + "kl_loss": 0.10010100901126862, + "loss_ib": 0.0015051586087793112, + "step": 2269 + }, + { + "ce_ib": 6.477016925811768, + "ce_orig": 1.6458380222320557, + "epoch": 0.652527140700266, + "kl_loss": 0.058974601328372955, + "loss_ib": 0.0012374477228149772, + "step": 2269 + }, + { + "ce_ib": 3.950042247772217, + "ce_orig": 0.9245351552963257, + "epoch": 0.652527140700266, + "kl_loss": 0.05961986631155014, + "loss_ib": 0.0009912028908729553, + "step": 2269 + }, + { + "epoch": 0.6528147242792437, + "grad_norm": 0.09461739659309387, + "learning_rate": 4.5557625577934504e-05, + "loss": 0.8929, + "step": 2270 + }, + { + "ce_ib": 5.349925994873047, + "ce_orig": 1.0550038814544678, + "epoch": 0.6528147242792437, + "kl_loss": 0.05185253918170929, + "loss_ib": 0.0010535179171711206, + "step": 2270 + }, + { + "ce_ib": 4.127248287200928, + "ce_orig": 0.7954692244529724, + "epoch": 0.6528147242792437, + "kl_loss": 0.08532535284757614, + "loss_ib": 0.001265978324227035, + "step": 2270 + }, + { + "ce_ib": 3.3121883869171143, + "ce_orig": 0.6129387021064758, + "epoch": 0.6528147242792437, + "kl_loss": 0.050515368580818176, + "loss_ib": 0.0008363724919036031, + "step": 2270 + }, + { + "ce_ib": 4.475091934204102, + "ce_orig": 1.0875869989395142, + "epoch": 0.6528147242792437, + "kl_loss": 0.09005168825387955, + "loss_ib": 0.0013480260968208313, + "step": 2270 + }, + { + "ce_ib": 3.040416955947876, + "ce_orig": 0.8704392313957214, + "epoch": 0.6531023078582213, + "kl_loss": 0.06819519400596619, + "loss_ib": 0.0009859935380518436, + "step": 2271 + }, + { + "ce_ib": 5.702722072601318, + "ce_orig": 1.2912049293518066, + "epoch": 0.6531023078582213, + "kl_loss": 0.10002905130386353, + "loss_ib": 0.0015705627156421542, + "step": 2271 + }, + { + "ce_ib": 6.078077793121338, + "ce_orig": 1.2164020538330078, + "epoch": 0.6531023078582213, + "kl_loss": 0.05532166361808777, + "loss_ib": 0.0011610244400799274, + "step": 2271 + }, + { + "ce_ib": 4.267163276672363, + "ce_orig": 0.5811713933944702, + "epoch": 0.6531023078582213, + "kl_loss": 0.06989666819572449, + "loss_ib": 0.0011256829602643847, + "step": 2271 + }, + { + "ce_ib": 4.3365559577941895, + "ce_orig": 0.7762757539749146, + "epoch": 0.6533898914371989, + "kl_loss": 0.05434329807758331, + "loss_ib": 0.000977088464424014, + "step": 2272 + }, + { + "ce_ib": 4.164851188659668, + "ce_orig": 1.0501207113265991, + "epoch": 0.6533898914371989, + "kl_loss": 0.06601358950138092, + "loss_ib": 0.0010766208870336413, + "step": 2272 + }, + { + "ce_ib": 4.730634689331055, + "ce_orig": 1.1071280241012573, + "epoch": 0.6533898914371989, + "kl_loss": 0.06709802150726318, + "loss_ib": 0.0011440436355769634, + "step": 2272 + }, + { + "ce_ib": 3.4250998497009277, + "ce_orig": 0.7877690196037292, + "epoch": 0.6533898914371989, + "kl_loss": 0.0746195837855339, + "loss_ib": 0.0010887057287618518, + "step": 2272 + }, + { + "ce_ib": 3.2136123180389404, + "ce_orig": 0.6821689605712891, + "epoch": 0.6536774750161766, + "kl_loss": 0.08651071041822433, + "loss_ib": 0.0011864682892337441, + "step": 2273 + }, + { + "ce_ib": 3.543617010116577, + "ce_orig": 0.8363583087921143, + "epoch": 0.6536774750161766, + "kl_loss": 0.04644445329904556, + "loss_ib": 0.0008188061765395105, + "step": 2273 + }, + { + "ce_ib": 6.52175760269165, + "ce_orig": 1.526800274848938, + "epoch": 0.6536774750161766, + "kl_loss": 0.08130738139152527, + "loss_ib": 0.0014652495738118887, + "step": 2273 + }, + { + "ce_ib": 5.275723457336426, + "ce_orig": 0.8912367820739746, + "epoch": 0.6536774750161766, + "kl_loss": 0.09127356857061386, + "loss_ib": 0.0014403080567717552, + "step": 2273 + }, + { + "ce_ib": 3.402512788772583, + "ce_orig": 0.6144426465034485, + "epoch": 0.6539650585951542, + "kl_loss": 0.06492749601602554, + "loss_ib": 0.000989526160992682, + "step": 2274 + }, + { + "ce_ib": 3.4656591415405273, + "ce_orig": 0.539176881313324, + "epoch": 0.6539650585951542, + "kl_loss": 0.07373885810375214, + "loss_ib": 0.001083954470232129, + "step": 2274 + }, + { + "ce_ib": 3.307788133621216, + "ce_orig": 0.8017793297767639, + "epoch": 0.6539650585951542, + "kl_loss": 0.06078724563121796, + "loss_ib": 0.0009386512683704495, + "step": 2274 + }, + { + "ce_ib": 7.612823963165283, + "ce_orig": 1.8478108644485474, + "epoch": 0.6539650585951542, + "kl_loss": 0.29333508014678955, + "loss_ib": 0.0036946332547813654, + "step": 2274 + }, + { + "epoch": 0.6542526421741318, + "grad_norm": 0.08989392220973969, + "learning_rate": 4.5535519378012295e-05, + "loss": 0.8647, + "step": 2275 + }, + { + "ce_ib": 4.669591426849365, + "ce_orig": 0.48698288202285767, + "epoch": 0.6542526421741318, + "kl_loss": 0.08301065862178802, + "loss_ib": 0.0012970657553523779, + "step": 2275 + }, + { + "ce_ib": 4.229344367980957, + "ce_orig": 0.9067175388336182, + "epoch": 0.6542526421741318, + "kl_loss": 0.07277588546276093, + "loss_ib": 0.001150693278759718, + "step": 2275 + }, + { + "ce_ib": 4.696434020996094, + "ce_orig": 0.7402951121330261, + "epoch": 0.6542526421741318, + "kl_loss": 0.07746754586696625, + "loss_ib": 0.0012443187879398465, + "step": 2275 + }, + { + "ce_ib": 5.579097270965576, + "ce_orig": 0.950671911239624, + "epoch": 0.6542526421741318, + "kl_loss": 0.10657227039337158, + "loss_ib": 0.0016236323863267899, + "step": 2275 + }, + { + "ce_ib": 3.785494804382324, + "ce_orig": 0.7026271224021912, + "epoch": 0.6545402257531096, + "kl_loss": 0.039121344685554504, + "loss_ib": 0.0007697629043832421, + "step": 2276 + }, + { + "ce_ib": 5.041788101196289, + "ce_orig": 0.5288264155387878, + "epoch": 0.6545402257531096, + "kl_loss": 0.06385783851146698, + "loss_ib": 0.0011427572462707758, + "step": 2276 + }, + { + "ce_ib": 4.646267414093018, + "ce_orig": 0.767318606376648, + "epoch": 0.6545402257531096, + "kl_loss": 0.09078411757946014, + "loss_ib": 0.0013724678428843617, + "step": 2276 + }, + { + "ce_ib": 4.901246070861816, + "ce_orig": 1.0622847080230713, + "epoch": 0.6545402257531096, + "kl_loss": 0.08497358858585358, + "loss_ib": 0.0013398604933172464, + "step": 2276 + }, + { + "ce_ib": 4.878903388977051, + "ce_orig": 0.708929181098938, + "epoch": 0.6548278093320872, + "kl_loss": 0.06434676051139832, + "loss_ib": 0.0011313578579574823, + "step": 2277 + }, + { + "ce_ib": 3.246945381164551, + "ce_orig": 0.6561176776885986, + "epoch": 0.6548278093320872, + "kl_loss": 0.07015529274940491, + "loss_ib": 0.0010262473952025175, + "step": 2277 + }, + { + "ce_ib": 2.95664119720459, + "ce_orig": 0.44515833258628845, + "epoch": 0.6548278093320872, + "kl_loss": 0.05494384467601776, + "loss_ib": 0.0008451024768874049, + "step": 2277 + }, + { + "ce_ib": 5.423230171203613, + "ce_orig": 1.0733256340026855, + "epoch": 0.6548278093320872, + "kl_loss": 0.062211956828832626, + "loss_ib": 0.00116444262675941, + "step": 2277 + }, + { + "ce_ib": 2.7047672271728516, + "ce_orig": 0.5896720290184021, + "epoch": 0.6551153929110648, + "kl_loss": 0.061353884637355804, + "loss_ib": 0.0008840155205689371, + "step": 2278 + }, + { + "ce_ib": 4.247391700744629, + "ce_orig": 0.7403078079223633, + "epoch": 0.6551153929110648, + "kl_loss": 0.06098688393831253, + "loss_ib": 0.0010346079943701625, + "step": 2278 + }, + { + "ce_ib": 3.235067367553711, + "ce_orig": 0.6209512948989868, + "epoch": 0.6551153929110648, + "kl_loss": 0.07574208080768585, + "loss_ib": 0.001080927555449307, + "step": 2278 + }, + { + "ce_ib": 5.207133769989014, + "ce_orig": 0.9501504302024841, + "epoch": 0.6551153929110648, + "kl_loss": 0.09136837720870972, + "loss_ib": 0.0014343970688059926, + "step": 2278 + }, + { + "ce_ib": 4.254356384277344, + "ce_orig": 0.7882441878318787, + "epoch": 0.6554029764900424, + "kl_loss": 0.07934015989303589, + "loss_ib": 0.0012188372202217579, + "step": 2279 + }, + { + "ce_ib": 2.548619270324707, + "ce_orig": 0.7700033783912659, + "epoch": 0.6554029764900424, + "kl_loss": 0.04540827497839928, + "loss_ib": 0.0007089446298778057, + "step": 2279 + }, + { + "ce_ib": 5.388217926025391, + "ce_orig": 1.3758680820465088, + "epoch": 0.6554029764900424, + "kl_loss": 0.09303371608257294, + "loss_ib": 0.001469158916734159, + "step": 2279 + }, + { + "ce_ib": 5.713489532470703, + "ce_orig": 0.8652381896972656, + "epoch": 0.6554029764900424, + "kl_loss": 0.0896613672375679, + "loss_ib": 0.0014679625164717436, + "step": 2279 + }, + { + "epoch": 0.65569056006902, + "grad_norm": 0.08211017400026321, + "learning_rate": 4.5513363703257496e-05, + "loss": 0.8207, + "step": 2280 + }, + { + "ce_ib": 3.457378387451172, + "ce_orig": 0.5909416675567627, + "epoch": 0.65569056006902, + "kl_loss": 0.06717115640640259, + "loss_ib": 0.001017449307255447, + "step": 2280 + }, + { + "ce_ib": 6.158926010131836, + "ce_orig": 1.4350898265838623, + "epoch": 0.65569056006902, + "kl_loss": 0.08906790614128113, + "loss_ib": 0.0015065715415403247, + "step": 2280 + }, + { + "ce_ib": 5.63430643081665, + "ce_orig": 1.0570200681686401, + "epoch": 0.65569056006902, + "kl_loss": 0.05327045917510986, + "loss_ib": 0.001096135238185525, + "step": 2280 + }, + { + "ce_ib": 2.965975046157837, + "ce_orig": 0.47790539264678955, + "epoch": 0.65569056006902, + "kl_loss": 0.08677102625370026, + "loss_ib": 0.0011643077014014125, + "step": 2280 + }, + { + "ce_ib": 5.527815341949463, + "ce_orig": 1.0988918542861938, + "epoch": 0.6559781436479977, + "kl_loss": 0.07420121133327484, + "loss_ib": 0.0012947935611009598, + "step": 2281 + }, + { + "ce_ib": 4.5638837814331055, + "ce_orig": 0.6286882162094116, + "epoch": 0.6559781436479977, + "kl_loss": 0.08235512673854828, + "loss_ib": 0.0012799396645277739, + "step": 2281 + }, + { + "ce_ib": 4.3961873054504395, + "ce_orig": 1.0633037090301514, + "epoch": 0.6559781436479977, + "kl_loss": 0.08458787947893143, + "loss_ib": 0.0012854975648224354, + "step": 2281 + }, + { + "ce_ib": 2.3273751735687256, + "ce_orig": 0.5219329595565796, + "epoch": 0.6559781436479977, + "kl_loss": 0.05435352027416229, + "loss_ib": 0.0007762726745568216, + "step": 2281 + }, + { + "ce_ib": 2.2179982662200928, + "ce_orig": 0.38114312291145325, + "epoch": 0.6562657272269753, + "kl_loss": 0.11536914855241776, + "loss_ib": 0.001375491265207529, + "step": 2282 + }, + { + "ce_ib": 5.180620193481445, + "ce_orig": 0.48903876543045044, + "epoch": 0.6562657272269753, + "kl_loss": 0.18777668476104736, + "loss_ib": 0.0023958287201821804, + "step": 2282 + }, + { + "ce_ib": 5.783363342285156, + "ce_orig": 1.3418450355529785, + "epoch": 0.6562657272269753, + "kl_loss": 0.06747318804264069, + "loss_ib": 0.0012530680978670716, + "step": 2282 + }, + { + "ce_ib": 7.086777210235596, + "ce_orig": 1.5074928998947144, + "epoch": 0.6562657272269753, + "kl_loss": 0.11783495545387268, + "loss_ib": 0.001887027290649712, + "step": 2282 + }, + { + "ce_ib": 3.1762819290161133, + "ce_orig": 0.8175716996192932, + "epoch": 0.656553310805953, + "kl_loss": 0.052870314568281174, + "loss_ib": 0.0008463312988169491, + "step": 2283 + }, + { + "ce_ib": 5.902597427368164, + "ce_orig": 1.362978458404541, + "epoch": 0.656553310805953, + "kl_loss": 0.10212495923042297, + "loss_ib": 0.0016115092439576983, + "step": 2283 + }, + { + "ce_ib": 3.5722718238830566, + "ce_orig": 0.7464645504951477, + "epoch": 0.656553310805953, + "kl_loss": 0.07006088644266129, + "loss_ib": 0.0010578359942883253, + "step": 2283 + }, + { + "ce_ib": 5.471219539642334, + "ce_orig": 0.9850662350654602, + "epoch": 0.656553310805953, + "kl_loss": 0.07114933431148529, + "loss_ib": 0.0012586151715368032, + "step": 2283 + }, + { + "ce_ib": 3.429659605026245, + "ce_orig": 0.6074689030647278, + "epoch": 0.6568408943849307, + "kl_loss": 0.08498065918684006, + "loss_ib": 0.0011927725281566381, + "step": 2284 + }, + { + "ce_ib": 4.881903171539307, + "ce_orig": 0.6370232701301575, + "epoch": 0.6568408943849307, + "kl_loss": 0.07819897681474686, + "loss_ib": 0.0012701799860224128, + "step": 2284 + }, + { + "ce_ib": 4.186735153198242, + "ce_orig": 0.8643299341201782, + "epoch": 0.6568408943849307, + "kl_loss": 0.06687933206558228, + "loss_ib": 0.0010874667204916477, + "step": 2284 + }, + { + "ce_ib": 2.8134377002716064, + "ce_orig": 0.5309114456176758, + "epoch": 0.6568408943849307, + "kl_loss": 0.09403102099895477, + "loss_ib": 0.0012216538889333606, + "step": 2284 + }, + { + "epoch": 0.6571284779639083, + "grad_norm": 0.11286040395498276, + "learning_rate": 4.549115860704829e-05, + "loss": 0.8109, + "step": 2285 + }, + { + "ce_ib": 7.26323127746582, + "ce_orig": 1.785958170890808, + "epoch": 0.6571284779639083, + "kl_loss": 0.07916676998138428, + "loss_ib": 0.0015179908368736506, + "step": 2285 + }, + { + "ce_ib": 4.309074401855469, + "ce_orig": 1.0697309970855713, + "epoch": 0.6571284779639083, + "kl_loss": 0.06697022169828415, + "loss_ib": 0.0011006095446646214, + "step": 2285 + }, + { + "ce_ib": 4.751814842224121, + "ce_orig": 0.7198655605316162, + "epoch": 0.6571284779639083, + "kl_loss": 0.07770450413227081, + "loss_ib": 0.0012522265315055847, + "step": 2285 + }, + { + "ce_ib": 4.2611260414123535, + "ce_orig": 0.931083619594574, + "epoch": 0.6571284779639083, + "kl_loss": 0.06077158451080322, + "loss_ib": 0.0010338283609598875, + "step": 2285 + }, + { + "ce_ib": 2.8614182472229004, + "ce_orig": 0.5164410471916199, + "epoch": 0.6574160615428859, + "kl_loss": 0.05914750322699547, + "loss_ib": 0.0008776168106123805, + "step": 2286 + }, + { + "ce_ib": 3.8371880054473877, + "ce_orig": 0.4602975845336914, + "epoch": 0.6574160615428859, + "kl_loss": 0.0901448205113411, + "loss_ib": 0.0012851670617237687, + "step": 2286 + }, + { + "ce_ib": 3.1333019733428955, + "ce_orig": 0.3193384110927582, + "epoch": 0.6574160615428859, + "kl_loss": 0.11290770024061203, + "loss_ib": 0.0014424071414396167, + "step": 2286 + }, + { + "ce_ib": 4.781826496124268, + "ce_orig": 0.9378849267959595, + "epoch": 0.6574160615428859, + "kl_loss": 0.08547717332839966, + "loss_ib": 0.0013329542707651854, + "step": 2286 + }, + { + "ce_ib": 2.6607635021209717, + "ce_orig": 0.6439882516860962, + "epoch": 0.6577036451218635, + "kl_loss": 0.053400225937366486, + "loss_ib": 0.0008000785601325333, + "step": 2287 + }, + { + "ce_ib": 6.513638973236084, + "ce_orig": 1.0941194295883179, + "epoch": 0.6577036451218635, + "kl_loss": 0.1061209887266159, + "loss_ib": 0.001712573692202568, + "step": 2287 + }, + { + "ce_ib": 3.80399489402771, + "ce_orig": 0.9507548809051514, + "epoch": 0.6577036451218635, + "kl_loss": 0.08741088211536407, + "loss_ib": 0.0012545082718133926, + "step": 2287 + }, + { + "ce_ib": 4.019896507263184, + "ce_orig": 0.5741574168205261, + "epoch": 0.6577036451218635, + "kl_loss": 0.10077936202287674, + "loss_ib": 0.0014097831444814801, + "step": 2287 + }, + { + "ce_ib": 3.0287888050079346, + "ce_orig": 0.553284764289856, + "epoch": 0.6579912287008411, + "kl_loss": 0.06870901584625244, + "loss_ib": 0.0009899690048769116, + "step": 2288 + }, + { + "ce_ib": 3.316420316696167, + "ce_orig": 0.9187297224998474, + "epoch": 0.6579912287008411, + "kl_loss": 0.06690701842308044, + "loss_ib": 0.0010007121600210667, + "step": 2288 + }, + { + "ce_ib": 3.937014579772949, + "ce_orig": 0.967896044254303, + "epoch": 0.6579912287008411, + "kl_loss": 0.28837424516677856, + "loss_ib": 0.003277443815022707, + "step": 2288 + }, + { + "ce_ib": 4.611119747161865, + "ce_orig": 0.47611191868782043, + "epoch": 0.6579912287008411, + "kl_loss": 0.07134867459535599, + "loss_ib": 0.0011745986994355917, + "step": 2288 + }, + { + "ce_ib": 3.3859286308288574, + "ce_orig": 0.4725525975227356, + "epoch": 0.6582788122798188, + "kl_loss": 0.05970991402864456, + "loss_ib": 0.0009356920490972698, + "step": 2289 + }, + { + "ce_ib": 6.599321365356445, + "ce_orig": 0.7391117215156555, + "epoch": 0.6582788122798188, + "kl_loss": 0.061128243803977966, + "loss_ib": 0.001271214452572167, + "step": 2289 + }, + { + "ce_ib": 3.508474588394165, + "ce_orig": 0.6224064230918884, + "epoch": 0.6582788122798188, + "kl_loss": 0.04278264194726944, + "loss_ib": 0.0007786738569848239, + "step": 2289 + }, + { + "ce_ib": 3.6264708042144775, + "ce_orig": 0.6128821969032288, + "epoch": 0.6582788122798188, + "kl_loss": 0.07624612748622894, + "loss_ib": 0.001125108334235847, + "step": 2289 + }, + { + "epoch": 0.6585663958587965, + "grad_norm": 0.09676265716552734, + "learning_rate": 4.5468904142881894e-05, + "loss": 0.8466, + "step": 2290 + }, + { + "ce_ib": 3.5421624183654785, + "ce_orig": 0.7433175444602966, + "epoch": 0.6585663958587965, + "kl_loss": 0.07465595006942749, + "loss_ib": 0.0011007756693288684, + "step": 2290 + }, + { + "ce_ib": 3.6902921199798584, + "ce_orig": 0.6768090128898621, + "epoch": 0.6585663958587965, + "kl_loss": 0.09259873628616333, + "loss_ib": 0.0012950164964422584, + "step": 2290 + }, + { + "ce_ib": 4.667980194091797, + "ce_orig": 0.8149797916412354, + "epoch": 0.6585663958587965, + "kl_loss": 0.06969562917947769, + "loss_ib": 0.0011637542629614472, + "step": 2290 + }, + { + "ce_ib": 4.281612873077393, + "ce_orig": 0.39050352573394775, + "epoch": 0.6585663958587965, + "kl_loss": 0.1346023827791214, + "loss_ib": 0.0017741851042956114, + "step": 2290 + }, + { + "ce_ib": 5.816831111907959, + "ce_orig": 1.0380734205245972, + "epoch": 0.6588539794377741, + "kl_loss": 0.08832317590713501, + "loss_ib": 0.0014649147633463144, + "step": 2291 + }, + { + "ce_ib": 7.487266540527344, + "ce_orig": 1.5106734037399292, + "epoch": 0.6588539794377741, + "kl_loss": 0.0850745216012001, + "loss_ib": 0.0015994717832654715, + "step": 2291 + }, + { + "ce_ib": 3.3617560863494873, + "ce_orig": 0.7871743440628052, + "epoch": 0.6588539794377741, + "kl_loss": 0.04560401290655136, + "loss_ib": 0.0007922157528810203, + "step": 2291 + }, + { + "ce_ib": 3.1831438541412354, + "ce_orig": 0.4353175759315491, + "epoch": 0.6588539794377741, + "kl_loss": 0.07327171415090561, + "loss_ib": 0.001051031518727541, + "step": 2291 + }, + { + "ce_ib": 3.227412462234497, + "ce_orig": 0.5139637589454651, + "epoch": 0.6591415630167518, + "kl_loss": 0.04596748948097229, + "loss_ib": 0.0007824161439202726, + "step": 2292 + }, + { + "ce_ib": 4.058830738067627, + "ce_orig": 0.9570779800415039, + "epoch": 0.6591415630167518, + "kl_loss": 0.0787658765912056, + "loss_ib": 0.0011935418006032705, + "step": 2292 + }, + { + "ce_ib": 3.6830649375915527, + "ce_orig": 0.7775994539260864, + "epoch": 0.6591415630167518, + "kl_loss": 0.07403576374053955, + "loss_ib": 0.0011086640879511833, + "step": 2292 + }, + { + "ce_ib": 4.285210609436035, + "ce_orig": 0.8347422480583191, + "epoch": 0.6591415630167518, + "kl_loss": 0.0726555585861206, + "loss_ib": 0.0011550765484571457, + "step": 2292 + }, + { + "ce_ib": 5.635944843292236, + "ce_orig": 1.5229945182800293, + "epoch": 0.6594291465957294, + "kl_loss": 0.0614401251077652, + "loss_ib": 0.0011779956985265017, + "step": 2293 + }, + { + "ce_ib": 4.682469844818115, + "ce_orig": 0.8855236768722534, + "epoch": 0.6594291465957294, + "kl_loss": 0.062151357531547546, + "loss_ib": 0.0010897604515776038, + "step": 2293 + }, + { + "ce_ib": 4.739254474639893, + "ce_orig": 1.1957552433013916, + "epoch": 0.6594291465957294, + "kl_loss": 0.07707645744085312, + "loss_ib": 0.0012446899199858308, + "step": 2293 + }, + { + "ce_ib": 3.405355930328369, + "ce_orig": 0.7179161906242371, + "epoch": 0.6594291465957294, + "kl_loss": 0.06103839352726936, + "loss_ib": 0.0009509195224381983, + "step": 2293 + }, + { + "ce_ib": 6.018336772918701, + "ce_orig": 1.1201651096343994, + "epoch": 0.659716730174707, + "kl_loss": 0.06943506002426147, + "loss_ib": 0.0012961841421201825, + "step": 2294 + }, + { + "ce_ib": 3.1760988235473633, + "ce_orig": 0.6044064164161682, + "epoch": 0.659716730174707, + "kl_loss": 0.062195923179388046, + "loss_ib": 0.0009395690867677331, + "step": 2294 + }, + { + "ce_ib": 3.416560173034668, + "ce_orig": 0.6867579817771912, + "epoch": 0.659716730174707, + "kl_loss": 0.059903666377067566, + "loss_ib": 0.0009406927274540067, + "step": 2294 + }, + { + "ce_ib": 6.241181373596191, + "ce_orig": 1.2581113576889038, + "epoch": 0.659716730174707, + "kl_loss": 0.08198889344930649, + "loss_ib": 0.0014440069207921624, + "step": 2294 + }, + { + "epoch": 0.6600043137536846, + "grad_norm": 0.10291100293397903, + "learning_rate": 4.544660036437449e-05, + "loss": 0.9184, + "step": 2295 + }, + { + "ce_ib": 2.2460129261016846, + "ce_orig": 0.48867541551589966, + "epoch": 0.6600043137536846, + "kl_loss": 0.07091905176639557, + "loss_ib": 0.0009337918017990887, + "step": 2295 + }, + { + "ce_ib": 5.934546947479248, + "ce_orig": 1.2902241945266724, + "epoch": 0.6600043137536846, + "kl_loss": 0.14199915528297424, + "loss_ib": 0.0020134462974965572, + "step": 2295 + }, + { + "ce_ib": 1.782610535621643, + "ce_orig": 0.41154971718788147, + "epoch": 0.6600043137536846, + "kl_loss": 0.05529959499835968, + "loss_ib": 0.0007312569650821388, + "step": 2295 + }, + { + "ce_ib": 3.991591215133667, + "ce_orig": 1.0878674983978271, + "epoch": 0.6600043137536846, + "kl_loss": 0.07420437783002853, + "loss_ib": 0.0011412028688937426, + "step": 2295 + }, + { + "ce_ib": 1.9446866512298584, + "ce_orig": 0.5166288614273071, + "epoch": 0.6602918973326624, + "kl_loss": 0.039557769894599915, + "loss_ib": 0.0005900463438592851, + "step": 2296 + }, + { + "ce_ib": 2.90800404548645, + "ce_orig": 0.7340528964996338, + "epoch": 0.6602918973326624, + "kl_loss": 0.05472474545240402, + "loss_ib": 0.0008380478248000145, + "step": 2296 + }, + { + "ce_ib": 3.2899534702301025, + "ce_orig": 0.8600157499313354, + "epoch": 0.6602918973326624, + "kl_loss": 0.06304998695850372, + "loss_ib": 0.0009594951407052577, + "step": 2296 + }, + { + "ce_ib": 3.3343920707702637, + "ce_orig": 0.6089526414871216, + "epoch": 0.6602918973326624, + "kl_loss": 0.05324772000312805, + "loss_ib": 0.0008659163722768426, + "step": 2296 + }, + { + "ce_ib": 3.61568546295166, + "ce_orig": 0.7560431361198425, + "epoch": 0.66057948091164, + "kl_loss": 0.11938367038965225, + "loss_ib": 0.0015554052079096437, + "step": 2297 + }, + { + "ce_ib": 3.7070071697235107, + "ce_orig": 0.6657878160476685, + "epoch": 0.66057948091164, + "kl_loss": 0.10293921828269958, + "loss_ib": 0.001400092849507928, + "step": 2297 + }, + { + "ce_ib": 5.661707401275635, + "ce_orig": 1.5501595735549927, + "epoch": 0.66057948091164, + "kl_loss": 0.055913448333740234, + "loss_ib": 0.0011253051925450563, + "step": 2297 + }, + { + "ce_ib": 5.243333339691162, + "ce_orig": 0.767796516418457, + "epoch": 0.66057948091164, + "kl_loss": 0.08871394395828247, + "loss_ib": 0.0014114726800471544, + "step": 2297 + }, + { + "ce_ib": 4.1427106857299805, + "ce_orig": 1.0138976573944092, + "epoch": 0.6608670644906176, + "kl_loss": 0.10364150255918503, + "loss_ib": 0.001450686133466661, + "step": 2298 + }, + { + "ce_ib": 5.277493476867676, + "ce_orig": 1.0618740320205688, + "epoch": 0.6608670644906176, + "kl_loss": 0.060346439480781555, + "loss_ib": 0.0011312137357890606, + "step": 2298 + }, + { + "ce_ib": 5.565991401672363, + "ce_orig": 1.229979157447815, + "epoch": 0.6608670644906176, + "kl_loss": 0.054697006940841675, + "loss_ib": 0.0011035691713914275, + "step": 2298 + }, + { + "ce_ib": 4.33672571182251, + "ce_orig": 0.9295956492424011, + "epoch": 0.6608670644906176, + "kl_loss": 0.06083536893129349, + "loss_ib": 0.0010420262115076184, + "step": 2298 + }, + { + "ce_ib": 4.9498772621154785, + "ce_orig": 0.5971134901046753, + "epoch": 0.6611546480695952, + "kl_loss": 0.10583186149597168, + "loss_ib": 0.001553306239657104, + "step": 2299 + }, + { + "ce_ib": 5.638417720794678, + "ce_orig": 1.022097110748291, + "epoch": 0.6611546480695952, + "kl_loss": 0.08376124501228333, + "loss_ib": 0.001401454210281372, + "step": 2299 + }, + { + "ce_ib": 4.44361686706543, + "ce_orig": 0.6903504133224487, + "epoch": 0.6611546480695952, + "kl_loss": 0.08465462923049927, + "loss_ib": 0.0012909079669043422, + "step": 2299 + }, + { + "ce_ib": 3.1570181846618652, + "ce_orig": 0.6455008387565613, + "epoch": 0.6611546480695952, + "kl_loss": 0.037843670696020126, + "loss_ib": 0.0006941385217942297, + "step": 2299 + }, + { + "epoch": 0.6614422316485729, + "grad_norm": 0.09163826704025269, + "learning_rate": 4.542424732526105e-05, + "loss": 0.8594, + "step": 2300 + }, + { + "ce_ib": 4.585714340209961, + "ce_orig": 0.536469578742981, + "epoch": 0.6614422316485729, + "kl_loss": 0.1393735110759735, + "loss_ib": 0.0018523065373301506, + "step": 2300 + }, + { + "ce_ib": 6.702412128448486, + "ce_orig": 1.151282548904419, + "epoch": 0.6614422316485729, + "kl_loss": 0.12131333351135254, + "loss_ib": 0.0018833744106814265, + "step": 2300 + }, + { + "ce_ib": 4.403960227966309, + "ce_orig": 0.702271044254303, + "epoch": 0.6614422316485729, + "kl_loss": 0.10177986323833466, + "loss_ib": 0.0014581945724785328, + "step": 2300 + }, + { + "ce_ib": 3.9570798873901367, + "ce_orig": 0.6132133603096008, + "epoch": 0.6614422316485729, + "kl_loss": 0.0918266773223877, + "loss_ib": 0.0013139747316017747, + "step": 2300 + }, + { + "ce_ib": 4.8402886390686035, + "ce_orig": 0.5338279604911804, + "epoch": 0.6617298152275505, + "kl_loss": 0.07608143240213394, + "loss_ib": 0.001244843122549355, + "step": 2301 + }, + { + "ce_ib": 3.0979831218719482, + "ce_orig": 0.5718993544578552, + "epoch": 0.6617298152275505, + "kl_loss": 0.061364851891994476, + "loss_ib": 0.0009234468452632427, + "step": 2301 + }, + { + "ce_ib": 5.552011013031006, + "ce_orig": 1.2815015316009521, + "epoch": 0.6617298152275505, + "kl_loss": 0.08428889513015747, + "loss_ib": 0.0013980900403112173, + "step": 2301 + }, + { + "ce_ib": 3.5236427783966064, + "ce_orig": 0.8205747604370117, + "epoch": 0.6617298152275505, + "kl_loss": 0.05883049964904785, + "loss_ib": 0.0009406692115589976, + "step": 2301 + }, + { + "ce_ib": 4.369736671447754, + "ce_orig": 0.31366175413131714, + "epoch": 0.6620173988065281, + "kl_loss": 0.10158488154411316, + "loss_ib": 0.0014528223546221852, + "step": 2302 + }, + { + "ce_ib": 5.318525791168213, + "ce_orig": 0.9881439805030823, + "epoch": 0.6620173988065281, + "kl_loss": 0.06270694732666016, + "loss_ib": 0.001158921979367733, + "step": 2302 + }, + { + "ce_ib": 4.183155059814453, + "ce_orig": 0.9580113291740417, + "epoch": 0.6620173988065281, + "kl_loss": 0.055890072137117386, + "loss_ib": 0.0009772161720320582, + "step": 2302 + }, + { + "ce_ib": 4.115776062011719, + "ce_orig": 0.806951642036438, + "epoch": 0.6620173988065281, + "kl_loss": 0.06703796237707138, + "loss_ib": 0.0010819572489708662, + "step": 2302 + }, + { + "ce_ib": 5.604039192199707, + "ce_orig": 1.2454640865325928, + "epoch": 0.6623049823855058, + "kl_loss": 0.09659774601459503, + "loss_ib": 0.001526381354779005, + "step": 2303 + }, + { + "ce_ib": 4.685688018798828, + "ce_orig": 1.0254878997802734, + "epoch": 0.6623049823855058, + "kl_loss": 0.07071443647146225, + "loss_ib": 0.001175713143311441, + "step": 2303 + }, + { + "ce_ib": 3.7580740451812744, + "ce_orig": 0.7312368750572205, + "epoch": 0.6623049823855058, + "kl_loss": 0.09944295883178711, + "loss_ib": 0.0013702369760721922, + "step": 2303 + }, + { + "ce_ib": 2.496598243713379, + "ce_orig": 0.40819889307022095, + "epoch": 0.6623049823855058, + "kl_loss": 0.06462891399860382, + "loss_ib": 0.0008959489059634507, + "step": 2303 + }, + { + "ce_ib": 4.274606227874756, + "ce_orig": 1.0119645595550537, + "epoch": 0.6625925659644835, + "kl_loss": 0.05080642178654671, + "loss_ib": 0.0009355248184874654, + "step": 2304 + }, + { + "ce_ib": 2.735414743423462, + "ce_orig": 0.32366201281547546, + "epoch": 0.6625925659644835, + "kl_loss": 0.0759478211402893, + "loss_ib": 0.0010330197401344776, + "step": 2304 + }, + { + "ce_ib": 6.116103172302246, + "ce_orig": 1.4057141542434692, + "epoch": 0.6625925659644835, + "kl_loss": 0.05163117125630379, + "loss_ib": 0.0011279219761490822, + "step": 2304 + }, + { + "ce_ib": 3.491584300994873, + "ce_orig": 0.6918932199478149, + "epoch": 0.6625925659644835, + "kl_loss": 0.050732940435409546, + "loss_ib": 0.0008564877789467573, + "step": 2304 + }, + { + "epoch": 0.6628801495434611, + "grad_norm": 0.08765435963869095, + "learning_rate": 4.540184507939523e-05, + "loss": 0.781, + "step": 2305 + }, + { + "ce_ib": 3.3366353511810303, + "ce_orig": 0.7002195119857788, + "epoch": 0.6628801495434611, + "kl_loss": 0.06932632625102997, + "loss_ib": 0.0010269267950206995, + "step": 2305 + }, + { + "ce_ib": 6.39858865737915, + "ce_orig": 1.4925941228866577, + "epoch": 0.6628801495434611, + "kl_loss": 0.07594285905361176, + "loss_ib": 0.0013992873718962073, + "step": 2305 + }, + { + "ce_ib": 5.899816513061523, + "ce_orig": 1.278214931488037, + "epoch": 0.6628801495434611, + "kl_loss": 0.06852123141288757, + "loss_ib": 0.0012751939939334989, + "step": 2305 + }, + { + "ce_ib": 4.523046493530273, + "ce_orig": 1.0756926536560059, + "epoch": 0.6628801495434611, + "kl_loss": 0.05748523771762848, + "loss_ib": 0.0010271569481119514, + "step": 2305 + }, + { + "ce_ib": 4.579641342163086, + "ce_orig": 0.8675640225410461, + "epoch": 0.6631677331224387, + "kl_loss": 0.09013731777667999, + "loss_ib": 0.00135933724232018, + "step": 2306 + }, + { + "ce_ib": 3.546947479248047, + "ce_orig": 0.7701714038848877, + "epoch": 0.6631677331224387, + "kl_loss": 0.039793942123651505, + "loss_ib": 0.0007526340777985752, + "step": 2306 + }, + { + "ce_ib": 4.942210674285889, + "ce_orig": 1.13329017162323, + "epoch": 0.6631677331224387, + "kl_loss": 0.05907517671585083, + "loss_ib": 0.0010849727550521493, + "step": 2306 + }, + { + "ce_ib": 4.249905586242676, + "ce_orig": 0.6675190329551697, + "epoch": 0.6631677331224387, + "kl_loss": 0.079674631357193, + "loss_ib": 0.0012217367766425014, + "step": 2306 + }, + { + "ce_ib": 4.1743621826171875, + "ce_orig": 0.905890166759491, + "epoch": 0.6634553167014163, + "kl_loss": 0.07811474800109863, + "loss_ib": 0.0011985836317762733, + "step": 2307 + }, + { + "ce_ib": 4.968560695648193, + "ce_orig": 0.8021719455718994, + "epoch": 0.6634553167014163, + "kl_loss": 0.07434044033288956, + "loss_ib": 0.0012402604334056377, + "step": 2307 + }, + { + "ce_ib": 8.804271697998047, + "ce_orig": 1.8410823345184326, + "epoch": 0.6634553167014163, + "kl_loss": 0.08040080219507217, + "loss_ib": 0.0016844351775944233, + "step": 2307 + }, + { + "ce_ib": 3.779813289642334, + "ce_orig": 0.39875727891921997, + "epoch": 0.6634553167014163, + "kl_loss": 0.08137817680835724, + "loss_ib": 0.0011917630909010768, + "step": 2307 + }, + { + "ce_ib": 3.6819381713867188, + "ce_orig": 0.8359904289245605, + "epoch": 0.663742900280394, + "kl_loss": 0.04314136877655983, + "loss_ib": 0.0007996074855327606, + "step": 2308 + }, + { + "ce_ib": 2.8889591693878174, + "ce_orig": 0.6500442028045654, + "epoch": 0.663742900280394, + "kl_loss": 0.05466313660144806, + "loss_ib": 0.0008355272584594786, + "step": 2308 + }, + { + "ce_ib": 4.25671911239624, + "ce_orig": 1.0391381978988647, + "epoch": 0.663742900280394, + "kl_loss": 0.08229251205921173, + "loss_ib": 0.0012485970510169864, + "step": 2308 + }, + { + "ce_ib": 4.4570393562316895, + "ce_orig": 0.8161414861679077, + "epoch": 0.663742900280394, + "kl_loss": 0.06268543750047684, + "loss_ib": 0.0010725583415478468, + "step": 2308 + }, + { + "ce_ib": 3.5389633178710938, + "ce_orig": 0.8339385986328125, + "epoch": 0.6640304838593716, + "kl_loss": 0.07316018640995026, + "loss_ib": 0.0010854981373995543, + "step": 2309 + }, + { + "ce_ib": 3.2108728885650635, + "ce_orig": 0.5334968566894531, + "epoch": 0.6640304838593716, + "kl_loss": 0.06187506765127182, + "loss_ib": 0.0009398379479534924, + "step": 2309 + }, + { + "ce_ib": 2.72507905960083, + "ce_orig": 0.43666666746139526, + "epoch": 0.6640304838593716, + "kl_loss": 0.046911273151636124, + "loss_ib": 0.0007416206644847989, + "step": 2309 + }, + { + "ce_ib": 2.807110548019409, + "ce_orig": 0.5605517029762268, + "epoch": 0.6640304838593716, + "kl_loss": 0.035666439682245255, + "loss_ib": 0.0006373754004016519, + "step": 2309 + }, + { + "epoch": 0.6643180674383493, + "grad_norm": 0.1078149825334549, + "learning_rate": 4.5379393680749255e-05, + "loss": 0.8985, + "step": 2310 + }, + { + "ce_ib": 4.10946798324585, + "ce_orig": 0.7619176506996155, + "epoch": 0.6643180674383493, + "kl_loss": 0.0623946487903595, + "loss_ib": 0.0010348932119086385, + "step": 2310 + }, + { + "ce_ib": 3.658240795135498, + "ce_orig": 0.6661192774772644, + "epoch": 0.6643180674383493, + "kl_loss": 0.0781165137887001, + "loss_ib": 0.0011469891760498285, + "step": 2310 + }, + { + "ce_ib": 3.8725290298461914, + "ce_orig": 0.9578630328178406, + "epoch": 0.6643180674383493, + "kl_loss": 0.07227423787117004, + "loss_ib": 0.0011099951807409525, + "step": 2310 + }, + { + "ce_ib": 5.512388706207275, + "ce_orig": 1.1715093851089478, + "epoch": 0.6643180674383493, + "kl_loss": 0.06936092674732208, + "loss_ib": 0.0012448480119928718, + "step": 2310 + }, + { + "ce_ib": 3.792863607406616, + "ce_orig": 0.9075955748558044, + "epoch": 0.6646056510173269, + "kl_loss": 0.05448092520236969, + "loss_ib": 0.0009240955696441233, + "step": 2311 + }, + { + "ce_ib": 2.5508925914764404, + "ce_orig": 0.6044411063194275, + "epoch": 0.6646056510173269, + "kl_loss": 0.05194834619760513, + "loss_ib": 0.0007745727198198438, + "step": 2311 + }, + { + "ce_ib": 3.603067398071289, + "ce_orig": 0.9242309331893921, + "epoch": 0.6646056510173269, + "kl_loss": 0.05516711622476578, + "loss_ib": 0.0009119778405874968, + "step": 2311 + }, + { + "ce_ib": 3.932664394378662, + "ce_orig": 0.8475385308265686, + "epoch": 0.6646056510173269, + "kl_loss": 0.06720563024282455, + "loss_ib": 0.0010653227800503373, + "step": 2311 + }, + { + "ce_ib": 4.070559978485107, + "ce_orig": 0.8391806483268738, + "epoch": 0.6648932345963046, + "kl_loss": 0.0766218900680542, + "loss_ib": 0.0011732748243957758, + "step": 2312 + }, + { + "ce_ib": 5.435855388641357, + "ce_orig": 1.090121865272522, + "epoch": 0.6648932345963046, + "kl_loss": 0.11195050925016403, + "loss_ib": 0.0016630905447527766, + "step": 2312 + }, + { + "ce_ib": 2.809833526611328, + "ce_orig": 0.7536491751670837, + "epoch": 0.6648932345963046, + "kl_loss": 0.06222258880734444, + "loss_ib": 0.0009032092057168484, + "step": 2312 + }, + { + "ce_ib": 3.900941848754883, + "ce_orig": 0.6897080540657043, + "epoch": 0.6648932345963046, + "kl_loss": 0.035024285316467285, + "loss_ib": 0.0007403370691463351, + "step": 2312 + }, + { + "ce_ib": 3.819096565246582, + "ce_orig": 0.7415339350700378, + "epoch": 0.6651808181752822, + "kl_loss": 0.0635252445936203, + "loss_ib": 0.001017161994241178, + "step": 2313 + }, + { + "ce_ib": 3.401242256164551, + "ce_orig": 0.7179132699966431, + "epoch": 0.6651808181752822, + "kl_loss": 0.04783964902162552, + "loss_ib": 0.0008185206679627299, + "step": 2313 + }, + { + "ce_ib": 2.8339178562164307, + "ce_orig": 0.6475207209587097, + "epoch": 0.6651808181752822, + "kl_loss": 0.058281440287828445, + "loss_ib": 0.0008662061300128698, + "step": 2313 + }, + { + "ce_ib": 4.520089626312256, + "ce_orig": 1.2674369812011719, + "epoch": 0.6651808181752822, + "kl_loss": 0.07202841341495514, + "loss_ib": 0.0011722930939868093, + "step": 2313 + }, + { + "ce_ib": 3.992109537124634, + "ce_orig": 0.5580715537071228, + "epoch": 0.6654684017542598, + "kl_loss": 0.11784952133893967, + "loss_ib": 0.0015777061926200986, + "step": 2314 + }, + { + "ce_ib": 4.0978803634643555, + "ce_orig": 0.524011492729187, + "epoch": 0.6654684017542598, + "kl_loss": 0.10812173783779144, + "loss_ib": 0.0014910054160282016, + "step": 2314 + }, + { + "ce_ib": 2.897897481918335, + "ce_orig": 0.4538169801235199, + "epoch": 0.6654684017542598, + "kl_loss": 0.10538367927074432, + "loss_ib": 0.0013436265289783478, + "step": 2314 + }, + { + "ce_ib": 5.390155792236328, + "ce_orig": 1.0273525714874268, + "epoch": 0.6654684017542598, + "kl_loss": 0.11931593716144562, + "loss_ib": 0.0017321748891845345, + "step": 2314 + }, + { + "epoch": 0.6657559853332374, + "grad_norm": 0.08208002895116806, + "learning_rate": 4.535689318341374e-05, + "loss": 0.7954, + "step": 2315 + }, + { + "ce_ib": 4.29964542388916, + "ce_orig": 0.5795935392379761, + "epoch": 0.6657559853332374, + "kl_loss": 0.09401943534612656, + "loss_ib": 0.0013701588613912463, + "step": 2315 + }, + { + "ce_ib": 2.9749085903167725, + "ce_orig": 0.5558741092681885, + "epoch": 0.6657559853332374, + "kl_loss": 0.06129830330610275, + "loss_ib": 0.0009104738710448146, + "step": 2315 + }, + { + "ce_ib": 4.906274318695068, + "ce_orig": 0.6952655911445618, + "epoch": 0.6657559853332374, + "kl_loss": 0.07042737305164337, + "loss_ib": 0.0011949011823162436, + "step": 2315 + }, + { + "ce_ib": 7.453649997711182, + "ce_orig": 1.7100341320037842, + "epoch": 0.6657559853332374, + "kl_loss": 0.18771900236606598, + "loss_ib": 0.0026225547771900892, + "step": 2315 + }, + { + "ce_ib": 2.5767016410827637, + "ce_orig": 0.6184089183807373, + "epoch": 0.6660435689122152, + "kl_loss": 0.0614604726433754, + "loss_ib": 0.000872274860739708, + "step": 2316 + }, + { + "ce_ib": 6.3059492111206055, + "ce_orig": 0.9996453523635864, + "epoch": 0.6660435689122152, + "kl_loss": 0.11540191620588303, + "loss_ib": 0.0017846139380708337, + "step": 2316 + }, + { + "ce_ib": 2.8794901371002197, + "ce_orig": 0.5142540335655212, + "epoch": 0.6660435689122152, + "kl_loss": 0.09201455861330032, + "loss_ib": 0.0012080945307388902, + "step": 2316 + }, + { + "ce_ib": 3.330183744430542, + "ce_orig": 0.7130447626113892, + "epoch": 0.6660435689122152, + "kl_loss": 0.15324272215366364, + "loss_ib": 0.0018654456362128258, + "step": 2316 + }, + { + "ce_ib": 2.6079494953155518, + "ce_orig": 0.3930950462818146, + "epoch": 0.6663311524911928, + "kl_loss": 0.037840958684682846, + "loss_ib": 0.0006392045179381967, + "step": 2317 + }, + { + "ce_ib": 3.3945980072021484, + "ce_orig": 0.8469601273536682, + "epoch": 0.6663311524911928, + "kl_loss": 0.08757445961236954, + "loss_ib": 0.0012152043636888266, + "step": 2317 + }, + { + "ce_ib": 7.074010372161865, + "ce_orig": 1.6447701454162598, + "epoch": 0.6663311524911928, + "kl_loss": 0.08114694058895111, + "loss_ib": 0.001518870354630053, + "step": 2317 + }, + { + "ce_ib": 3.8817989826202393, + "ce_orig": 0.671375572681427, + "epoch": 0.6663311524911928, + "kl_loss": 0.07580038905143738, + "loss_ib": 0.001146183698438108, + "step": 2317 + }, + { + "ce_ib": 4.534895420074463, + "ce_orig": 0.9483494758605957, + "epoch": 0.6666187360701704, + "kl_loss": 0.11395417153835297, + "loss_ib": 0.0015930312220007181, + "step": 2318 + }, + { + "ce_ib": 5.007517337799072, + "ce_orig": 0.5803803205490112, + "epoch": 0.6666187360701704, + "kl_loss": 0.05594553425908089, + "loss_ib": 0.0010602070251479745, + "step": 2318 + }, + { + "ce_ib": 4.075723171234131, + "ce_orig": 0.7099723219871521, + "epoch": 0.6666187360701704, + "kl_loss": 0.06146550923585892, + "loss_ib": 0.0010222273413091898, + "step": 2318 + }, + { + "ce_ib": 4.050246238708496, + "ce_orig": 0.9485629796981812, + "epoch": 0.6666187360701704, + "kl_loss": 0.09994609653949738, + "loss_ib": 0.0014044855488464236, + "step": 2318 + }, + { + "ce_ib": 4.3477253913879395, + "ce_orig": 0.7831006646156311, + "epoch": 0.666906319649148, + "kl_loss": 0.07043857127428055, + "loss_ib": 0.001139158266596496, + "step": 2319 + }, + { + "ce_ib": 3.3641722202301025, + "ce_orig": 0.38998591899871826, + "epoch": 0.666906319649148, + "kl_loss": 0.1574057638645172, + "loss_ib": 0.0019104748498648405, + "step": 2319 + }, + { + "ce_ib": 5.230608940124512, + "ce_orig": 1.0831204652786255, + "epoch": 0.666906319649148, + "kl_loss": 0.07203826308250427, + "loss_ib": 0.0012434434611350298, + "step": 2319 + }, + { + "ce_ib": 5.765401840209961, + "ce_orig": 1.3809008598327637, + "epoch": 0.666906319649148, + "kl_loss": 0.098860964179039, + "loss_ib": 0.0015651496360078454, + "step": 2319 + }, + { + "epoch": 0.6671939032281257, + "grad_norm": 0.0904807299375534, + "learning_rate": 4.533434364159761e-05, + "loss": 0.8101, + "step": 2320 + }, + { + "ce_ib": 5.550869464874268, + "ce_orig": 1.3476725816726685, + "epoch": 0.6671939032281257, + "kl_loss": 0.06633864343166351, + "loss_ib": 0.0012184733059257269, + "step": 2320 + }, + { + "ce_ib": 4.980867385864258, + "ce_orig": 1.01884925365448, + "epoch": 0.6671939032281257, + "kl_loss": 0.07042741775512695, + "loss_ib": 0.00120236084330827, + "step": 2320 + }, + { + "ce_ib": 4.913198471069336, + "ce_orig": 1.161201000213623, + "epoch": 0.6671939032281257, + "kl_loss": 0.07731471210718155, + "loss_ib": 0.001264466904103756, + "step": 2320 + }, + { + "ce_ib": 4.198073387145996, + "ce_orig": 1.1210319995880127, + "epoch": 0.6671939032281257, + "kl_loss": 0.09281370043754578, + "loss_ib": 0.001347944256849587, + "step": 2320 + }, + { + "ce_ib": 6.8892292976379395, + "ce_orig": 1.4147008657455444, + "epoch": 0.6674814868071033, + "kl_loss": 0.09945371747016907, + "loss_ib": 0.0016834600828588009, + "step": 2321 + }, + { + "ce_ib": 4.80649471282959, + "ce_orig": 0.5403802990913391, + "epoch": 0.6674814868071033, + "kl_loss": 0.10284711420536041, + "loss_ib": 0.0015091205714270473, + "step": 2321 + }, + { + "ce_ib": 4.035510063171387, + "ce_orig": 0.8067759275436401, + "epoch": 0.6674814868071033, + "kl_loss": 0.06901931762695312, + "loss_ib": 0.0010937441838905215, + "step": 2321 + }, + { + "ce_ib": 3.112593650817871, + "ce_orig": 0.559760332107544, + "epoch": 0.6674814868071033, + "kl_loss": 0.04468610882759094, + "loss_ib": 0.0007581203826703131, + "step": 2321 + }, + { + "ce_ib": 3.8905210494995117, + "ce_orig": 0.7455503940582275, + "epoch": 0.6677690703860809, + "kl_loss": 0.0547509528696537, + "loss_ib": 0.000936561671551317, + "step": 2322 + }, + { + "ce_ib": 2.0035006999969482, + "ce_orig": 0.45797860622406006, + "epoch": 0.6677690703860809, + "kl_loss": 0.046770863234996796, + "loss_ib": 0.0006680586957372725, + "step": 2322 + }, + { + "ce_ib": 3.670503854751587, + "ce_orig": 1.026633620262146, + "epoch": 0.6677690703860809, + "kl_loss": 0.0749804675579071, + "loss_ib": 0.0011168550699949265, + "step": 2322 + }, + { + "ce_ib": 6.569978713989258, + "ce_orig": 1.625328540802002, + "epoch": 0.6677690703860809, + "kl_loss": 0.0906650722026825, + "loss_ib": 0.001563648576848209, + "step": 2322 + }, + { + "ce_ib": 5.043107509613037, + "ce_orig": 1.2650187015533447, + "epoch": 0.6680566539650586, + "kl_loss": 0.09104341268539429, + "loss_ib": 0.001414744765497744, + "step": 2323 + }, + { + "ce_ib": 3.7177481651306152, + "ce_orig": 0.9913787245750427, + "epoch": 0.6680566539650586, + "kl_loss": 0.06612144410610199, + "loss_ib": 0.001032989239320159, + "step": 2323 + }, + { + "ce_ib": 2.7801132202148438, + "ce_orig": 0.6442387104034424, + "epoch": 0.6680566539650586, + "kl_loss": 0.03747297078371048, + "loss_ib": 0.000652741058729589, + "step": 2323 + }, + { + "ce_ib": 6.609991073608398, + "ce_orig": 1.59720778465271, + "epoch": 0.6680566539650586, + "kl_loss": 0.07868288457393646, + "loss_ib": 0.001447827904485166, + "step": 2323 + }, + { + "ce_ib": 3.9504082202911377, + "ce_orig": 0.777961254119873, + "epoch": 0.6683442375440363, + "kl_loss": 0.0839567631483078, + "loss_ib": 0.00123460846953094, + "step": 2324 + }, + { + "ce_ib": 0.9511605501174927, + "ce_orig": 0.05136101692914963, + "epoch": 0.6683442375440363, + "kl_loss": 0.1553187370300293, + "loss_ib": 0.0016483033541589975, + "step": 2324 + }, + { + "ce_ib": 5.089809417724609, + "ce_orig": 0.8727638125419617, + "epoch": 0.6683442375440363, + "kl_loss": 0.05790979415178299, + "loss_ib": 0.0010880789486691356, + "step": 2324 + }, + { + "ce_ib": 4.343912601470947, + "ce_orig": 1.0105496644973755, + "epoch": 0.6683442375440363, + "kl_loss": 0.0919429212808609, + "loss_ib": 0.0013538204366341233, + "step": 2324 + }, + { + "epoch": 0.6686318211230139, + "grad_norm": 0.08504663407802582, + "learning_rate": 4.531174510962794e-05, + "loss": 0.8615, + "step": 2325 + }, + { + "ce_ib": 4.156410217285156, + "ce_orig": 0.5536832213401794, + "epoch": 0.6686318211230139, + "kl_loss": 0.118343286216259, + "loss_ib": 0.001599073875695467, + "step": 2325 + }, + { + "ce_ib": 3.124356746673584, + "ce_orig": 0.3156042695045471, + "epoch": 0.6686318211230139, + "kl_loss": 0.1961500644683838, + "loss_ib": 0.002273936290293932, + "step": 2325 + }, + { + "ce_ib": 3.0425851345062256, + "ce_orig": 0.5372470617294312, + "epoch": 0.6686318211230139, + "kl_loss": 0.05462384596467018, + "loss_ib": 0.0008504969300702214, + "step": 2325 + }, + { + "ce_ib": 5.227597713470459, + "ce_orig": 0.762674868106842, + "epoch": 0.6686318211230139, + "kl_loss": 0.09933719784021378, + "loss_ib": 0.0015161316841840744, + "step": 2325 + }, + { + "ce_ib": 3.9852519035339355, + "ce_orig": 0.7440446615219116, + "epoch": 0.6689194047019915, + "kl_loss": 0.11053875088691711, + "loss_ib": 0.0015039126155897975, + "step": 2326 + }, + { + "ce_ib": 4.537551403045654, + "ce_orig": 0.9567805528640747, + "epoch": 0.6689194047019915, + "kl_loss": 0.05468638986349106, + "loss_ib": 0.0010006190277636051, + "step": 2326 + }, + { + "ce_ib": 2.1228787899017334, + "ce_orig": 0.6466413736343384, + "epoch": 0.6689194047019915, + "kl_loss": 0.05510219186544418, + "loss_ib": 0.0007633097120560706, + "step": 2326 + }, + { + "ce_ib": 5.227719783782959, + "ce_orig": 1.1621975898742676, + "epoch": 0.6689194047019915, + "kl_loss": 0.05818916857242584, + "loss_ib": 0.0011046635918319225, + "step": 2326 + }, + { + "ce_ib": 5.185980319976807, + "ce_orig": 0.8216467499732971, + "epoch": 0.6692069882809691, + "kl_loss": 0.06687350571155548, + "loss_ib": 0.0011873330222442746, + "step": 2327 + }, + { + "ce_ib": 6.432191848754883, + "ce_orig": 1.4891397953033447, + "epoch": 0.6692069882809691, + "kl_loss": 0.18233469128608704, + "loss_ib": 0.002466566162183881, + "step": 2327 + }, + { + "ce_ib": 5.62655782699585, + "ce_orig": 1.0524921417236328, + "epoch": 0.6692069882809691, + "kl_loss": 0.057445965707302094, + "loss_ib": 0.001137115410529077, + "step": 2327 + }, + { + "ce_ib": 4.043249607086182, + "ce_orig": 0.9246647357940674, + "epoch": 0.6692069882809691, + "kl_loss": 0.14621132612228394, + "loss_ib": 0.0018664381932467222, + "step": 2327 + }, + { + "ce_ib": 3.1795194149017334, + "ce_orig": 0.6710485219955444, + "epoch": 0.6694945718599468, + "kl_loss": 0.09667327255010605, + "loss_ib": 0.001284684636630118, + "step": 2328 + }, + { + "ce_ib": 6.067139625549316, + "ce_orig": 1.2619585990905762, + "epoch": 0.6694945718599468, + "kl_loss": 0.0659581869840622, + "loss_ib": 0.001266295905224979, + "step": 2328 + }, + { + "ce_ib": 3.926442861557007, + "ce_orig": 1.0653356313705444, + "epoch": 0.6694945718599468, + "kl_loss": 0.06911227107048035, + "loss_ib": 0.001083766925148666, + "step": 2328 + }, + { + "ce_ib": 4.11264181137085, + "ce_orig": 0.7078129649162292, + "epoch": 0.6694945718599468, + "kl_loss": 0.07340046763420105, + "loss_ib": 0.00114526879042387, + "step": 2328 + }, + { + "ce_ib": 4.52550745010376, + "ce_orig": 0.7215232849121094, + "epoch": 0.6697821554389244, + "kl_loss": 0.06420797854661942, + "loss_ib": 0.00109463045373559, + "step": 2329 + }, + { + "ce_ib": 6.0001397132873535, + "ce_orig": 1.4058009386062622, + "epoch": 0.6697821554389244, + "kl_loss": 0.08288180828094482, + "loss_ib": 0.0014288320671766996, + "step": 2329 + }, + { + "ce_ib": 2.775533676147461, + "ce_orig": 0.7339069247245789, + "epoch": 0.6697821554389244, + "kl_loss": 0.0486435666680336, + "loss_ib": 0.0007639889954589307, + "step": 2329 + }, + { + "ce_ib": 3.616960287094116, + "ce_orig": 1.0729107856750488, + "epoch": 0.6697821554389244, + "kl_loss": 0.036291513592004776, + "loss_ib": 0.0007246111053973436, + "step": 2329 + }, + { + "epoch": 0.6700697390179021, + "grad_norm": 0.10095444321632385, + "learning_rate": 4.528909764194985e-05, + "loss": 0.8393, + "step": 2330 + }, + { + "ce_ib": 5.752561569213867, + "ce_orig": 0.6287232041358948, + "epoch": 0.6700697390179021, + "kl_loss": 0.06999038904905319, + "loss_ib": 0.0012751600006595254, + "step": 2330 + }, + { + "ce_ib": 5.325689792633057, + "ce_orig": 0.7426400184631348, + "epoch": 0.6700697390179021, + "kl_loss": 0.053764328360557556, + "loss_ib": 0.0010702122235670686, + "step": 2330 + }, + { + "ce_ib": 5.399483680725098, + "ce_orig": 1.0266982316970825, + "epoch": 0.6700697390179021, + "kl_loss": 0.09918960928916931, + "loss_ib": 0.0015318443765863776, + "step": 2330 + }, + { + "ce_ib": 4.1048665046691895, + "ce_orig": 1.197453260421753, + "epoch": 0.6700697390179021, + "kl_loss": 0.06097786873579025, + "loss_ib": 0.0010202653938904405, + "step": 2330 + }, + { + "ce_ib": 4.390247821807861, + "ce_orig": 0.765121579170227, + "epoch": 0.6703573225968797, + "kl_loss": 0.07734306901693344, + "loss_ib": 0.0012124554486945271, + "step": 2331 + }, + { + "ce_ib": 3.13266658782959, + "ce_orig": 0.7988690137863159, + "epoch": 0.6703573225968797, + "kl_loss": 0.058783773332834244, + "loss_ib": 0.0009011043584905565, + "step": 2331 + }, + { + "ce_ib": 3.3222885131835938, + "ce_orig": 0.4198530614376068, + "epoch": 0.6703573225968797, + "kl_loss": 0.06264559924602509, + "loss_ib": 0.0009586848318576813, + "step": 2331 + }, + { + "ce_ib": 3.1700668334960938, + "ce_orig": 0.7576343417167664, + "epoch": 0.6703573225968797, + "kl_loss": 0.058337949216365814, + "loss_ib": 0.0009003860759548843, + "step": 2331 + }, + { + "ce_ib": 3.557924270629883, + "ce_orig": 0.8625192046165466, + "epoch": 0.6706449061758574, + "kl_loss": 0.0775996744632721, + "loss_ib": 0.0011317891767248511, + "step": 2332 + }, + { + "ce_ib": 4.949527740478516, + "ce_orig": 1.0993982553482056, + "epoch": 0.6706449061758574, + "kl_loss": 0.06721162050962448, + "loss_ib": 0.0011670689564198256, + "step": 2332 + }, + { + "ce_ib": 3.6613872051239014, + "ce_orig": 0.6822810769081116, + "epoch": 0.6706449061758574, + "kl_loss": 0.06285718083381653, + "loss_ib": 0.0009947104845196009, + "step": 2332 + }, + { + "ce_ib": 5.613789081573486, + "ce_orig": 1.2783747911453247, + "epoch": 0.6706449061758574, + "kl_loss": 0.07804320752620697, + "loss_ib": 0.001341810915619135, + "step": 2332 + }, + { + "ce_ib": 2.469487190246582, + "ce_orig": 0.5588794946670532, + "epoch": 0.670932489754835, + "kl_loss": 0.03728429600596428, + "loss_ib": 0.0006197916809469461, + "step": 2333 + }, + { + "ce_ib": 5.557516098022461, + "ce_orig": 1.359696865081787, + "epoch": 0.670932489754835, + "kl_loss": 0.06465465575456619, + "loss_ib": 0.0012022980954498053, + "step": 2333 + }, + { + "ce_ib": 3.306607723236084, + "ce_orig": 0.7515954375267029, + "epoch": 0.670932489754835, + "kl_loss": 0.06735484302043915, + "loss_ib": 0.001004209159873426, + "step": 2333 + }, + { + "ce_ib": 6.646660327911377, + "ce_orig": 1.4280188083648682, + "epoch": 0.670932489754835, + "kl_loss": 0.06424468010663986, + "loss_ib": 0.0013071128632873297, + "step": 2333 + }, + { + "ce_ib": 3.1993932723999023, + "ce_orig": 0.690685510635376, + "epoch": 0.6712200733338126, + "kl_loss": 0.05731090158224106, + "loss_ib": 0.0008930483018048108, + "step": 2334 + }, + { + "ce_ib": 4.613617420196533, + "ce_orig": 1.312070369720459, + "epoch": 0.6712200733338126, + "kl_loss": 0.04431382566690445, + "loss_ib": 0.0009045000188052654, + "step": 2334 + }, + { + "ce_ib": 1.9247506856918335, + "ce_orig": 0.5102055668830872, + "epoch": 0.6712200733338126, + "kl_loss": 0.06339417397975922, + "loss_ib": 0.0008264167699962854, + "step": 2334 + }, + { + "ce_ib": 2.8813629150390625, + "ce_orig": 0.6228950619697571, + "epoch": 0.6712200733338126, + "kl_loss": 0.06543630361557007, + "loss_ib": 0.000942499318625778, + "step": 2334 + }, + { + "epoch": 0.6715076569127902, + "grad_norm": 0.09536433219909668, + "learning_rate": 4.5266401293126336e-05, + "loss": 0.8222, + "step": 2335 + }, + { + "ce_ib": 2.7992734909057617, + "ce_orig": 0.5085657238960266, + "epoch": 0.6715076569127902, + "kl_loss": 0.054011840373277664, + "loss_ib": 0.0008200457668863237, + "step": 2335 + }, + { + "ce_ib": 2.354738235473633, + "ce_orig": 0.40272119641304016, + "epoch": 0.6715076569127902, + "kl_loss": 0.046297915279865265, + "loss_ib": 0.0006984529318287969, + "step": 2335 + }, + { + "ce_ib": 2.826906204223633, + "ce_orig": 0.5593550205230713, + "epoch": 0.6715076569127902, + "kl_loss": 0.04355494678020477, + "loss_ib": 0.000718240044079721, + "step": 2335 + }, + { + "ce_ib": 5.023959159851074, + "ce_orig": 0.5790183544158936, + "epoch": 0.6715076569127902, + "kl_loss": 0.11107636988162994, + "loss_ib": 0.001613159547559917, + "step": 2335 + }, + { + "ce_ib": 4.558361530303955, + "ce_orig": 1.1150665283203125, + "epoch": 0.671795240491768, + "kl_loss": 0.06770940124988556, + "loss_ib": 0.001132930163294077, + "step": 2336 + }, + { + "ce_ib": 5.240286350250244, + "ce_orig": 0.8448647260665894, + "epoch": 0.671795240491768, + "kl_loss": 0.07042264193296432, + "loss_ib": 0.0012282549869269133, + "step": 2336 + }, + { + "ce_ib": 6.131394863128662, + "ce_orig": 1.2260792255401611, + "epoch": 0.671795240491768, + "kl_loss": 0.11199796199798584, + "loss_ib": 0.001733119017444551, + "step": 2336 + }, + { + "ce_ib": 6.1582465171813965, + "ce_orig": 0.622921884059906, + "epoch": 0.671795240491768, + "kl_loss": 0.05602339282631874, + "loss_ib": 0.0011760585475713015, + "step": 2336 + }, + { + "ce_ib": 2.3441386222839355, + "ce_orig": 0.5129339098930359, + "epoch": 0.6720828240707456, + "kl_loss": 0.11836494505405426, + "loss_ib": 0.0014180633006617427, + "step": 2337 + }, + { + "ce_ib": 3.7549996376037598, + "ce_orig": 0.7464841604232788, + "epoch": 0.6720828240707456, + "kl_loss": 0.09116454422473907, + "loss_ib": 0.0012871453072875738, + "step": 2337 + }, + { + "ce_ib": 2.4007718563079834, + "ce_orig": 0.6772271394729614, + "epoch": 0.6720828240707456, + "kl_loss": 0.07132670283317566, + "loss_ib": 0.0009533441625535488, + "step": 2337 + }, + { + "ce_ib": 4.376418113708496, + "ce_orig": 0.8214682340621948, + "epoch": 0.6720828240707456, + "kl_loss": 0.06515570729970932, + "loss_ib": 0.0010891988640651107, + "step": 2337 + }, + { + "ce_ib": 5.387823581695557, + "ce_orig": 1.1784050464630127, + "epoch": 0.6723704076497232, + "kl_loss": 0.04967402666807175, + "loss_ib": 0.0010355225531384349, + "step": 2338 + }, + { + "ce_ib": 1.948256492614746, + "ce_orig": 0.32438793778419495, + "epoch": 0.6723704076497232, + "kl_loss": 0.14566317200660706, + "loss_ib": 0.0016514573944732547, + "step": 2338 + }, + { + "ce_ib": 4.893378734588623, + "ce_orig": 0.9664208889007568, + "epoch": 0.6723704076497232, + "kl_loss": 0.0636010617017746, + "loss_ib": 0.001125348499044776, + "step": 2338 + }, + { + "ce_ib": 5.4178595542907715, + "ce_orig": 0.8270339369773865, + "epoch": 0.6723704076497232, + "kl_loss": 0.12017965316772461, + "loss_ib": 0.001743582426570356, + "step": 2338 + }, + { + "ce_ib": 4.231266975402832, + "ce_orig": 0.8210704922676086, + "epoch": 0.6726579912287008, + "kl_loss": 0.07726576924324036, + "loss_ib": 0.0011957843089476228, + "step": 2339 + }, + { + "ce_ib": 2.561232805252075, + "ce_orig": 0.49728143215179443, + "epoch": 0.6726579912287008, + "kl_loss": 0.0638236552476883, + "loss_ib": 0.0008943597786128521, + "step": 2339 + }, + { + "ce_ib": 4.856182098388672, + "ce_orig": 1.2321163415908813, + "epoch": 0.6726579912287008, + "kl_loss": 0.08723242580890656, + "loss_ib": 0.0013579424703493714, + "step": 2339 + }, + { + "ce_ib": 3.3694396018981934, + "ce_orig": 0.8225352764129639, + "epoch": 0.6726579912287008, + "kl_loss": 0.05948465317487717, + "loss_ib": 0.000931790447793901, + "step": 2339 + }, + { + "epoch": 0.6729455748076785, + "grad_norm": 0.1231447085738182, + "learning_rate": 4.524365611783818e-05, + "loss": 0.8333, + "step": 2340 + }, + { + "ce_ib": 5.383634090423584, + "ce_orig": 1.300133466720581, + "epoch": 0.6729455748076785, + "kl_loss": 0.07802054286003113, + "loss_ib": 0.001318568829447031, + "step": 2340 + }, + { + "ce_ib": 4.581700325012207, + "ce_orig": 0.8436693549156189, + "epoch": 0.6729455748076785, + "kl_loss": 0.08568571507930756, + "loss_ib": 0.0013150271261110902, + "step": 2340 + }, + { + "ce_ib": 2.6762521266937256, + "ce_orig": 0.5653050541877747, + "epoch": 0.6729455748076785, + "kl_loss": 0.0606236532330513, + "loss_ib": 0.0008738617179915309, + "step": 2340 + }, + { + "ce_ib": 3.5776028633117676, + "ce_orig": 0.8022984266281128, + "epoch": 0.6729455748076785, + "kl_loss": 0.05445438623428345, + "loss_ib": 0.0009023041347973049, + "step": 2340 + }, + { + "ce_ib": 3.7604360580444336, + "ce_orig": 1.092064619064331, + "epoch": 0.6732331583866561, + "kl_loss": 0.10540948808193207, + "loss_ib": 0.0014301384799182415, + "step": 2341 + }, + { + "ce_ib": 3.5611393451690674, + "ce_orig": 0.6238676905632019, + "epoch": 0.6732331583866561, + "kl_loss": 0.062144432216882706, + "loss_ib": 0.0009775582002475858, + "step": 2341 + }, + { + "ce_ib": 5.046367645263672, + "ce_orig": 1.072721242904663, + "epoch": 0.6732331583866561, + "kl_loss": 0.14300626516342163, + "loss_ib": 0.001934699364937842, + "step": 2341 + }, + { + "ce_ib": 3.7733418941497803, + "ce_orig": 0.9515407085418701, + "epoch": 0.6732331583866561, + "kl_loss": 0.10047587752342224, + "loss_ib": 0.0013820929452776909, + "step": 2341 + }, + { + "ce_ib": 3.398378849029541, + "ce_orig": 0.5044198036193848, + "epoch": 0.6735207419656337, + "kl_loss": 0.05231235921382904, + "loss_ib": 0.0008629614603705704, + "step": 2342 + }, + { + "ce_ib": 7.702817440032959, + "ce_orig": 1.3929275274276733, + "epoch": 0.6735207419656337, + "kl_loss": 0.13297533988952637, + "loss_ib": 0.0021000350825488567, + "step": 2342 + }, + { + "ce_ib": 4.305813312530518, + "ce_orig": 0.8352392315864563, + "epoch": 0.6735207419656337, + "kl_loss": 0.08007320761680603, + "loss_ib": 0.0012313133338466287, + "step": 2342 + }, + { + "ce_ib": 4.777695178985596, + "ce_orig": 0.5871140360832214, + "epoch": 0.6735207419656337, + "kl_loss": 0.06987899541854858, + "loss_ib": 0.0011765594827011228, + "step": 2342 + }, + { + "ce_ib": 2.869696617126465, + "ce_orig": 0.6723451018333435, + "epoch": 0.6738083255446115, + "kl_loss": 0.04676450043916702, + "loss_ib": 0.0007546145934611559, + "step": 2343 + }, + { + "ce_ib": 3.938778877258301, + "ce_orig": 0.7568756341934204, + "epoch": 0.6738083255446115, + "kl_loss": 0.0732978880405426, + "loss_ib": 0.0011268567759543657, + "step": 2343 + }, + { + "ce_ib": 4.566656589508057, + "ce_orig": 0.3587321937084198, + "epoch": 0.6738083255446115, + "kl_loss": 0.07018304616212845, + "loss_ib": 0.00115849613212049, + "step": 2343 + }, + { + "ce_ib": 3.902167320251465, + "ce_orig": 0.45034560561180115, + "epoch": 0.6738083255446115, + "kl_loss": 0.13324205577373505, + "loss_ib": 0.0017226372146978974, + "step": 2343 + }, + { + "ce_ib": 4.39942741394043, + "ce_orig": 0.793448269367218, + "epoch": 0.6740959091235891, + "kl_loss": 0.08057791739702225, + "loss_ib": 0.0012457218253985047, + "step": 2344 + }, + { + "ce_ib": 3.5021018981933594, + "ce_orig": 0.6936731338500977, + "epoch": 0.6740959091235891, + "kl_loss": 0.06683695316314697, + "loss_ib": 0.0010185797000303864, + "step": 2344 + }, + { + "ce_ib": 5.853850841522217, + "ce_orig": 1.4052269458770752, + "epoch": 0.6740959091235891, + "kl_loss": 0.07364781200885773, + "loss_ib": 0.0013218631502240896, + "step": 2344 + }, + { + "ce_ib": 3.311544418334961, + "ce_orig": 0.9437717795372009, + "epoch": 0.6740959091235891, + "kl_loss": 0.0794367790222168, + "loss_ib": 0.0011255221907049417, + "step": 2344 + }, + { + "epoch": 0.6743834927025667, + "grad_norm": 0.09441088885068893, + "learning_rate": 4.522086217088378e-05, + "loss": 0.8654, + "step": 2345 + }, + { + "ce_ib": 5.062776565551758, + "ce_orig": 0.8115209937095642, + "epoch": 0.6743834927025667, + "kl_loss": 0.17053493857383728, + "loss_ib": 0.002211627084761858, + "step": 2345 + }, + { + "ce_ib": 3.3677570819854736, + "ce_orig": 0.699272632598877, + "epoch": 0.6743834927025667, + "kl_loss": 0.09216322004795074, + "loss_ib": 0.0012584078358486295, + "step": 2345 + }, + { + "ce_ib": 7.441214561462402, + "ce_orig": 1.889093279838562, + "epoch": 0.6743834927025667, + "kl_loss": 0.10662440955638885, + "loss_ib": 0.0018103655893355608, + "step": 2345 + }, + { + "ce_ib": 3.2980809211730957, + "ce_orig": 0.8491880893707275, + "epoch": 0.6743834927025667, + "kl_loss": 0.04651125147938728, + "loss_ib": 0.0007949206046760082, + "step": 2345 + }, + { + "ce_ib": 3.556356430053711, + "ce_orig": 0.9111829400062561, + "epoch": 0.6746710762815443, + "kl_loss": 0.04343552887439728, + "loss_ib": 0.0007899908814579248, + "step": 2346 + }, + { + "ce_ib": 4.374747276306152, + "ce_orig": 0.7436111569404602, + "epoch": 0.6746710762815443, + "kl_loss": 0.10729756951332092, + "loss_ib": 0.0015104503836482763, + "step": 2346 + }, + { + "ce_ib": 6.6478071212768555, + "ce_orig": 1.4652659893035889, + "epoch": 0.6746710762815443, + "kl_loss": 0.08524242788553238, + "loss_ib": 0.001517204917035997, + "step": 2346 + }, + { + "ce_ib": 4.940809726715088, + "ce_orig": 0.8578914999961853, + "epoch": 0.6746710762815443, + "kl_loss": 0.07197269052267075, + "loss_ib": 0.0012138078454881907, + "step": 2346 + }, + { + "ce_ib": 4.831655025482178, + "ce_orig": 0.7743867635726929, + "epoch": 0.674958659860522, + "kl_loss": 0.12833386659622192, + "loss_ib": 0.001766504137776792, + "step": 2347 + }, + { + "ce_ib": 5.112598419189453, + "ce_orig": 1.0994750261306763, + "epoch": 0.674958659860522, + "kl_loss": 0.08550111949443817, + "loss_ib": 0.001366271055303514, + "step": 2347 + }, + { + "ce_ib": 4.055953025817871, + "ce_orig": 0.7213334441184998, + "epoch": 0.674958659860522, + "kl_loss": 0.0799262598156929, + "loss_ib": 0.001204857835546136, + "step": 2347 + }, + { + "ce_ib": 2.8027408123016357, + "ce_orig": 0.6014555096626282, + "epoch": 0.674958659860522, + "kl_loss": 0.031270161271095276, + "loss_ib": 0.0005929757026024163, + "step": 2347 + }, + { + "ce_ib": 4.39713191986084, + "ce_orig": 0.6965343356132507, + "epoch": 0.6752462434394996, + "kl_loss": 0.06559377908706665, + "loss_ib": 0.001095650950446725, + "step": 2348 + }, + { + "ce_ib": 4.784936904907227, + "ce_orig": 0.8699886202812195, + "epoch": 0.6752462434394996, + "kl_loss": 0.09045187383890152, + "loss_ib": 0.00138301239348948, + "step": 2348 + }, + { + "ce_ib": 3.6558730602264404, + "ce_orig": 0.6282638907432556, + "epoch": 0.6752462434394996, + "kl_loss": 0.06578533351421356, + "loss_ib": 0.0010234406217932701, + "step": 2348 + }, + { + "ce_ib": 7.341830730438232, + "ce_orig": 1.6113260984420776, + "epoch": 0.6752462434394996, + "kl_loss": 0.08778535574674606, + "loss_ib": 0.0016120364889502525, + "step": 2348 + }, + { + "ce_ib": 3.95459246635437, + "ce_orig": 0.645257830619812, + "epoch": 0.6755338270184772, + "kl_loss": 0.07721763849258423, + "loss_ib": 0.0011676355497911572, + "step": 2349 + }, + { + "ce_ib": 5.204736232757568, + "ce_orig": 1.2076115608215332, + "epoch": 0.6755338270184772, + "kl_loss": 0.07544884085655212, + "loss_ib": 0.0012749619781970978, + "step": 2349 + }, + { + "ce_ib": 3.983389377593994, + "ce_orig": 0.7909812927246094, + "epoch": 0.6755338270184772, + "kl_loss": 0.03823942691087723, + "loss_ib": 0.0007807331858202815, + "step": 2349 + }, + { + "ce_ib": 8.540934562683105, + "ce_orig": 2.062216281890869, + "epoch": 0.6755338270184772, + "kl_loss": 0.07940573990345001, + "loss_ib": 0.0016481508500874043, + "step": 2349 + }, + { + "epoch": 0.6758214105974549, + "grad_norm": 0.09969067573547363, + "learning_rate": 4.519801950717905e-05, + "loss": 0.9129, + "step": 2350 + }, + { + "ce_ib": 2.807394027709961, + "ce_orig": 0.6516605615615845, + "epoch": 0.6758214105974549, + "kl_loss": 0.07641524076461792, + "loss_ib": 0.0010448917746543884, + "step": 2350 + }, + { + "ce_ib": 3.552865743637085, + "ce_orig": 0.5992064476013184, + "epoch": 0.6758214105974549, + "kl_loss": 0.04745316505432129, + "loss_ib": 0.0008298182510770857, + "step": 2350 + }, + { + "ce_ib": 1.213832974433899, + "ce_orig": 0.19332973659038544, + "epoch": 0.6758214105974549, + "kl_loss": 0.13761843740940094, + "loss_ib": 0.0014975675148889422, + "step": 2350 + }, + { + "ce_ib": 2.9414851665496826, + "ce_orig": 0.7690237164497375, + "epoch": 0.6758214105974549, + "kl_loss": 0.060536645352840424, + "loss_ib": 0.0008995149983093143, + "step": 2350 + }, + { + "ce_ib": 3.766172409057617, + "ce_orig": 0.6510964632034302, + "epoch": 0.6761089941764326, + "kl_loss": 0.1041039377450943, + "loss_ib": 0.0014176565455272794, + "step": 2351 + }, + { + "ce_ib": 3.299255132675171, + "ce_orig": 0.44790542125701904, + "epoch": 0.6761089941764326, + "kl_loss": 0.1360345482826233, + "loss_ib": 0.0016902708448469639, + "step": 2351 + }, + { + "ce_ib": 4.47609281539917, + "ce_orig": 0.6144552826881409, + "epoch": 0.6761089941764326, + "kl_loss": 0.06488990038633347, + "loss_ib": 0.0010965082328766584, + "step": 2351 + }, + { + "ce_ib": 3.9631896018981934, + "ce_orig": 0.78432697057724, + "epoch": 0.6761089941764326, + "kl_loss": 0.06447555124759674, + "loss_ib": 0.0010410743998363614, + "step": 2351 + }, + { + "ce_ib": 2.3894827365875244, + "ce_orig": 0.3234657645225525, + "epoch": 0.6763965777554102, + "kl_loss": 0.061039410531520844, + "loss_ib": 0.0008493423229083419, + "step": 2352 + }, + { + "ce_ib": 1.6446903944015503, + "ce_orig": 0.2665823996067047, + "epoch": 0.6763965777554102, + "kl_loss": 0.1623561829328537, + "loss_ib": 0.001788030844181776, + "step": 2352 + }, + { + "ce_ib": 4.190186023712158, + "ce_orig": 1.024111270904541, + "epoch": 0.6763965777554102, + "kl_loss": 0.10762232542037964, + "loss_ib": 0.0014952417695894837, + "step": 2352 + }, + { + "ce_ib": 5.005459785461426, + "ce_orig": 0.5109586715698242, + "epoch": 0.6763965777554102, + "kl_loss": 0.12536078691482544, + "loss_ib": 0.0017541537526994944, + "step": 2352 + }, + { + "ce_ib": 2.3601248264312744, + "ce_orig": 0.541144847869873, + "epoch": 0.6766841613343878, + "kl_loss": 0.042578816413879395, + "loss_ib": 0.0006618006154894829, + "step": 2353 + }, + { + "ce_ib": 4.919713973999023, + "ce_orig": 1.3390673398971558, + "epoch": 0.6766841613343878, + "kl_loss": 0.26599279046058655, + "loss_ib": 0.003151899203658104, + "step": 2353 + }, + { + "ce_ib": 6.746940612792969, + "ce_orig": 1.309516191482544, + "epoch": 0.6766841613343878, + "kl_loss": 0.06598269939422607, + "loss_ib": 0.0013345209881663322, + "step": 2353 + }, + { + "ce_ib": 3.8660736083984375, + "ce_orig": 0.8872079253196716, + "epoch": 0.6766841613343878, + "kl_loss": 0.06126921996474266, + "loss_ib": 0.0009992995765060186, + "step": 2353 + }, + { + "ce_ib": 6.6313157081604, + "ce_orig": 1.4609990119934082, + "epoch": 0.6769717449133654, + "kl_loss": 0.06460058689117432, + "loss_ib": 0.0013091373257339, + "step": 2354 + }, + { + "ce_ib": 6.101470470428467, + "ce_orig": 1.2691885232925415, + "epoch": 0.6769717449133654, + "kl_loss": 0.08702749013900757, + "loss_ib": 0.0014804219827055931, + "step": 2354 + }, + { + "ce_ib": 3.8085744380950928, + "ce_orig": 0.547160267829895, + "epoch": 0.6769717449133654, + "kl_loss": 0.18820711970329285, + "loss_ib": 0.0022629285231232643, + "step": 2354 + }, + { + "ce_ib": 5.849277019500732, + "ce_orig": 1.4335964918136597, + "epoch": 0.6769717449133654, + "kl_loss": 0.1268499344587326, + "loss_ib": 0.001853426918387413, + "step": 2354 + }, + { + "epoch": 0.677259328492343, + "grad_norm": 0.10071748495101929, + "learning_rate": 4.517512818175726e-05, + "loss": 0.8213, + "step": 2355 + }, + { + "ce_ib": 4.858452796936035, + "ce_orig": 1.1973932981491089, + "epoch": 0.677259328492343, + "kl_loss": 0.06980141997337341, + "loss_ib": 0.0011838594218716025, + "step": 2355 + }, + { + "ce_ib": 3.0154268741607666, + "ce_orig": 0.5534801483154297, + "epoch": 0.677259328492343, + "kl_loss": 0.054177042096853256, + "loss_ib": 0.000843313115183264, + "step": 2355 + }, + { + "ce_ib": 3.4313926696777344, + "ce_orig": 0.4799686074256897, + "epoch": 0.677259328492343, + "kl_loss": 0.082373708486557, + "loss_ib": 0.001166876289062202, + "step": 2355 + }, + { + "ce_ib": 3.468970775604248, + "ce_orig": 0.3694916069507599, + "epoch": 0.677259328492343, + "kl_loss": 0.10563535988330841, + "loss_ib": 0.0014032506151124835, + "step": 2355 + }, + { + "ce_ib": 4.739312648773193, + "ce_orig": 0.6991109251976013, + "epoch": 0.6775469120713207, + "kl_loss": 0.08805115520954132, + "loss_ib": 0.0013544427929446101, + "step": 2356 + }, + { + "ce_ib": 4.530791282653809, + "ce_orig": 0.8552468419075012, + "epoch": 0.6775469120713207, + "kl_loss": 0.07603997737169266, + "loss_ib": 0.0012134788557887077, + "step": 2356 + }, + { + "ce_ib": 5.710221767425537, + "ce_orig": 1.021727204322815, + "epoch": 0.6775469120713207, + "kl_loss": 0.09536243975162506, + "loss_ib": 0.00152464653365314, + "step": 2356 + }, + { + "ce_ib": 5.723595142364502, + "ce_orig": 0.9670653343200684, + "epoch": 0.6775469120713207, + "kl_loss": 0.13732868432998657, + "loss_ib": 0.0019456463633105159, + "step": 2356 + }, + { + "ce_ib": 3.181032657623291, + "ce_orig": 0.6803690791130066, + "epoch": 0.6778344956502984, + "kl_loss": 0.0422971248626709, + "loss_ib": 0.0007410745020024478, + "step": 2357 + }, + { + "ce_ib": 5.176339149475098, + "ce_orig": 0.6800485849380493, + "epoch": 0.6778344956502984, + "kl_loss": 0.11409227550029755, + "loss_ib": 0.001658556517213583, + "step": 2357 + }, + { + "ce_ib": 4.755335807800293, + "ce_orig": 0.6468582153320312, + "epoch": 0.6778344956502984, + "kl_loss": 0.05592215806245804, + "loss_ib": 0.0010347551433369517, + "step": 2357 + }, + { + "ce_ib": 4.76873779296875, + "ce_orig": 1.2869858741760254, + "epoch": 0.6778344956502984, + "kl_loss": 0.08207343518733978, + "loss_ib": 0.0012976081343367696, + "step": 2357 + }, + { + "ce_ib": 3.4028573036193848, + "ce_orig": 0.8830423355102539, + "epoch": 0.678122079229276, + "kl_loss": 0.2584499418735504, + "loss_ib": 0.002924785017967224, + "step": 2358 + }, + { + "ce_ib": 4.30031681060791, + "ce_orig": 0.622747540473938, + "epoch": 0.678122079229276, + "kl_loss": 0.07918980717658997, + "loss_ib": 0.0012219297932460904, + "step": 2358 + }, + { + "ce_ib": 3.0111260414123535, + "ce_orig": 1.0559265613555908, + "epoch": 0.678122079229276, + "kl_loss": 0.055594656616449356, + "loss_ib": 0.0008570591453462839, + "step": 2358 + }, + { + "ce_ib": 3.686088800430298, + "ce_orig": 0.47006309032440186, + "epoch": 0.678122079229276, + "kl_loss": 0.09560932219028473, + "loss_ib": 0.0013247020542621613, + "step": 2358 + }, + { + "ce_ib": 4.381927967071533, + "ce_orig": 0.4970502555370331, + "epoch": 0.6784096628082537, + "kl_loss": 0.10028770565986633, + "loss_ib": 0.0014410697622224689, + "step": 2359 + }, + { + "ce_ib": 1.608655571937561, + "ce_orig": 0.23726926743984222, + "epoch": 0.6784096628082537, + "kl_loss": 0.13768664002418518, + "loss_ib": 0.0015377318486571312, + "step": 2359 + }, + { + "ce_ib": 4.99705696105957, + "ce_orig": 0.7741214036941528, + "epoch": 0.6784096628082537, + "kl_loss": 0.05800304934382439, + "loss_ib": 0.0010797361610457301, + "step": 2359 + }, + { + "ce_ib": 5.599058151245117, + "ce_orig": 1.103107213973999, + "epoch": 0.6784096628082537, + "kl_loss": 0.06653723120689392, + "loss_ib": 0.0012252780143171549, + "step": 2359 + }, + { + "epoch": 0.6786972463872313, + "grad_norm": 0.13324923813343048, + "learning_rate": 4.515218824976895e-05, + "loss": 0.8481, + "step": 2360 + }, + { + "ce_ib": 5.5933685302734375, + "ce_orig": 0.8991292119026184, + "epoch": 0.6786972463872313, + "kl_loss": 0.0714542493224144, + "loss_ib": 0.0012738794321194291, + "step": 2360 + }, + { + "ce_ib": 4.776568412780762, + "ce_orig": 1.310439109802246, + "epoch": 0.6786972463872313, + "kl_loss": 0.05904984846711159, + "loss_ib": 0.0010681552812457085, + "step": 2360 + }, + { + "ce_ib": 4.027004718780518, + "ce_orig": 0.8606640100479126, + "epoch": 0.6786972463872313, + "kl_loss": 0.05494123324751854, + "loss_ib": 0.0009521127794869244, + "step": 2360 + }, + { + "ce_ib": 5.949367046356201, + "ce_orig": 1.0277644395828247, + "epoch": 0.6786972463872313, + "kl_loss": 0.08532886207103729, + "loss_ib": 0.0014482253463938832, + "step": 2360 + }, + { + "ce_ib": 3.030498504638672, + "ce_orig": 0.796371579170227, + "epoch": 0.6789848299662089, + "kl_loss": 0.05132900923490524, + "loss_ib": 0.0008163399179466069, + "step": 2361 + }, + { + "ce_ib": 5.4557671546936035, + "ce_orig": 0.8622795939445496, + "epoch": 0.6789848299662089, + "kl_loss": 0.11755703389644623, + "loss_ib": 0.0017211469821631908, + "step": 2361 + }, + { + "ce_ib": 3.892008066177368, + "ce_orig": 0.6399655938148499, + "epoch": 0.6789848299662089, + "kl_loss": 0.06192721053957939, + "loss_ib": 0.0010084728710353374, + "step": 2361 + }, + { + "ce_ib": 5.812283992767334, + "ce_orig": 1.16305673122406, + "epoch": 0.6789848299662089, + "kl_loss": 0.08915567398071289, + "loss_ib": 0.0014727851375937462, + "step": 2361 + }, + { + "ce_ib": 2.329178810119629, + "ce_orig": 0.6151803731918335, + "epoch": 0.6792724135451865, + "kl_loss": 0.03346136584877968, + "loss_ib": 0.0005675315042026341, + "step": 2362 + }, + { + "ce_ib": 3.91125750541687, + "ce_orig": 0.876040518283844, + "epoch": 0.6792724135451865, + "kl_loss": 0.04902535676956177, + "loss_ib": 0.0008813792956061661, + "step": 2362 + }, + { + "ce_ib": 4.793557643890381, + "ce_orig": 1.0395740270614624, + "epoch": 0.6792724135451865, + "kl_loss": 0.07909566909074783, + "loss_ib": 0.00127031235024333, + "step": 2362 + }, + { + "ce_ib": 3.203479290008545, + "ce_orig": 0.5631647706031799, + "epoch": 0.6792724135451865, + "kl_loss": 0.07862997055053711, + "loss_ib": 0.001106647658161819, + "step": 2362 + }, + { + "ce_ib": 6.020439624786377, + "ce_orig": 1.2751753330230713, + "epoch": 0.6795599971241643, + "kl_loss": 0.08655031025409698, + "loss_ib": 0.0014675470301881433, + "step": 2363 + }, + { + "ce_ib": 4.361606121063232, + "ce_orig": 0.7346363067626953, + "epoch": 0.6795599971241643, + "kl_loss": 0.08631746470928192, + "loss_ib": 0.0012993351556360722, + "step": 2363 + }, + { + "ce_ib": 4.5856475830078125, + "ce_orig": 0.4414523243904114, + "epoch": 0.6795599971241643, + "kl_loss": 0.2636999487876892, + "loss_ib": 0.0030955641996115446, + "step": 2363 + }, + { + "ce_ib": 4.270997524261475, + "ce_orig": 0.9079581499099731, + "epoch": 0.6795599971241643, + "kl_loss": 0.03402726352214813, + "loss_ib": 0.0007673723739571869, + "step": 2363 + }, + { + "ce_ib": 2.9930615425109863, + "ce_orig": 0.5769110918045044, + "epoch": 0.6798475807031419, + "kl_loss": 0.053628869354724884, + "loss_ib": 0.0008355948375537992, + "step": 2364 + }, + { + "ce_ib": 2.9722635746002197, + "ce_orig": 0.7473849654197693, + "epoch": 0.6798475807031419, + "kl_loss": 0.05315898731350899, + "loss_ib": 0.0008288162061944604, + "step": 2364 + }, + { + "ce_ib": 4.126216888427734, + "ce_orig": 0.8117655515670776, + "epoch": 0.6798475807031419, + "kl_loss": 0.050634801387786865, + "loss_ib": 0.0009189696284011006, + "step": 2364 + }, + { + "ce_ib": 4.856286525726318, + "ce_orig": 1.12398362159729, + "epoch": 0.6798475807031419, + "kl_loss": 0.05676986277103424, + "loss_ib": 0.00105332734528929, + "step": 2364 + }, + { + "epoch": 0.6801351642821195, + "grad_norm": 0.0915922075510025, + "learning_rate": 4.512919976648171e-05, + "loss": 0.8335, + "step": 2365 + }, + { + "ce_ib": 4.097760200500488, + "ce_orig": 0.5240581035614014, + "epoch": 0.6801351642821195, + "kl_loss": 0.11602523922920227, + "loss_ib": 0.0015700283693149686, + "step": 2365 + }, + { + "ce_ib": 5.5975565910339355, + "ce_orig": 1.0269001722335815, + "epoch": 0.6801351642821195, + "kl_loss": 0.12049694359302521, + "loss_ib": 0.0017647250788286328, + "step": 2365 + }, + { + "ce_ib": 2.9131391048431396, + "ce_orig": 0.7318040132522583, + "epoch": 0.6801351642821195, + "kl_loss": 0.048626791685819626, + "loss_ib": 0.0007775817648507655, + "step": 2365 + }, + { + "ce_ib": 4.6686625480651855, + "ce_orig": 1.0797736644744873, + "epoch": 0.6801351642821195, + "kl_loss": 0.04841184616088867, + "loss_ib": 0.0009509846568107605, + "step": 2365 + }, + { + "ce_ib": 5.291426181793213, + "ce_orig": 1.0283656120300293, + "epoch": 0.6804227478610971, + "kl_loss": 0.07209806144237518, + "loss_ib": 0.0012501232558861375, + "step": 2366 + }, + { + "ce_ib": 4.187557697296143, + "ce_orig": 0.43934953212738037, + "epoch": 0.6804227478610971, + "kl_loss": 0.08126208186149597, + "loss_ib": 0.0012313765473663807, + "step": 2366 + }, + { + "ce_ib": 2.4467687606811523, + "ce_orig": 0.4631514847278595, + "epoch": 0.6804227478610971, + "kl_loss": 0.12538418173789978, + "loss_ib": 0.0014985187444835901, + "step": 2366 + }, + { + "ce_ib": 4.843488693237305, + "ce_orig": 0.7209252715110779, + "epoch": 0.6804227478610971, + "kl_loss": 0.08611969649791718, + "loss_ib": 0.0013455457519739866, + "step": 2366 + }, + { + "ce_ib": 5.288057804107666, + "ce_orig": 1.2054836750030518, + "epoch": 0.6807103314400748, + "kl_loss": 0.1102331280708313, + "loss_ib": 0.0016311371000483632, + "step": 2367 + }, + { + "ce_ib": 3.4613418579101562, + "ce_orig": 0.6198671460151672, + "epoch": 0.6807103314400748, + "kl_loss": 0.12549957633018494, + "loss_ib": 0.0016011298866942525, + "step": 2367 + }, + { + "ce_ib": 4.9068922996521, + "ce_orig": 0.8056033253669739, + "epoch": 0.6807103314400748, + "kl_loss": 0.07279548048973083, + "loss_ib": 0.0012186439707875252, + "step": 2367 + }, + { + "ce_ib": 3.4269721508026123, + "ce_orig": 0.8333673477172852, + "epoch": 0.6807103314400748, + "kl_loss": 0.1494365632534027, + "loss_ib": 0.0018370627658441663, + "step": 2367 + }, + { + "ce_ib": 2.2460885047912598, + "ce_orig": 0.4481543004512787, + "epoch": 0.6809979150190524, + "kl_loss": 0.03808562457561493, + "loss_ib": 0.0006054650875739753, + "step": 2368 + }, + { + "ce_ib": 2.6902987957000732, + "ce_orig": 0.7006246447563171, + "epoch": 0.6809979150190524, + "kl_loss": 0.04278700798749924, + "loss_ib": 0.0006968999514356256, + "step": 2368 + }, + { + "ce_ib": 4.735789775848389, + "ce_orig": 1.0977898836135864, + "epoch": 0.6809979150190524, + "kl_loss": 0.06206683814525604, + "loss_ib": 0.0010942473309114575, + "step": 2368 + }, + { + "ce_ib": 4.480602741241455, + "ce_orig": 0.8755375742912292, + "epoch": 0.6809979150190524, + "kl_loss": 0.07436387240886688, + "loss_ib": 0.0011916989460587502, + "step": 2368 + }, + { + "ce_ib": 6.438822269439697, + "ce_orig": 1.3127914667129517, + "epoch": 0.68128549859803, + "kl_loss": 0.0598125196993351, + "loss_ib": 0.0012420074781402946, + "step": 2369 + }, + { + "ce_ib": 5.851805210113525, + "ce_orig": 1.387368083000183, + "epoch": 0.68128549859803, + "kl_loss": 0.06897658854722977, + "loss_ib": 0.0012749462621286511, + "step": 2369 + }, + { + "ce_ib": 5.016909599304199, + "ce_orig": 1.1099350452423096, + "epoch": 0.68128549859803, + "kl_loss": 0.0965472161769867, + "loss_ib": 0.001467163092456758, + "step": 2369 + }, + { + "ce_ib": 4.001672267913818, + "ce_orig": 1.0258426666259766, + "epoch": 0.68128549859803, + "kl_loss": 0.04375666379928589, + "loss_ib": 0.0008377338526770473, + "step": 2369 + }, + { + "epoch": 0.6815730821770077, + "grad_norm": 0.11016444861888885, + "learning_rate": 4.510616278728015e-05, + "loss": 0.8017, + "step": 2370 + }, + { + "ce_ib": 6.944091796875, + "ce_orig": 1.1735879182815552, + "epoch": 0.6815730821770077, + "kl_loss": 0.1110323965549469, + "loss_ib": 0.0018047330668196082, + "step": 2370 + }, + { + "ce_ib": 3.7935614585876465, + "ce_orig": 0.8287927508354187, + "epoch": 0.6815730821770077, + "kl_loss": 0.07988609373569489, + "loss_ib": 0.0011782170040532947, + "step": 2370 + }, + { + "ce_ib": 5.079182147979736, + "ce_orig": 0.881047785282135, + "epoch": 0.6815730821770077, + "kl_loss": 0.17493757605552673, + "loss_ib": 0.002257293788716197, + "step": 2370 + }, + { + "ce_ib": 5.445065498352051, + "ce_orig": 1.1195857524871826, + "epoch": 0.6815730821770077, + "kl_loss": 0.058698371052742004, + "loss_ib": 0.0011314902221783996, + "step": 2370 + }, + { + "ce_ib": 4.317038536071777, + "ce_orig": 1.093035101890564, + "epoch": 0.6818606657559854, + "kl_loss": 0.07689093053340912, + "loss_ib": 0.0012006131000816822, + "step": 2371 + }, + { + "ce_ib": 3.9225051403045654, + "ce_orig": 0.9147409200668335, + "epoch": 0.6818606657559854, + "kl_loss": 0.059253960847854614, + "loss_ib": 0.0009847900364547968, + "step": 2371 + }, + { + "ce_ib": 3.9064676761627197, + "ce_orig": 0.6961843967437744, + "epoch": 0.6818606657559854, + "kl_loss": 0.06174233555793762, + "loss_ib": 0.0010080700740218163, + "step": 2371 + }, + { + "ce_ib": 1.6449000835418701, + "ce_orig": 0.35380589962005615, + "epoch": 0.6818606657559854, + "kl_loss": 0.11586003750562668, + "loss_ib": 0.0013230902841314673, + "step": 2371 + }, + { + "ce_ib": 4.102778911590576, + "ce_orig": 0.7697730660438538, + "epoch": 0.682148249334963, + "kl_loss": 0.08069413155317307, + "loss_ib": 0.0012172191636636853, + "step": 2372 + }, + { + "ce_ib": 3.2852284908294678, + "ce_orig": 0.4865918755531311, + "epoch": 0.682148249334963, + "kl_loss": 0.08099428564310074, + "loss_ib": 0.0011384657118469477, + "step": 2372 + }, + { + "ce_ib": 4.208924770355225, + "ce_orig": 0.6900352835655212, + "epoch": 0.682148249334963, + "kl_loss": 0.08081613481044769, + "loss_ib": 0.0012290538288652897, + "step": 2372 + }, + { + "ce_ib": 2.1896238327026367, + "ce_orig": 0.37474513053894043, + "epoch": 0.682148249334963, + "kl_loss": 0.04967765882611275, + "loss_ib": 0.0007157389773055911, + "step": 2372 + }, + { + "ce_ib": 3.771662950515747, + "ce_orig": 0.8076186776161194, + "epoch": 0.6824358329139406, + "kl_loss": 0.08618397265672684, + "loss_ib": 0.0012390059418976307, + "step": 2373 + }, + { + "ce_ib": 5.378939628601074, + "ce_orig": 1.0583337545394897, + "epoch": 0.6824358329139406, + "kl_loss": 0.08256299048662186, + "loss_ib": 0.001363523886539042, + "step": 2373 + }, + { + "ce_ib": 3.659956216812134, + "ce_orig": 0.6407950520515442, + "epoch": 0.6824358329139406, + "kl_loss": 0.12763281166553497, + "loss_ib": 0.001642323681153357, + "step": 2373 + }, + { + "ce_ib": 3.6054892539978027, + "ce_orig": 0.5225866436958313, + "epoch": 0.6824358329139406, + "kl_loss": 0.06692127883434296, + "loss_ib": 0.001029761740937829, + "step": 2373 + }, + { + "ce_ib": 6.252756118774414, + "ce_orig": 1.2088062763214111, + "epoch": 0.6827234164929182, + "kl_loss": 0.08299087733030319, + "loss_ib": 0.001455184305086732, + "step": 2374 + }, + { + "ce_ib": 4.672387599945068, + "ce_orig": 1.0233503580093384, + "epoch": 0.6827234164929182, + "kl_loss": 0.06512218713760376, + "loss_ib": 0.0011184605536982417, + "step": 2374 + }, + { + "ce_ib": 3.4768033027648926, + "ce_orig": 0.6293911933898926, + "epoch": 0.6827234164929182, + "kl_loss": 0.062224969267845154, + "loss_ib": 0.000969929969869554, + "step": 2374 + }, + { + "ce_ib": 2.757728338241577, + "ce_orig": 0.5879848599433899, + "epoch": 0.6827234164929182, + "kl_loss": 0.04531679302453995, + "loss_ib": 0.0007289407076314092, + "step": 2374 + }, + { + "epoch": 0.6830110000718959, + "grad_norm": 0.10339463502168655, + "learning_rate": 4.5083077367665695e-05, + "loss": 0.8401, + "step": 2375 + }, + { + "ce_ib": 3.803887128829956, + "ce_orig": 0.9248018860816956, + "epoch": 0.6830110000718959, + "kl_loss": 0.07683758437633514, + "loss_ib": 0.0011487645097076893, + "step": 2375 + }, + { + "ce_ib": 6.982426643371582, + "ce_orig": 1.7689875364303589, + "epoch": 0.6830110000718959, + "kl_loss": 0.06737598776817322, + "loss_ib": 0.0013720025308430195, + "step": 2375 + }, + { + "ce_ib": 3.9819090366363525, + "ce_orig": 0.4501799941062927, + "epoch": 0.6830110000718959, + "kl_loss": 0.11452930420637131, + "loss_ib": 0.0015434838132932782, + "step": 2375 + }, + { + "ce_ib": 1.879157304763794, + "ce_orig": 0.25453323125839233, + "epoch": 0.6830110000718959, + "kl_loss": 0.05647023767232895, + "loss_ib": 0.000752618070691824, + "step": 2375 + }, + { + "ce_ib": 2.2203335762023926, + "ce_orig": 0.4958862066268921, + "epoch": 0.6832985836508735, + "kl_loss": 0.04689696803689003, + "loss_ib": 0.000691003049723804, + "step": 2376 + }, + { + "ce_ib": 6.16979455947876, + "ce_orig": 1.1716549396514893, + "epoch": 0.6832985836508735, + "kl_loss": 0.06888240575790405, + "loss_ib": 0.0013058034237474203, + "step": 2376 + }, + { + "ce_ib": 2.781658411026001, + "ce_orig": 0.5314446687698364, + "epoch": 0.6832985836508735, + "kl_loss": 0.043509453535079956, + "loss_ib": 0.0007132603786885738, + "step": 2376 + }, + { + "ce_ib": 4.028306007385254, + "ce_orig": 1.1244102716445923, + "epoch": 0.6832985836508735, + "kl_loss": 0.05534365028142929, + "loss_ib": 0.0009562671184539795, + "step": 2376 + }, + { + "ce_ib": 3.7927114963531494, + "ce_orig": 0.4870842397212982, + "epoch": 0.6835861672298512, + "kl_loss": 0.06997610628604889, + "loss_ib": 0.0010790321975946426, + "step": 2377 + }, + { + "ce_ib": 5.707444667816162, + "ce_orig": 1.4286458492279053, + "epoch": 0.6835861672298512, + "kl_loss": 0.06784400343894958, + "loss_ib": 0.001249184482730925, + "step": 2377 + }, + { + "ce_ib": 4.879306793212891, + "ce_orig": 1.0052940845489502, + "epoch": 0.6835861672298512, + "kl_loss": 0.05651932954788208, + "loss_ib": 0.0010531238513067365, + "step": 2377 + }, + { + "ce_ib": 3.346207857131958, + "ce_orig": 0.555307924747467, + "epoch": 0.6835861672298512, + "kl_loss": 0.06000287085771561, + "loss_ib": 0.0009346494334749877, + "step": 2377 + }, + { + "ce_ib": 5.13154935836792, + "ce_orig": 0.8888310790061951, + "epoch": 0.6838737508088288, + "kl_loss": 0.0875050276517868, + "loss_ib": 0.0013882052153348923, + "step": 2378 + }, + { + "ce_ib": 3.2569408416748047, + "ce_orig": 0.522498607635498, + "epoch": 0.6838737508088288, + "kl_loss": 0.059237755835056305, + "loss_ib": 0.0009180716006085277, + "step": 2378 + }, + { + "ce_ib": 0.8005009293556213, + "ce_orig": 0.10670509189367294, + "epoch": 0.6838737508088288, + "kl_loss": 0.13523335754871368, + "loss_ib": 0.0014323836658149958, + "step": 2378 + }, + { + "ce_ib": 3.668679714202881, + "ce_orig": 0.6353479623794556, + "epoch": 0.6838737508088288, + "kl_loss": 0.06668281555175781, + "loss_ib": 0.001033696113154292, + "step": 2378 + }, + { + "ce_ib": 6.268472194671631, + "ce_orig": 1.7528010606765747, + "epoch": 0.6841613343878065, + "kl_loss": 0.062474459409713745, + "loss_ib": 0.0012515917187556624, + "step": 2379 + }, + { + "ce_ib": 4.306081295013428, + "ce_orig": 0.7660561800003052, + "epoch": 0.6841613343878065, + "kl_loss": 0.06854882091283798, + "loss_ib": 0.0011160963913425803, + "step": 2379 + }, + { + "ce_ib": 2.173487663269043, + "ce_orig": 0.3125285506248474, + "epoch": 0.6841613343878065, + "kl_loss": 0.17184247076511383, + "loss_ib": 0.0019357734126970172, + "step": 2379 + }, + { + "ce_ib": 3.4797756671905518, + "ce_orig": 0.7164185643196106, + "epoch": 0.6841613343878065, + "kl_loss": 0.06608693301677704, + "loss_ib": 0.0010088469134643674, + "step": 2379 + }, + { + "epoch": 0.6844489179667841, + "grad_norm": 0.09607995301485062, + "learning_rate": 4.5059943563256476e-05, + "loss": 0.7591, + "step": 2380 + }, + { + "ce_ib": 2.9154000282287598, + "ce_orig": 0.5503300428390503, + "epoch": 0.6844489179667841, + "kl_loss": 0.05782308429479599, + "loss_ib": 0.0008697707671672106, + "step": 2380 + }, + { + "ce_ib": 3.11104679107666, + "ce_orig": 0.49342772364616394, + "epoch": 0.6844489179667841, + "kl_loss": 0.06838034093379974, + "loss_ib": 0.0009949080413207412, + "step": 2380 + }, + { + "ce_ib": 3.825946569442749, + "ce_orig": 0.52889484167099, + "epoch": 0.6844489179667841, + "kl_loss": 0.16279849410057068, + "loss_ib": 0.002010579453781247, + "step": 2380 + }, + { + "ce_ib": 2.8907129764556885, + "ce_orig": 0.7702388763427734, + "epoch": 0.6844489179667841, + "kl_loss": 0.0413312166929245, + "loss_ib": 0.0007023834041319788, + "step": 2380 + }, + { + "ce_ib": 5.852891445159912, + "ce_orig": 1.061463475227356, + "epoch": 0.6847365015457617, + "kl_loss": 0.08384833484888077, + "loss_ib": 0.0014237724244594574, + "step": 2381 + }, + { + "ce_ib": 6.276810646057129, + "ce_orig": 1.66095769405365, + "epoch": 0.6847365015457617, + "kl_loss": 0.0856628566980362, + "loss_ib": 0.0014843095559626818, + "step": 2381 + }, + { + "ce_ib": 2.9701812267303467, + "ce_orig": 0.6323562264442444, + "epoch": 0.6847365015457617, + "kl_loss": 0.06166598200798035, + "loss_ib": 0.0009136779117397964, + "step": 2381 + }, + { + "ce_ib": 3.5807392597198486, + "ce_orig": 0.7741387486457825, + "epoch": 0.6847365015457617, + "kl_loss": 0.042232297360897064, + "loss_ib": 0.0007803969201631844, + "step": 2381 + }, + { + "ce_ib": 5.263377666473389, + "ce_orig": 1.3021589517593384, + "epoch": 0.6850240851247393, + "kl_loss": 0.05716366320848465, + "loss_ib": 0.0010979743674397469, + "step": 2382 + }, + { + "ce_ib": 3.296444892883301, + "ce_orig": 0.6773521900177002, + "epoch": 0.6850240851247393, + "kl_loss": 0.06006632372736931, + "loss_ib": 0.0009303077240474522, + "step": 2382 + }, + { + "ce_ib": 3.611212968826294, + "ce_orig": 0.6983643174171448, + "epoch": 0.6850240851247393, + "kl_loss": 0.0485423244535923, + "loss_ib": 0.0008465445134788752, + "step": 2382 + }, + { + "ce_ib": 5.503469467163086, + "ce_orig": 1.3654491901397705, + "epoch": 0.6850240851247393, + "kl_loss": 0.0910867303609848, + "loss_ib": 0.0014612142695114017, + "step": 2382 + }, + { + "ce_ib": 4.344907760620117, + "ce_orig": 0.8362290263175964, + "epoch": 0.6853116687037171, + "kl_loss": 0.0839262306690216, + "loss_ib": 0.001273753005079925, + "step": 2383 + }, + { + "ce_ib": 5.520389080047607, + "ce_orig": 1.187247633934021, + "epoch": 0.6853116687037171, + "kl_loss": 0.09120158851146698, + "loss_ib": 0.001464054686948657, + "step": 2383 + }, + { + "ce_ib": 4.818489074707031, + "ce_orig": 0.9105939865112305, + "epoch": 0.6853116687037171, + "kl_loss": 0.07776856422424316, + "loss_ib": 0.0012595345033332705, + "step": 2383 + }, + { + "ce_ib": 5.564348220825195, + "ce_orig": 1.1761729717254639, + "epoch": 0.6853116687037171, + "kl_loss": 0.058388933539390564, + "loss_ib": 0.0011403241660445929, + "step": 2383 + }, + { + "ce_ib": 3.728081464767456, + "ce_orig": 0.7174080610275269, + "epoch": 0.6855992522826947, + "kl_loss": 0.08852941542863846, + "loss_ib": 0.0012581023620441556, + "step": 2384 + }, + { + "ce_ib": 5.138340950012207, + "ce_orig": 1.1182996034622192, + "epoch": 0.6855992522826947, + "kl_loss": 0.08831809461116791, + "loss_ib": 0.0013970149448141456, + "step": 2384 + }, + { + "ce_ib": 3.9753613471984863, + "ce_orig": 0.7840017080307007, + "epoch": 0.6855992522826947, + "kl_loss": 0.08785945922136307, + "loss_ib": 0.0012761306716129184, + "step": 2384 + }, + { + "ce_ib": 4.214541912078857, + "ce_orig": 0.5649296641349792, + "epoch": 0.6855992522826947, + "kl_loss": 0.2813422977924347, + "loss_ib": 0.0032348772510886192, + "step": 2384 + }, + { + "epoch": 0.6858868358616723, + "grad_norm": 0.08792214840650558, + "learning_rate": 4.50367614297872e-05, + "loss": 0.8571, + "step": 2385 + }, + { + "ce_ib": 1.7841172218322754, + "ce_orig": 0.37206581234931946, + "epoch": 0.6858868358616723, + "kl_loss": 0.04595661163330078, + "loss_ib": 0.0006379778496921062, + "step": 2385 + }, + { + "ce_ib": 2.718514919281006, + "ce_orig": 0.4330751895904541, + "epoch": 0.6858868358616723, + "kl_loss": 0.0710272565484047, + "loss_ib": 0.0009821240091696382, + "step": 2385 + }, + { + "ce_ib": 2.7772040367126465, + "ce_orig": 0.33493879437446594, + "epoch": 0.6858868358616723, + "kl_loss": 0.08640769124031067, + "loss_ib": 0.0011417972855269909, + "step": 2385 + }, + { + "ce_ib": 4.946574687957764, + "ce_orig": 0.9643908739089966, + "epoch": 0.6858868358616723, + "kl_loss": 0.049996331334114075, + "loss_ib": 0.0009946207283064723, + "step": 2385 + }, + { + "ce_ib": 3.991344690322876, + "ce_orig": 0.9889262318611145, + "epoch": 0.6861744194406499, + "kl_loss": 0.07016021013259888, + "loss_ib": 0.0011007365537807345, + "step": 2386 + }, + { + "ce_ib": 3.9557337760925293, + "ce_orig": 0.9389891624450684, + "epoch": 0.6861744194406499, + "kl_loss": 0.07007039338350296, + "loss_ib": 0.0010962772648781538, + "step": 2386 + }, + { + "ce_ib": 3.131747245788574, + "ce_orig": 0.5720566511154175, + "epoch": 0.6861744194406499, + "kl_loss": 0.04919755831360817, + "loss_ib": 0.0008051502518355846, + "step": 2386 + }, + { + "ce_ib": 6.6983466148376465, + "ce_orig": 1.0567456483840942, + "epoch": 0.6861744194406499, + "kl_loss": 0.12974557280540466, + "loss_ib": 0.0019672901835292578, + "step": 2386 + }, + { + "ce_ib": 4.160679817199707, + "ce_orig": 0.9091314673423767, + "epoch": 0.6864620030196276, + "kl_loss": 0.07021360099315643, + "loss_ib": 0.0011182039743289351, + "step": 2387 + }, + { + "ce_ib": 3.658273935317993, + "ce_orig": 0.6426052451133728, + "epoch": 0.6864620030196276, + "kl_loss": 0.06429222226142883, + "loss_ib": 0.00100874959025532, + "step": 2387 + }, + { + "ce_ib": 5.404973983764648, + "ce_orig": 0.7060353755950928, + "epoch": 0.6864620030196276, + "kl_loss": 0.07609620690345764, + "loss_ib": 0.0013014593860134482, + "step": 2387 + }, + { + "ce_ib": 3.884572744369507, + "ce_orig": 0.6886847615242004, + "epoch": 0.6864620030196276, + "kl_loss": 0.08830083906650543, + "loss_ib": 0.0012714656768366694, + "step": 2387 + }, + { + "ce_ib": 3.9030895233154297, + "ce_orig": 0.7029653191566467, + "epoch": 0.6867495865986052, + "kl_loss": 0.05640052258968353, + "loss_ib": 0.0009543141350150108, + "step": 2388 + }, + { + "ce_ib": 6.369217395782471, + "ce_orig": 1.6111299991607666, + "epoch": 0.6867495865986052, + "kl_loss": 0.09301546216011047, + "loss_ib": 0.001567076425999403, + "step": 2388 + }, + { + "ce_ib": 4.430602550506592, + "ce_orig": 0.9101887345314026, + "epoch": 0.6867495865986052, + "kl_loss": 0.043910134583711624, + "loss_ib": 0.0008821615483611822, + "step": 2388 + }, + { + "ce_ib": 4.180103778839111, + "ce_orig": 0.637823760509491, + "epoch": 0.6867495865986052, + "kl_loss": 0.06550019979476929, + "loss_ib": 0.0010730123613029718, + "step": 2388 + }, + { + "ce_ib": 4.5657243728637695, + "ce_orig": 1.0906542539596558, + "epoch": 0.6870371701775828, + "kl_loss": 0.05895843729376793, + "loss_ib": 0.00104615674354136, + "step": 2389 + }, + { + "ce_ib": 3.3322198390960693, + "ce_orig": 0.6196792125701904, + "epoch": 0.6870371701775828, + "kl_loss": 0.04948567971587181, + "loss_ib": 0.0008280787151306868, + "step": 2389 + }, + { + "ce_ib": 6.282787322998047, + "ce_orig": 1.228327751159668, + "epoch": 0.6870371701775828, + "kl_loss": 0.06262929737567902, + "loss_ib": 0.0012545716017484665, + "step": 2389 + }, + { + "ce_ib": 5.183629035949707, + "ce_orig": 0.883438229560852, + "epoch": 0.6870371701775828, + "kl_loss": 0.08445407450199127, + "loss_ib": 0.001362903625704348, + "step": 2389 + }, + { + "epoch": 0.6873247537565605, + "grad_norm": 0.09329061210155487, + "learning_rate": 4.5013531023109014e-05, + "loss": 0.8027, + "step": 2390 + }, + { + "ce_ib": 4.602840900421143, + "ce_orig": 0.8900063037872314, + "epoch": 0.6873247537565605, + "kl_loss": 0.0560818612575531, + "loss_ib": 0.0010211026528850198, + "step": 2390 + }, + { + "ce_ib": 6.718210220336914, + "ce_orig": 1.421173334121704, + "epoch": 0.6873247537565605, + "kl_loss": 0.09698675572872162, + "loss_ib": 0.001641688635572791, + "step": 2390 + }, + { + "ce_ib": 3.1996653079986572, + "ce_orig": 0.6522877812385559, + "epoch": 0.6873247537565605, + "kl_loss": 0.054536331444978714, + "loss_ib": 0.0008653297554701567, + "step": 2390 + }, + { + "ce_ib": 4.778492450714111, + "ce_orig": 0.6798626780509949, + "epoch": 0.6873247537565605, + "kl_loss": 0.07839198410511017, + "loss_ib": 0.0012617690954357386, + "step": 2390 + }, + { + "ce_ib": 4.35453987121582, + "ce_orig": 0.5972073674201965, + "epoch": 0.6876123373355382, + "kl_loss": 0.11946751177310944, + "loss_ib": 0.0016301290597766638, + "step": 2391 + }, + { + "ce_ib": 4.751070022583008, + "ce_orig": 0.8722823262214661, + "epoch": 0.6876123373355382, + "kl_loss": 0.05450456216931343, + "loss_ib": 0.0010201525874435902, + "step": 2391 + }, + { + "ce_ib": 3.1686642169952393, + "ce_orig": 0.6746863126754761, + "epoch": 0.6876123373355382, + "kl_loss": 0.09476751834154129, + "loss_ib": 0.001264541526325047, + "step": 2391 + }, + { + "ce_ib": 3.321824550628662, + "ce_orig": 0.6461423635482788, + "epoch": 0.6876123373355382, + "kl_loss": 0.06612546741962433, + "loss_ib": 0.0009934371337294579, + "step": 2391 + }, + { + "ce_ib": 2.398550033569336, + "ce_orig": 0.487697035074234, + "epoch": 0.6878999209145158, + "kl_loss": 0.061880357563495636, + "loss_ib": 0.0008586585172452033, + "step": 2392 + }, + { + "ce_ib": 5.066722393035889, + "ce_orig": 0.7410906553268433, + "epoch": 0.6878999209145158, + "kl_loss": 0.09472712874412537, + "loss_ib": 0.0014539435505867004, + "step": 2392 + }, + { + "ce_ib": 7.368229389190674, + "ce_orig": 0.7774073481559753, + "epoch": 0.6878999209145158, + "kl_loss": 0.06957130134105682, + "loss_ib": 0.0014325359370559454, + "step": 2392 + }, + { + "ce_ib": 2.8934214115142822, + "ce_orig": 0.8016295433044434, + "epoch": 0.6878999209145158, + "kl_loss": 0.06220569834113121, + "loss_ib": 0.0009113990818150342, + "step": 2392 + }, + { + "ce_ib": 3.755826234817505, + "ce_orig": 0.5021089911460876, + "epoch": 0.6881875044934934, + "kl_loss": 0.10134477913379669, + "loss_ib": 0.0013890303671360016, + "step": 2393 + }, + { + "ce_ib": 5.163618087768555, + "ce_orig": 1.072688341140747, + "epoch": 0.6881875044934934, + "kl_loss": 0.08374626189470291, + "loss_ib": 0.0013538243947550654, + "step": 2393 + }, + { + "ce_ib": 3.600046396255493, + "ce_orig": 0.6004895567893982, + "epoch": 0.6881875044934934, + "kl_loss": 0.056912850588560104, + "loss_ib": 0.0009291331516578794, + "step": 2393 + }, + { + "ce_ib": 3.335738182067871, + "ce_orig": 0.7645314931869507, + "epoch": 0.6881875044934934, + "kl_loss": 0.05543137714266777, + "loss_ib": 0.0008878876105882227, + "step": 2393 + }, + { + "ce_ib": 2.283447265625, + "ce_orig": 0.38531094789505005, + "epoch": 0.688475088072471, + "kl_loss": 0.08036352694034576, + "loss_ib": 0.0010319799184799194, + "step": 2394 + }, + { + "ce_ib": 4.510163307189941, + "ce_orig": 1.1682887077331543, + "epoch": 0.688475088072471, + "kl_loss": 0.055774327367544174, + "loss_ib": 0.0010087596019729972, + "step": 2394 + }, + { + "ce_ib": 3.899970293045044, + "ce_orig": 0.6640347838401794, + "epoch": 0.688475088072471, + "kl_loss": 0.06354688853025436, + "loss_ib": 0.0010254658991470933, + "step": 2394 + }, + { + "ce_ib": 5.264353275299072, + "ce_orig": 0.9449872970581055, + "epoch": 0.688475088072471, + "kl_loss": 0.07547201216220856, + "loss_ib": 0.0012811552733182907, + "step": 2394 + }, + { + "epoch": 0.6887626716514487, + "grad_norm": 0.10147140920162201, + "learning_rate": 4.4990252399189346e-05, + "loss": 0.8471, + "step": 2395 + }, + { + "ce_ib": 2.48022198677063, + "ce_orig": 0.7178166508674622, + "epoch": 0.6887626716514487, + "kl_loss": 0.036385904997587204, + "loss_ib": 0.000611881201621145, + "step": 2395 + }, + { + "ce_ib": 4.868823528289795, + "ce_orig": 0.6047196984291077, + "epoch": 0.6887626716514487, + "kl_loss": 0.13441461324691772, + "loss_ib": 0.0018310283776372671, + "step": 2395 + }, + { + "ce_ib": 4.179558753967285, + "ce_orig": 0.8722028732299805, + "epoch": 0.6887626716514487, + "kl_loss": 0.09974934160709381, + "loss_ib": 0.0014154491946101189, + "step": 2395 + }, + { + "ce_ib": 4.046128749847412, + "ce_orig": 0.7782866954803467, + "epoch": 0.6887626716514487, + "kl_loss": 0.07842613756656647, + "loss_ib": 0.0011888742446899414, + "step": 2395 + }, + { + "ce_ib": 3.916594982147217, + "ce_orig": 1.141455888748169, + "epoch": 0.6890502552304263, + "kl_loss": 0.06095770746469498, + "loss_ib": 0.001001236611045897, + "step": 2396 + }, + { + "ce_ib": 5.298075199127197, + "ce_orig": 1.1941957473754883, + "epoch": 0.6890502552304263, + "kl_loss": 0.08878438174724579, + "loss_ib": 0.0014176513068377972, + "step": 2396 + }, + { + "ce_ib": 4.252720355987549, + "ce_orig": 0.7679389119148254, + "epoch": 0.6890502552304263, + "kl_loss": 0.06731608510017395, + "loss_ib": 0.0010984328109771013, + "step": 2396 + }, + { + "ce_ib": 5.570112705230713, + "ce_orig": 1.35520339012146, + "epoch": 0.6890502552304263, + "kl_loss": 0.05816281959414482, + "loss_ib": 0.0011386394035071135, + "step": 2396 + }, + { + "ce_ib": 3.3620336055755615, + "ce_orig": 0.7842528820037842, + "epoch": 0.689337838809404, + "kl_loss": 0.08570797741413116, + "loss_ib": 0.001193283125758171, + "step": 2397 + }, + { + "ce_ib": 3.7103538513183594, + "ce_orig": 0.9287613034248352, + "epoch": 0.689337838809404, + "kl_loss": 0.07754450291395187, + "loss_ib": 0.0011464804410934448, + "step": 2397 + }, + { + "ce_ib": 3.2517616748809814, + "ce_orig": 0.7256904244422913, + "epoch": 0.689337838809404, + "kl_loss": 0.04245951399207115, + "loss_ib": 0.000749771308619529, + "step": 2397 + }, + { + "ce_ib": 4.544743537902832, + "ce_orig": 0.7028631567955017, + "epoch": 0.689337838809404, + "kl_loss": 0.1085115522146225, + "loss_ib": 0.001539589837193489, + "step": 2397 + }, + { + "ce_ib": 5.018190860748291, + "ce_orig": 0.8440736532211304, + "epoch": 0.6896254223883816, + "kl_loss": 0.07311636209487915, + "loss_ib": 0.001232982729561627, + "step": 2398 + }, + { + "ce_ib": 3.1788179874420166, + "ce_orig": 0.7326139807701111, + "epoch": 0.6896254223883816, + "kl_loss": 0.10009107738733292, + "loss_ib": 0.0013187925796955824, + "step": 2398 + }, + { + "ce_ib": 5.628873348236084, + "ce_orig": 1.346350073814392, + "epoch": 0.6896254223883816, + "kl_loss": 0.07939018309116364, + "loss_ib": 0.001356789143756032, + "step": 2398 + }, + { + "ce_ib": 4.874034404754639, + "ce_orig": 1.088059425354004, + "epoch": 0.6896254223883816, + "kl_loss": 0.04589637368917465, + "loss_ib": 0.000946367159485817, + "step": 2398 + }, + { + "ce_ib": 5.961834907531738, + "ce_orig": 1.3695114850997925, + "epoch": 0.6899130059673593, + "kl_loss": 0.07710470259189606, + "loss_ib": 0.0013672304339706898, + "step": 2399 + }, + { + "ce_ib": 4.562358856201172, + "ce_orig": 0.9286516308784485, + "epoch": 0.6899130059673593, + "kl_loss": 0.08390610665082932, + "loss_ib": 0.0012952969409525394, + "step": 2399 + }, + { + "ce_ib": 4.9585862159729, + "ce_orig": 0.72636878490448, + "epoch": 0.6899130059673593, + "kl_loss": 0.0773414671421051, + "loss_ib": 0.0012692732270807028, + "step": 2399 + }, + { + "ce_ib": 4.033093452453613, + "ce_orig": 0.6793612241744995, + "epoch": 0.6899130059673593, + "kl_loss": 0.09849656373262405, + "loss_ib": 0.0013882749481126666, + "step": 2399 + }, + { + "epoch": 0.6902005895463369, + "grad_norm": 0.08657790720462799, + "learning_rate": 4.496692561411182e-05, + "loss": 0.8572, + "step": 2400 + }, + { + "ce_ib": 4.9370012283325195, + "ce_orig": 1.0778354406356812, + "epoch": 0.6902005895463369, + "kl_loss": 0.11346911638975143, + "loss_ib": 0.0016283912118524313, + "step": 2400 + }, + { + "ce_ib": 3.1718249320983887, + "ce_orig": 0.6910493969917297, + "epoch": 0.6902005895463369, + "kl_loss": 0.043131273239851, + "loss_ib": 0.0007484952220693231, + "step": 2400 + }, + { + "ce_ib": 3.653409481048584, + "ce_orig": 0.7212897539138794, + "epoch": 0.6902005895463369, + "kl_loss": 0.0962454229593277, + "loss_ib": 0.001327795092947781, + "step": 2400 + }, + { + "ce_ib": 5.395859718322754, + "ce_orig": 1.2446403503417969, + "epoch": 0.6902005895463369, + "kl_loss": 0.08307822793722153, + "loss_ib": 0.001370368292555213, + "step": 2400 + }, + { + "ce_ib": 3.157407283782959, + "ce_orig": 0.5043870806694031, + "epoch": 0.6904881731253145, + "kl_loss": 0.07756854593753815, + "loss_ib": 0.0010914261220023036, + "step": 2401 + }, + { + "ce_ib": 4.692742824554443, + "ce_orig": 1.0417239665985107, + "epoch": 0.6904881731253145, + "kl_loss": 0.0552489347755909, + "loss_ib": 0.0010217635426670313, + "step": 2401 + }, + { + "ce_ib": 2.6966564655303955, + "ce_orig": 0.638038694858551, + "epoch": 0.6904881731253145, + "kl_loss": 0.0638963133096695, + "loss_ib": 0.0009086287464015186, + "step": 2401 + }, + { + "ce_ib": 4.241952419281006, + "ce_orig": 1.0051885843276978, + "epoch": 0.6904881731253145, + "kl_loss": 0.05318768322467804, + "loss_ib": 0.0009560720645822585, + "step": 2401 + }, + { + "ce_ib": 3.765841245651245, + "ce_orig": 0.6108748316764832, + "epoch": 0.6907757567042921, + "kl_loss": 0.05182775855064392, + "loss_ib": 0.000894861645065248, + "step": 2402 + }, + { + "ce_ib": 3.6464767456054688, + "ce_orig": 0.7311961650848389, + "epoch": 0.6907757567042921, + "kl_loss": 0.04409867152571678, + "loss_ib": 0.000805634364951402, + "step": 2402 + }, + { + "ce_ib": 5.139959335327148, + "ce_orig": 1.4126787185668945, + "epoch": 0.6907757567042921, + "kl_loss": 0.06447149813175201, + "loss_ib": 0.001158710801973939, + "step": 2402 + }, + { + "ce_ib": 2.902367353439331, + "ce_orig": 0.5759242177009583, + "epoch": 0.6907757567042921, + "kl_loss": 0.0442953035235405, + "loss_ib": 0.0007331896922551095, + "step": 2402 + }, + { + "ce_ib": 3.3541018962860107, + "ce_orig": 0.813486635684967, + "epoch": 0.6910633402832699, + "kl_loss": 0.09643542766571045, + "loss_ib": 0.00129976449534297, + "step": 2403 + }, + { + "ce_ib": 6.840709209442139, + "ce_orig": 1.480482578277588, + "epoch": 0.6910633402832699, + "kl_loss": 0.10524459928274155, + "loss_ib": 0.0017365169478580356, + "step": 2403 + }, + { + "ce_ib": 2.7844879627227783, + "ce_orig": 0.6575375199317932, + "epoch": 0.6910633402832699, + "kl_loss": 0.06125100702047348, + "loss_ib": 0.0008909588796086609, + "step": 2403 + }, + { + "ce_ib": 4.387655258178711, + "ce_orig": 0.6589735746383667, + "epoch": 0.6910633402832699, + "kl_loss": 0.07157374918460846, + "loss_ib": 0.0011545029701665044, + "step": 2403 + }, + { + "ce_ib": 3.629495859146118, + "ce_orig": 0.8516076803207397, + "epoch": 0.6913509238622475, + "kl_loss": 0.04929491505026817, + "loss_ib": 0.000855898717418313, + "step": 2404 + }, + { + "ce_ib": 3.0300674438476562, + "ce_orig": 0.4390190541744232, + "epoch": 0.6913509238622475, + "kl_loss": 0.08101572096347809, + "loss_ib": 0.0011131640058010817, + "step": 2404 + }, + { + "ce_ib": 6.173761367797852, + "ce_orig": 1.0861191749572754, + "epoch": 0.6913509238622475, + "kl_loss": 0.054766591638326645, + "loss_ib": 0.0011650420492514968, + "step": 2404 + }, + { + "ce_ib": 3.4908721446990967, + "ce_orig": 0.9791658520698547, + "epoch": 0.6913509238622475, + "kl_loss": 0.07784635573625565, + "loss_ib": 0.001127550727687776, + "step": 2404 + }, + { + "epoch": 0.6916385074412251, + "grad_norm": 0.09216935187578201, + "learning_rate": 4.4943550724076063e-05, + "loss": 0.9226, + "step": 2405 + }, + { + "ce_ib": 5.0021796226501465, + "ce_orig": 1.2096168994903564, + "epoch": 0.6916385074412251, + "kl_loss": 0.046264536678791046, + "loss_ib": 0.0009628632687963545, + "step": 2405 + }, + { + "ce_ib": 4.511892318725586, + "ce_orig": 1.065072774887085, + "epoch": 0.6916385074412251, + "kl_loss": 0.09597969055175781, + "loss_ib": 0.0014109861804172397, + "step": 2405 + }, + { + "ce_ib": 2.9196510314941406, + "ce_orig": 0.7079920172691345, + "epoch": 0.6916385074412251, + "kl_loss": 0.06995537132024765, + "loss_ib": 0.0009915187256410718, + "step": 2405 + }, + { + "ce_ib": 3.8347561359405518, + "ce_orig": 0.40000176429748535, + "epoch": 0.6916385074412251, + "kl_loss": 0.0656663179397583, + "loss_ib": 0.0010401387698948383, + "step": 2405 + }, + { + "ce_ib": 2.7992067337036133, + "ce_orig": 0.787947952747345, + "epoch": 0.6919260910202027, + "kl_loss": 0.04667407274246216, + "loss_ib": 0.0007466613897122443, + "step": 2406 + }, + { + "ce_ib": 6.040639877319336, + "ce_orig": 1.3102067708969116, + "epoch": 0.6919260910202027, + "kl_loss": 0.05797279626131058, + "loss_ib": 0.001183791900984943, + "step": 2406 + }, + { + "ce_ib": 5.317227363586426, + "ce_orig": 0.5310835242271423, + "epoch": 0.6919260910202027, + "kl_loss": 0.1352420151233673, + "loss_ib": 0.0018841428682208061, + "step": 2406 + }, + { + "ce_ib": 4.730186939239502, + "ce_orig": 1.0843673944473267, + "epoch": 0.6919260910202027, + "kl_loss": 0.06417645514011383, + "loss_ib": 0.0011147832265123725, + "step": 2406 + }, + { + "ce_ib": 2.9693779945373535, + "ce_orig": 0.8443106412887573, + "epoch": 0.6922136745991804, + "kl_loss": 0.039845868945121765, + "loss_ib": 0.0006953965057618916, + "step": 2407 + }, + { + "ce_ib": 5.569772243499756, + "ce_orig": 0.872257649898529, + "epoch": 0.6922136745991804, + "kl_loss": 0.27120593190193176, + "loss_ib": 0.0032690363004803658, + "step": 2407 + }, + { + "ce_ib": 4.101600170135498, + "ce_orig": 0.8461344242095947, + "epoch": 0.6922136745991804, + "kl_loss": 0.10466500371694565, + "loss_ib": 0.001456810045056045, + "step": 2407 + }, + { + "ce_ib": 7.397150993347168, + "ce_orig": 1.3108196258544922, + "epoch": 0.6922136745991804, + "kl_loss": 0.08420944213867188, + "loss_ib": 0.0015818094834685326, + "step": 2407 + }, + { + "ce_ib": 2.96337628364563, + "ce_orig": 0.6210533380508423, + "epoch": 0.692501258178158, + "kl_loss": 0.06368958950042725, + "loss_ib": 0.0009332334739156067, + "step": 2408 + }, + { + "ce_ib": 6.2130656242370605, + "ce_orig": 1.7537107467651367, + "epoch": 0.692501258178158, + "kl_loss": 0.08413614332675934, + "loss_ib": 0.0014626679476350546, + "step": 2408 + }, + { + "ce_ib": 4.797325134277344, + "ce_orig": 1.1587156057357788, + "epoch": 0.692501258178158, + "kl_loss": 0.0444721058011055, + "loss_ib": 0.0009244535467587411, + "step": 2408 + }, + { + "ce_ib": 5.243434906005859, + "ce_orig": 0.9368939995765686, + "epoch": 0.692501258178158, + "kl_loss": 0.058525241911411285, + "loss_ib": 0.001109595876187086, + "step": 2408 + }, + { + "ce_ib": 3.2221312522888184, + "ce_orig": 0.8689323663711548, + "epoch": 0.6927888417571356, + "kl_loss": 0.05020219832658768, + "loss_ib": 0.0008242350886575878, + "step": 2409 + }, + { + "ce_ib": 5.558706760406494, + "ce_orig": 1.1151801347732544, + "epoch": 0.6927888417571356, + "kl_loss": 0.11840584874153137, + "loss_ib": 0.001739929080940783, + "step": 2409 + }, + { + "ce_ib": 4.101006507873535, + "ce_orig": 0.617148220539093, + "epoch": 0.6927888417571356, + "kl_loss": 0.04457172006368637, + "loss_ib": 0.0008558177505619824, + "step": 2409 + }, + { + "ce_ib": 5.075506687164307, + "ce_orig": 1.0550310611724854, + "epoch": 0.6927888417571356, + "kl_loss": 0.14307713508605957, + "loss_ib": 0.0019383219769224524, + "step": 2409 + }, + { + "epoch": 0.6930764253361134, + "grad_norm": 0.10090930759906769, + "learning_rate": 4.492012778539762e-05, + "loss": 0.8213, + "step": 2410 + }, + { + "ce_ib": 2.871002435684204, + "ce_orig": 0.4256100654602051, + "epoch": 0.6930764253361134, + "kl_loss": 0.056130632758140564, + "loss_ib": 0.0008484065183438361, + "step": 2410 + }, + { + "ce_ib": 5.427347660064697, + "ce_orig": 1.1786848306655884, + "epoch": 0.6930764253361134, + "kl_loss": 0.09445418417453766, + "loss_ib": 0.001487276516854763, + "step": 2410 + }, + { + "ce_ib": 4.175151348114014, + "ce_orig": 1.039307713508606, + "epoch": 0.6930764253361134, + "kl_loss": 0.08813652396202087, + "loss_ib": 0.0012988802045583725, + "step": 2410 + }, + { + "ce_ib": 4.338197708129883, + "ce_orig": 0.9571808576583862, + "epoch": 0.6930764253361134, + "kl_loss": 0.0873841792345047, + "loss_ib": 0.0013076615286991, + "step": 2410 + }, + { + "ce_ib": 6.326228141784668, + "ce_orig": 0.8943297863006592, + "epoch": 0.693364008915091, + "kl_loss": 0.05130557715892792, + "loss_ib": 0.001145678455941379, + "step": 2411 + }, + { + "ce_ib": 4.436978816986084, + "ce_orig": 0.8963078856468201, + "epoch": 0.693364008915091, + "kl_loss": 0.06266219913959503, + "loss_ib": 0.0010703197913244367, + "step": 2411 + }, + { + "ce_ib": 5.189483642578125, + "ce_orig": 0.6847471594810486, + "epoch": 0.693364008915091, + "kl_loss": 0.08382858335971832, + "loss_ib": 0.0013572340831160545, + "step": 2411 + }, + { + "ce_ib": 3.6775970458984375, + "ce_orig": 0.6934906244277954, + "epoch": 0.693364008915091, + "kl_loss": 0.08581317961215973, + "loss_ib": 0.001225891406647861, + "step": 2411 + }, + { + "ce_ib": 4.536906719207764, + "ce_orig": 0.6553398370742798, + "epoch": 0.6936515924940686, + "kl_loss": 0.0782172828912735, + "loss_ib": 0.0012358634267002344, + "step": 2412 + }, + { + "ce_ib": 5.1787309646606445, + "ce_orig": 1.1194020509719849, + "epoch": 0.6936515924940686, + "kl_loss": 0.04803486540913582, + "loss_ib": 0.0009982218034565449, + "step": 2412 + }, + { + "ce_ib": 4.987370491027832, + "ce_orig": 1.209346055984497, + "epoch": 0.6936515924940686, + "kl_loss": 0.12007644027471542, + "loss_ib": 0.0016995014157146215, + "step": 2412 + }, + { + "ce_ib": 5.079449653625488, + "ce_orig": 1.0091747045516968, + "epoch": 0.6936515924940686, + "kl_loss": 0.11099615693092346, + "loss_ib": 0.001617906498722732, + "step": 2412 + }, + { + "ce_ib": 4.768636703491211, + "ce_orig": 1.1728335618972778, + "epoch": 0.6939391760730462, + "kl_loss": 0.07302531599998474, + "loss_ib": 0.0012071167584508657, + "step": 2413 + }, + { + "ce_ib": 5.194084167480469, + "ce_orig": 1.226790428161621, + "epoch": 0.6939391760730462, + "kl_loss": 0.08596241474151611, + "loss_ib": 0.0013790325028821826, + "step": 2413 + }, + { + "ce_ib": 1.6872254610061646, + "ce_orig": 0.23666377365589142, + "epoch": 0.6939391760730462, + "kl_loss": 0.16429464519023895, + "loss_ib": 0.0018116689752787352, + "step": 2413 + }, + { + "ce_ib": 4.188203811645508, + "ce_orig": 0.7789223790168762, + "epoch": 0.6939391760730462, + "kl_loss": 0.08524826169013977, + "loss_ib": 0.0012713029282167554, + "step": 2413 + }, + { + "ce_ib": 4.067956447601318, + "ce_orig": 1.0364197492599487, + "epoch": 0.6942267596520239, + "kl_loss": 0.032415274530649185, + "loss_ib": 0.0007309483480639756, + "step": 2414 + }, + { + "ce_ib": 2.419804334640503, + "ce_orig": 0.3926176130771637, + "epoch": 0.6942267596520239, + "kl_loss": 0.08412323892116547, + "loss_ib": 0.0010832127882167697, + "step": 2414 + }, + { + "ce_ib": 3.6541967391967773, + "ce_orig": 0.6048765778541565, + "epoch": 0.6942267596520239, + "kl_loss": 0.09116160869598389, + "loss_ib": 0.0012770358007401228, + "step": 2414 + }, + { + "ce_ib": 3.886138916015625, + "ce_orig": 0.7660588026046753, + "epoch": 0.6942267596520239, + "kl_loss": 0.07183912396430969, + "loss_ib": 0.0011070050531998277, + "step": 2414 + }, + { + "epoch": 0.6945143432310015, + "grad_norm": 0.09994565695524216, + "learning_rate": 4.489665685450778e-05, + "loss": 0.9399, + "step": 2415 + }, + { + "ce_ib": 2.6576530933380127, + "ce_orig": 0.4457158148288727, + "epoch": 0.6945143432310015, + "kl_loss": 0.06834821403026581, + "loss_ib": 0.0009492474491707981, + "step": 2415 + }, + { + "ce_ib": 1.8866108655929565, + "ce_orig": 0.4177325665950775, + "epoch": 0.6945143432310015, + "kl_loss": 0.05127159506082535, + "loss_ib": 0.0007013769936747849, + "step": 2415 + }, + { + "ce_ib": 3.156304121017456, + "ce_orig": 0.5318323373794556, + "epoch": 0.6945143432310015, + "kl_loss": 0.0813676193356514, + "loss_ib": 0.00112930650357157, + "step": 2415 + }, + { + "ce_ib": 3.0987770557403564, + "ce_orig": 0.5087038278579712, + "epoch": 0.6945143432310015, + "kl_loss": 0.050896573811769485, + "loss_ib": 0.0008188433712348342, + "step": 2415 + }, + { + "ce_ib": 5.562070846557617, + "ce_orig": 1.1888585090637207, + "epoch": 0.6948019268099791, + "kl_loss": 0.07006698846817017, + "loss_ib": 0.0012568768579512835, + "step": 2416 + }, + { + "ce_ib": 2.988525390625, + "ce_orig": 0.5458453297615051, + "epoch": 0.6948019268099791, + "kl_loss": 0.05878884345293045, + "loss_ib": 0.0008867409778758883, + "step": 2416 + }, + { + "ce_ib": 4.019321441650391, + "ce_orig": 0.8385211229324341, + "epoch": 0.6948019268099791, + "kl_loss": 0.06410448998212814, + "loss_ib": 0.001042976975440979, + "step": 2416 + }, + { + "ce_ib": 4.556407451629639, + "ce_orig": 0.9664219617843628, + "epoch": 0.6948019268099791, + "kl_loss": 0.05726923421025276, + "loss_ib": 0.0010283330921083689, + "step": 2416 + }, + { + "ce_ib": 2.533841609954834, + "ce_orig": 0.20921021699905396, + "epoch": 0.6950895103889568, + "kl_loss": 0.03259589895606041, + "loss_ib": 0.0005793431191705167, + "step": 2417 + }, + { + "ce_ib": 4.045670986175537, + "ce_orig": 0.8279528021812439, + "epoch": 0.6950895103889568, + "kl_loss": 0.08246147632598877, + "loss_ib": 0.0012291818857192993, + "step": 2417 + }, + { + "ce_ib": 1.672650933265686, + "ce_orig": 0.5017266869544983, + "epoch": 0.6950895103889568, + "kl_loss": 0.0434282124042511, + "loss_ib": 0.0006015471881255507, + "step": 2417 + }, + { + "ce_ib": 4.14917516708374, + "ce_orig": 0.5667518377304077, + "epoch": 0.6950895103889568, + "kl_loss": 0.0536496601998806, + "loss_ib": 0.0009514141129329801, + "step": 2417 + }, + { + "ce_ib": 5.399324893951416, + "ce_orig": 1.1622483730316162, + "epoch": 0.6953770939679345, + "kl_loss": 0.08754502236843109, + "loss_ib": 0.0014153826050460339, + "step": 2418 + }, + { + "ce_ib": 3.0251479148864746, + "ce_orig": 0.6901456117630005, + "epoch": 0.6953770939679345, + "kl_loss": 0.03301297873258591, + "loss_ib": 0.0006326445727609098, + "step": 2418 + }, + { + "ce_ib": 5.895893096923828, + "ce_orig": 1.415846586227417, + "epoch": 0.6953770939679345, + "kl_loss": 0.09628024697303772, + "loss_ib": 0.0015523917973041534, + "step": 2418 + }, + { + "ce_ib": 3.882730007171631, + "ce_orig": 1.115507960319519, + "epoch": 0.6953770939679345, + "kl_loss": 0.055306777358055115, + "loss_ib": 0.0009413407533429563, + "step": 2418 + }, + { + "ce_ib": 8.152046203613281, + "ce_orig": 1.7239480018615723, + "epoch": 0.6956646775469121, + "kl_loss": 0.07704395055770874, + "loss_ib": 0.0015856442041695118, + "step": 2419 + }, + { + "ce_ib": 4.261550426483154, + "ce_orig": 0.756473183631897, + "epoch": 0.6956646775469121, + "kl_loss": 0.07325805723667145, + "loss_ib": 0.001158735598437488, + "step": 2419 + }, + { + "ce_ib": 3.3830292224884033, + "ce_orig": 0.7236105799674988, + "epoch": 0.6956646775469121, + "kl_loss": 0.0835438147187233, + "loss_ib": 0.0011737410677596927, + "step": 2419 + }, + { + "ce_ib": 3.852938652038574, + "ce_orig": 0.566871166229248, + "epoch": 0.6956646775469121, + "kl_loss": 0.08276958763599396, + "loss_ib": 0.001212989678606391, + "step": 2419 + }, + { + "epoch": 0.6959522611258897, + "grad_norm": 0.08467966318130493, + "learning_rate": 4.487313798795346e-05, + "loss": 0.8453, + "step": 2420 + }, + { + "ce_ib": 5.128081798553467, + "ce_orig": 0.9661197066307068, + "epoch": 0.6959522611258897, + "kl_loss": 0.06729994714260101, + "loss_ib": 0.0011858075158670545, + "step": 2420 + }, + { + "ce_ib": 5.577975749969482, + "ce_orig": 1.238182544708252, + "epoch": 0.6959522611258897, + "kl_loss": 0.06433675438165665, + "loss_ib": 0.0012011650251224637, + "step": 2420 + }, + { + "ce_ib": 3.5458621978759766, + "ce_orig": 0.4433455765247345, + "epoch": 0.6959522611258897, + "kl_loss": 0.06447719037532806, + "loss_ib": 0.0009993581334128976, + "step": 2420 + }, + { + "ce_ib": 3.17144775390625, + "ce_orig": 0.5882195830345154, + "epoch": 0.6959522611258897, + "kl_loss": 0.06645147502422333, + "loss_ib": 0.0009816595120355487, + "step": 2420 + }, + { + "ce_ib": 5.088037014007568, + "ce_orig": 1.1159788370132446, + "epoch": 0.6962398447048673, + "kl_loss": 0.07028113305568695, + "loss_ib": 0.0012116150464862585, + "step": 2421 + }, + { + "ce_ib": 2.7338130474090576, + "ce_orig": 0.6431488990783691, + "epoch": 0.6962398447048673, + "kl_loss": 0.051388438791036606, + "loss_ib": 0.0007872656569816172, + "step": 2421 + }, + { + "ce_ib": 5.470101356506348, + "ce_orig": 1.5208131074905396, + "epoch": 0.6962398447048673, + "kl_loss": 0.07896239310503006, + "loss_ib": 0.0013366339262574911, + "step": 2421 + }, + { + "ce_ib": 2.593456506729126, + "ce_orig": 0.5829979777336121, + "epoch": 0.6962398447048673, + "kl_loss": 0.06037526577711105, + "loss_ib": 0.0008630982483737171, + "step": 2421 + }, + { + "ce_ib": 4.1568169593811035, + "ce_orig": 0.740251362323761, + "epoch": 0.696527428283845, + "kl_loss": 0.07949431985616684, + "loss_ib": 0.0012106248177587986, + "step": 2422 + }, + { + "ce_ib": 4.6218061447143555, + "ce_orig": 0.8164442777633667, + "epoch": 0.696527428283845, + "kl_loss": 0.09065210819244385, + "loss_ib": 0.0013687016908079386, + "step": 2422 + }, + { + "ce_ib": 4.927943229675293, + "ce_orig": 1.0574220418930054, + "epoch": 0.696527428283845, + "kl_loss": 0.08861935138702393, + "loss_ib": 0.001378987799398601, + "step": 2422 + }, + { + "ce_ib": 3.3201518058776855, + "ce_orig": 0.6958391666412354, + "epoch": 0.696527428283845, + "kl_loss": 0.05570698529481888, + "loss_ib": 0.0008890850003808737, + "step": 2422 + }, + { + "ce_ib": 3.640284538269043, + "ce_orig": 0.6122000217437744, + "epoch": 0.6968150118628227, + "kl_loss": 0.10310137271881104, + "loss_ib": 0.0013950421707704663, + "step": 2423 + }, + { + "ce_ib": 4.365780830383301, + "ce_orig": 0.9285838603973389, + "epoch": 0.6968150118628227, + "kl_loss": 0.10030068457126617, + "loss_ib": 0.0014395848847925663, + "step": 2423 + }, + { + "ce_ib": 2.9177463054656982, + "ce_orig": 0.6814598441123962, + "epoch": 0.6968150118628227, + "kl_loss": 0.04493485018610954, + "loss_ib": 0.0007411231053993106, + "step": 2423 + }, + { + "ce_ib": 3.252166509628296, + "ce_orig": 0.5251758098602295, + "epoch": 0.6968150118628227, + "kl_loss": 0.07403828948736191, + "loss_ib": 0.00106559949927032, + "step": 2423 + }, + { + "ce_ib": 3.348069429397583, + "ce_orig": 0.852552592754364, + "epoch": 0.6971025954418003, + "kl_loss": 0.03675362840294838, + "loss_ib": 0.0007023431826382875, + "step": 2424 + }, + { + "ce_ib": 2.803931951522827, + "ce_orig": 0.7383123636245728, + "epoch": 0.6971025954418003, + "kl_loss": 0.05664676055312157, + "loss_ib": 0.000846860755700618, + "step": 2424 + }, + { + "ce_ib": 3.326809883117676, + "ce_orig": 0.5987116098403931, + "epoch": 0.6971025954418003, + "kl_loss": 0.07386401295661926, + "loss_ib": 0.0010713210795074701, + "step": 2424 + }, + { + "ce_ib": 3.863417625427246, + "ce_orig": 0.8767144083976746, + "epoch": 0.6971025954418003, + "kl_loss": 0.04931942746043205, + "loss_ib": 0.0008795359754003584, + "step": 2424 + }, + { + "epoch": 0.6973901790207779, + "grad_norm": 0.09347673505544662, + "learning_rate": 4.484957124239707e-05, + "loss": 0.7991, + "step": 2425 + }, + { + "ce_ib": 5.678686141967773, + "ce_orig": 0.806731641292572, + "epoch": 0.6973901790207779, + "kl_loss": 0.085017628967762, + "loss_ib": 0.0014180447906255722, + "step": 2425 + }, + { + "ce_ib": 5.577174663543701, + "ce_orig": 1.3204976320266724, + "epoch": 0.6973901790207779, + "kl_loss": 0.059431008994579315, + "loss_ib": 0.0011520275147631764, + "step": 2425 + }, + { + "ce_ib": 5.84564208984375, + "ce_orig": 1.6411858797073364, + "epoch": 0.6973901790207779, + "kl_loss": 0.05319777876138687, + "loss_ib": 0.0011165420291945338, + "step": 2425 + }, + { + "ce_ib": 5.67768669128418, + "ce_orig": 1.1154669523239136, + "epoch": 0.6973901790207779, + "kl_loss": 0.07535646855831146, + "loss_ib": 0.0013213333440944552, + "step": 2425 + }, + { + "ce_ib": 4.068757057189941, + "ce_orig": 0.7483729720115662, + "epoch": 0.6976777625997556, + "kl_loss": 0.08998892456293106, + "loss_ib": 0.001306764897890389, + "step": 2426 + }, + { + "ce_ib": 3.562678337097168, + "ce_orig": 0.8839237689971924, + "epoch": 0.6976777625997556, + "kl_loss": 0.06667760759592056, + "loss_ib": 0.001023043878376484, + "step": 2426 + }, + { + "ce_ib": 3.442366361618042, + "ce_orig": 0.8839514255523682, + "epoch": 0.6976777625997556, + "kl_loss": 0.050994448363780975, + "loss_ib": 0.0008541811257600784, + "step": 2426 + }, + { + "ce_ib": 3.5261595249176025, + "ce_orig": 0.9456899762153625, + "epoch": 0.6976777625997556, + "kl_loss": 0.04719686508178711, + "loss_ib": 0.0008245845674537122, + "step": 2426 + }, + { + "ce_ib": 5.661541938781738, + "ce_orig": 0.8350020051002502, + "epoch": 0.6979653461787332, + "kl_loss": 0.05362590774893761, + "loss_ib": 0.001102413167245686, + "step": 2427 + }, + { + "ce_ib": 4.381192207336426, + "ce_orig": 0.6200610399246216, + "epoch": 0.6979653461787332, + "kl_loss": 0.11215183138847351, + "loss_ib": 0.0015596374869346619, + "step": 2427 + }, + { + "ce_ib": 2.13877010345459, + "ce_orig": 0.3050120770931244, + "epoch": 0.6979653461787332, + "kl_loss": 0.0483175665140152, + "loss_ib": 0.0006970526301302016, + "step": 2427 + }, + { + "ce_ib": 2.9289515018463135, + "ce_orig": 1.0179545879364014, + "epoch": 0.6979653461787332, + "kl_loss": 0.03347410261631012, + "loss_ib": 0.0006276361527852714, + "step": 2427 + }, + { + "ce_ib": 2.6790096759796143, + "ce_orig": 0.6311802268028259, + "epoch": 0.6982529297577108, + "kl_loss": 0.06813117861747742, + "loss_ib": 0.0009492127574048936, + "step": 2428 + }, + { + "ce_ib": 3.5683438777923584, + "ce_orig": 0.5922291278839111, + "epoch": 0.6982529297577108, + "kl_loss": 0.05492416396737099, + "loss_ib": 0.0009060759912244976, + "step": 2428 + }, + { + "ce_ib": 6.132636070251465, + "ce_orig": 1.1435695886611938, + "epoch": 0.6982529297577108, + "kl_loss": 0.07812712341547012, + "loss_ib": 0.0013945348327979445, + "step": 2428 + }, + { + "ce_ib": 6.888500213623047, + "ce_orig": 1.7110567092895508, + "epoch": 0.6982529297577108, + "kl_loss": 0.08394025266170502, + "loss_ib": 0.0015282524982467294, + "step": 2428 + }, + { + "ce_ib": 3.585756778717041, + "ce_orig": 0.4531008005142212, + "epoch": 0.6985405133366884, + "kl_loss": 0.0736529603600502, + "loss_ib": 0.0010951051954180002, + "step": 2429 + }, + { + "ce_ib": 5.441714763641357, + "ce_orig": 1.0572446584701538, + "epoch": 0.6985405133366884, + "kl_loss": 0.062079183757305145, + "loss_ib": 0.00116496323607862, + "step": 2429 + }, + { + "ce_ib": 2.975109577178955, + "ce_orig": 0.4132767915725708, + "epoch": 0.6985405133366884, + "kl_loss": 0.1066233441233635, + "loss_ib": 0.0013637443771585822, + "step": 2429 + }, + { + "ce_ib": 2.3111343383789062, + "ce_orig": 0.4956235885620117, + "epoch": 0.6985405133366884, + "kl_loss": 0.0438164621591568, + "loss_ib": 0.0006692780298180878, + "step": 2429 + }, + { + "epoch": 0.6988280969156662, + "grad_norm": 0.09799016267061234, + "learning_rate": 4.482595667461639e-05, + "loss": 0.8297, + "step": 2430 + }, + { + "ce_ib": 2.236515522003174, + "ce_orig": 0.6192347407341003, + "epoch": 0.6988280969156662, + "kl_loss": 0.0370117723941803, + "loss_ib": 0.0005937692476436496, + "step": 2430 + }, + { + "ce_ib": 4.043331146240234, + "ce_orig": 0.8102149963378906, + "epoch": 0.6988280969156662, + "kl_loss": 0.06850084662437439, + "loss_ib": 0.0010893415892496705, + "step": 2430 + }, + { + "ce_ib": 3.1724069118499756, + "ce_orig": 0.7252013087272644, + "epoch": 0.6988280969156662, + "kl_loss": 0.07418900728225708, + "loss_ib": 0.0010591306490823627, + "step": 2430 + }, + { + "ce_ib": 3.163860559463501, + "ce_orig": 0.6080918908119202, + "epoch": 0.6988280969156662, + "kl_loss": 0.020674899220466614, + "loss_ib": 0.0005231350078247488, + "step": 2430 + }, + { + "ce_ib": 6.194910049438477, + "ce_orig": 1.5870405435562134, + "epoch": 0.6991156804946438, + "kl_loss": 0.05882204324007034, + "loss_ib": 0.0012077114079147577, + "step": 2431 + }, + { + "ce_ib": 3.632702589035034, + "ce_orig": 0.6964841485023499, + "epoch": 0.6991156804946438, + "kl_loss": 0.06265025585889816, + "loss_ib": 0.0009897728450596333, + "step": 2431 + }, + { + "ce_ib": 6.306859493255615, + "ce_orig": 1.3499929904937744, + "epoch": 0.6991156804946438, + "kl_loss": 0.04649018496274948, + "loss_ib": 0.0010955877369269729, + "step": 2431 + }, + { + "ce_ib": 3.558377265930176, + "ce_orig": 0.811074435710907, + "epoch": 0.6991156804946438, + "kl_loss": 0.06599580496549606, + "loss_ib": 0.0010157957440242171, + "step": 2431 + }, + { + "ce_ib": 5.995537757873535, + "ce_orig": 1.5453393459320068, + "epoch": 0.6994032640736214, + "kl_loss": 0.06946267932653427, + "loss_ib": 0.0012941805180162191, + "step": 2432 + }, + { + "ce_ib": 3.3417458534240723, + "ce_orig": 0.518341600894928, + "epoch": 0.6994032640736214, + "kl_loss": 0.09017793834209442, + "loss_ib": 0.0012359539978206158, + "step": 2432 + }, + { + "ce_ib": 7.279566764831543, + "ce_orig": 1.6375384330749512, + "epoch": 0.6994032640736214, + "kl_loss": 0.07953529059886932, + "loss_ib": 0.0015233096200972795, + "step": 2432 + }, + { + "ce_ib": 3.9120852947235107, + "ce_orig": 0.562882661819458, + "epoch": 0.6994032640736214, + "kl_loss": 0.08976616710424423, + "loss_ib": 0.0012888702331110835, + "step": 2432 + }, + { + "ce_ib": 4.7368621826171875, + "ce_orig": 0.3427463471889496, + "epoch": 0.699690847652599, + "kl_loss": 0.13071399927139282, + "loss_ib": 0.0017808261327445507, + "step": 2433 + }, + { + "ce_ib": 2.9356493949890137, + "ce_orig": 0.6133739352226257, + "epoch": 0.699690847652599, + "kl_loss": 0.06651619076728821, + "loss_ib": 0.0009587268577888608, + "step": 2433 + }, + { + "ce_ib": 3.9988298416137695, + "ce_orig": 0.9660942554473877, + "epoch": 0.699690847652599, + "kl_loss": 0.05492332577705383, + "loss_ib": 0.0009491161908954382, + "step": 2433 + }, + { + "ce_ib": 4.850404739379883, + "ce_orig": 1.0836856365203857, + "epoch": 0.699690847652599, + "kl_loss": 0.0663650631904602, + "loss_ib": 0.0011486910516396165, + "step": 2433 + }, + { + "ce_ib": 3.9864070415496826, + "ce_orig": 0.7523148059844971, + "epoch": 0.6999784312315767, + "kl_loss": 0.08964260667562485, + "loss_ib": 0.0012950667878612876, + "step": 2434 + }, + { + "ce_ib": 5.567244052886963, + "ce_orig": 1.2794591188430786, + "epoch": 0.6999784312315767, + "kl_loss": 0.07891793549060822, + "loss_ib": 0.0013459037290886045, + "step": 2434 + }, + { + "ce_ib": 7.122055530548096, + "ce_orig": 1.2646336555480957, + "epoch": 0.6999784312315767, + "kl_loss": 0.09100532531738281, + "loss_ib": 0.001622258685529232, + "step": 2434 + }, + { + "ce_ib": 5.263060569763184, + "ce_orig": 0.9940396547317505, + "epoch": 0.6999784312315767, + "kl_loss": 0.0644381195306778, + "loss_ib": 0.0011706871446222067, + "step": 2434 + }, + { + "epoch": 0.7002660148105543, + "grad_norm": 0.09461139142513275, + "learning_rate": 4.480229434150436e-05, + "loss": 0.849, + "step": 2435 + }, + { + "ce_ib": 5.3667073249816895, + "ce_orig": 1.0803425312042236, + "epoch": 0.7002660148105543, + "kl_loss": 0.085472971200943, + "loss_ib": 0.0013914003502577543, + "step": 2435 + }, + { + "ce_ib": 6.543687343597412, + "ce_orig": 1.5147833824157715, + "epoch": 0.7002660148105543, + "kl_loss": 0.07317483425140381, + "loss_ib": 0.0013861169572919607, + "step": 2435 + }, + { + "ce_ib": 4.939040660858154, + "ce_orig": 0.9310745000839233, + "epoch": 0.7002660148105543, + "kl_loss": 0.0953943133354187, + "loss_ib": 0.0014478471130132675, + "step": 2435 + }, + { + "ce_ib": 3.5520734786987305, + "ce_orig": 0.6873761415481567, + "epoch": 0.7002660148105543, + "kl_loss": 0.07793775200843811, + "loss_ib": 0.0011345847742632031, + "step": 2435 + }, + { + "ce_ib": 3.037790536880493, + "ce_orig": 0.7553188800811768, + "epoch": 0.7005535983895319, + "kl_loss": 0.07641607522964478, + "loss_ib": 0.0010679397964850068, + "step": 2436 + }, + { + "ce_ib": 5.946833610534668, + "ce_orig": 1.5358357429504395, + "epoch": 0.7005535983895319, + "kl_loss": 0.08466943353414536, + "loss_ib": 0.001441377680748701, + "step": 2436 + }, + { + "ce_ib": 1.8540916442871094, + "ce_orig": 0.49209675192832947, + "epoch": 0.7005535983895319, + "kl_loss": 0.02997838519513607, + "loss_ib": 0.0004851929843425751, + "step": 2436 + }, + { + "ce_ib": 4.6678056716918945, + "ce_orig": 0.8505120277404785, + "epoch": 0.7005535983895319, + "kl_loss": 0.06323128938674927, + "loss_ib": 0.0010990933515131474, + "step": 2436 + }, + { + "ce_ib": 6.735151290893555, + "ce_orig": 1.5315630435943604, + "epoch": 0.7008411819685096, + "kl_loss": 0.0484260693192482, + "loss_ib": 0.001157775754109025, + "step": 2437 + }, + { + "ce_ib": 3.246838092803955, + "ce_orig": 0.7642510533332825, + "epoch": 0.7008411819685096, + "kl_loss": 0.05810908228158951, + "loss_ib": 0.0009057745919562876, + "step": 2437 + }, + { + "ce_ib": 2.5224974155426025, + "ce_orig": 0.4799850583076477, + "epoch": 0.7008411819685096, + "kl_loss": 0.12292974442243576, + "loss_ib": 0.0014815471367910504, + "step": 2437 + }, + { + "ce_ib": 3.691371202468872, + "ce_orig": 0.8822781443595886, + "epoch": 0.7008411819685096, + "kl_loss": 0.04815828055143356, + "loss_ib": 0.0008507199236191809, + "step": 2437 + }, + { + "ce_ib": 2.7413623332977295, + "ce_orig": 0.5302308201789856, + "epoch": 0.7011287655474873, + "kl_loss": 0.07758566737174988, + "loss_ib": 0.001049992861226201, + "step": 2438 + }, + { + "ce_ib": 4.607650279998779, + "ce_orig": 0.7928907871246338, + "epoch": 0.7011287655474873, + "kl_loss": 0.056179992854595184, + "loss_ib": 0.001022564945742488, + "step": 2438 + }, + { + "ce_ib": 7.729616641998291, + "ce_orig": 1.404163122177124, + "epoch": 0.7011287655474873, + "kl_loss": 0.06223555654287338, + "loss_ib": 0.0013953172601759434, + "step": 2438 + }, + { + "ce_ib": 3.3299121856689453, + "ce_orig": 0.750211238861084, + "epoch": 0.7011287655474873, + "kl_loss": 0.06666648387908936, + "loss_ib": 0.0009996560402214527, + "step": 2438 + }, + { + "ce_ib": 2.782268762588501, + "ce_orig": 0.5007914304733276, + "epoch": 0.7014163491264649, + "kl_loss": 0.05887405201792717, + "loss_ib": 0.0008669673698022962, + "step": 2439 + }, + { + "ce_ib": 4.493607521057129, + "ce_orig": 0.5572299361228943, + "epoch": 0.7014163491264649, + "kl_loss": 0.07305562496185303, + "loss_ib": 0.0011799170169979334, + "step": 2439 + }, + { + "ce_ib": 5.844841003417969, + "ce_orig": 1.0876977443695068, + "epoch": 0.7014163491264649, + "kl_loss": 0.07622945308685303, + "loss_ib": 0.0013467785902321339, + "step": 2439 + }, + { + "ce_ib": 2.8612918853759766, + "ce_orig": 0.7050551176071167, + "epoch": 0.7014163491264649, + "kl_loss": 0.040577568113803864, + "loss_ib": 0.0006919048610143363, + "step": 2439 + }, + { + "epoch": 0.7017039327054425, + "grad_norm": 0.09617263078689575, + "learning_rate": 4.477858430006906e-05, + "loss": 0.8608, + "step": 2440 + }, + { + "ce_ib": 3.8840410709381104, + "ce_orig": 0.6927491426467896, + "epoch": 0.7017039327054425, + "kl_loss": 0.09754123538732529, + "loss_ib": 0.0013638163218274713, + "step": 2440 + }, + { + "ce_ib": 4.291858673095703, + "ce_orig": 1.003179907798767, + "epoch": 0.7017039327054425, + "kl_loss": 0.048747193068265915, + "loss_ib": 0.0009166577365249395, + "step": 2440 + }, + { + "ce_ib": 4.679376602172852, + "ce_orig": 1.1057898998260498, + "epoch": 0.7017039327054425, + "kl_loss": 0.06715390831232071, + "loss_ib": 0.0011394767789170146, + "step": 2440 + }, + { + "ce_ib": 4.094161033630371, + "ce_orig": 0.5228696465492249, + "epoch": 0.7017039327054425, + "kl_loss": 0.06996893882751465, + "loss_ib": 0.0011091054184362292, + "step": 2440 + }, + { + "ce_ib": 5.361659526824951, + "ce_orig": 0.912357747554779, + "epoch": 0.7019915162844201, + "kl_loss": 0.11304466426372528, + "loss_ib": 0.0016666125738993287, + "step": 2441 + }, + { + "ce_ib": 5.807859420776367, + "ce_orig": 0.8833421468734741, + "epoch": 0.7019915162844201, + "kl_loss": 0.054955556988716125, + "loss_ib": 0.0011303414357826114, + "step": 2441 + }, + { + "ce_ib": 1.8670817613601685, + "ce_orig": 0.49550360441207886, + "epoch": 0.7019915162844201, + "kl_loss": 0.0370902381837368, + "loss_ib": 0.0005576105322688818, + "step": 2441 + }, + { + "ce_ib": 3.271066427230835, + "ce_orig": 0.6104457974433899, + "epoch": 0.7019915162844201, + "kl_loss": 0.09203866124153137, + "loss_ib": 0.0012474932009354234, + "step": 2441 + }, + { + "ce_ib": 4.268223762512207, + "ce_orig": 1.0487993955612183, + "epoch": 0.7022790998633978, + "kl_loss": 0.07842513918876648, + "loss_ib": 0.0012110737152397633, + "step": 2442 + }, + { + "ce_ib": 7.673248291015625, + "ce_orig": 1.4217948913574219, + "epoch": 0.7022790998633978, + "kl_loss": 0.16436880826950073, + "loss_ib": 0.002411013003438711, + "step": 2442 + }, + { + "ce_ib": 3.9976983070373535, + "ce_orig": 0.7052599191665649, + "epoch": 0.7022790998633978, + "kl_loss": 0.10316755622625351, + "loss_ib": 0.0014314452419057488, + "step": 2442 + }, + { + "ce_ib": 4.167831897735596, + "ce_orig": 0.9135403037071228, + "epoch": 0.7022790998633978, + "kl_loss": 0.10235833376646042, + "loss_ib": 0.0014403664972633123, + "step": 2442 + }, + { + "ce_ib": 4.124255180358887, + "ce_orig": 0.9855893850326538, + "epoch": 0.7025666834423755, + "kl_loss": 0.04707842320203781, + "loss_ib": 0.000883209693711251, + "step": 2443 + }, + { + "ce_ib": 5.089686393737793, + "ce_orig": 0.7453948855400085, + "epoch": 0.7025666834423755, + "kl_loss": 0.06257013976573944, + "loss_ib": 0.0011346701066941023, + "step": 2443 + }, + { + "ce_ib": 7.1745429039001465, + "ce_orig": 0.9626012444496155, + "epoch": 0.7025666834423755, + "kl_loss": 0.11492922902107239, + "loss_ib": 0.00186674646101892, + "step": 2443 + }, + { + "ce_ib": 2.4846420288085938, + "ce_orig": 0.6248784065246582, + "epoch": 0.7025666834423755, + "kl_loss": 0.031301893293857574, + "loss_ib": 0.0005614831461571157, + "step": 2443 + }, + { + "ce_ib": 2.6017982959747314, + "ce_orig": 0.571060299873352, + "epoch": 0.7028542670213531, + "kl_loss": 0.09496860951185226, + "loss_ib": 0.0012098659062758088, + "step": 2444 + }, + { + "ce_ib": 6.799217700958252, + "ce_orig": 1.1922132968902588, + "epoch": 0.7028542670213531, + "kl_loss": 0.07654416561126709, + "loss_ib": 0.0014453633921220899, + "step": 2444 + }, + { + "ce_ib": 6.172910690307617, + "ce_orig": 1.1639057397842407, + "epoch": 0.7028542670213531, + "kl_loss": 0.05755811184644699, + "loss_ib": 0.001192872179672122, + "step": 2444 + }, + { + "ce_ib": 5.339524745941162, + "ce_orig": 0.8451376557350159, + "epoch": 0.7028542670213531, + "kl_loss": 0.06624536216259003, + "loss_ib": 0.0011964059667661786, + "step": 2444 + }, + { + "epoch": 0.7031418506003307, + "grad_norm": 0.09280931204557419, + "learning_rate": 4.475482660743347e-05, + "loss": 0.8441, + "step": 2445 + }, + { + "ce_ib": 5.525257110595703, + "ce_orig": 1.2090442180633545, + "epoch": 0.7031418506003307, + "kl_loss": 0.05937904492020607, + "loss_ib": 0.001146316179074347, + "step": 2445 + }, + { + "ce_ib": 3.931814193725586, + "ce_orig": 0.7651978135108948, + "epoch": 0.7031418506003307, + "kl_loss": 0.06502370536327362, + "loss_ib": 0.0010434184223413467, + "step": 2445 + }, + { + "ce_ib": 1.5169264078140259, + "ce_orig": 0.2752300798892975, + "epoch": 0.7031418506003307, + "kl_loss": 0.14779391884803772, + "loss_ib": 0.0016296317335218191, + "step": 2445 + }, + { + "ce_ib": 2.642292022705078, + "ce_orig": 0.514089822769165, + "epoch": 0.7031418506003307, + "kl_loss": 0.051863446831703186, + "loss_ib": 0.0007828636444173753, + "step": 2445 + }, + { + "ce_ib": 4.49675989151001, + "ce_orig": 0.7476747035980225, + "epoch": 0.7034294341793084, + "kl_loss": 0.1546691358089447, + "loss_ib": 0.0019963672384619713, + "step": 2446 + }, + { + "ce_ib": 4.27454137802124, + "ce_orig": 0.8611135482788086, + "epoch": 0.7034294341793084, + "kl_loss": 0.06605615466833115, + "loss_ib": 0.0010880156187340617, + "step": 2446 + }, + { + "ce_ib": 3.624872922897339, + "ce_orig": 0.5186425447463989, + "epoch": 0.7034294341793084, + "kl_loss": 0.05422455817461014, + "loss_ib": 0.0009047328494489193, + "step": 2446 + }, + { + "ce_ib": 4.676347732543945, + "ce_orig": 0.8394465446472168, + "epoch": 0.7034294341793084, + "kl_loss": 0.059910498559474945, + "loss_ib": 0.001066739670932293, + "step": 2446 + }, + { + "ce_ib": 4.398399353027344, + "ce_orig": 0.611301600933075, + "epoch": 0.703717017758286, + "kl_loss": 0.07329360395669937, + "loss_ib": 0.0011727758683264256, + "step": 2447 + }, + { + "ce_ib": 3.658702850341797, + "ce_orig": 1.1008598804473877, + "epoch": 0.703717017758286, + "kl_loss": 0.05751960724592209, + "loss_ib": 0.0009410662460140884, + "step": 2447 + }, + { + "ce_ib": 3.632070779800415, + "ce_orig": 0.9607521891593933, + "epoch": 0.703717017758286, + "kl_loss": 0.061652906239032745, + "loss_ib": 0.000979736098088324, + "step": 2447 + }, + { + "ce_ib": 3.3523082733154297, + "ce_orig": 0.7329584360122681, + "epoch": 0.703717017758286, + "kl_loss": 0.04269731044769287, + "loss_ib": 0.0007622039411216974, + "step": 2447 + }, + { + "ce_ib": 3.75700306892395, + "ce_orig": 0.6924781203269958, + "epoch": 0.7040046013372636, + "kl_loss": 0.08325391262769699, + "loss_ib": 0.0012082393513992429, + "step": 2448 + }, + { + "ce_ib": 4.9653825759887695, + "ce_orig": 0.7734203934669495, + "epoch": 0.7040046013372636, + "kl_loss": 0.05607933551073074, + "loss_ib": 0.0010573315666988492, + "step": 2448 + }, + { + "ce_ib": 4.12546968460083, + "ce_orig": 0.9861577749252319, + "epoch": 0.7040046013372636, + "kl_loss": 0.054867543280124664, + "loss_ib": 0.000961222336627543, + "step": 2448 + }, + { + "ce_ib": 3.6605236530303955, + "ce_orig": 0.9320517182350159, + "epoch": 0.7040046013372636, + "kl_loss": 0.06553182005882263, + "loss_ib": 0.0010213705245405436, + "step": 2448 + }, + { + "ce_ib": 4.500462532043457, + "ce_orig": 0.9375230669975281, + "epoch": 0.7042921849162412, + "kl_loss": 0.03811377286911011, + "loss_ib": 0.0008311839192174375, + "step": 2449 + }, + { + "ce_ib": 3.9264121055603027, + "ce_orig": 0.8090680837631226, + "epoch": 0.7042921849162412, + "kl_loss": 0.08422176539897919, + "loss_ib": 0.0012348588788881898, + "step": 2449 + }, + { + "ce_ib": 4.418134689331055, + "ce_orig": 1.0470235347747803, + "epoch": 0.7042921849162412, + "kl_loss": 0.08948887884616852, + "loss_ib": 0.0013367022620514035, + "step": 2449 + }, + { + "ce_ib": 4.395148754119873, + "ce_orig": 0.9732404351234436, + "epoch": 0.7042921849162412, + "kl_loss": 0.06800919026136398, + "loss_ib": 0.0011196067789569497, + "step": 2449 + }, + { + "epoch": 0.704579768495219, + "grad_norm": 0.10638634860515594, + "learning_rate": 4.4731021320835386e-05, + "loss": 0.8367, + "step": 2450 + }, + { + "ce_ib": 2.0127692222595215, + "ce_orig": 0.3801999092102051, + "epoch": 0.704579768495219, + "kl_loss": 0.03445838391780853, + "loss_ib": 0.0005458607338368893, + "step": 2450 + }, + { + "ce_ib": 7.637469291687012, + "ce_orig": 1.9613330364227295, + "epoch": 0.704579768495219, + "kl_loss": 0.07507511228322983, + "loss_ib": 0.0015144979115575552, + "step": 2450 + }, + { + "ce_ib": 1.9336552619934082, + "ce_orig": 0.4746708571910858, + "epoch": 0.704579768495219, + "kl_loss": 0.04308144748210907, + "loss_ib": 0.0006241799565032125, + "step": 2450 + }, + { + "ce_ib": 3.5081188678741455, + "ce_orig": 0.729441225528717, + "epoch": 0.704579768495219, + "kl_loss": 0.06269555538892746, + "loss_ib": 0.0009777673985809088, + "step": 2450 + }, + { + "ce_ib": 4.486143589019775, + "ce_orig": 0.6143993735313416, + "epoch": 0.7048673520741966, + "kl_loss": 0.08442427217960358, + "loss_ib": 0.0012928571086376905, + "step": 2451 + }, + { + "ce_ib": 3.079157590866089, + "ce_orig": 0.6395124793052673, + "epoch": 0.7048673520741966, + "kl_loss": 0.038676343858242035, + "loss_ib": 0.000694679154548794, + "step": 2451 + }, + { + "ce_ib": 6.441850662231445, + "ce_orig": 1.5758235454559326, + "epoch": 0.7048673520741966, + "kl_loss": 0.07839981466531754, + "loss_ib": 0.0014281831681728363, + "step": 2451 + }, + { + "ce_ib": 2.7358453273773193, + "ce_orig": 0.6065284609794617, + "epoch": 0.7048673520741966, + "kl_loss": 0.03857909142971039, + "loss_ib": 0.0006593753932975233, + "step": 2451 + }, + { + "ce_ib": 4.946217060089111, + "ce_orig": 0.9583625197410583, + "epoch": 0.7051549356531742, + "kl_loss": 0.09457166492938995, + "loss_ib": 0.0014403383247554302, + "step": 2452 + }, + { + "ce_ib": 2.5050339698791504, + "ce_orig": 0.5582999587059021, + "epoch": 0.7051549356531742, + "kl_loss": 0.04148471727967262, + "loss_ib": 0.0006653505261056125, + "step": 2452 + }, + { + "ce_ib": 5.3092122077941895, + "ce_orig": 1.2030603885650635, + "epoch": 0.7051549356531742, + "kl_loss": 0.08235003799200058, + "loss_ib": 0.0013544216053560376, + "step": 2452 + }, + { + "ce_ib": 4.320234775543213, + "ce_orig": 0.6054648160934448, + "epoch": 0.7051549356531742, + "kl_loss": 0.09406498074531555, + "loss_ib": 0.0013726731995120645, + "step": 2452 + }, + { + "ce_ib": 6.725493907928467, + "ce_orig": 1.3805580139160156, + "epoch": 0.7054425192321518, + "kl_loss": 0.07330088317394257, + "loss_ib": 0.001405558199621737, + "step": 2453 + }, + { + "ce_ib": 4.833137035369873, + "ce_orig": 1.4005980491638184, + "epoch": 0.7054425192321518, + "kl_loss": 0.05038394406437874, + "loss_ib": 0.0009871531510725617, + "step": 2453 + }, + { + "ce_ib": 1.996078372001648, + "ce_orig": 0.3463900685310364, + "epoch": 0.7054425192321518, + "kl_loss": 0.15916860103607178, + "loss_ib": 0.0017912938492372632, + "step": 2453 + }, + { + "ce_ib": 6.175534248352051, + "ce_orig": 1.5757560729980469, + "epoch": 0.7054425192321518, + "kl_loss": 0.092034250497818, + "loss_ib": 0.0015378958778455853, + "step": 2453 + }, + { + "ce_ib": 4.30781364440918, + "ce_orig": 0.7356980443000793, + "epoch": 0.7057301028111295, + "kl_loss": 0.06711861491203308, + "loss_ib": 0.0011019675293937325, + "step": 2454 + }, + { + "ce_ib": 4.545855522155762, + "ce_orig": 1.1702476739883423, + "epoch": 0.7057301028111295, + "kl_loss": 0.062229156494140625, + "loss_ib": 0.0010768771171569824, + "step": 2454 + }, + { + "ce_ib": 4.3658952713012695, + "ce_orig": 0.8853712677955627, + "epoch": 0.7057301028111295, + "kl_loss": 0.05716421455144882, + "loss_ib": 0.001008231658488512, + "step": 2454 + }, + { + "ce_ib": 3.7477517127990723, + "ce_orig": 0.7165572643280029, + "epoch": 0.7057301028111295, + "kl_loss": 0.0748642086982727, + "loss_ib": 0.0011234171688556671, + "step": 2454 + }, + { + "epoch": 0.7060176863901071, + "grad_norm": 0.0874132439494133, + "learning_rate": 4.4707168497627286e-05, + "loss": 0.8593, + "step": 2455 + }, + { + "ce_ib": 3.3512728214263916, + "ce_orig": 0.615624725818634, + "epoch": 0.7060176863901071, + "kl_loss": 0.04761355370283127, + "loss_ib": 0.0008112627547234297, + "step": 2455 + }, + { + "ce_ib": 3.868187665939331, + "ce_orig": 0.8121145367622375, + "epoch": 0.7060176863901071, + "kl_loss": 0.07529988884925842, + "loss_ib": 0.0011398176429793239, + "step": 2455 + }, + { + "ce_ib": 2.2528507709503174, + "ce_orig": 0.6525576710700989, + "epoch": 0.7060176863901071, + "kl_loss": 0.07533807307481766, + "loss_ib": 0.0009786657756194472, + "step": 2455 + }, + { + "ce_ib": 5.847594738006592, + "ce_orig": 0.8312925696372986, + "epoch": 0.7060176863901071, + "kl_loss": 0.06994178146123886, + "loss_ib": 0.0012841772986575961, + "step": 2455 + }, + { + "ce_ib": 4.272310256958008, + "ce_orig": 0.9176694750785828, + "epoch": 0.7063052699690847, + "kl_loss": 0.08874399214982986, + "loss_ib": 0.0013146708952262998, + "step": 2456 + }, + { + "ce_ib": 4.431949615478516, + "ce_orig": 1.069342851638794, + "epoch": 0.7063052699690847, + "kl_loss": 0.058535147458314896, + "loss_ib": 0.0010285463649779558, + "step": 2456 + }, + { + "ce_ib": 5.0232768058776855, + "ce_orig": 0.937240481376648, + "epoch": 0.7063052699690847, + "kl_loss": 0.10510456562042236, + "loss_ib": 0.0015533732948824763, + "step": 2456 + }, + { + "ce_ib": 2.627418041229248, + "ce_orig": 0.714735746383667, + "epoch": 0.7063052699690847, + "kl_loss": 0.036921847611665726, + "loss_ib": 0.0006319602252915502, + "step": 2456 + }, + { + "ce_ib": 3.095309019088745, + "ce_orig": 0.7008824348449707, + "epoch": 0.7065928535480624, + "kl_loss": 0.04088623449206352, + "loss_ib": 0.0007183932466432452, + "step": 2457 + }, + { + "ce_ib": 4.108094692230225, + "ce_orig": 0.738994836807251, + "epoch": 0.7065928535480624, + "kl_loss": 0.07390367984771729, + "loss_ib": 0.0011498462408781052, + "step": 2457 + }, + { + "ce_ib": 2.5761401653289795, + "ce_orig": 0.4607084393501282, + "epoch": 0.7065928535480624, + "kl_loss": 0.04441787675023079, + "loss_ib": 0.0007017927709966898, + "step": 2457 + }, + { + "ce_ib": 4.593289375305176, + "ce_orig": 0.5390825867652893, + "epoch": 0.7065928535480624, + "kl_loss": 0.08886592835187912, + "loss_ib": 0.0013479882618412375, + "step": 2457 + }, + { + "ce_ib": 3.5470693111419678, + "ce_orig": 0.7110878229141235, + "epoch": 0.7068804371270401, + "kl_loss": 0.06283411383628845, + "loss_ib": 0.0009830481139943004, + "step": 2458 + }, + { + "ce_ib": 3.234903573989868, + "ce_orig": 0.5314695239067078, + "epoch": 0.7068804371270401, + "kl_loss": 0.04659136384725571, + "loss_ib": 0.0007894039736129344, + "step": 2458 + }, + { + "ce_ib": 3.864187240600586, + "ce_orig": 0.6532856822013855, + "epoch": 0.7068804371270401, + "kl_loss": 0.0789119079709053, + "loss_ib": 0.0011755377054214478, + "step": 2458 + }, + { + "ce_ib": 2.371051788330078, + "ce_orig": 0.5439738631248474, + "epoch": 0.7068804371270401, + "kl_loss": 0.08973756432533264, + "loss_ib": 0.0011344808153808117, + "step": 2458 + }, + { + "ce_ib": 3.240872859954834, + "ce_orig": 0.7896727919578552, + "epoch": 0.7071680207060177, + "kl_loss": 0.08087287098169327, + "loss_ib": 0.0011328159598633647, + "step": 2459 + }, + { + "ce_ib": 4.276877403259277, + "ce_orig": 0.9298912286758423, + "epoch": 0.7071680207060177, + "kl_loss": 0.0774746686220169, + "loss_ib": 0.0012024344177916646, + "step": 2459 + }, + { + "ce_ib": 5.647823333740234, + "ce_orig": 1.349510908126831, + "epoch": 0.7071680207060177, + "kl_loss": 0.06119902431964874, + "loss_ib": 0.001176772522740066, + "step": 2459 + }, + { + "ce_ib": 2.391134023666382, + "ce_orig": 0.7633427977561951, + "epoch": 0.7071680207060177, + "kl_loss": 0.037791069597005844, + "loss_ib": 0.0006170240812934935, + "step": 2459 + }, + { + "epoch": 0.7074556042849953, + "grad_norm": 0.09779242426156998, + "learning_rate": 4.4683268195276126e-05, + "loss": 0.8721, + "step": 2460 + }, + { + "ce_ib": 3.3721251487731934, + "ce_orig": 0.7034766674041748, + "epoch": 0.7074556042849953, + "kl_loss": 0.24393221735954285, + "loss_ib": 0.002776534529402852, + "step": 2460 + }, + { + "ce_ib": 4.278869152069092, + "ce_orig": 0.8220384120941162, + "epoch": 0.7074556042849953, + "kl_loss": 0.0925118625164032, + "loss_ib": 0.001353005412966013, + "step": 2460 + }, + { + "ce_ib": 3.4454662799835205, + "ce_orig": 0.552661120891571, + "epoch": 0.7074556042849953, + "kl_loss": 0.22607477009296417, + "loss_ib": 0.002605294343084097, + "step": 2460 + }, + { + "ce_ib": 3.8634629249572754, + "ce_orig": 0.9206970930099487, + "epoch": 0.7074556042849953, + "kl_loss": 0.04420977830886841, + "loss_ib": 0.0008284440264105797, + "step": 2460 + }, + { + "ce_ib": 2.679903030395508, + "ce_orig": 0.6922663450241089, + "epoch": 0.707743187863973, + "kl_loss": 0.0314762108027935, + "loss_ib": 0.0005827524000778794, + "step": 2461 + }, + { + "ce_ib": 2.389035224914551, + "ce_orig": 0.43596401810646057, + "epoch": 0.707743187863973, + "kl_loss": 0.04024985432624817, + "loss_ib": 0.0006414020899683237, + "step": 2461 + }, + { + "ce_ib": 3.0289740562438965, + "ce_orig": 0.5766894817352295, + "epoch": 0.707743187863973, + "kl_loss": 0.09254838526248932, + "loss_ib": 0.0012283811811357737, + "step": 2461 + }, + { + "ce_ib": 5.578298568725586, + "ce_orig": 1.064693570137024, + "epoch": 0.707743187863973, + "kl_loss": 0.08779986202716827, + "loss_ib": 0.0014358285116031766, + "step": 2461 + }, + { + "ce_ib": 4.742644786834717, + "ce_orig": 0.48527196049690247, + "epoch": 0.7080307714429506, + "kl_loss": 0.11261700093746185, + "loss_ib": 0.0016004344215616584, + "step": 2462 + }, + { + "ce_ib": 5.045293807983398, + "ce_orig": 0.6543939709663391, + "epoch": 0.7080307714429506, + "kl_loss": 0.09011992812156677, + "loss_ib": 0.0014057286316528916, + "step": 2462 + }, + { + "ce_ib": 4.083078384399414, + "ce_orig": 0.8978660106658936, + "epoch": 0.7080307714429506, + "kl_loss": 0.04814693331718445, + "loss_ib": 0.0008897771476767957, + "step": 2462 + }, + { + "ce_ib": 3.297593832015991, + "ce_orig": 0.6179133653640747, + "epoch": 0.7080307714429506, + "kl_loss": 0.09365937113761902, + "loss_ib": 0.001266353065147996, + "step": 2462 + }, + { + "ce_ib": 4.297399044036865, + "ce_orig": 0.9301415681838989, + "epoch": 0.7083183550219283, + "kl_loss": 0.05947175249457359, + "loss_ib": 0.0010244573932141066, + "step": 2463 + }, + { + "ce_ib": 3.796590805053711, + "ce_orig": 0.45465368032455444, + "epoch": 0.7083183550219283, + "kl_loss": 0.20761260390281677, + "loss_ib": 0.002455785172060132, + "step": 2463 + }, + { + "ce_ib": 2.3017709255218506, + "ce_orig": 0.251491904258728, + "epoch": 0.7083183550219283, + "kl_loss": 0.05885554477572441, + "loss_ib": 0.000818732485640794, + "step": 2463 + }, + { + "ce_ib": 7.2562971115112305, + "ce_orig": 1.6845715045928955, + "epoch": 0.7083183550219283, + "kl_loss": 0.10674712061882019, + "loss_ib": 0.0017931008478626609, + "step": 2463 + }, + { + "ce_ib": 3.012697458267212, + "ce_orig": 0.8551570773124695, + "epoch": 0.7086059386009059, + "kl_loss": 0.09420502185821533, + "loss_ib": 0.0012433199444785714, + "step": 2464 + }, + { + "ce_ib": 4.501144886016846, + "ce_orig": 0.8444926738739014, + "epoch": 0.7086059386009059, + "kl_loss": 0.07693564891815186, + "loss_ib": 0.001219470868818462, + "step": 2464 + }, + { + "ce_ib": 4.166062355041504, + "ce_orig": 0.6024538278579712, + "epoch": 0.7086059386009059, + "kl_loss": 0.07325442135334015, + "loss_ib": 0.0011491504264995456, + "step": 2464 + }, + { + "ce_ib": 3.5494203567504883, + "ce_orig": 0.7270805835723877, + "epoch": 0.7086059386009059, + "kl_loss": 0.049862563610076904, + "loss_ib": 0.0008535676752217114, + "step": 2464 + }, + { + "epoch": 0.7088935221798836, + "grad_norm": 0.08930348604917526, + "learning_rate": 4.4659320471363314e-05, + "loss": 0.749, + "step": 2465 + }, + { + "ce_ib": 3.368718385696411, + "ce_orig": 0.7352393865585327, + "epoch": 0.7088935221798836, + "kl_loss": 0.06908942759037018, + "loss_ib": 0.0010277660330757499, + "step": 2465 + }, + { + "ce_ib": 2.1695353984832764, + "ce_orig": 0.41568851470947266, + "epoch": 0.7088935221798836, + "kl_loss": 0.12951114773750305, + "loss_ib": 0.001512064947746694, + "step": 2465 + }, + { + "ce_ib": 5.142531871795654, + "ce_orig": 0.9847705960273743, + "epoch": 0.7088935221798836, + "kl_loss": 0.07134226709604263, + "loss_ib": 0.0012276758207008243, + "step": 2465 + }, + { + "ce_ib": 2.8645315170288086, + "ce_orig": 0.3198161721229553, + "epoch": 0.7088935221798836, + "kl_loss": 0.09043452888727188, + "loss_ib": 0.001190798357129097, + "step": 2465 + }, + { + "ce_ib": 4.104280948638916, + "ce_orig": 0.6447741985321045, + "epoch": 0.7091811057588612, + "kl_loss": 0.06662202626466751, + "loss_ib": 0.0010766483610495925, + "step": 2466 + }, + { + "ce_ib": 4.588134765625, + "ce_orig": 0.956866979598999, + "epoch": 0.7091811057588612, + "kl_loss": 0.10753054171800613, + "loss_ib": 0.0015341188991442323, + "step": 2466 + }, + { + "ce_ib": 4.339702129364014, + "ce_orig": 0.7645941972732544, + "epoch": 0.7091811057588612, + "kl_loss": 0.06682245433330536, + "loss_ib": 0.0011021947721019387, + "step": 2466 + }, + { + "ce_ib": 5.745256423950195, + "ce_orig": 0.3915524482727051, + "epoch": 0.7091811057588612, + "kl_loss": 0.14280155301094055, + "loss_ib": 0.0020025409758090973, + "step": 2466 + }, + { + "ce_ib": 3.060087203979492, + "ce_orig": 0.6042104363441467, + "epoch": 0.7094686893378388, + "kl_loss": 0.03409036993980408, + "loss_ib": 0.0006469124346040189, + "step": 2467 + }, + { + "ce_ib": 3.8071043491363525, + "ce_orig": 0.7688999176025391, + "epoch": 0.7094686893378388, + "kl_loss": 0.07678074389696121, + "loss_ib": 0.001148517825640738, + "step": 2467 + }, + { + "ce_ib": 2.3900249004364014, + "ce_orig": 0.5517880320549011, + "epoch": 0.7094686893378388, + "kl_loss": 0.028091009706258774, + "loss_ib": 0.0005199125735089183, + "step": 2467 + }, + { + "ce_ib": 4.335521221160889, + "ce_orig": 0.4041826128959656, + "epoch": 0.7094686893378388, + "kl_loss": 0.06611642241477966, + "loss_ib": 0.0010947163682430983, + "step": 2467 + }, + { + "ce_ib": 3.1154661178588867, + "ce_orig": 0.6135067939758301, + "epoch": 0.7097562729168164, + "kl_loss": 0.07161054015159607, + "loss_ib": 0.0010276519460603595, + "step": 2468 + }, + { + "ce_ib": 4.963098526000977, + "ce_orig": 0.853329598903656, + "epoch": 0.7097562729168164, + "kl_loss": 0.11775326728820801, + "loss_ib": 0.0016738424310460687, + "step": 2468 + }, + { + "ce_ib": 3.079561233520508, + "ce_orig": 0.6469029188156128, + "epoch": 0.7097562729168164, + "kl_loss": 0.06164021044969559, + "loss_ib": 0.0009243582026101649, + "step": 2468 + }, + { + "ce_ib": 4.954494476318359, + "ce_orig": 1.0484414100646973, + "epoch": 0.7097562729168164, + "kl_loss": 0.05881786718964577, + "loss_ib": 0.0010836280416697264, + "step": 2468 + }, + { + "ce_ib": 3.8130991458892822, + "ce_orig": 0.7735584378242493, + "epoch": 0.710043856495794, + "kl_loss": 0.06564754247665405, + "loss_ib": 0.001037785317748785, + "step": 2469 + }, + { + "ce_ib": 4.3138580322265625, + "ce_orig": 0.8114487528800964, + "epoch": 0.710043856495794, + "kl_loss": 0.04754916578531265, + "loss_ib": 0.0009068773942999542, + "step": 2469 + }, + { + "ce_ib": 3.942394495010376, + "ce_orig": 0.6763074994087219, + "epoch": 0.710043856495794, + "kl_loss": 0.10736332833766937, + "loss_ib": 0.0014678726438432932, + "step": 2469 + }, + { + "ce_ib": 2.3449959754943848, + "ce_orig": 0.4732280671596527, + "epoch": 0.710043856495794, + "kl_loss": 0.052730120718479156, + "loss_ib": 0.0007618007366545498, + "step": 2469 + }, + { + "epoch": 0.7103314400747718, + "grad_norm": 0.08481067419052124, + "learning_rate": 4.463532538358446e-05, + "loss": 0.789, + "step": 2470 + }, + { + "ce_ib": 3.3307621479034424, + "ce_orig": 0.6510723829269409, + "epoch": 0.7103314400747718, + "kl_loss": 0.05923522636294365, + "loss_ib": 0.0009254284086637199, + "step": 2470 + }, + { + "ce_ib": 4.532038688659668, + "ce_orig": 1.131734848022461, + "epoch": 0.7103314400747718, + "kl_loss": 0.07520109415054321, + "loss_ib": 0.0012052147649228573, + "step": 2470 + }, + { + "ce_ib": 6.066849708557129, + "ce_orig": 0.8016252517700195, + "epoch": 0.7103314400747718, + "kl_loss": 0.07603708654642105, + "loss_ib": 0.0013670556945726275, + "step": 2470 + }, + { + "ce_ib": 6.1097564697265625, + "ce_orig": 1.0713250637054443, + "epoch": 0.7103314400747718, + "kl_loss": 0.06564067304134369, + "loss_ib": 0.0012673822930082679, + "step": 2470 + }, + { + "ce_ib": 4.3541107177734375, + "ce_orig": 0.8734663724899292, + "epoch": 0.7106190236537494, + "kl_loss": 0.06653627753257751, + "loss_ib": 0.0011007738066837192, + "step": 2471 + }, + { + "ce_ib": 4.018720626831055, + "ce_orig": 0.8277319073677063, + "epoch": 0.7106190236537494, + "kl_loss": 0.07808080315589905, + "loss_ib": 0.0011826801346614957, + "step": 2471 + }, + { + "ce_ib": 4.0927839279174805, + "ce_orig": 0.6928671002388, + "epoch": 0.7106190236537494, + "kl_loss": 0.05716496706008911, + "loss_ib": 0.00098092807456851, + "step": 2471 + }, + { + "ce_ib": 3.102800130844116, + "ce_orig": 0.6153552532196045, + "epoch": 0.7106190236537494, + "kl_loss": 0.07717233151197433, + "loss_ib": 0.0010820033494383097, + "step": 2471 + }, + { + "ce_ib": 7.498143672943115, + "ce_orig": 1.7522863149642944, + "epoch": 0.710906607232727, + "kl_loss": 0.10836684703826904, + "loss_ib": 0.0018334827618673444, + "step": 2472 + }, + { + "ce_ib": 2.971794843673706, + "ce_orig": 0.7886481285095215, + "epoch": 0.710906607232727, + "kl_loss": 0.05858233943581581, + "loss_ib": 0.0008830028818920255, + "step": 2472 + }, + { + "ce_ib": 5.681220531463623, + "ce_orig": 0.886928915977478, + "epoch": 0.710906607232727, + "kl_loss": 0.09084787964820862, + "loss_ib": 0.0014766007661819458, + "step": 2472 + }, + { + "ce_ib": 6.054305076599121, + "ce_orig": 0.8948971033096313, + "epoch": 0.710906607232727, + "kl_loss": 0.0784926563501358, + "loss_ib": 0.0013903570361435413, + "step": 2472 + }, + { + "ce_ib": 3.778740406036377, + "ce_orig": 0.5217180848121643, + "epoch": 0.7111941908117047, + "kl_loss": 0.06950059533119202, + "loss_ib": 0.0010728799970820546, + "step": 2473 + }, + { + "ce_ib": 2.48878812789917, + "ce_orig": 0.6334192156791687, + "epoch": 0.7111941908117047, + "kl_loss": 0.04890771955251694, + "loss_ib": 0.0007379560265690088, + "step": 2473 + }, + { + "ce_ib": 1.7395235300064087, + "ce_orig": 0.33303049206733704, + "epoch": 0.7111941908117047, + "kl_loss": 0.1624894142150879, + "loss_ib": 0.0017988464096561074, + "step": 2473 + }, + { + "ce_ib": 3.54797101020813, + "ce_orig": 0.7278929352760315, + "epoch": 0.7111941908117047, + "kl_loss": 0.03628005087375641, + "loss_ib": 0.000717597606126219, + "step": 2473 + }, + { + "ce_ib": 7.8455119132995605, + "ce_orig": 1.0026957988739014, + "epoch": 0.7114817743906823, + "kl_loss": 0.08762629330158234, + "loss_ib": 0.001660814043134451, + "step": 2474 + }, + { + "ce_ib": 3.5084707736968994, + "ce_orig": 0.804724931716919, + "epoch": 0.7114817743906823, + "kl_loss": 0.061394304037094116, + "loss_ib": 0.000964790116995573, + "step": 2474 + }, + { + "ce_ib": 2.405052423477173, + "ce_orig": 0.4542389512062073, + "epoch": 0.7114817743906823, + "kl_loss": 0.0699644461274147, + "loss_ib": 0.0009401497081853449, + "step": 2474 + }, + { + "ce_ib": 2.2263739109039307, + "ce_orig": 0.3748561441898346, + "epoch": 0.7114817743906823, + "kl_loss": 0.06206116825342178, + "loss_ib": 0.0008432490867562592, + "step": 2474 + }, + { + "epoch": 0.7117693579696599, + "grad_norm": 0.08293458074331284, + "learning_rate": 4.461128298974929e-05, + "loss": 0.8738, + "step": 2475 + }, + { + "ce_ib": 4.252569198608398, + "ce_orig": 0.9419707655906677, + "epoch": 0.7117693579696599, + "kl_loss": 0.08116728067398071, + "loss_ib": 0.0012369296746328473, + "step": 2475 + }, + { + "ce_ib": 4.268518924713135, + "ce_orig": 1.0212091207504272, + "epoch": 0.7117693579696599, + "kl_loss": 0.058205798268318176, + "loss_ib": 0.0010089098941534758, + "step": 2475 + }, + { + "ce_ib": 6.135821342468262, + "ce_orig": 0.5580954551696777, + "epoch": 0.7117693579696599, + "kl_loss": 0.25101906061172485, + "loss_ib": 0.0031237725634127855, + "step": 2475 + }, + { + "ce_ib": 3.448345422744751, + "ce_orig": 0.8414466381072998, + "epoch": 0.7117693579696599, + "kl_loss": 0.07144638895988464, + "loss_ib": 0.0010592984035611153, + "step": 2475 + }, + { + "ce_ib": 3.3013083934783936, + "ce_orig": 0.5287490487098694, + "epoch": 0.7120569415486375, + "kl_loss": 0.09331430494785309, + "loss_ib": 0.0012632738798856735, + "step": 2476 + }, + { + "ce_ib": 2.402777671813965, + "ce_orig": 0.5421953201293945, + "epoch": 0.7120569415486375, + "kl_loss": 0.05409309267997742, + "loss_ib": 0.0007812086842022836, + "step": 2476 + }, + { + "ce_ib": 2.393768072128296, + "ce_orig": 0.2894938588142395, + "epoch": 0.7120569415486375, + "kl_loss": 0.058476440608501434, + "loss_ib": 0.0008241411997005343, + "step": 2476 + }, + { + "ce_ib": 5.710258483886719, + "ce_orig": 1.2866765260696411, + "epoch": 0.7120569415486375, + "kl_loss": 0.05450168624520302, + "loss_ib": 0.001116042723879218, + "step": 2476 + }, + { + "ce_ib": 3.475041151046753, + "ce_orig": 0.8161251544952393, + "epoch": 0.7123445251276153, + "kl_loss": 0.06651251018047333, + "loss_ib": 0.0010126291308552027, + "step": 2477 + }, + { + "ce_ib": 3.292191505432129, + "ce_orig": 0.6881237626075745, + "epoch": 0.7123445251276153, + "kl_loss": 0.06563001871109009, + "loss_ib": 0.0009855192620307207, + "step": 2477 + }, + { + "ce_ib": 2.4911279678344727, + "ce_orig": 0.4633689820766449, + "epoch": 0.7123445251276153, + "kl_loss": 0.044973358511924744, + "loss_ib": 0.000698846357408911, + "step": 2477 + }, + { + "ce_ib": 4.288124084472656, + "ce_orig": 0.6266053915023804, + "epoch": 0.7123445251276153, + "kl_loss": 0.04644102603197098, + "loss_ib": 0.0008932226919569075, + "step": 2477 + }, + { + "ce_ib": 3.778156042098999, + "ce_orig": 0.682131290435791, + "epoch": 0.7126321087065929, + "kl_loss": 0.07371369004249573, + "loss_ib": 0.0011149524943903089, + "step": 2478 + }, + { + "ce_ib": 4.108396530151367, + "ce_orig": 0.6130662560462952, + "epoch": 0.7126321087065929, + "kl_loss": 0.07182661443948746, + "loss_ib": 0.0011291058035567403, + "step": 2478 + }, + { + "ce_ib": 4.586065292358398, + "ce_orig": 1.2393933534622192, + "epoch": 0.7126321087065929, + "kl_loss": 0.07316775619983673, + "loss_ib": 0.0011902840342372656, + "step": 2478 + }, + { + "ce_ib": 5.991560935974121, + "ce_orig": 1.5675946474075317, + "epoch": 0.7126321087065929, + "kl_loss": 0.0760963037610054, + "loss_ib": 0.0013601190876215696, + "step": 2478 + }, + { + "ce_ib": 3.492835283279419, + "ce_orig": 0.679515540599823, + "epoch": 0.7129196922855705, + "kl_loss": 0.06356733292341232, + "loss_ib": 0.0009849568596109748, + "step": 2479 + }, + { + "ce_ib": 6.101678371429443, + "ce_orig": 1.156787395477295, + "epoch": 0.7129196922855705, + "kl_loss": 0.05128641426563263, + "loss_ib": 0.0011230319505557418, + "step": 2479 + }, + { + "ce_ib": 2.229016065597534, + "ce_orig": 0.652623176574707, + "epoch": 0.7129196922855705, + "kl_loss": 0.0381455197930336, + "loss_ib": 0.0006043568137101829, + "step": 2479 + }, + { + "ce_ib": 6.164759159088135, + "ce_orig": 1.324988842010498, + "epoch": 0.7129196922855705, + "kl_loss": 0.0898704081773758, + "loss_ib": 0.0015151799889281392, + "step": 2479 + }, + { + "epoch": 0.7132072758645481, + "grad_norm": 0.09627386927604675, + "learning_rate": 4.458719334778153e-05, + "loss": 0.8082, + "step": 2480 + }, + { + "ce_ib": 3.0716376304626465, + "ce_orig": 0.7416693568229675, + "epoch": 0.7132072758645481, + "kl_loss": 0.06277188658714294, + "loss_ib": 0.0009348826133646071, + "step": 2480 + }, + { + "ce_ib": 3.200453281402588, + "ce_orig": 0.8663844466209412, + "epoch": 0.7132072758645481, + "kl_loss": 0.055416494607925415, + "loss_ib": 0.0008742102654650807, + "step": 2480 + }, + { + "ce_ib": 4.581921577453613, + "ce_orig": 0.9108496308326721, + "epoch": 0.7132072758645481, + "kl_loss": 0.03574289008975029, + "loss_ib": 0.0008156209951266646, + "step": 2480 + }, + { + "ce_ib": 4.348526477813721, + "ce_orig": 1.0028400421142578, + "epoch": 0.7132072758645481, + "kl_loss": 0.07629738748073578, + "loss_ib": 0.0011978265829384327, + "step": 2480 + }, + { + "ce_ib": 2.399322748184204, + "ce_orig": 0.29405975341796875, + "epoch": 0.7134948594435258, + "kl_loss": 0.05986557900905609, + "loss_ib": 0.0008385879918932915, + "step": 2481 + }, + { + "ce_ib": 6.372483253479004, + "ce_orig": 1.1779887676239014, + "epoch": 0.7134948594435258, + "kl_loss": 0.0593823567032814, + "loss_ib": 0.0012310718884691596, + "step": 2481 + }, + { + "ce_ib": 3.9093523025512695, + "ce_orig": 0.932524561882019, + "epoch": 0.7134948594435258, + "kl_loss": 0.06685782968997955, + "loss_ib": 0.0010595135390758514, + "step": 2481 + }, + { + "ce_ib": 6.906336784362793, + "ce_orig": 1.561110019683838, + "epoch": 0.7134948594435258, + "kl_loss": 0.08958755433559418, + "loss_ib": 0.001586509170010686, + "step": 2481 + }, + { + "ce_ib": 6.141745090484619, + "ce_orig": 1.069779634475708, + "epoch": 0.7137824430225034, + "kl_loss": 0.092012420296669, + "loss_ib": 0.001534298644401133, + "step": 2482 + }, + { + "ce_ib": 3.5181424617767334, + "ce_orig": 0.6814113259315491, + "epoch": 0.7137824430225034, + "kl_loss": 0.051980867981910706, + "loss_ib": 0.0008716229349374771, + "step": 2482 + }, + { + "ce_ib": 6.612294673919678, + "ce_orig": 1.6019073724746704, + "epoch": 0.7137824430225034, + "kl_loss": 0.0852557048201561, + "loss_ib": 0.001513786381110549, + "step": 2482 + }, + { + "ce_ib": 3.61376690864563, + "ce_orig": 1.1580986976623535, + "epoch": 0.7137824430225034, + "kl_loss": 0.04581739008426666, + "loss_ib": 0.0008195505943149328, + "step": 2482 + }, + { + "ce_ib": 2.148751974105835, + "ce_orig": 0.5414494276046753, + "epoch": 0.714070026601481, + "kl_loss": 0.058729540556669235, + "loss_ib": 0.0008021706016734242, + "step": 2483 + }, + { + "ce_ib": 3.2508444786071777, + "ce_orig": 0.39077746868133545, + "epoch": 0.714070026601481, + "kl_loss": 0.09127286821603775, + "loss_ib": 0.00123781303409487, + "step": 2483 + }, + { + "ce_ib": 3.174884080886841, + "ce_orig": 0.5687984824180603, + "epoch": 0.714070026601481, + "kl_loss": 0.05667026713490486, + "loss_ib": 0.0008841910748742521, + "step": 2483 + }, + { + "ce_ib": 5.279588222503662, + "ce_orig": 1.3416664600372314, + "epoch": 0.714070026601481, + "kl_loss": 0.09653843939304352, + "loss_ib": 0.0014933430356904864, + "step": 2483 + }, + { + "ce_ib": 3.515720844268799, + "ce_orig": 0.7290142774581909, + "epoch": 0.7143576101804587, + "kl_loss": 0.07293741405010223, + "loss_ib": 0.0010809461819007993, + "step": 2484 + }, + { + "ce_ib": 3.0716285705566406, + "ce_orig": 0.8131110072135925, + "epoch": 0.7143576101804587, + "kl_loss": 0.03295387327671051, + "loss_ib": 0.0006367015885189176, + "step": 2484 + }, + { + "ce_ib": 4.619172096252441, + "ce_orig": 0.7437143325805664, + "epoch": 0.7143576101804587, + "kl_loss": 0.05647268518805504, + "loss_ib": 0.0010266440222039819, + "step": 2484 + }, + { + "ce_ib": 2.626728057861328, + "ce_orig": 0.22839970886707306, + "epoch": 0.7143576101804587, + "kl_loss": 0.07185496389865875, + "loss_ib": 0.0009812224889174104, + "step": 2484 + }, + { + "epoch": 0.7146451937594364, + "grad_norm": 0.1048688217997551, + "learning_rate": 4.4563056515718714e-05, + "loss": 0.896, + "step": 2485 + }, + { + "ce_ib": 4.7008748054504395, + "ce_orig": 0.9941673278808594, + "epoch": 0.7146451937594364, + "kl_loss": 0.09823766350746155, + "loss_ib": 0.0014524641446769238, + "step": 2485 + }, + { + "ce_ib": 4.28904914855957, + "ce_orig": 0.9701245427131653, + "epoch": 0.7146451937594364, + "kl_loss": 0.07147714495658875, + "loss_ib": 0.0011436763452365994, + "step": 2485 + }, + { + "ce_ib": 2.7858877182006836, + "ce_orig": 0.7195507884025574, + "epoch": 0.7146451937594364, + "kl_loss": 0.05879398435354233, + "loss_ib": 0.0008665286004543304, + "step": 2485 + }, + { + "ce_ib": 3.2580819129943848, + "ce_orig": 0.9430491328239441, + "epoch": 0.7146451937594364, + "kl_loss": 0.06230747327208519, + "loss_ib": 0.000948882894590497, + "step": 2485 + }, + { + "ce_ib": 3.8087430000305176, + "ce_orig": 0.9125425815582275, + "epoch": 0.714932777338414, + "kl_loss": 0.0461852140724659, + "loss_ib": 0.0008427263819612563, + "step": 2486 + }, + { + "ce_ib": 5.257542610168457, + "ce_orig": 1.1523751020431519, + "epoch": 0.714932777338414, + "kl_loss": 0.05144429951906204, + "loss_ib": 0.0010401972103863955, + "step": 2486 + }, + { + "ce_ib": 4.400386333465576, + "ce_orig": 0.9992987513542175, + "epoch": 0.714932777338414, + "kl_loss": 0.1557035744190216, + "loss_ib": 0.001997074345126748, + "step": 2486 + }, + { + "ce_ib": 7.195793628692627, + "ce_orig": 1.6515491008758545, + "epoch": 0.714932777338414, + "kl_loss": 0.07862049341201782, + "loss_ib": 0.001505784341134131, + "step": 2486 + }, + { + "ce_ib": 3.5827012062072754, + "ce_orig": 0.7072409987449646, + "epoch": 0.7152203609173916, + "kl_loss": 0.14137421548366547, + "loss_ib": 0.0017720122123137116, + "step": 2487 + }, + { + "ce_ib": 6.60667085647583, + "ce_orig": 1.4756971597671509, + "epoch": 0.7152203609173916, + "kl_loss": 0.052493758499622345, + "loss_ib": 0.0011856046039611101, + "step": 2487 + }, + { + "ce_ib": 3.918893337249756, + "ce_orig": 1.217162013053894, + "epoch": 0.7152203609173916, + "kl_loss": 0.04138485714793205, + "loss_ib": 0.0008057378581725061, + "step": 2487 + }, + { + "ce_ib": 2.8760488033294678, + "ce_orig": 0.5402053594589233, + "epoch": 0.7152203609173916, + "kl_loss": 0.054024435579776764, + "loss_ib": 0.000827849202323705, + "step": 2487 + }, + { + "ce_ib": 6.882384300231934, + "ce_orig": 1.6752797365188599, + "epoch": 0.7155079444963692, + "kl_loss": 0.07988215237855911, + "loss_ib": 0.001487059867940843, + "step": 2488 + }, + { + "ce_ib": 5.793782711029053, + "ce_orig": 0.80655437707901, + "epoch": 0.7155079444963692, + "kl_loss": 0.11155011504888535, + "loss_ib": 0.0016948793781921268, + "step": 2488 + }, + { + "ce_ib": 3.6792287826538086, + "ce_orig": 0.8342434763908386, + "epoch": 0.7155079444963692, + "kl_loss": 0.06972579658031464, + "loss_ib": 0.0010651807533577085, + "step": 2488 + }, + { + "ce_ib": 4.46274471282959, + "ce_orig": 0.8047752976417542, + "epoch": 0.7155079444963692, + "kl_loss": 0.0945175439119339, + "loss_ib": 0.0013914498267695308, + "step": 2488 + }, + { + "ce_ib": 3.6118006706237793, + "ce_orig": 0.7247437238693237, + "epoch": 0.7157955280753469, + "kl_loss": 0.061056625097990036, + "loss_ib": 0.0009717462817206979, + "step": 2489 + }, + { + "ce_ib": 6.222522735595703, + "ce_orig": 0.9197909235954285, + "epoch": 0.7157955280753469, + "kl_loss": 0.08779767155647278, + "loss_ib": 0.0015002288855612278, + "step": 2489 + }, + { + "ce_ib": 4.040431499481201, + "ce_orig": 0.9533820748329163, + "epoch": 0.7157955280753469, + "kl_loss": 0.15640714764595032, + "loss_ib": 0.001968114636838436, + "step": 2489 + }, + { + "ce_ib": 3.4571549892425537, + "ce_orig": 0.5709390640258789, + "epoch": 0.7157955280753469, + "kl_loss": 0.06064267084002495, + "loss_ib": 0.0009521421743556857, + "step": 2489 + }, + { + "epoch": 0.7160831116543246, + "grad_norm": 0.12143053114414215, + "learning_rate": 4.453887255171206e-05, + "loss": 0.8811, + "step": 2490 + }, + { + "ce_ib": 5.762470722198486, + "ce_orig": 1.1445187330245972, + "epoch": 0.7160831116543246, + "kl_loss": 0.07524527609348297, + "loss_ib": 0.00132869987282902, + "step": 2490 + }, + { + "ce_ib": 5.9828596115112305, + "ce_orig": 1.5162875652313232, + "epoch": 0.7160831116543246, + "kl_loss": 0.07361647486686707, + "loss_ib": 0.0013344506733119488, + "step": 2490 + }, + { + "ce_ib": 4.002218723297119, + "ce_orig": 0.6994504332542419, + "epoch": 0.7160831116543246, + "kl_loss": 0.07100704312324524, + "loss_ib": 0.0011102922726422548, + "step": 2490 + }, + { + "ce_ib": 3.184886932373047, + "ce_orig": 0.5948696136474609, + "epoch": 0.7160831116543246, + "kl_loss": 0.06929668039083481, + "loss_ib": 0.0010114554315805435, + "step": 2490 + }, + { + "ce_ib": 4.7134833335876465, + "ce_orig": 0.8486917614936829, + "epoch": 0.7163706952333022, + "kl_loss": 0.05780864134430885, + "loss_ib": 0.001049434649758041, + "step": 2491 + }, + { + "ce_ib": 2.7392807006835938, + "ce_orig": 0.38656774163246155, + "epoch": 0.7163706952333022, + "kl_loss": 0.08510546386241913, + "loss_ib": 0.001124982605688274, + "step": 2491 + }, + { + "ce_ib": 4.215832710266113, + "ce_orig": 0.8222890496253967, + "epoch": 0.7163706952333022, + "kl_loss": 0.0642717182636261, + "loss_ib": 0.0010643004206940532, + "step": 2491 + }, + { + "ce_ib": 3.6374127864837646, + "ce_orig": 0.6957947015762329, + "epoch": 0.7163706952333022, + "kl_loss": 0.11818782985210419, + "loss_ib": 0.0015456194523721933, + "step": 2491 + }, + { + "ce_ib": 2.8301069736480713, + "ce_orig": 0.5624188184738159, + "epoch": 0.7166582788122798, + "kl_loss": 0.059430599212646484, + "loss_ib": 0.0008773166337050498, + "step": 2492 + }, + { + "ce_ib": 4.840952396392822, + "ce_orig": 1.1258659362792969, + "epoch": 0.7166582788122798, + "kl_loss": 0.07017441093921661, + "loss_ib": 0.0011858392972499132, + "step": 2492 + }, + { + "ce_ib": 3.5279486179351807, + "ce_orig": 0.7238861322402954, + "epoch": 0.7166582788122798, + "kl_loss": 0.06752358376979828, + "loss_ib": 0.0010280306451022625, + "step": 2492 + }, + { + "ce_ib": 6.354328632354736, + "ce_orig": 1.4426466226577759, + "epoch": 0.7166582788122798, + "kl_loss": 0.0845097154378891, + "loss_ib": 0.0014805298997089267, + "step": 2492 + }, + { + "ce_ib": 5.049655437469482, + "ce_orig": 0.6907108426094055, + "epoch": 0.7169458623912575, + "kl_loss": 0.08083859086036682, + "loss_ib": 0.0013133513275533915, + "step": 2493 + }, + { + "ce_ib": 3.9551496505737305, + "ce_orig": 0.7936204075813293, + "epoch": 0.7169458623912575, + "kl_loss": 0.08357883244752884, + "loss_ib": 0.0012313032057136297, + "step": 2493 + }, + { + "ce_ib": 4.233648777008057, + "ce_orig": 0.8080227375030518, + "epoch": 0.7169458623912575, + "kl_loss": 0.0754157081246376, + "loss_ib": 0.001177521888166666, + "step": 2493 + }, + { + "ce_ib": 3.081719398498535, + "ce_orig": 0.9312008023262024, + "epoch": 0.7169458623912575, + "kl_loss": 0.060940228402614594, + "loss_ib": 0.0009175742161460221, + "step": 2493 + }, + { + "ce_ib": 3.3064160346984863, + "ce_orig": 0.7993355393409729, + "epoch": 0.7172334459702351, + "kl_loss": 0.04161113500595093, + "loss_ib": 0.0007467528921552002, + "step": 2494 + }, + { + "ce_ib": 3.94814133644104, + "ce_orig": 1.047865390777588, + "epoch": 0.7172334459702351, + "kl_loss": 0.06388489902019501, + "loss_ib": 0.001033663167618215, + "step": 2494 + }, + { + "ce_ib": 5.61794376373291, + "ce_orig": 1.0958690643310547, + "epoch": 0.7172334459702351, + "kl_loss": 0.07595770806074142, + "loss_ib": 0.0013213714119046926, + "step": 2494 + }, + { + "ce_ib": 4.666260242462158, + "ce_orig": 0.730616569519043, + "epoch": 0.7172334459702351, + "kl_loss": 0.06953883916139603, + "loss_ib": 0.0011620144359767437, + "step": 2494 + }, + { + "epoch": 0.7175210295492127, + "grad_norm": 0.10552120953798294, + "learning_rate": 4.451464151402637e-05, + "loss": 0.862, + "step": 2495 + }, + { + "ce_ib": 3.293606758117676, + "ce_orig": 0.6658942699432373, + "epoch": 0.7175210295492127, + "kl_loss": 0.08916455507278442, + "loss_ib": 0.0012210061540827155, + "step": 2495 + }, + { + "ce_ib": 7.116178512573242, + "ce_orig": 1.6929001808166504, + "epoch": 0.7175210295492127, + "kl_loss": 0.08938782662153244, + "loss_ib": 0.0016054960433393717, + "step": 2495 + }, + { + "ce_ib": 3.197575807571411, + "ce_orig": 0.6377484202384949, + "epoch": 0.7175210295492127, + "kl_loss": 0.0681009516119957, + "loss_ib": 0.0010007669916376472, + "step": 2495 + }, + { + "ce_ib": 2.798036575317383, + "ce_orig": 0.584926187992096, + "epoch": 0.7175210295492127, + "kl_loss": 0.06283008307218552, + "loss_ib": 0.000908104469999671, + "step": 2495 + }, + { + "ce_ib": 5.6863532066345215, + "ce_orig": 1.3527158498764038, + "epoch": 0.7178086131281903, + "kl_loss": 0.07124589383602142, + "loss_ib": 0.0012810942716896534, + "step": 2496 + }, + { + "ce_ib": 3.996487617492676, + "ce_orig": 0.7507157921791077, + "epoch": 0.7178086131281903, + "kl_loss": 0.08453623950481415, + "loss_ib": 0.0012450111098587513, + "step": 2496 + }, + { + "ce_ib": 3.463732957839966, + "ce_orig": 0.7905276417732239, + "epoch": 0.7178086131281903, + "kl_loss": 0.051370568573474884, + "loss_ib": 0.0008600789005868137, + "step": 2496 + }, + { + "ce_ib": 4.703672409057617, + "ce_orig": 1.2457382678985596, + "epoch": 0.7178086131281903, + "kl_loss": 0.10494688153266907, + "loss_ib": 0.0015198360197246075, + "step": 2496 + }, + { + "ce_ib": 5.416543483734131, + "ce_orig": 1.1120857000350952, + "epoch": 0.7180961967071681, + "kl_loss": 0.07543879002332687, + "loss_ib": 0.0012960422318428755, + "step": 2497 + }, + { + "ce_ib": 4.305575847625732, + "ce_orig": 1.2039988040924072, + "epoch": 0.7180961967071681, + "kl_loss": 0.06686088442802429, + "loss_ib": 0.0010991663439199328, + "step": 2497 + }, + { + "ce_ib": 4.486831188201904, + "ce_orig": 0.7788097262382507, + "epoch": 0.7180961967071681, + "kl_loss": 0.0836104154586792, + "loss_ib": 0.0012847871985286474, + "step": 2497 + }, + { + "ce_ib": 4.496500492095947, + "ce_orig": 0.6620306968688965, + "epoch": 0.7180961967071681, + "kl_loss": 0.08001721650362015, + "loss_ib": 0.001249822205863893, + "step": 2497 + }, + { + "ce_ib": 4.074254989624023, + "ce_orig": 0.6558361649513245, + "epoch": 0.7183837802861457, + "kl_loss": 0.06879603862762451, + "loss_ib": 0.001095385872758925, + "step": 2498 + }, + { + "ce_ib": 4.1718854904174805, + "ce_orig": 0.48279350996017456, + "epoch": 0.7183837802861457, + "kl_loss": 0.09531471133232117, + "loss_ib": 0.0013703355798497796, + "step": 2498 + }, + { + "ce_ib": 2.996084213256836, + "ce_orig": 0.4806993007659912, + "epoch": 0.7183837802861457, + "kl_loss": 0.06362958252429962, + "loss_ib": 0.0009359042160212994, + "step": 2498 + }, + { + "ce_ib": 5.055881500244141, + "ce_orig": 0.8321597576141357, + "epoch": 0.7183837802861457, + "kl_loss": 0.087398961186409, + "loss_ib": 0.00137957779224962, + "step": 2498 + }, + { + "ce_ib": 4.712131500244141, + "ce_orig": 0.6918990612030029, + "epoch": 0.7186713638651233, + "kl_loss": 0.06400211155414581, + "loss_ib": 0.0011112343054264784, + "step": 2499 + }, + { + "ce_ib": 4.429751873016357, + "ce_orig": 0.8629217743873596, + "epoch": 0.7186713638651233, + "kl_loss": 0.10206570476293564, + "loss_ib": 0.001463632215745747, + "step": 2499 + }, + { + "ce_ib": 3.7669031620025635, + "ce_orig": 0.6723566055297852, + "epoch": 0.7186713638651233, + "kl_loss": 0.05196370556950569, + "loss_ib": 0.0008963273139670491, + "step": 2499 + }, + { + "ce_ib": 3.0728585720062256, + "ce_orig": 0.7828332185745239, + "epoch": 0.7186713638651233, + "kl_loss": 0.09714056551456451, + "loss_ib": 0.0012786914594471455, + "step": 2499 + }, + { + "epoch": 0.7189589474441009, + "grad_norm": 0.09986384958028793, + "learning_rate": 4.449036346103982e-05, + "loss": 0.8551, + "step": 2500 + }, + { + "ce_ib": 3.4666082859039307, + "ce_orig": 0.5022187232971191, + "epoch": 0.7189589474441009, + "kl_loss": 0.04652781784534454, + "loss_ib": 0.0008119390113279223, + "step": 2500 + }, + { + "ce_ib": 3.555588483810425, + "ce_orig": 0.29726648330688477, + "epoch": 0.7189589474441009, + "kl_loss": 0.10847494006156921, + "loss_ib": 0.001440308173187077, + "step": 2500 + }, + { + "ce_ib": 3.563683271408081, + "ce_orig": 0.6578286290168762, + "epoch": 0.7189589474441009, + "kl_loss": 0.0645580142736435, + "loss_ib": 0.0010019483743235469, + "step": 2500 + }, + { + "ce_ib": 3.199897527694702, + "ce_orig": 0.7867447733879089, + "epoch": 0.7189589474441009, + "kl_loss": 0.03795589134097099, + "loss_ib": 0.000699548632837832, + "step": 2500 + }, + { + "ce_ib": 3.798292398452759, + "ce_orig": 0.8359459638595581, + "epoch": 0.7192465310230786, + "kl_loss": 0.10292378067970276, + "loss_ib": 0.0014090670738369226, + "step": 2501 + }, + { + "ce_ib": 5.395669937133789, + "ce_orig": 1.0032111406326294, + "epoch": 0.7192465310230786, + "kl_loss": 0.08924266695976257, + "loss_ib": 0.0014319936744868755, + "step": 2501 + }, + { + "ce_ib": 6.09841251373291, + "ce_orig": 0.7053411602973938, + "epoch": 0.7192465310230786, + "kl_loss": 0.06713879853487015, + "loss_ib": 0.0012812291970476508, + "step": 2501 + }, + { + "ce_ib": 4.1601104736328125, + "ce_orig": 0.5654179453849792, + "epoch": 0.7192465310230786, + "kl_loss": 0.10214321315288544, + "loss_ib": 0.0014374431921169162, + "step": 2501 + }, + { + "ce_ib": 5.235673904418945, + "ce_orig": 1.1465309858322144, + "epoch": 0.7195341146020562, + "kl_loss": 0.07755856961011887, + "loss_ib": 0.0012991530820727348, + "step": 2502 + }, + { + "ce_ib": 2.994769334793091, + "ce_orig": 0.49645015597343445, + "epoch": 0.7195341146020562, + "kl_loss": 0.05268247425556183, + "loss_ib": 0.0008263016352429986, + "step": 2502 + }, + { + "ce_ib": 4.976837158203125, + "ce_orig": 0.6708213090896606, + "epoch": 0.7195341146020562, + "kl_loss": 0.05100115388631821, + "loss_ib": 0.0010076952166855335, + "step": 2502 + }, + { + "ce_ib": 6.5362935066223145, + "ce_orig": 1.3008201122283936, + "epoch": 0.7195341146020562, + "kl_loss": 0.11490561813116074, + "loss_ib": 0.001802685554139316, + "step": 2502 + }, + { + "ce_ib": 5.305369853973389, + "ce_orig": 1.4667848348617554, + "epoch": 0.7198216981810338, + "kl_loss": 0.0744345486164093, + "loss_ib": 0.0012748823501169682, + "step": 2503 + }, + { + "ce_ib": 3.14485239982605, + "ce_orig": 0.5923724174499512, + "epoch": 0.7198216981810338, + "kl_loss": 0.045998625457286835, + "loss_ib": 0.0007744714384898543, + "step": 2503 + }, + { + "ce_ib": 3.2136991024017334, + "ce_orig": 0.4522950053215027, + "epoch": 0.7198216981810338, + "kl_loss": 0.049951642751693726, + "loss_ib": 0.0008208863437175751, + "step": 2503 + }, + { + "ce_ib": 4.126372337341309, + "ce_orig": 0.8969367742538452, + "epoch": 0.7198216981810338, + "kl_loss": 0.09940312802791595, + "loss_ib": 0.0014066683361306787, + "step": 2503 + }, + { + "ce_ib": 3.297671318054199, + "ce_orig": 0.6156386733055115, + "epoch": 0.7201092817600115, + "kl_loss": 0.042213406413793564, + "loss_ib": 0.0007519011851400137, + "step": 2504 + }, + { + "ce_ib": 2.414529800415039, + "ce_orig": 0.5796786546707153, + "epoch": 0.7201092817600115, + "kl_loss": 0.03580652177333832, + "loss_ib": 0.0005995181854814291, + "step": 2504 + }, + { + "ce_ib": 2.5596044063568115, + "ce_orig": 0.5128830075263977, + "epoch": 0.7201092817600115, + "kl_loss": 0.08379323780536652, + "loss_ib": 0.0010938928462564945, + "step": 2504 + }, + { + "ce_ib": 2.067023754119873, + "ce_orig": 0.3827531039714813, + "epoch": 0.7201092817600115, + "kl_loss": 0.1819780170917511, + "loss_ib": 0.0020264824852347374, + "step": 2504 + }, + { + "epoch": 0.7203968653389892, + "grad_norm": 0.12291832268238068, + "learning_rate": 4.446603845124388e-05, + "loss": 0.807, + "step": 2505 + }, + { + "ce_ib": 3.8733785152435303, + "ce_orig": 0.7505314946174622, + "epoch": 0.7203968653389892, + "kl_loss": 0.04337753728032112, + "loss_ib": 0.0008211131789721549, + "step": 2505 + }, + { + "ce_ib": 2.5143826007843018, + "ce_orig": 0.571890652179718, + "epoch": 0.7203968653389892, + "kl_loss": 0.035364892333745956, + "loss_ib": 0.0006050871452316642, + "step": 2505 + }, + { + "ce_ib": 4.5452423095703125, + "ce_orig": 0.8931484222412109, + "epoch": 0.7203968653389892, + "kl_loss": 0.08819201588630676, + "loss_ib": 0.001336444285698235, + "step": 2505 + }, + { + "ce_ib": 2.542088508605957, + "ce_orig": 0.3461896777153015, + "epoch": 0.7203968653389892, + "kl_loss": 0.13746455311775208, + "loss_ib": 0.0016288544284179807, + "step": 2505 + }, + { + "ce_ib": 6.9970550537109375, + "ce_orig": 1.4761524200439453, + "epoch": 0.7206844489179668, + "kl_loss": 0.0843179002404213, + "loss_ib": 0.0015428843908011913, + "step": 2506 + }, + { + "ce_ib": 2.1347551345825195, + "ce_orig": 0.22786208987236023, + "epoch": 0.7206844489179668, + "kl_loss": 0.09229382127523422, + "loss_ib": 0.0011364136589691043, + "step": 2506 + }, + { + "ce_ib": 2.651665687561035, + "ce_orig": 0.5953783988952637, + "epoch": 0.7206844489179668, + "kl_loss": 0.10860519111156464, + "loss_ib": 0.0013512184377759695, + "step": 2506 + }, + { + "ce_ib": 3.8186991214752197, + "ce_orig": 0.9932923913002014, + "epoch": 0.7206844489179668, + "kl_loss": 0.06171686202287674, + "loss_ib": 0.0009990384569391608, + "step": 2506 + }, + { + "ce_ib": 5.185868740081787, + "ce_orig": 1.3650621175765991, + "epoch": 0.7209720324969444, + "kl_loss": 0.06976290047168732, + "loss_ib": 0.001216215780004859, + "step": 2507 + }, + { + "ce_ib": 3.063645124435425, + "ce_orig": 0.8990799784660339, + "epoch": 0.7209720324969444, + "kl_loss": 0.04837596416473389, + "loss_ib": 0.0007901241187937558, + "step": 2507 + }, + { + "ce_ib": 6.755120277404785, + "ce_orig": 1.643056869506836, + "epoch": 0.7209720324969444, + "kl_loss": 0.07545918226242065, + "loss_ib": 0.001430103788152337, + "step": 2507 + }, + { + "ce_ib": 4.302093982696533, + "ce_orig": 0.795421302318573, + "epoch": 0.7209720324969444, + "kl_loss": 0.09718292951583862, + "loss_ib": 0.0014020386151969433, + "step": 2507 + }, + { + "ce_ib": 4.340654373168945, + "ce_orig": 1.044073462486267, + "epoch": 0.721259616075922, + "kl_loss": 0.08204951137304306, + "loss_ib": 0.001254560425877571, + "step": 2508 + }, + { + "ce_ib": 5.11433744430542, + "ce_orig": 0.6995687484741211, + "epoch": 0.721259616075922, + "kl_loss": 0.0661940947175026, + "loss_ib": 0.0011733745923265815, + "step": 2508 + }, + { + "ce_ib": 5.047739028930664, + "ce_orig": 0.7598916888237, + "epoch": 0.721259616075922, + "kl_loss": 0.09712298959493637, + "loss_ib": 0.0014760037884116173, + "step": 2508 + }, + { + "ce_ib": 3.7557716369628906, + "ce_orig": 0.6227285861968994, + "epoch": 0.721259616075922, + "kl_loss": 0.07119263708591461, + "loss_ib": 0.001087503507733345, + "step": 2508 + }, + { + "ce_ib": 3.378626585006714, + "ce_orig": 0.6502926349639893, + "epoch": 0.7215471996548997, + "kl_loss": 0.05330148711800575, + "loss_ib": 0.0008708774694241583, + "step": 2509 + }, + { + "ce_ib": 4.1831464767456055, + "ce_orig": 0.8311187028884888, + "epoch": 0.7215471996548997, + "kl_loss": 0.06966935098171234, + "loss_ib": 0.0011150081409141421, + "step": 2509 + }, + { + "ce_ib": 3.0929741859436035, + "ce_orig": 0.7939823865890503, + "epoch": 0.7215471996548997, + "kl_loss": 0.04570823907852173, + "loss_ib": 0.0007663798169232905, + "step": 2509 + }, + { + "ce_ib": 5.528732776641846, + "ce_orig": 0.9578753709793091, + "epoch": 0.7215471996548997, + "kl_loss": 0.09166350960731506, + "loss_ib": 0.0014695083955302835, + "step": 2509 + }, + { + "epoch": 0.7218347832338774, + "grad_norm": 0.12486255913972855, + "learning_rate": 4.4441666543243156e-05, + "loss": 0.8279, + "step": 2510 + }, + { + "ce_ib": 2.654970407485962, + "ce_orig": 0.6484200358390808, + "epoch": 0.7218347832338774, + "kl_loss": 0.04031450301408768, + "loss_ib": 0.0006686420529149473, + "step": 2510 + }, + { + "ce_ib": 4.656977653503418, + "ce_orig": 1.1066426038742065, + "epoch": 0.7218347832338774, + "kl_loss": 0.03091743402183056, + "loss_ib": 0.0007748720818199217, + "step": 2510 + }, + { + "ce_ib": 3.445368528366089, + "ce_orig": 0.77411288022995, + "epoch": 0.7218347832338774, + "kl_loss": 0.053091540932655334, + "loss_ib": 0.0008754523005336523, + "step": 2510 + }, + { + "ce_ib": 3.2160887718200684, + "ce_orig": 0.36531519889831543, + "epoch": 0.7218347832338774, + "kl_loss": 0.08263958990573883, + "loss_ib": 0.0011480047833174467, + "step": 2510 + }, + { + "ce_ib": 4.057005882263184, + "ce_orig": 1.1467152833938599, + "epoch": 0.722122366812855, + "kl_loss": 0.08678203821182251, + "loss_ib": 0.0012735208729282022, + "step": 2511 + }, + { + "ce_ib": 7.762226581573486, + "ce_orig": 1.3962137699127197, + "epoch": 0.722122366812855, + "kl_loss": 0.07603676617145538, + "loss_ib": 0.0015365902800112963, + "step": 2511 + }, + { + "ce_ib": 3.085076093673706, + "ce_orig": 0.6340494155883789, + "epoch": 0.722122366812855, + "kl_loss": 0.05327055975794792, + "loss_ib": 0.0008412131574004889, + "step": 2511 + }, + { + "ce_ib": 4.079852104187012, + "ce_orig": 1.1506277322769165, + "epoch": 0.722122366812855, + "kl_loss": 0.05135380104184151, + "loss_ib": 0.0009215231984853745, + "step": 2511 + }, + { + "ce_ib": 3.086840867996216, + "ce_orig": 0.5932393670082092, + "epoch": 0.7224099503918326, + "kl_loss": 0.04675854742527008, + "loss_ib": 0.0007762695895507932, + "step": 2512 + }, + { + "ce_ib": 4.780213356018066, + "ce_orig": 0.8380338549613953, + "epoch": 0.7224099503918326, + "kl_loss": 0.07210764288902283, + "loss_ib": 0.001199097721837461, + "step": 2512 + }, + { + "ce_ib": 3.8120198249816895, + "ce_orig": 0.5866168141365051, + "epoch": 0.7224099503918326, + "kl_loss": 0.07524831593036652, + "loss_ib": 0.0011336851166561246, + "step": 2512 + }, + { + "ce_ib": 4.054421901702881, + "ce_orig": 0.5778701305389404, + "epoch": 0.7224099503918326, + "kl_loss": 0.08686588704586029, + "loss_ib": 0.0012741010868921876, + "step": 2512 + }, + { + "ce_ib": 5.282199382781982, + "ce_orig": 1.0757616758346558, + "epoch": 0.7226975339708103, + "kl_loss": 0.05383358150720596, + "loss_ib": 0.0010665557347238064, + "step": 2513 + }, + { + "ce_ib": 2.898329496383667, + "ce_orig": 0.6191767454147339, + "epoch": 0.7226975339708103, + "kl_loss": 0.058228928595781326, + "loss_ib": 0.0008721221820451319, + "step": 2513 + }, + { + "ce_ib": 7.347433090209961, + "ce_orig": 1.736725926399231, + "epoch": 0.7226975339708103, + "kl_loss": 0.05374414846301079, + "loss_ib": 0.0012721847742795944, + "step": 2513 + }, + { + "ce_ib": 3.3175671100616455, + "ce_orig": 0.8922802805900574, + "epoch": 0.7226975339708103, + "kl_loss": 0.05767374485731125, + "loss_ib": 0.0009084941120818257, + "step": 2513 + }, + { + "ce_ib": 3.715005397796631, + "ce_orig": 0.6575178503990173, + "epoch": 0.7229851175497879, + "kl_loss": 0.046224445104599, + "loss_ib": 0.0008337449980899692, + "step": 2514 + }, + { + "ce_ib": 5.110638618469238, + "ce_orig": 1.2282531261444092, + "epoch": 0.7229851175497879, + "kl_loss": 0.0843357965350151, + "loss_ib": 0.0013544218381866813, + "step": 2514 + }, + { + "ce_ib": 6.2423481941223145, + "ce_orig": 1.488637089729309, + "epoch": 0.7229851175497879, + "kl_loss": 0.051694080233573914, + "loss_ib": 0.001141175627708435, + "step": 2514 + }, + { + "ce_ib": 4.658631324768066, + "ce_orig": 0.6972258687019348, + "epoch": 0.7229851175497879, + "kl_loss": 0.11087486147880554, + "loss_ib": 0.001574611640535295, + "step": 2514 + }, + { + "epoch": 0.7232727011287655, + "grad_norm": 0.09210823476314545, + "learning_rate": 4.441724779575521e-05, + "loss": 0.8867, + "step": 2515 + }, + { + "ce_ib": 6.987382888793945, + "ce_orig": 1.7075897455215454, + "epoch": 0.7232727011287655, + "kl_loss": 0.050043828785419464, + "loss_ib": 0.0011991765350103378, + "step": 2515 + }, + { + "ce_ib": 2.471082925796509, + "ce_orig": 0.5287762880325317, + "epoch": 0.7232727011287655, + "kl_loss": 0.060710158199071884, + "loss_ib": 0.0008542098803445697, + "step": 2515 + }, + { + "ce_ib": 3.9245989322662354, + "ce_orig": 1.0647910833358765, + "epoch": 0.7232727011287655, + "kl_loss": 0.03572811558842659, + "loss_ib": 0.000749741040635854, + "step": 2515 + }, + { + "ce_ib": 2.5149033069610596, + "ce_orig": 0.564926028251648, + "epoch": 0.7232727011287655, + "kl_loss": 0.04322795569896698, + "loss_ib": 0.000683769874740392, + "step": 2515 + }, + { + "ce_ib": 2.843897819519043, + "ce_orig": 0.661108672618866, + "epoch": 0.7235602847077431, + "kl_loss": 0.06030695512890816, + "loss_ib": 0.0008874593186192214, + "step": 2516 + }, + { + "ce_ib": 2.843977451324463, + "ce_orig": 0.7228829860687256, + "epoch": 0.7235602847077431, + "kl_loss": 0.04763541370630264, + "loss_ib": 0.0007607518928125501, + "step": 2516 + }, + { + "ce_ib": 1.4183530807495117, + "ce_orig": 0.24931171536445618, + "epoch": 0.7235602847077431, + "kl_loss": 0.17432504892349243, + "loss_ib": 0.0018850858323276043, + "step": 2516 + }, + { + "ce_ib": 3.686619997024536, + "ce_orig": 0.6049343943595886, + "epoch": 0.7235602847077431, + "kl_loss": 0.07531294971704483, + "loss_ib": 0.001121791428886354, + "step": 2516 + }, + { + "ce_ib": 4.135274887084961, + "ce_orig": 1.1932634115219116, + "epoch": 0.7238478682867209, + "kl_loss": 0.05055537819862366, + "loss_ib": 0.0009190812706947327, + "step": 2517 + }, + { + "ce_ib": 4.61160945892334, + "ce_orig": 0.75896155834198, + "epoch": 0.7238478682867209, + "kl_loss": 0.21808795630931854, + "loss_ib": 0.0026420406065881252, + "step": 2517 + }, + { + "ce_ib": 2.602882146835327, + "ce_orig": 0.7164777517318726, + "epoch": 0.7238478682867209, + "kl_loss": 0.05935150384902954, + "loss_ib": 0.0008538032416254282, + "step": 2517 + }, + { + "ce_ib": 3.039844274520874, + "ce_orig": 0.6557605862617493, + "epoch": 0.7238478682867209, + "kl_loss": 0.05641894042491913, + "loss_ib": 0.0008681738399900496, + "step": 2517 + }, + { + "ce_ib": 2.6259653568267822, + "ce_orig": 0.5459722280502319, + "epoch": 0.7241354518656985, + "kl_loss": 0.07542313635349274, + "loss_ib": 0.0010168278822675347, + "step": 2518 + }, + { + "ce_ib": 2.7983005046844482, + "ce_orig": 0.5853060483932495, + "epoch": 0.7241354518656985, + "kl_loss": 0.17558130621910095, + "loss_ib": 0.002035643206909299, + "step": 2518 + }, + { + "ce_ib": 1.9805042743682861, + "ce_orig": 0.5449436902999878, + "epoch": 0.7241354518656985, + "kl_loss": 0.03751155734062195, + "loss_ib": 0.0005731660057790577, + "step": 2518 + }, + { + "ce_ib": 3.7767863273620605, + "ce_orig": 0.6449462175369263, + "epoch": 0.7241354518656985, + "kl_loss": 0.08682431280612946, + "loss_ib": 0.0012459217105060816, + "step": 2518 + }, + { + "ce_ib": 5.150989055633545, + "ce_orig": 1.065609097480774, + "epoch": 0.7244230354446761, + "kl_loss": 0.05291251838207245, + "loss_ib": 0.0010442240163683891, + "step": 2519 + }, + { + "ce_ib": 7.386144161224365, + "ce_orig": 1.5487664937973022, + "epoch": 0.7244230354446761, + "kl_loss": 0.13174355030059814, + "loss_ib": 0.002056049881502986, + "step": 2519 + }, + { + "ce_ib": 3.3005130290985107, + "ce_orig": 0.5541477799415588, + "epoch": 0.7244230354446761, + "kl_loss": 0.05411140248179436, + "loss_ib": 0.0008711653063073754, + "step": 2519 + }, + { + "ce_ib": 3.450654983520508, + "ce_orig": 0.7580965161323547, + "epoch": 0.7244230354446761, + "kl_loss": 0.0831499695777893, + "loss_ib": 0.0011765650706365705, + "step": 2519 + }, + { + "epoch": 0.7247106190236537, + "grad_norm": 0.10518350452184677, + "learning_rate": 4.43927822676105e-05, + "loss": 0.8507, + "step": 2520 + }, + { + "ce_ib": 2.508821725845337, + "ce_orig": 0.6316868662834167, + "epoch": 0.7247106190236537, + "kl_loss": 0.042383402585983276, + "loss_ib": 0.0006747161969542503, + "step": 2520 + }, + { + "ce_ib": 3.5339584350585938, + "ce_orig": 0.9015162587165833, + "epoch": 0.7247106190236537, + "kl_loss": 0.04187385365366936, + "loss_ib": 0.0007721343426965177, + "step": 2520 + }, + { + "ce_ib": 4.181256294250488, + "ce_orig": 0.6805806159973145, + "epoch": 0.7247106190236537, + "kl_loss": 0.06276450306177139, + "loss_ib": 0.00104577059391886, + "step": 2520 + }, + { + "ce_ib": 2.512144088745117, + "ce_orig": 0.6569667458534241, + "epoch": 0.7247106190236537, + "kl_loss": 0.04414936155080795, + "loss_ib": 0.0006927080103196204, + "step": 2520 + }, + { + "ce_ib": 2.2225496768951416, + "ce_orig": 0.5850231051445007, + "epoch": 0.7249982026026314, + "kl_loss": 0.04321721941232681, + "loss_ib": 0.0006544271018356085, + "step": 2521 + }, + { + "ce_ib": 4.270439624786377, + "ce_orig": 0.9706498384475708, + "epoch": 0.7249982026026314, + "kl_loss": 0.07591305673122406, + "loss_ib": 0.001186174456961453, + "step": 2521 + }, + { + "ce_ib": 1.6533586978912354, + "ce_orig": 0.4497601389884949, + "epoch": 0.7249982026026314, + "kl_loss": 0.03532896563410759, + "loss_ib": 0.0005186255439184606, + "step": 2521 + }, + { + "ce_ib": 3.5175983905792236, + "ce_orig": 0.7735888957977295, + "epoch": 0.7249982026026314, + "kl_loss": 0.06268094480037689, + "loss_ib": 0.0009785692673176527, + "step": 2521 + }, + { + "ce_ib": 3.466916561126709, + "ce_orig": 0.9748359322547913, + "epoch": 0.725285786181609, + "kl_loss": 0.04635559022426605, + "loss_ib": 0.000810247496701777, + "step": 2522 + }, + { + "ce_ib": 2.699125051498413, + "ce_orig": 0.49413660168647766, + "epoch": 0.725285786181609, + "kl_loss": 0.044700827449560165, + "loss_ib": 0.0007169207674451172, + "step": 2522 + }, + { + "ce_ib": 3.2563416957855225, + "ce_orig": 0.7995749115943909, + "epoch": 0.725285786181609, + "kl_loss": 0.05717437341809273, + "loss_ib": 0.0008973778458312154, + "step": 2522 + }, + { + "ce_ib": 2.891904354095459, + "ce_orig": 0.7050593495368958, + "epoch": 0.725285786181609, + "kl_loss": 0.07658790051937103, + "loss_ib": 0.0010550693841651082, + "step": 2522 + }, + { + "ce_ib": 4.821568489074707, + "ce_orig": 1.2601996660232544, + "epoch": 0.7255733697605866, + "kl_loss": 0.06235979124903679, + "loss_ib": 0.0011057547526434064, + "step": 2523 + }, + { + "ce_ib": 3.3660483360290527, + "ce_orig": 0.7489072680473328, + "epoch": 0.7255733697605866, + "kl_loss": 0.06708566844463348, + "loss_ib": 0.0010074615711346269, + "step": 2523 + }, + { + "ce_ib": 4.21556282043457, + "ce_orig": 0.9245425462722778, + "epoch": 0.7255733697605866, + "kl_loss": 0.07491608709096909, + "loss_ib": 0.001170717179775238, + "step": 2523 + }, + { + "ce_ib": 2.655243158340454, + "ce_orig": 0.7059034705162048, + "epoch": 0.7255733697605866, + "kl_loss": 0.05609574541449547, + "loss_ib": 0.0008264817297458649, + "step": 2523 + }, + { + "ce_ib": 4.864908695220947, + "ce_orig": 0.9895210862159729, + "epoch": 0.7258609533395644, + "kl_loss": 0.07100045680999756, + "loss_ib": 0.0011964953737333417, + "step": 2524 + }, + { + "ce_ib": 2.1631767749786377, + "ce_orig": 0.5655300617218018, + "epoch": 0.7258609533395644, + "kl_loss": 0.033205725252628326, + "loss_ib": 0.0005483749555423856, + "step": 2524 + }, + { + "ce_ib": 4.6160969734191895, + "ce_orig": 1.1999176740646362, + "epoch": 0.7258609533395644, + "kl_loss": 0.08460991829633713, + "loss_ib": 0.0013077089097350836, + "step": 2524 + }, + { + "ce_ib": 4.0561699867248535, + "ce_orig": 0.7270290851593018, + "epoch": 0.7258609533395644, + "kl_loss": 0.10962356626987457, + "loss_ib": 0.001501852530054748, + "step": 2524 + }, + { + "epoch": 0.726148536918542, + "grad_norm": 0.1101323813199997, + "learning_rate": 4.4368270017752135e-05, + "loss": 0.7702, + "step": 2525 + }, + { + "ce_ib": 5.104349613189697, + "ce_orig": 0.4348459541797638, + "epoch": 0.726148536918542, + "kl_loss": 0.1379815936088562, + "loss_ib": 0.0018902508309111, + "step": 2525 + }, + { + "ce_ib": 5.528597831726074, + "ce_orig": 1.4528030157089233, + "epoch": 0.726148536918542, + "kl_loss": 0.0732806995511055, + "loss_ib": 0.0012856668327003717, + "step": 2525 + }, + { + "ce_ib": 4.587219715118408, + "ce_orig": 0.8700019717216492, + "epoch": 0.726148536918542, + "kl_loss": 0.07719434797763824, + "loss_ib": 0.00123066536616534, + "step": 2525 + }, + { + "ce_ib": 5.861127853393555, + "ce_orig": 1.1546661853790283, + "epoch": 0.726148536918542, + "kl_loss": 0.0770147368311882, + "loss_ib": 0.0013562601525336504, + "step": 2525 + }, + { + "ce_ib": 4.295505523681641, + "ce_orig": 0.7510903477668762, + "epoch": 0.7264361204975196, + "kl_loss": 0.05862804874777794, + "loss_ib": 0.0010158310178667307, + "step": 2526 + }, + { + "ce_ib": 7.84440803527832, + "ce_orig": 1.7178151607513428, + "epoch": 0.7264361204975196, + "kl_loss": 0.0688193216919899, + "loss_ib": 0.0014726340305060148, + "step": 2526 + }, + { + "ce_ib": 4.474917888641357, + "ce_orig": 0.571538507938385, + "epoch": 0.7264361204975196, + "kl_loss": 0.09217777103185654, + "loss_ib": 0.0013692694483324885, + "step": 2526 + }, + { + "ce_ib": 4.317047119140625, + "ce_orig": 0.6481124758720398, + "epoch": 0.7264361204975196, + "kl_loss": 0.08461485803127289, + "loss_ib": 0.0012778532691299915, + "step": 2526 + }, + { + "ce_ib": 5.13293981552124, + "ce_orig": 1.2331348657608032, + "epoch": 0.7267237040764972, + "kl_loss": 0.08923782408237457, + "loss_ib": 0.0014056720538064837, + "step": 2527 + }, + { + "ce_ib": 5.964245319366455, + "ce_orig": 0.7374433279037476, + "epoch": 0.7267237040764972, + "kl_loss": 0.06728055328130722, + "loss_ib": 0.001269230036996305, + "step": 2527 + }, + { + "ce_ib": 2.768512010574341, + "ce_orig": 0.869515061378479, + "epoch": 0.7267237040764972, + "kl_loss": 0.03656191751360893, + "loss_ib": 0.0006424703169614077, + "step": 2527 + }, + { + "ce_ib": 2.6809909343719482, + "ce_orig": 0.48262709379196167, + "epoch": 0.7267237040764972, + "kl_loss": 0.06387209892272949, + "loss_ib": 0.0009068200597539544, + "step": 2527 + }, + { + "ce_ib": 3.6606104373931885, + "ce_orig": 0.6870585680007935, + "epoch": 0.7270112876554748, + "kl_loss": 0.073047935962677, + "loss_ib": 0.0010965403635054827, + "step": 2528 + }, + { + "ce_ib": 2.629187822341919, + "ce_orig": 0.4782581627368927, + "epoch": 0.7270112876554748, + "kl_loss": 0.06778986752033234, + "loss_ib": 0.0009408174082636833, + "step": 2528 + }, + { + "ce_ib": 4.336165428161621, + "ce_orig": 0.8352712988853455, + "epoch": 0.7270112876554748, + "kl_loss": 0.05918899178504944, + "loss_ib": 0.001025506411679089, + "step": 2528 + }, + { + "ce_ib": 4.2921671867370605, + "ce_orig": 0.9507220387458801, + "epoch": 0.7270112876554748, + "kl_loss": 0.07670994102954865, + "loss_ib": 0.0011963160941377282, + "step": 2528 + }, + { + "ce_ib": 3.3260574340820312, + "ce_orig": 0.5243785977363586, + "epoch": 0.7272988712344525, + "kl_loss": 0.07880212366580963, + "loss_ib": 0.0011206269264221191, + "step": 2529 + }, + { + "ce_ib": 4.328843593597412, + "ce_orig": 0.8712429404258728, + "epoch": 0.7272988712344525, + "kl_loss": 0.05414075776934624, + "loss_ib": 0.0009742919355630875, + "step": 2529 + }, + { + "ce_ib": 3.333247661590576, + "ce_orig": 0.7451984286308289, + "epoch": 0.7272988712344525, + "kl_loss": 0.05152883380651474, + "loss_ib": 0.000848613039124757, + "step": 2529 + }, + { + "ce_ib": 4.0343852043151855, + "ce_orig": 0.7900019288063049, + "epoch": 0.7272988712344525, + "kl_loss": 0.09045347571372986, + "loss_ib": 0.0013079732889309525, + "step": 2529 + }, + { + "epoch": 0.7275864548134302, + "grad_norm": 0.08829555660486221, + "learning_rate": 4.434371110523583e-05, + "loss": 0.8471, + "step": 2530 + }, + { + "ce_ib": 6.702322483062744, + "ce_orig": 1.3327499628067017, + "epoch": 0.7275864548134302, + "kl_loss": 0.06598884612321854, + "loss_ib": 0.0013301207218319178, + "step": 2530 + }, + { + "ce_ib": 2.660832405090332, + "ce_orig": 0.45379671454429626, + "epoch": 0.7275864548134302, + "kl_loss": 0.059382885694503784, + "loss_ib": 0.0008599120774306357, + "step": 2530 + }, + { + "ce_ib": 4.6245856285095215, + "ce_orig": 0.6456936597824097, + "epoch": 0.7275864548134302, + "kl_loss": 0.07173187285661697, + "loss_ib": 0.0011797772021964192, + "step": 2530 + }, + { + "ce_ib": 5.820184230804443, + "ce_orig": 1.4345626831054688, + "epoch": 0.7275864548134302, + "kl_loss": 0.0579664446413517, + "loss_ib": 0.0011616828851401806, + "step": 2530 + }, + { + "ce_ib": 3.0675413608551025, + "ce_orig": 0.7975252866744995, + "epoch": 0.7278740383924078, + "kl_loss": 0.04311671853065491, + "loss_ib": 0.0007379212765954435, + "step": 2531 + }, + { + "ce_ib": 4.338830471038818, + "ce_orig": 0.8074547648429871, + "epoch": 0.7278740383924078, + "kl_loss": 0.09138748794794083, + "loss_ib": 0.001347757875919342, + "step": 2531 + }, + { + "ce_ib": 4.863844871520996, + "ce_orig": 0.8249049186706543, + "epoch": 0.7278740383924078, + "kl_loss": 0.06638094782829285, + "loss_ib": 0.0011501939734444022, + "step": 2531 + }, + { + "ce_ib": 2.772394895553589, + "ce_orig": 0.6670551896095276, + "epoch": 0.7278740383924078, + "kl_loss": 0.045714445412158966, + "loss_ib": 0.0007343838806264102, + "step": 2531 + }, + { + "ce_ib": 2.4099769592285156, + "ce_orig": 0.5363471508026123, + "epoch": 0.7281616219713855, + "kl_loss": 0.05210151523351669, + "loss_ib": 0.0007620127871632576, + "step": 2532 + }, + { + "ce_ib": 5.784844875335693, + "ce_orig": 1.18984055519104, + "epoch": 0.7281616219713855, + "kl_loss": 0.05229605361819267, + "loss_ib": 0.0011014449410140514, + "step": 2532 + }, + { + "ce_ib": 4.243745803833008, + "ce_orig": 0.7178491353988647, + "epoch": 0.7281616219713855, + "kl_loss": 0.09161851555109024, + "loss_ib": 0.001340559683740139, + "step": 2532 + }, + { + "ce_ib": 4.054720401763916, + "ce_orig": 0.9719099998474121, + "epoch": 0.7281616219713855, + "kl_loss": 0.04773734509944916, + "loss_ib": 0.0008828454883769155, + "step": 2532 + }, + { + "ce_ib": 3.9456727504730225, + "ce_orig": 0.7013164758682251, + "epoch": 0.7284492055503631, + "kl_loss": 0.035317495465278625, + "loss_ib": 0.0007477421895600855, + "step": 2533 + }, + { + "ce_ib": 3.042792320251465, + "ce_orig": 0.515903890132904, + "epoch": 0.7284492055503631, + "kl_loss": 0.09842934459447861, + "loss_ib": 0.0012885726755484939, + "step": 2533 + }, + { + "ce_ib": 2.8259127140045166, + "ce_orig": 0.663155734539032, + "epoch": 0.7284492055503631, + "kl_loss": 0.045302994549274445, + "loss_ib": 0.0007356212008744478, + "step": 2533 + }, + { + "ce_ib": 3.2024145126342773, + "ce_orig": 0.5893147587776184, + "epoch": 0.7284492055503631, + "kl_loss": 0.08049768954515457, + "loss_ib": 0.0011252183467149734, + "step": 2533 + }, + { + "ce_ib": 4.435690879821777, + "ce_orig": 0.9404907822608948, + "epoch": 0.7287367891293407, + "kl_loss": 0.07608331739902496, + "loss_ib": 0.001204402302391827, + "step": 2534 + }, + { + "ce_ib": 2.8048362731933594, + "ce_orig": 0.4600967764854431, + "epoch": 0.7287367891293407, + "kl_loss": 0.058985307812690735, + "loss_ib": 0.0008703367202542722, + "step": 2534 + }, + { + "ce_ib": 3.7578999996185303, + "ce_orig": 0.7756895422935486, + "epoch": 0.7287367891293407, + "kl_loss": 0.08893326669931412, + "loss_ib": 0.0012651225551962852, + "step": 2534 + }, + { + "ce_ib": 3.798828363418579, + "ce_orig": 0.7111980319023132, + "epoch": 0.7287367891293407, + "kl_loss": 0.05286325886845589, + "loss_ib": 0.0009085154160857201, + "step": 2534 + }, + { + "epoch": 0.7290243727083183, + "grad_norm": 0.09133889526128769, + "learning_rate": 4.43191055892297e-05, + "loss": 0.845, + "step": 2535 + }, + { + "ce_ib": 3.9590554237365723, + "ce_orig": 0.6205008029937744, + "epoch": 0.7290243727083183, + "kl_loss": 0.07269398868083954, + "loss_ib": 0.0011228453367948532, + "step": 2535 + }, + { + "ce_ib": 4.699456214904785, + "ce_orig": 0.6837276220321655, + "epoch": 0.7290243727083183, + "kl_loss": 0.09944184124469757, + "loss_ib": 0.0014643638860434294, + "step": 2535 + }, + { + "ce_ib": 3.4719719886779785, + "ce_orig": 0.7924101948738098, + "epoch": 0.7290243727083183, + "kl_loss": 0.07662612199783325, + "loss_ib": 0.001113458420149982, + "step": 2535 + }, + { + "ce_ib": 6.43671178817749, + "ce_orig": 1.314465880393982, + "epoch": 0.7290243727083183, + "kl_loss": 0.09748245775699615, + "loss_ib": 0.0016184956766664982, + "step": 2535 + }, + { + "ce_ib": 5.21309232711792, + "ce_orig": 0.9384906888008118, + "epoch": 0.729311956287296, + "kl_loss": 0.0718073844909668, + "loss_ib": 0.0012393830111250281, + "step": 2536 + }, + { + "ce_ib": 4.865029811859131, + "ce_orig": 1.1014511585235596, + "epoch": 0.729311956287296, + "kl_loss": 0.08708865940570831, + "loss_ib": 0.0013573894975706935, + "step": 2536 + }, + { + "ce_ib": 3.990086555480957, + "ce_orig": 0.9231894016265869, + "epoch": 0.729311956287296, + "kl_loss": 0.06327144801616669, + "loss_ib": 0.0010317231062799692, + "step": 2536 + }, + { + "ce_ib": 3.4488210678100586, + "ce_orig": 0.6533098220825195, + "epoch": 0.729311956287296, + "kl_loss": 0.06530895829200745, + "loss_ib": 0.0009979716269299388, + "step": 2536 + }, + { + "ce_ib": 4.593130588531494, + "ce_orig": 0.6464447379112244, + "epoch": 0.7295995398662737, + "kl_loss": 0.08192870020866394, + "loss_ib": 0.0012786000734195113, + "step": 2537 + }, + { + "ce_ib": 3.9237773418426514, + "ce_orig": 0.9564934372901917, + "epoch": 0.7295995398662737, + "kl_loss": 0.08504457771778107, + "loss_ib": 0.0012428234331309795, + "step": 2537 + }, + { + "ce_ib": 5.050119400024414, + "ce_orig": 1.4060823917388916, + "epoch": 0.7295995398662737, + "kl_loss": 0.05913498252630234, + "loss_ib": 0.0010963616659864783, + "step": 2537 + }, + { + "ce_ib": 3.043843984603882, + "ce_orig": 0.5319644212722778, + "epoch": 0.7295995398662737, + "kl_loss": 0.061446353793144226, + "loss_ib": 0.0009188479161821306, + "step": 2537 + }, + { + "ce_ib": 5.308557510375977, + "ce_orig": 0.8266493082046509, + "epoch": 0.7298871234452513, + "kl_loss": 0.12333327531814575, + "loss_ib": 0.0017641884041950107, + "step": 2538 + }, + { + "ce_ib": 2.2671682834625244, + "ce_orig": 0.5579190254211426, + "epoch": 0.7298871234452513, + "kl_loss": 0.040175147354602814, + "loss_ib": 0.0006284682895056903, + "step": 2538 + }, + { + "ce_ib": 3.82171893119812, + "ce_orig": 0.6734261512756348, + "epoch": 0.7298871234452513, + "kl_loss": 0.06664562225341797, + "loss_ib": 0.001048628124408424, + "step": 2538 + }, + { + "ce_ib": 4.2188029289245605, + "ce_orig": 0.9157139658927917, + "epoch": 0.7298871234452513, + "kl_loss": 0.05669383332133293, + "loss_ib": 0.0009888185886666179, + "step": 2538 + }, + { + "ce_ib": 5.726754188537598, + "ce_orig": 1.5385526418685913, + "epoch": 0.7301747070242289, + "kl_loss": 0.06384187936782837, + "loss_ib": 0.0012110942043364048, + "step": 2539 + }, + { + "ce_ib": 4.232954502105713, + "ce_orig": 0.8337367177009583, + "epoch": 0.7301747070242289, + "kl_loss": 0.08517486602067947, + "loss_ib": 0.0012750440509989858, + "step": 2539 + }, + { + "ce_ib": 5.488724231719971, + "ce_orig": 1.0431056022644043, + "epoch": 0.7301747070242289, + "kl_loss": 0.09109634906053543, + "loss_ib": 0.0014598359121009707, + "step": 2539 + }, + { + "ce_ib": 3.2237555980682373, + "ce_orig": 0.6148270964622498, + "epoch": 0.7301747070242289, + "kl_loss": 0.04560668393969536, + "loss_ib": 0.0007784423651173711, + "step": 2539 + }, + { + "epoch": 0.7304622906032066, + "grad_norm": 0.09160952270030975, + "learning_rate": 4.429445352901415e-05, + "loss": 0.8451, + "step": 2540 + }, + { + "ce_ib": 3.1587767601013184, + "ce_orig": 0.5733940005302429, + "epoch": 0.7304622906032066, + "kl_loss": 0.08378744125366211, + "loss_ib": 0.0011537519749253988, + "step": 2540 + }, + { + "ce_ib": 2.6767990589141846, + "ce_orig": 0.5855844020843506, + "epoch": 0.7304622906032066, + "kl_loss": 0.08607249706983566, + "loss_ib": 0.0011284048669040203, + "step": 2540 + }, + { + "ce_ib": 3.576653242111206, + "ce_orig": 0.725315511226654, + "epoch": 0.7304622906032066, + "kl_loss": 0.05203034728765488, + "loss_ib": 0.0008779687923379242, + "step": 2540 + }, + { + "ce_ib": 3.287815570831299, + "ce_orig": 0.7614700198173523, + "epoch": 0.7304622906032066, + "kl_loss": 0.061164479702711105, + "loss_ib": 0.0009404263691976666, + "step": 2540 + }, + { + "ce_ib": 4.1499810218811035, + "ce_orig": 0.6641122102737427, + "epoch": 0.7307498741821842, + "kl_loss": 0.07551529258489609, + "loss_ib": 0.0011701509356498718, + "step": 2541 + }, + { + "ce_ib": 8.751667022705078, + "ce_orig": 2.0677733421325684, + "epoch": 0.7307498741821842, + "kl_loss": 0.0753607526421547, + "loss_ib": 0.00162877410184592, + "step": 2541 + }, + { + "ce_ib": 2.5627872943878174, + "ce_orig": 0.3020722270011902, + "epoch": 0.7307498741821842, + "kl_loss": 0.19681793451309204, + "loss_ib": 0.0022244579158723354, + "step": 2541 + }, + { + "ce_ib": 6.615450382232666, + "ce_orig": 1.3636186122894287, + "epoch": 0.7307498741821842, + "kl_loss": 0.07389692217111588, + "loss_ib": 0.0014005141565576196, + "step": 2541 + }, + { + "ce_ib": 7.278303146362305, + "ce_orig": 1.4624578952789307, + "epoch": 0.7310374577611618, + "kl_loss": 0.0896192342042923, + "loss_ib": 0.0016240227269008756, + "step": 2542 + }, + { + "ce_ib": 3.1657445430755615, + "ce_orig": 0.7816922068595886, + "epoch": 0.7310374577611618, + "kl_loss": 0.06258879601955414, + "loss_ib": 0.0009424623567610979, + "step": 2542 + }, + { + "ce_ib": 5.270100116729736, + "ce_orig": 1.0970340967178345, + "epoch": 0.7310374577611618, + "kl_loss": 0.07337362319231033, + "loss_ib": 0.0012607462704181671, + "step": 2542 + }, + { + "ce_ib": 4.901740550994873, + "ce_orig": 1.1273388862609863, + "epoch": 0.7310374577611618, + "kl_loss": 0.059101901948451996, + "loss_ib": 0.0010811930987983942, + "step": 2542 + }, + { + "ce_ib": 4.030134201049805, + "ce_orig": 0.9962092638015747, + "epoch": 0.7313250413401394, + "kl_loss": 0.048031821846961975, + "loss_ib": 0.0008833315805532038, + "step": 2543 + }, + { + "ce_ib": 5.651214599609375, + "ce_orig": 1.0323526859283447, + "epoch": 0.7313250413401394, + "kl_loss": 0.07755300402641296, + "loss_ib": 0.0013406515354290605, + "step": 2543 + }, + { + "ce_ib": 3.0062978267669678, + "ce_orig": 0.5879145860671997, + "epoch": 0.7313250413401394, + "kl_loss": 0.05917463079094887, + "loss_ib": 0.0008923760615289211, + "step": 2543 + }, + { + "ce_ib": 3.8631248474121094, + "ce_orig": 1.0137938261032104, + "epoch": 0.7313250413401394, + "kl_loss": 0.05446445941925049, + "loss_ib": 0.0009309570305049419, + "step": 2543 + }, + { + "ce_ib": 2.6432442665100098, + "ce_orig": 0.7073929905891418, + "epoch": 0.7316126249191172, + "kl_loss": 0.04050665721297264, + "loss_ib": 0.0006693910108879209, + "step": 2544 + }, + { + "ce_ib": 1.7110036611557007, + "ce_orig": 0.4046548008918762, + "epoch": 0.7316126249191172, + "kl_loss": 0.06754639744758606, + "loss_ib": 0.0008465643040835857, + "step": 2544 + }, + { + "ce_ib": 2.008786678314209, + "ce_orig": 0.36904439330101013, + "epoch": 0.7316126249191172, + "kl_loss": 0.03545650094747543, + "loss_ib": 0.0005554436938837171, + "step": 2544 + }, + { + "ce_ib": 3.720088481903076, + "ce_orig": 0.6521130204200745, + "epoch": 0.7316126249191172, + "kl_loss": 0.04659423604607582, + "loss_ib": 0.0008379511418752372, + "step": 2544 + }, + { + "epoch": 0.7319002084980948, + "grad_norm": 0.09929618239402771, + "learning_rate": 4.4269754983981703e-05, + "loss": 0.824, + "step": 2545 + }, + { + "ce_ib": 3.2564616203308105, + "ce_orig": 0.4623085558414459, + "epoch": 0.7319002084980948, + "kl_loss": 0.0466461256146431, + "loss_ib": 0.0007921074284240603, + "step": 2545 + }, + { + "ce_ib": 3.1405532360076904, + "ce_orig": 0.527510941028595, + "epoch": 0.7319002084980948, + "kl_loss": 0.07201611995697021, + "loss_ib": 0.0010342164896428585, + "step": 2545 + }, + { + "ce_ib": 4.333597183227539, + "ce_orig": 1.0804507732391357, + "epoch": 0.7319002084980948, + "kl_loss": 0.08598098158836365, + "loss_ib": 0.001293169567361474, + "step": 2545 + }, + { + "ce_ib": 7.329817771911621, + "ce_orig": 1.6696662902832031, + "epoch": 0.7319002084980948, + "kl_loss": 0.09173433482646942, + "loss_ib": 0.001650325022637844, + "step": 2545 + }, + { + "ce_ib": 3.996683120727539, + "ce_orig": 1.2160428762435913, + "epoch": 0.7321877920770724, + "kl_loss": 0.05477648228406906, + "loss_ib": 0.0009474331163801253, + "step": 2546 + }, + { + "ce_ib": 5.588686943054199, + "ce_orig": 1.0167205333709717, + "epoch": 0.7321877920770724, + "kl_loss": 0.06957874447107315, + "loss_ib": 0.0012546561192721128, + "step": 2546 + }, + { + "ce_ib": 4.288455009460449, + "ce_orig": 0.8143596649169922, + "epoch": 0.7321877920770724, + "kl_loss": 0.0386275053024292, + "loss_ib": 0.0008151205256581306, + "step": 2546 + }, + { + "ce_ib": 5.984123229980469, + "ce_orig": 1.473110556602478, + "epoch": 0.7321877920770724, + "kl_loss": 0.08851215988397598, + "loss_ib": 0.0014835339970886707, + "step": 2546 + }, + { + "ce_ib": 2.7125332355499268, + "ce_orig": 0.6408149003982544, + "epoch": 0.73247537565605, + "kl_loss": 0.04711627587676048, + "loss_ib": 0.0007424160721711814, + "step": 2547 + }, + { + "ce_ib": 4.070252418518066, + "ce_orig": 0.7372927069664001, + "epoch": 0.73247537565605, + "kl_loss": 0.06026851013302803, + "loss_ib": 0.001009710249491036, + "step": 2547 + }, + { + "ce_ib": 2.7643821239471436, + "ce_orig": 0.5099959373474121, + "epoch": 0.73247537565605, + "kl_loss": 0.05726715922355652, + "loss_ib": 0.0008491097833029926, + "step": 2547 + }, + { + "ce_ib": 4.513518810272217, + "ce_orig": 1.0415478944778442, + "epoch": 0.73247537565605, + "kl_loss": 0.06817103177309036, + "loss_ib": 0.0011330621782690287, + "step": 2547 + }, + { + "ce_ib": 5.840067386627197, + "ce_orig": 1.5950075387954712, + "epoch": 0.7327629592350277, + "kl_loss": 0.07351769506931305, + "loss_ib": 0.0013191837351769209, + "step": 2548 + }, + { + "ce_ib": 2.9431662559509277, + "ce_orig": 0.4615962505340576, + "epoch": 0.7327629592350277, + "kl_loss": 0.05576659366488457, + "loss_ib": 0.0008519825059920549, + "step": 2548 + }, + { + "ce_ib": 3.9676501750946045, + "ce_orig": 0.7848939299583435, + "epoch": 0.7327629592350277, + "kl_loss": 0.08531592786312103, + "loss_ib": 0.0012499243021011353, + "step": 2548 + }, + { + "ce_ib": 3.079287052154541, + "ce_orig": 0.7816784381866455, + "epoch": 0.7327629592350277, + "kl_loss": 0.060674503445625305, + "loss_ib": 0.000914673728402704, + "step": 2548 + }, + { + "ce_ib": 5.973537921905518, + "ce_orig": 0.7702679634094238, + "epoch": 0.7330505428140053, + "kl_loss": 0.14056363701820374, + "loss_ib": 0.0020029901061207056, + "step": 2549 + }, + { + "ce_ib": 3.8791184425354004, + "ce_orig": 1.0739803314208984, + "epoch": 0.7330505428140053, + "kl_loss": 0.24936170876026154, + "loss_ib": 0.0028815290424972773, + "step": 2549 + }, + { + "ce_ib": 4.772655963897705, + "ce_orig": 1.0986201763153076, + "epoch": 0.7330505428140053, + "kl_loss": 0.0701749324798584, + "loss_ib": 0.0011790149146690965, + "step": 2549 + }, + { + "ce_ib": 3.941453456878662, + "ce_orig": 0.9522497057914734, + "epoch": 0.7330505428140053, + "kl_loss": 0.056413814425468445, + "loss_ib": 0.000958283431828022, + "step": 2549 + }, + { + "epoch": 0.733338126392983, + "grad_norm": 0.09787895530462265, + "learning_rate": 4.42450100136369e-05, + "loss": 0.8713, + "step": 2550 + }, + { + "ce_ib": 2.8441720008850098, + "ce_orig": 0.6472594141960144, + "epoch": 0.733338126392983, + "kl_loss": 0.05808237940073013, + "loss_ib": 0.0008652409887872636, + "step": 2550 + }, + { + "ce_ib": 1.6997954845428467, + "ce_orig": 0.43152570724487305, + "epoch": 0.733338126392983, + "kl_loss": 0.23404395580291748, + "loss_ib": 0.002510419115424156, + "step": 2550 + }, + { + "ce_ib": 6.555705547332764, + "ce_orig": 0.9530043601989746, + "epoch": 0.733338126392983, + "kl_loss": 0.17079365253448486, + "loss_ib": 0.0023635071702301502, + "step": 2550 + }, + { + "ce_ib": 4.246199131011963, + "ce_orig": 0.9307243824005127, + "epoch": 0.733338126392983, + "kl_loss": 0.0677059218287468, + "loss_ib": 0.0011016790522262454, + "step": 2550 + }, + { + "ce_ib": 5.912776947021484, + "ce_orig": 1.4946506023406982, + "epoch": 0.7336257099719606, + "kl_loss": 0.05873428285121918, + "loss_ib": 0.001178620383143425, + "step": 2551 + }, + { + "ce_ib": 4.31003475189209, + "ce_orig": 0.6804960370063782, + "epoch": 0.7336257099719606, + "kl_loss": 0.04811407998204231, + "loss_ib": 0.0009121442562900484, + "step": 2551 + }, + { + "ce_ib": 3.842656373977661, + "ce_orig": 0.6807820200920105, + "epoch": 0.7336257099719606, + "kl_loss": 0.04172481223940849, + "loss_ib": 0.0008015137864276767, + "step": 2551 + }, + { + "ce_ib": 3.1441309452056885, + "ce_orig": 0.6264840960502625, + "epoch": 0.7336257099719606, + "kl_loss": 0.07925839722156525, + "loss_ib": 0.0011069970205426216, + "step": 2551 + }, + { + "ce_ib": 7.677999496459961, + "ce_orig": 1.3504799604415894, + "epoch": 0.7339132935509383, + "kl_loss": 0.08213654160499573, + "loss_ib": 0.0015891653019934893, + "step": 2552 + }, + { + "ce_ib": 4.517632007598877, + "ce_orig": 0.8050701022148132, + "epoch": 0.7339132935509383, + "kl_loss": 0.09229297935962677, + "loss_ib": 0.00137469288893044, + "step": 2552 + }, + { + "ce_ib": 4.303158760070801, + "ce_orig": 1.0559715032577515, + "epoch": 0.7339132935509383, + "kl_loss": 0.05268782377243042, + "loss_ib": 0.0009571940754540265, + "step": 2552 + }, + { + "ce_ib": 4.631587982177734, + "ce_orig": 0.9159388542175293, + "epoch": 0.7339132935509383, + "kl_loss": 0.05625476315617561, + "loss_ib": 0.0010257064132019877, + "step": 2552 + }, + { + "ce_ib": 3.7719757556915283, + "ce_orig": 0.5006332993507385, + "epoch": 0.7342008771299159, + "kl_loss": 0.08303138613700867, + "loss_ib": 0.001207511406391859, + "step": 2553 + }, + { + "ce_ib": 4.950904846191406, + "ce_orig": 1.0260454416275024, + "epoch": 0.7342008771299159, + "kl_loss": 0.16311413049697876, + "loss_ib": 0.0021262317895889282, + "step": 2553 + }, + { + "ce_ib": 5.1417083740234375, + "ce_orig": 0.9811510443687439, + "epoch": 0.7342008771299159, + "kl_loss": 0.0547034926712513, + "loss_ib": 0.001061205635778606, + "step": 2553 + }, + { + "ce_ib": 4.528010845184326, + "ce_orig": 0.8814241290092468, + "epoch": 0.7342008771299159, + "kl_loss": 0.042763009667396545, + "loss_ib": 0.0008804311510175467, + "step": 2553 + }, + { + "ce_ib": 4.0052809715271, + "ce_orig": 1.1545820236206055, + "epoch": 0.7344884607088935, + "kl_loss": 0.08012805879116058, + "loss_ib": 0.0012018086854368448, + "step": 2554 + }, + { + "ce_ib": 2.8548014163970947, + "ce_orig": 0.7047145962715149, + "epoch": 0.7344884607088935, + "kl_loss": 0.04570944607257843, + "loss_ib": 0.0007425746298395097, + "step": 2554 + }, + { + "ce_ib": 5.8232741355896, + "ce_orig": 1.1542556285858154, + "epoch": 0.7344884607088935, + "kl_loss": 0.10706064105033875, + "loss_ib": 0.0016529337735846639, + "step": 2554 + }, + { + "ce_ib": 2.5376627445220947, + "ce_orig": 0.23005487024784088, + "epoch": 0.7344884607088935, + "kl_loss": 0.04527690261602402, + "loss_ib": 0.0007065352401696146, + "step": 2554 + }, + { + "epoch": 0.7347760442878711, + "grad_norm": 0.09861506521701813, + "learning_rate": 4.422021867759611e-05, + "loss": 0.8424, + "step": 2555 + }, + { + "ce_ib": 8.227801322937012, + "ce_orig": 2.0940463542938232, + "epoch": 0.7347760442878711, + "kl_loss": 0.08830320835113525, + "loss_ib": 0.0017058121738955379, + "step": 2555 + }, + { + "ce_ib": 3.8228583335876465, + "ce_orig": 0.9579325318336487, + "epoch": 0.7347760442878711, + "kl_loss": 0.07221651077270508, + "loss_ib": 0.0011044509010389447, + "step": 2555 + }, + { + "ce_ib": 3.923677682876587, + "ce_orig": 0.9961575269699097, + "epoch": 0.7347760442878711, + "kl_loss": 0.058890555053949356, + "loss_ib": 0.0009812733624130487, + "step": 2555 + }, + { + "ce_ib": 3.6888082027435303, + "ce_orig": 0.5747889280319214, + "epoch": 0.7347760442878711, + "kl_loss": 0.12291692197322845, + "loss_ib": 0.001598050002939999, + "step": 2555 + }, + { + "ce_ib": 8.319635391235352, + "ce_orig": 2.0619654655456543, + "epoch": 0.7350636278668488, + "kl_loss": 0.06655091792345047, + "loss_ib": 0.0014974726364016533, + "step": 2556 + }, + { + "ce_ib": 1.317051649093628, + "ce_orig": 0.2455514818429947, + "epoch": 0.7350636278668488, + "kl_loss": 0.12527576088905334, + "loss_ib": 0.0013844626955688, + "step": 2556 + }, + { + "ce_ib": 2.75348162651062, + "ce_orig": 0.7281994223594666, + "epoch": 0.7350636278668488, + "kl_loss": 0.034132808446884155, + "loss_ib": 0.0006166762323118746, + "step": 2556 + }, + { + "ce_ib": 4.277116298675537, + "ce_orig": 0.6569865345954895, + "epoch": 0.7350636278668488, + "kl_loss": 0.055891282856464386, + "loss_ib": 0.0009866243926808238, + "step": 2556 + }, + { + "ce_ib": 2.6081125736236572, + "ce_orig": 0.5122495889663696, + "epoch": 0.7353512114458265, + "kl_loss": 0.05083637312054634, + "loss_ib": 0.0007691749488003552, + "step": 2557 + }, + { + "ce_ib": 3.503298282623291, + "ce_orig": 0.5880540609359741, + "epoch": 0.7353512114458265, + "kl_loss": 0.054999593645334244, + "loss_ib": 0.0009003257728181779, + "step": 2557 + }, + { + "ce_ib": 3.4818220138549805, + "ce_orig": 0.8023570775985718, + "epoch": 0.7353512114458265, + "kl_loss": 0.05696990340948105, + "loss_ib": 0.0009178812615573406, + "step": 2557 + }, + { + "ce_ib": 4.931515693664551, + "ce_orig": 1.4178873300552368, + "epoch": 0.7353512114458265, + "kl_loss": 0.061086829751729965, + "loss_ib": 0.0011040198151022196, + "step": 2557 + }, + { + "ce_ib": 3.265125274658203, + "ce_orig": 0.34762704372406006, + "epoch": 0.7356387950248041, + "kl_loss": 0.17680853605270386, + "loss_ib": 0.002094597788527608, + "step": 2558 + }, + { + "ce_ib": 4.012164115905762, + "ce_orig": 0.6525203585624695, + "epoch": 0.7356387950248041, + "kl_loss": 0.06207481771707535, + "loss_ib": 0.0010219644755125046, + "step": 2558 + }, + { + "ce_ib": 1.522469162940979, + "ce_orig": 0.37318751215934753, + "epoch": 0.7356387950248041, + "kl_loss": 0.033336102962493896, + "loss_ib": 0.0004856079467572272, + "step": 2558 + }, + { + "ce_ib": 5.909492492675781, + "ce_orig": 0.8291534185409546, + "epoch": 0.7356387950248041, + "kl_loss": 0.08296653628349304, + "loss_ib": 0.00142061454243958, + "step": 2558 + }, + { + "ce_ib": 4.431573867797852, + "ce_orig": 0.9499626159667969, + "epoch": 0.7359263786037817, + "kl_loss": 0.12914985418319702, + "loss_ib": 0.0017346559325233102, + "step": 2559 + }, + { + "ce_ib": 4.5401153564453125, + "ce_orig": 1.130836009979248, + "epoch": 0.7359263786037817, + "kl_loss": 0.06199537590146065, + "loss_ib": 0.0010739652207121253, + "step": 2559 + }, + { + "ce_ib": 5.864817142486572, + "ce_orig": 1.1889883279800415, + "epoch": 0.7359263786037817, + "kl_loss": 0.07133369892835617, + "loss_ib": 0.0012998187448829412, + "step": 2559 + }, + { + "ce_ib": 4.476242542266846, + "ce_orig": 0.9124060869216919, + "epoch": 0.7359263786037817, + "kl_loss": 0.037031665444374084, + "loss_ib": 0.0008179408614523709, + "step": 2559 + }, + { + "epoch": 0.7362139621827594, + "grad_norm": 0.11245033890008926, + "learning_rate": 4.419538103558742e-05, + "loss": 0.8403, + "step": 2560 + }, + { + "ce_ib": 5.7571492195129395, + "ce_orig": 1.244217038154602, + "epoch": 0.7362139621827594, + "kl_loss": 0.09752306342124939, + "loss_ib": 0.0015509454533457756, + "step": 2560 + }, + { + "ce_ib": 2.7707252502441406, + "ce_orig": 0.7412793040275574, + "epoch": 0.7362139621827594, + "kl_loss": 0.13195906579494476, + "loss_ib": 0.0015966631472110748, + "step": 2560 + }, + { + "ce_ib": 6.769991874694824, + "ce_orig": 1.515411615371704, + "epoch": 0.7362139621827594, + "kl_loss": 0.08191555738449097, + "loss_ib": 0.0014961545821279287, + "step": 2560 + }, + { + "ce_ib": 2.7577216625213623, + "ce_orig": 0.6849924325942993, + "epoch": 0.7362139621827594, + "kl_loss": 0.0532384067773819, + "loss_ib": 0.0008081562118604779, + "step": 2560 + }, + { + "ce_ib": 5.537027359008789, + "ce_orig": 1.0563477277755737, + "epoch": 0.736501545761737, + "kl_loss": 0.17213504016399384, + "loss_ib": 0.0022750531788915396, + "step": 2561 + }, + { + "ce_ib": 4.857758522033691, + "ce_orig": 1.032212495803833, + "epoch": 0.736501545761737, + "kl_loss": 0.07222697138786316, + "loss_ib": 0.001208045519888401, + "step": 2561 + }, + { + "ce_ib": 3.7976951599121094, + "ce_orig": 0.7961548566818237, + "epoch": 0.736501545761737, + "kl_loss": 0.0969441682100296, + "loss_ib": 0.0013492112047970295, + "step": 2561 + }, + { + "ce_ib": 2.8113341331481934, + "ce_orig": 0.28618955612182617, + "epoch": 0.736501545761737, + "kl_loss": 0.07314255833625793, + "loss_ib": 0.001012558932416141, + "step": 2561 + }, + { + "ce_ib": 3.5820844173431396, + "ce_orig": 0.41708695888519287, + "epoch": 0.7367891293407146, + "kl_loss": 0.08633735775947571, + "loss_ib": 0.0012215819442644715, + "step": 2562 + }, + { + "ce_ib": 2.447361707687378, + "ce_orig": 0.2716906666755676, + "epoch": 0.7367891293407146, + "kl_loss": 0.057870104908943176, + "loss_ib": 0.000823437178041786, + "step": 2562 + }, + { + "ce_ib": 3.892979860305786, + "ce_orig": 0.7424241900444031, + "epoch": 0.7367891293407146, + "kl_loss": 0.06859511137008667, + "loss_ib": 0.0010752490488812327, + "step": 2562 + }, + { + "ce_ib": 2.5121355056762695, + "ce_orig": 0.540684700012207, + "epoch": 0.7367891293407146, + "kl_loss": 0.05600199103355408, + "loss_ib": 0.0008112334180623293, + "step": 2562 + }, + { + "ce_ib": 3.4656693935394287, + "ce_orig": 0.8735074996948242, + "epoch": 0.7370767129196922, + "kl_loss": 0.058187589049339294, + "loss_ib": 0.0009284427505917847, + "step": 2563 + }, + { + "ce_ib": 6.377230167388916, + "ce_orig": 1.6977349519729614, + "epoch": 0.7370767129196922, + "kl_loss": 0.08055483549833298, + "loss_ib": 0.001443271292373538, + "step": 2563 + }, + { + "ce_ib": 2.9206583499908447, + "ce_orig": 0.5117824673652649, + "epoch": 0.7370767129196922, + "kl_loss": 0.07159797847270966, + "loss_ib": 0.0010080456268042326, + "step": 2563 + }, + { + "ce_ib": 2.8847920894622803, + "ce_orig": 0.41876980662345886, + "epoch": 0.7370767129196922, + "kl_loss": 0.07366097718477249, + "loss_ib": 0.0010250889463350177, + "step": 2563 + }, + { + "ce_ib": 3.2115819454193115, + "ce_orig": 0.7020355463027954, + "epoch": 0.73736429649867, + "kl_loss": 0.06432952731847763, + "loss_ib": 0.0009644534438848495, + "step": 2564 + }, + { + "ce_ib": 4.622864246368408, + "ce_orig": 1.2939435243606567, + "epoch": 0.73736429649867, + "kl_loss": 0.05433636158704758, + "loss_ib": 0.001005650032311678, + "step": 2564 + }, + { + "ce_ib": 5.136824131011963, + "ce_orig": 0.7429656982421875, + "epoch": 0.73736429649867, + "kl_loss": 0.07212339341640472, + "loss_ib": 0.0012349162716418505, + "step": 2564 + }, + { + "ce_ib": 5.215304851531982, + "ce_orig": 0.7838786840438843, + "epoch": 0.73736429649867, + "kl_loss": 0.07147246599197388, + "loss_ib": 0.001236255164258182, + "step": 2564 + }, + { + "epoch": 0.7376518800776476, + "grad_norm": 0.1103733628988266, + "learning_rate": 4.417049714745047e-05, + "loss": 0.8205, + "step": 2565 + }, + { + "ce_ib": 4.084223747253418, + "ce_orig": 0.8694925904273987, + "epoch": 0.7376518800776476, + "kl_loss": 0.08693473041057587, + "loss_ib": 0.0012777696829289198, + "step": 2565 + }, + { + "ce_ib": 5.941091537475586, + "ce_orig": 1.087989091873169, + "epoch": 0.7376518800776476, + "kl_loss": 0.05627686530351639, + "loss_ib": 0.001156877726316452, + "step": 2565 + }, + { + "ce_ib": 3.302229166030884, + "ce_orig": 0.5513265132904053, + "epoch": 0.7376518800776476, + "kl_loss": 0.06972073018550873, + "loss_ib": 0.0010274301748722792, + "step": 2565 + }, + { + "ce_ib": 2.305845260620117, + "ce_orig": 0.5409442782402039, + "epoch": 0.7376518800776476, + "kl_loss": 0.04515703022480011, + "loss_ib": 0.000682154786773026, + "step": 2565 + }, + { + "ce_ib": 4.1865973472595215, + "ce_orig": 1.0048584938049316, + "epoch": 0.7379394636566252, + "kl_loss": 0.08558057993650436, + "loss_ib": 0.001274465466849506, + "step": 2566 + }, + { + "ce_ib": 2.730767250061035, + "ce_orig": 0.488731324672699, + "epoch": 0.7379394636566252, + "kl_loss": 0.06614499539136887, + "loss_ib": 0.0009345266735181212, + "step": 2566 + }, + { + "ce_ib": 2.3672356605529785, + "ce_orig": 0.609944224357605, + "epoch": 0.7379394636566252, + "kl_loss": 0.04035130515694618, + "loss_ib": 0.0006402365979738533, + "step": 2566 + }, + { + "ce_ib": 4.1651787757873535, + "ce_orig": 0.9854905605316162, + "epoch": 0.7379394636566252, + "kl_loss": 0.07628787308931351, + "loss_ib": 0.0011793965240940452, + "step": 2566 + }, + { + "ce_ib": 4.445311546325684, + "ce_orig": 0.4307897686958313, + "epoch": 0.7382270472356028, + "kl_loss": 0.07912839949131012, + "loss_ib": 0.0012358151143416762, + "step": 2567 + }, + { + "ce_ib": 3.0400032997131348, + "ce_orig": 0.8194936513900757, + "epoch": 0.7382270472356028, + "kl_loss": 0.06948184221982956, + "loss_ib": 0.0009988186648115516, + "step": 2567 + }, + { + "ce_ib": 5.74213171005249, + "ce_orig": 1.024618148803711, + "epoch": 0.7382270472356028, + "kl_loss": 0.1232074648141861, + "loss_ib": 0.001806287677027285, + "step": 2567 + }, + { + "ce_ib": 3.556917667388916, + "ce_orig": 1.0424631834030151, + "epoch": 0.7382270472356028, + "kl_loss": 0.0525040328502655, + "loss_ib": 0.0008807320846244693, + "step": 2567 + }, + { + "ce_ib": 4.570929050445557, + "ce_orig": 1.2142177820205688, + "epoch": 0.7385146308145805, + "kl_loss": 0.057305578142404556, + "loss_ib": 0.0010301487054675817, + "step": 2568 + }, + { + "ce_ib": 4.238239288330078, + "ce_orig": 0.6969324350357056, + "epoch": 0.7385146308145805, + "kl_loss": 0.07041943073272705, + "loss_ib": 0.0011280181352049112, + "step": 2568 + }, + { + "ce_ib": 4.418795585632324, + "ce_orig": 0.8112379908561707, + "epoch": 0.7385146308145805, + "kl_loss": 0.059337861835956573, + "loss_ib": 0.001035258173942566, + "step": 2568 + }, + { + "ce_ib": 4.479321479797363, + "ce_orig": 0.7419564127922058, + "epoch": 0.7385146308145805, + "kl_loss": 0.09061938524246216, + "loss_ib": 0.001354126026853919, + "step": 2568 + }, + { + "ce_ib": 4.315573692321777, + "ce_orig": 0.8469096422195435, + "epoch": 0.7388022143935581, + "kl_loss": 0.05162046477198601, + "loss_ib": 0.0009477619896642864, + "step": 2569 + }, + { + "ce_ib": 3.4812068939208984, + "ce_orig": 0.6418944001197815, + "epoch": 0.7388022143935581, + "kl_loss": 0.04930641129612923, + "loss_ib": 0.0008411847520619631, + "step": 2569 + }, + { + "ce_ib": 4.878547668457031, + "ce_orig": 1.046449899673462, + "epoch": 0.7388022143935581, + "kl_loss": 0.06787554919719696, + "loss_ib": 0.0011666102800518274, + "step": 2569 + }, + { + "ce_ib": 2.636530876159668, + "ce_orig": 0.6198428869247437, + "epoch": 0.7388022143935581, + "kl_loss": 0.04823814332485199, + "loss_ib": 0.0007460344932042062, + "step": 2569 + }, + { + "epoch": 0.7390897979725358, + "grad_norm": 0.09944455325603485, + "learning_rate": 4.4145567073136324e-05, + "loss": 0.8119, + "step": 2570 + }, + { + "ce_ib": 6.547268390655518, + "ce_orig": 1.3930197954177856, + "epoch": 0.7390897979725358, + "kl_loss": 0.10683184117078781, + "loss_ib": 0.0017230452504009008, + "step": 2570 + }, + { + "ce_ib": 3.558889865875244, + "ce_orig": 0.47801288962364197, + "epoch": 0.7390897979725358, + "kl_loss": 0.0892530232667923, + "loss_ib": 0.0012484191684052348, + "step": 2570 + }, + { + "ce_ib": 4.967440605163574, + "ce_orig": 1.0234252214431763, + "epoch": 0.7390897979725358, + "kl_loss": 0.037432536482810974, + "loss_ib": 0.000871069438289851, + "step": 2570 + }, + { + "ce_ib": 4.00338888168335, + "ce_orig": 0.9321321845054626, + "epoch": 0.7390897979725358, + "kl_loss": 0.07252911478281021, + "loss_ib": 0.0011256299912929535, + "step": 2570 + }, + { + "ce_ib": 2.5859503746032715, + "ce_orig": 0.7844471335411072, + "epoch": 0.7393773815515134, + "kl_loss": 0.044905394315719604, + "loss_ib": 0.0007076489855535328, + "step": 2571 + }, + { + "ce_ib": 2.6647121906280518, + "ce_orig": 0.4469383656978607, + "epoch": 0.7393773815515134, + "kl_loss": 0.05427014082670212, + "loss_ib": 0.000809172575827688, + "step": 2571 + }, + { + "ce_ib": 4.9980316162109375, + "ce_orig": 1.25248122215271, + "epoch": 0.7393773815515134, + "kl_loss": 0.08325157314538956, + "loss_ib": 0.001332318875938654, + "step": 2571 + }, + { + "ce_ib": 3.030717134475708, + "ce_orig": 0.49566319584846497, + "epoch": 0.7393773815515134, + "kl_loss": 0.04390109330415726, + "loss_ib": 0.0007420826004818082, + "step": 2571 + }, + { + "ce_ib": 4.032922744750977, + "ce_orig": 0.9610474705696106, + "epoch": 0.7396649651304911, + "kl_loss": 0.08107848465442657, + "loss_ib": 0.0012140771141275764, + "step": 2572 + }, + { + "ce_ib": 5.178061485290527, + "ce_orig": 1.2563475370407104, + "epoch": 0.7396649651304911, + "kl_loss": 0.06172291934490204, + "loss_ib": 0.0011350353015586734, + "step": 2572 + }, + { + "ce_ib": 5.858159065246582, + "ce_orig": 0.9335759878158569, + "epoch": 0.7396649651304911, + "kl_loss": 0.08625059574842453, + "loss_ib": 0.0014483218546956778, + "step": 2572 + }, + { + "ce_ib": 3.545172929763794, + "ce_orig": 0.40578415989875793, + "epoch": 0.7396649651304911, + "kl_loss": 0.06368565559387207, + "loss_ib": 0.0009913737885653973, + "step": 2572 + }, + { + "ce_ib": 7.312714576721191, + "ce_orig": 1.7574609518051147, + "epoch": 0.7399525487094687, + "kl_loss": 0.08803218603134155, + "loss_ib": 0.0016115932958200574, + "step": 2573 + }, + { + "ce_ib": 3.248394250869751, + "ce_orig": 0.545518696308136, + "epoch": 0.7399525487094687, + "kl_loss": 0.07260354608297348, + "loss_ib": 0.001050874823704362, + "step": 2573 + }, + { + "ce_ib": 4.671828746795654, + "ce_orig": 0.8100747466087341, + "epoch": 0.7399525487094687, + "kl_loss": 0.05996207892894745, + "loss_ib": 0.001066803582943976, + "step": 2573 + }, + { + "ce_ib": 4.250085353851318, + "ce_orig": 0.8881317973136902, + "epoch": 0.7399525487094687, + "kl_loss": 0.060156822204589844, + "loss_ib": 0.001026576734147966, + "step": 2573 + }, + { + "ce_ib": 4.550487995147705, + "ce_orig": 1.1256704330444336, + "epoch": 0.7402401322884463, + "kl_loss": 0.06329309195280075, + "loss_ib": 0.0010879796463996172, + "step": 2574 + }, + { + "ce_ib": 5.72885799407959, + "ce_orig": 1.3765617609024048, + "epoch": 0.7402401322884463, + "kl_loss": 0.07648453116416931, + "loss_ib": 0.00133773114066571, + "step": 2574 + }, + { + "ce_ib": 2.6893646717071533, + "ce_orig": 0.6395198106765747, + "epoch": 0.7402401322884463, + "kl_loss": 0.07181580364704132, + "loss_ib": 0.000987094477750361, + "step": 2574 + }, + { + "ce_ib": 5.057989597320557, + "ce_orig": 1.0695439577102661, + "epoch": 0.7402401322884463, + "kl_loss": 0.08865334093570709, + "loss_ib": 0.001392332254908979, + "step": 2574 + }, + { + "epoch": 0.7405277158674239, + "grad_norm": 0.09706132113933563, + "learning_rate": 4.412059087270732e-05, + "loss": 0.8644, + "step": 2575 + }, + { + "ce_ib": 7.072035789489746, + "ce_orig": 1.3882551193237305, + "epoch": 0.7405277158674239, + "kl_loss": 0.061442017555236816, + "loss_ib": 0.0013216238003224134, + "step": 2575 + }, + { + "ce_ib": 3.028895378112793, + "ce_orig": 0.8933044075965881, + "epoch": 0.7405277158674239, + "kl_loss": 0.040329694747924805, + "loss_ib": 0.0007061864016577601, + "step": 2575 + }, + { + "ce_ib": 4.459721565246582, + "ce_orig": 0.45581740140914917, + "epoch": 0.7405277158674239, + "kl_loss": 0.08267170190811157, + "loss_ib": 0.0012726890854537487, + "step": 2575 + }, + { + "ce_ib": 3.543400287628174, + "ce_orig": 0.6595332622528076, + "epoch": 0.7405277158674239, + "kl_loss": 0.07624059915542603, + "loss_ib": 0.0011167459888383746, + "step": 2575 + }, + { + "ce_ib": 1.8442020416259766, + "ce_orig": 0.31498345732688904, + "epoch": 0.7408152994464016, + "kl_loss": 0.08847532421350479, + "loss_ib": 0.0010691734496504068, + "step": 2576 + }, + { + "ce_ib": 2.7261159420013428, + "ce_orig": 0.6391726732254028, + "epoch": 0.7408152994464016, + "kl_loss": 0.05965297669172287, + "loss_ib": 0.0008691413677297533, + "step": 2576 + }, + { + "ce_ib": 3.8739564418792725, + "ce_orig": 0.4357936978340149, + "epoch": 0.7408152994464016, + "kl_loss": 0.10875240713357925, + "loss_ib": 0.001474919612519443, + "step": 2576 + }, + { + "ce_ib": 4.431300640106201, + "ce_orig": 1.3889539241790771, + "epoch": 0.7408152994464016, + "kl_loss": 0.0767272561788559, + "loss_ib": 0.001210402580909431, + "step": 2576 + }, + { + "ce_ib": 3.3789772987365723, + "ce_orig": 0.5575844049453735, + "epoch": 0.7411028830253793, + "kl_loss": 0.06805503368377686, + "loss_ib": 0.0010184480343014002, + "step": 2577 + }, + { + "ce_ib": 2.547501802444458, + "ce_orig": 0.5416218638420105, + "epoch": 0.7411028830253793, + "kl_loss": 0.06306931376457214, + "loss_ib": 0.0008854432962834835, + "step": 2577 + }, + { + "ce_ib": 4.57963228225708, + "ce_orig": 1.1833187341690063, + "epoch": 0.7411028830253793, + "kl_loss": 0.04787059873342514, + "loss_ib": 0.0009366692393086851, + "step": 2577 + }, + { + "ce_ib": 3.6324610710144043, + "ce_orig": 0.7624838352203369, + "epoch": 0.7411028830253793, + "kl_loss": 0.04670936241745949, + "loss_ib": 0.0008303396753035486, + "step": 2577 + }, + { + "ce_ib": 3.9461944103240967, + "ce_orig": 0.9357620477676392, + "epoch": 0.7413904666043569, + "kl_loss": 0.04995229095220566, + "loss_ib": 0.0008941423729993403, + "step": 2578 + }, + { + "ce_ib": 3.008882999420166, + "ce_orig": 0.5856472849845886, + "epoch": 0.7413904666043569, + "kl_loss": 0.07247543334960938, + "loss_ib": 0.0010256426176056266, + "step": 2578 + }, + { + "ce_ib": 3.922748565673828, + "ce_orig": 0.865063488483429, + "epoch": 0.7413904666043569, + "kl_loss": 0.050635240972042084, + "loss_ib": 0.000898627273272723, + "step": 2578 + }, + { + "ce_ib": 2.9783823490142822, + "ce_orig": 0.6468735933303833, + "epoch": 0.7413904666043569, + "kl_loss": 0.049869243055582047, + "loss_ib": 0.0007965306867845356, + "step": 2578 + }, + { + "ce_ib": 2.860308885574341, + "ce_orig": 0.6965271830558777, + "epoch": 0.7416780501833345, + "kl_loss": 0.07002636045217514, + "loss_ib": 0.0009862944716587663, + "step": 2579 + }, + { + "ce_ib": 3.302278995513916, + "ce_orig": 0.6081038117408752, + "epoch": 0.7416780501833345, + "kl_loss": 0.0667794942855835, + "loss_ib": 0.0009980228496715426, + "step": 2579 + }, + { + "ce_ib": 2.956879138946533, + "ce_orig": 0.5213910341262817, + "epoch": 0.7416780501833345, + "kl_loss": 0.06289583444595337, + "loss_ib": 0.0009246462141163647, + "step": 2579 + }, + { + "ce_ib": 3.2453157901763916, + "ce_orig": 0.939166784286499, + "epoch": 0.7416780501833345, + "kl_loss": 0.04900698363780975, + "loss_ib": 0.0008146013715304434, + "step": 2579 + }, + { + "epoch": 0.7419656337623122, + "grad_norm": 0.09891051054000854, + "learning_rate": 4.409556860633692e-05, + "loss": 0.8378, + "step": 2580 + }, + { + "ce_ib": 4.379268169403076, + "ce_orig": 0.8012250661849976, + "epoch": 0.7419656337623122, + "kl_loss": 0.07369227707386017, + "loss_ib": 0.0011748495744541287, + "step": 2580 + }, + { + "ce_ib": 3.56805682182312, + "ce_orig": 0.9024236798286438, + "epoch": 0.7419656337623122, + "kl_loss": 0.111238494515419, + "loss_ib": 0.001469190581701696, + "step": 2580 + }, + { + "ce_ib": 2.836031913757324, + "ce_orig": 0.585896372795105, + "epoch": 0.7419656337623122, + "kl_loss": 0.04480822756886482, + "loss_ib": 0.0007316854316741228, + "step": 2580 + }, + { + "ce_ib": 2.950778007507324, + "ce_orig": 0.6733917593955994, + "epoch": 0.7419656337623122, + "kl_loss": 0.02699289843440056, + "loss_ib": 0.0005650067469105124, + "step": 2580 + }, + { + "ce_ib": 2.5744376182556152, + "ce_orig": 0.4790117144584656, + "epoch": 0.7422532173412898, + "kl_loss": 0.03391452133655548, + "loss_ib": 0.0005965889431536198, + "step": 2581 + }, + { + "ce_ib": 5.160916805267334, + "ce_orig": 0.8047636151313782, + "epoch": 0.7422532173412898, + "kl_loss": 0.05981285870075226, + "loss_ib": 0.0011142202420160174, + "step": 2581 + }, + { + "ce_ib": 2.68749737739563, + "ce_orig": 0.5233400464057922, + "epoch": 0.7422532173412898, + "kl_loss": 0.06911662220954895, + "loss_ib": 0.0009599159238860011, + "step": 2581 + }, + { + "ce_ib": 4.383358955383301, + "ce_orig": 1.0090528726577759, + "epoch": 0.7422532173412898, + "kl_loss": 0.054264698177576065, + "loss_ib": 0.0009809827897697687, + "step": 2581 + }, + { + "ce_ib": 3.449718475341797, + "ce_orig": 0.8678279519081116, + "epoch": 0.7425408009202674, + "kl_loss": 0.06426624208688736, + "loss_ib": 0.0009876341791823506, + "step": 2582 + }, + { + "ce_ib": 4.131200313568115, + "ce_orig": 0.9741604328155518, + "epoch": 0.7425408009202674, + "kl_loss": 0.08313065767288208, + "loss_ib": 0.0012444265885278583, + "step": 2582 + }, + { + "ce_ib": 4.9473748207092285, + "ce_orig": 1.3995301723480225, + "epoch": 0.7425408009202674, + "kl_loss": 0.06486578285694122, + "loss_ib": 0.0011433953186497092, + "step": 2582 + }, + { + "ce_ib": 4.608459949493408, + "ce_orig": 0.7929960489273071, + "epoch": 0.7425408009202674, + "kl_loss": 0.0867910161614418, + "loss_ib": 0.0013287562178447843, + "step": 2582 + }, + { + "ce_ib": 3.743453025817871, + "ce_orig": 0.9927791953086853, + "epoch": 0.742828384499245, + "kl_loss": 0.048653166741132736, + "loss_ib": 0.0008608769276179373, + "step": 2583 + }, + { + "ce_ib": 4.174836158752441, + "ce_orig": 0.9403161406517029, + "epoch": 0.742828384499245, + "kl_loss": 0.07702706754207611, + "loss_ib": 0.0011877542128786445, + "step": 2583 + }, + { + "ce_ib": 2.768383264541626, + "ce_orig": 0.7504225373268127, + "epoch": 0.742828384499245, + "kl_loss": 0.044195547699928284, + "loss_ib": 0.0007187937735579908, + "step": 2583 + }, + { + "ce_ib": 3.3006691932678223, + "ce_orig": 0.6968424320220947, + "epoch": 0.742828384499245, + "kl_loss": 0.08904240280389786, + "loss_ib": 0.0012204908998683095, + "step": 2583 + }, + { + "ce_ib": 6.438364028930664, + "ce_orig": 1.5981535911560059, + "epoch": 0.7431159680782228, + "kl_loss": 0.06375338137149811, + "loss_ib": 0.0012813701760023832, + "step": 2584 + }, + { + "ce_ib": 4.549433708190918, + "ce_orig": 1.1969043016433716, + "epoch": 0.7431159680782228, + "kl_loss": 0.07165991514921188, + "loss_ib": 0.001171542564406991, + "step": 2584 + }, + { + "ce_ib": 5.680286407470703, + "ce_orig": 1.2225241661071777, + "epoch": 0.7431159680782228, + "kl_loss": 0.09685512632131577, + "loss_ib": 0.0015365798026323318, + "step": 2584 + }, + { + "ce_ib": 4.324751853942871, + "ce_orig": 0.7711944580078125, + "epoch": 0.7431159680782228, + "kl_loss": 0.04753297194838524, + "loss_ib": 0.0009078048751689494, + "step": 2584 + }, + { + "epoch": 0.7434035516572004, + "grad_norm": 0.11297635734081268, + "learning_rate": 4.407050033430957e-05, + "loss": 0.8285, + "step": 2585 + }, + { + "ce_ib": 4.73291540145874, + "ce_orig": 0.9491531848907471, + "epoch": 0.7434035516572004, + "kl_loss": 0.05469028279185295, + "loss_ib": 0.001020194380544126, + "step": 2585 + }, + { + "ce_ib": 3.4928338527679443, + "ce_orig": 0.33206185698509216, + "epoch": 0.7434035516572004, + "kl_loss": 0.06565520912408829, + "loss_ib": 0.0010058354819193482, + "step": 2585 + }, + { + "ce_ib": 4.089582920074463, + "ce_orig": 1.0478966236114502, + "epoch": 0.7434035516572004, + "kl_loss": 0.07225595414638519, + "loss_ib": 0.0011315178126096725, + "step": 2585 + }, + { + "ce_ib": 3.95180344581604, + "ce_orig": 1.069475769996643, + "epoch": 0.7434035516572004, + "kl_loss": 0.041052743792533875, + "loss_ib": 0.0008057077066041529, + "step": 2585 + }, + { + "ce_ib": 2.811655282974243, + "ce_orig": 0.5392038822174072, + "epoch": 0.743691135236178, + "kl_loss": 0.054515816271305084, + "loss_ib": 0.0008263236959464848, + "step": 2586 + }, + { + "ce_ib": 4.984159469604492, + "ce_orig": 0.6378013491630554, + "epoch": 0.743691135236178, + "kl_loss": 0.06238000467419624, + "loss_ib": 0.0011222159955650568, + "step": 2586 + }, + { + "ce_ib": 4.0284342765808105, + "ce_orig": 0.6446165442466736, + "epoch": 0.743691135236178, + "kl_loss": 0.09347113966941833, + "loss_ib": 0.0013375547714531422, + "step": 2586 + }, + { + "ce_ib": 3.080892562866211, + "ce_orig": 0.7576041221618652, + "epoch": 0.743691135236178, + "kl_loss": 0.06637796759605408, + "loss_ib": 0.0009718689252622426, + "step": 2586 + }, + { + "ce_ib": 4.095834732055664, + "ce_orig": 0.6964684724807739, + "epoch": 0.7439787188151556, + "kl_loss": 0.04757712781429291, + "loss_ib": 0.0008853547042235732, + "step": 2587 + }, + { + "ce_ib": 2.097806692123413, + "ce_orig": 0.6310796141624451, + "epoch": 0.7439787188151556, + "kl_loss": 0.034966617822647095, + "loss_ib": 0.000559446809347719, + "step": 2587 + }, + { + "ce_ib": 3.783611297607422, + "ce_orig": 0.6643132567405701, + "epoch": 0.7439787188151556, + "kl_loss": 0.041876889765262604, + "loss_ib": 0.0007971299928613007, + "step": 2587 + }, + { + "ce_ib": 3.71995210647583, + "ce_orig": 0.6676472425460815, + "epoch": 0.7439787188151556, + "kl_loss": 0.07816995680332184, + "loss_ib": 0.00115369469858706, + "step": 2587 + }, + { + "ce_ib": 3.5494015216827393, + "ce_orig": 0.6072925925254822, + "epoch": 0.7442663023941333, + "kl_loss": 0.06659725308418274, + "loss_ib": 0.0010209126630797982, + "step": 2588 + }, + { + "ce_ib": 2.990497589111328, + "ce_orig": 0.7995051145553589, + "epoch": 0.7442663023941333, + "kl_loss": 0.04403121396899223, + "loss_ib": 0.0007393618579953909, + "step": 2588 + }, + { + "ce_ib": 5.408304691314697, + "ce_orig": 0.7285920977592468, + "epoch": 0.7442663023941333, + "kl_loss": 0.09705479443073273, + "loss_ib": 0.0015113784465938807, + "step": 2588 + }, + { + "ce_ib": 3.6048712730407715, + "ce_orig": 0.6303426027297974, + "epoch": 0.7442663023941333, + "kl_loss": 0.0869026631116867, + "loss_ib": 0.001229513669386506, + "step": 2588 + }, + { + "ce_ib": 4.097700595855713, + "ce_orig": 1.0316451787948608, + "epoch": 0.7445538859731109, + "kl_loss": 0.05404011532664299, + "loss_ib": 0.0009501712047494948, + "step": 2589 + }, + { + "ce_ib": 3.1966567039489746, + "ce_orig": 0.6761201024055481, + "epoch": 0.7445538859731109, + "kl_loss": 0.06773994863033295, + "loss_ib": 0.0009970651008188725, + "step": 2589 + }, + { + "ce_ib": 2.5886831283569336, + "ce_orig": 0.7045834064483643, + "epoch": 0.7445538859731109, + "kl_loss": 0.07596857845783234, + "loss_ib": 0.0010185540886595845, + "step": 2589 + }, + { + "ce_ib": 5.34528112411499, + "ce_orig": 0.8017042875289917, + "epoch": 0.7445538859731109, + "kl_loss": 0.11683738231658936, + "loss_ib": 0.0017029017908498645, + "step": 2589 + }, + { + "epoch": 0.7448414695520886, + "grad_norm": 0.09200417995452881, + "learning_rate": 4.404538611702055e-05, + "loss": 0.8458, + "step": 2590 + }, + { + "ce_ib": 3.882032871246338, + "ce_orig": 1.0715233087539673, + "epoch": 0.7448414695520886, + "kl_loss": 0.053743306547403336, + "loss_ib": 0.0009256362682208419, + "step": 2590 + }, + { + "ce_ib": 5.002346992492676, + "ce_orig": 0.9990749955177307, + "epoch": 0.7448414695520886, + "kl_loss": 0.07104042917490005, + "loss_ib": 0.0012106390204280615, + "step": 2590 + }, + { + "ce_ib": 2.6215364933013916, + "ce_orig": 0.38888999819755554, + "epoch": 0.7448414695520886, + "kl_loss": 0.07574401795864105, + "loss_ib": 0.0010195937938988209, + "step": 2590 + }, + { + "ce_ib": 3.8946969509124756, + "ce_orig": 0.7936232686042786, + "epoch": 0.7448414695520886, + "kl_loss": 0.029540935531258583, + "loss_ib": 0.0006848790217190981, + "step": 2590 + }, + { + "ce_ib": 3.4122397899627686, + "ce_orig": 0.936580240726471, + "epoch": 0.7451290531310663, + "kl_loss": 0.0739307552576065, + "loss_ib": 0.0010805315105244517, + "step": 2591 + }, + { + "ce_ib": 2.3038318157196045, + "ce_orig": 0.6360535025596619, + "epoch": 0.7451290531310663, + "kl_loss": 0.0326407253742218, + "loss_ib": 0.000556790444534272, + "step": 2591 + }, + { + "ce_ib": 5.024445056915283, + "ce_orig": 1.1636579036712646, + "epoch": 0.7451290531310663, + "kl_loss": 0.0632898360490799, + "loss_ib": 0.0011353428708389401, + "step": 2591 + }, + { + "ce_ib": 3.271307945251465, + "ce_orig": 0.4929085969924927, + "epoch": 0.7451290531310663, + "kl_loss": 0.04569963365793228, + "loss_ib": 0.000784127099905163, + "step": 2591 + }, + { + "ce_ib": 2.0254228115081787, + "ce_orig": 0.24506555497646332, + "epoch": 0.7454166367100439, + "kl_loss": 0.07669173181056976, + "loss_ib": 0.0009694595355540514, + "step": 2592 + }, + { + "ce_ib": 4.831639289855957, + "ce_orig": 1.210736870765686, + "epoch": 0.7454166367100439, + "kl_loss": 0.07173870503902435, + "loss_ib": 0.0012005509342998266, + "step": 2592 + }, + { + "ce_ib": 2.781217098236084, + "ce_orig": 0.6672853827476501, + "epoch": 0.7454166367100439, + "kl_loss": 0.09384464472532272, + "loss_ib": 0.0012165680527687073, + "step": 2592 + }, + { + "ce_ib": 3.6011455059051514, + "ce_orig": 0.7196161150932312, + "epoch": 0.7454166367100439, + "kl_loss": 0.06563127040863037, + "loss_ib": 0.0010164272971451283, + "step": 2592 + }, + { + "ce_ib": 3.954333543777466, + "ce_orig": 0.9575192928314209, + "epoch": 0.7457042202890215, + "kl_loss": 0.07370633631944656, + "loss_ib": 0.0011324967490509152, + "step": 2593 + }, + { + "ce_ib": 2.624098062515259, + "ce_orig": 0.7732236981391907, + "epoch": 0.7457042202890215, + "kl_loss": 0.03341679647564888, + "loss_ib": 0.0005965777090750635, + "step": 2593 + }, + { + "ce_ib": 4.7814531326293945, + "ce_orig": 1.1523326635360718, + "epoch": 0.7457042202890215, + "kl_loss": 0.0602932907640934, + "loss_ib": 0.001081078196875751, + "step": 2593 + }, + { + "ce_ib": 4.168055057525635, + "ce_orig": 1.0284149646759033, + "epoch": 0.7457042202890215, + "kl_loss": 0.10809239745140076, + "loss_ib": 0.0014977293321862817, + "step": 2593 + }, + { + "ce_ib": 4.506146430969238, + "ce_orig": 0.7714839577674866, + "epoch": 0.7459918038679991, + "kl_loss": 0.12286544591188431, + "loss_ib": 0.0016792690148577094, + "step": 2594 + }, + { + "ce_ib": 4.715756416320801, + "ce_orig": 1.2692714929580688, + "epoch": 0.7459918038679991, + "kl_loss": 0.061877861618995667, + "loss_ib": 0.0010903541697189212, + "step": 2594 + }, + { + "ce_ib": 4.88641357421875, + "ce_orig": 1.171509027481079, + "epoch": 0.7459918038679991, + "kl_loss": 0.0562155619263649, + "loss_ib": 0.0010507969418540597, + "step": 2594 + }, + { + "ce_ib": 2.8465781211853027, + "ce_orig": 0.5048832297325134, + "epoch": 0.7459918038679991, + "kl_loss": 0.07600749284029007, + "loss_ib": 0.0010447327513247728, + "step": 2594 + }, + { + "epoch": 0.7462793874469767, + "grad_norm": 0.07936331629753113, + "learning_rate": 4.4020226014975854e-05, + "loss": 0.8585, + "step": 2595 + }, + { + "ce_ib": 3.5158519744873047, + "ce_orig": 0.7277178764343262, + "epoch": 0.7462793874469767, + "kl_loss": 0.04770027473568916, + "loss_ib": 0.0008285879157483578, + "step": 2595 + }, + { + "ce_ib": 4.57207727432251, + "ce_orig": 0.95831698179245, + "epoch": 0.7462793874469767, + "kl_loss": 0.14505349099636078, + "loss_ib": 0.0019077425822615623, + "step": 2595 + }, + { + "ce_ib": 3.448648691177368, + "ce_orig": 0.4866870045661926, + "epoch": 0.7462793874469767, + "kl_loss": 0.09246155619621277, + "loss_ib": 0.0012694804463535547, + "step": 2595 + }, + { + "ce_ib": 4.498739242553711, + "ce_orig": 1.0777403116226196, + "epoch": 0.7462793874469767, + "kl_loss": 0.07526026666164398, + "loss_ib": 0.001202476560138166, + "step": 2595 + }, + { + "ce_ib": 5.589422225952148, + "ce_orig": 1.3688710927963257, + "epoch": 0.7465669710259544, + "kl_loss": 0.10575936734676361, + "loss_ib": 0.0016165359411388636, + "step": 2596 + }, + { + "ce_ib": 5.802870273590088, + "ce_orig": 1.33457612991333, + "epoch": 0.7465669710259544, + "kl_loss": 0.06283240020275116, + "loss_ib": 0.0012086109491065145, + "step": 2596 + }, + { + "ce_ib": 4.8877854347229, + "ce_orig": 0.7255083322525024, + "epoch": 0.7465669710259544, + "kl_loss": 0.06389065831899643, + "loss_ib": 0.0011276851873844862, + "step": 2596 + }, + { + "ce_ib": 5.430732727050781, + "ce_orig": 1.455398678779602, + "epoch": 0.7465669710259544, + "kl_loss": 0.07589921355247498, + "loss_ib": 0.0013020653277635574, + "step": 2596 + }, + { + "ce_ib": 4.899883270263672, + "ce_orig": 1.3858762979507446, + "epoch": 0.7468545546049321, + "kl_loss": 0.06349992752075195, + "loss_ib": 0.0011249876115471125, + "step": 2597 + }, + { + "ce_ib": 3.827234983444214, + "ce_orig": 1.0242801904678345, + "epoch": 0.7468545546049321, + "kl_loss": 0.035586223006248474, + "loss_ib": 0.0007385857170447707, + "step": 2597 + }, + { + "ce_ib": 2.034520387649536, + "ce_orig": 0.3020414113998413, + "epoch": 0.7468545546049321, + "kl_loss": 0.05058005452156067, + "loss_ib": 0.000709252548404038, + "step": 2597 + }, + { + "ce_ib": 3.5984888076782227, + "ce_orig": 0.5280361771583557, + "epoch": 0.7468545546049321, + "kl_loss": 0.08656114339828491, + "loss_ib": 0.0012254603207111359, + "step": 2597 + }, + { + "ce_ib": 4.993515491485596, + "ce_orig": 0.5173854827880859, + "epoch": 0.7471421381839097, + "kl_loss": 0.26123201847076416, + "loss_ib": 0.003111671656370163, + "step": 2598 + }, + { + "ce_ib": 5.149080276489258, + "ce_orig": 1.282194972038269, + "epoch": 0.7471421381839097, + "kl_loss": 0.08549994230270386, + "loss_ib": 0.0013699074042961001, + "step": 2598 + }, + { + "ce_ib": 5.225446701049805, + "ce_orig": 0.9509761929512024, + "epoch": 0.7471421381839097, + "kl_loss": 0.05922668054699898, + "loss_ib": 0.0011148113990202546, + "step": 2598 + }, + { + "ce_ib": 4.720577716827393, + "ce_orig": 0.9915254712104797, + "epoch": 0.7471421381839097, + "kl_loss": 0.05367963761091232, + "loss_ib": 0.0010088541312143207, + "step": 2598 + }, + { + "ce_ib": 4.7411394119262695, + "ce_orig": 1.3172940015792847, + "epoch": 0.7474297217628874, + "kl_loss": 0.05003967881202698, + "loss_ib": 0.0009745107381604612, + "step": 2599 + }, + { + "ce_ib": 1.7016873359680176, + "ce_orig": 0.25735703110694885, + "epoch": 0.7474297217628874, + "kl_loss": 0.13538548350334167, + "loss_ib": 0.0015240234788507223, + "step": 2599 + }, + { + "ce_ib": 2.4660797119140625, + "ce_orig": 0.5630449652671814, + "epoch": 0.7474297217628874, + "kl_loss": 0.04939427971839905, + "loss_ib": 0.0007405507494695485, + "step": 2599 + }, + { + "ce_ib": 4.1353349685668945, + "ce_orig": 0.5576004385948181, + "epoch": 0.7474297217628874, + "kl_loss": 0.06614483892917633, + "loss_ib": 0.001074981759302318, + "step": 2599 + }, + { + "epoch": 0.747717305341865, + "grad_norm": 0.08992118388414383, + "learning_rate": 4.3995020088792e-05, + "loss": 0.8799, + "step": 2600 + }, + { + "ce_ib": 3.4333436489105225, + "ce_orig": 0.7552141547203064, + "epoch": 0.747717305341865, + "kl_loss": 0.05490860342979431, + "loss_ib": 0.0008924204157665372, + "step": 2600 + }, + { + "ce_ib": 4.483781337738037, + "ce_orig": 0.7321637868881226, + "epoch": 0.747717305341865, + "kl_loss": 0.08318322151899338, + "loss_ib": 0.0012802103301510215, + "step": 2600 + }, + { + "ce_ib": 4.480503082275391, + "ce_orig": 0.8110916614532471, + "epoch": 0.747717305341865, + "kl_loss": 0.06513393670320511, + "loss_ib": 0.001099389628507197, + "step": 2600 + }, + { + "ce_ib": 2.639897108078003, + "ce_orig": 0.5544835925102234, + "epoch": 0.747717305341865, + "kl_loss": 0.03665342181921005, + "loss_ib": 0.0006305238930508494, + "step": 2600 + }, + { + "ce_ib": 1.8997756242752075, + "ce_orig": 0.5850474834442139, + "epoch": 0.7480048889208426, + "kl_loss": 0.025231575593352318, + "loss_ib": 0.0004422932688612491, + "step": 2601 + }, + { + "ce_ib": 4.4821929931640625, + "ce_orig": 0.9159391522407532, + "epoch": 0.7480048889208426, + "kl_loss": 0.0526246652007103, + "loss_ib": 0.0009744658600538969, + "step": 2601 + }, + { + "ce_ib": 6.049164295196533, + "ce_orig": 0.6094668507575989, + "epoch": 0.7480048889208426, + "kl_loss": 0.08713231980800629, + "loss_ib": 0.0014762395294383168, + "step": 2601 + }, + { + "ce_ib": 4.979468822479248, + "ce_orig": 1.1237781047821045, + "epoch": 0.7480048889208426, + "kl_loss": 0.05107580125331879, + "loss_ib": 0.0010087048867717385, + "step": 2601 + }, + { + "ce_ib": 4.589431285858154, + "ce_orig": 0.8710101842880249, + "epoch": 0.7482924724998202, + "kl_loss": 0.07488340139389038, + "loss_ib": 0.0012077770661562681, + "step": 2602 + }, + { + "ce_ib": 7.147107124328613, + "ce_orig": 1.9577276706695557, + "epoch": 0.7482924724998202, + "kl_loss": 0.11918140947818756, + "loss_ib": 0.0019065248779952526, + "step": 2602 + }, + { + "ce_ib": 6.476070404052734, + "ce_orig": 1.0752394199371338, + "epoch": 0.7482924724998202, + "kl_loss": 0.061946626752614975, + "loss_ib": 0.0012670733267441392, + "step": 2602 + }, + { + "ce_ib": 6.04497766494751, + "ce_orig": 0.9597272276878357, + "epoch": 0.7482924724998202, + "kl_loss": 0.05106658488512039, + "loss_ib": 0.001115163555368781, + "step": 2602 + }, + { + "ce_ib": 4.253410816192627, + "ce_orig": 0.5042362213134766, + "epoch": 0.7485800560787979, + "kl_loss": 0.06845272332429886, + "loss_ib": 0.0011098682880401611, + "step": 2603 + }, + { + "ce_ib": 6.390913963317871, + "ce_orig": 0.8368533849716187, + "epoch": 0.7485800560787979, + "kl_loss": 0.07555709034204483, + "loss_ib": 0.0013946621911600232, + "step": 2603 + }, + { + "ce_ib": 1.2362782955169678, + "ce_orig": 0.16032838821411133, + "epoch": 0.7485800560787979, + "kl_loss": 0.1414971947669983, + "loss_ib": 0.001538599724881351, + "step": 2603 + }, + { + "ce_ib": 4.463633060455322, + "ce_orig": 0.9657441973686218, + "epoch": 0.7485800560787979, + "kl_loss": 0.03825679421424866, + "loss_ib": 0.0008289311663247645, + "step": 2603 + }, + { + "ce_ib": 2.496349334716797, + "ce_orig": 0.484628289937973, + "epoch": 0.7488676396577756, + "kl_loss": 0.08891230076551437, + "loss_ib": 0.0011387579143047333, + "step": 2604 + }, + { + "ce_ib": 5.165247917175293, + "ce_orig": 0.7103233933448792, + "epoch": 0.7488676396577756, + "kl_loss": 0.09887310117483139, + "loss_ib": 0.0015052556991577148, + "step": 2604 + }, + { + "ce_ib": 3.9432530403137207, + "ce_orig": 0.8037404417991638, + "epoch": 0.7488676396577756, + "kl_loss": 0.05362536013126373, + "loss_ib": 0.0009305788553319871, + "step": 2604 + }, + { + "ce_ib": 4.621462821960449, + "ce_orig": 0.8773047924041748, + "epoch": 0.7488676396577756, + "kl_loss": 0.04656460881233215, + "loss_ib": 0.0009277923381887376, + "step": 2604 + }, + { + "epoch": 0.7491552232367532, + "grad_norm": 0.11463533341884613, + "learning_rate": 4.396976839919591e-05, + "loss": 0.8533, + "step": 2605 + }, + { + "ce_ib": 3.4730374813079834, + "ce_orig": 0.7471719980239868, + "epoch": 0.7491552232367532, + "kl_loss": 0.03520294651389122, + "loss_ib": 0.0006993332062847912, + "step": 2605 + }, + { + "ce_ib": 2.1781840324401855, + "ce_orig": 0.4124497175216675, + "epoch": 0.7491552232367532, + "kl_loss": 0.07622547447681427, + "loss_ib": 0.000980073120445013, + "step": 2605 + }, + { + "ce_ib": 3.7198879718780518, + "ce_orig": 0.6507270336151123, + "epoch": 0.7491552232367532, + "kl_loss": 0.05370865389704704, + "loss_ib": 0.0009090752573683858, + "step": 2605 + }, + { + "ce_ib": 5.696014404296875, + "ce_orig": 1.3747234344482422, + "epoch": 0.7491552232367532, + "kl_loss": 0.07643292844295502, + "loss_ib": 0.0013339307624846697, + "step": 2605 + }, + { + "ce_ib": 4.954375267028809, + "ce_orig": 1.0684945583343506, + "epoch": 0.7494428068157308, + "kl_loss": 0.10058516263961792, + "loss_ib": 0.0015012890798971057, + "step": 2606 + }, + { + "ce_ib": 3.4100053310394287, + "ce_orig": 0.6830905675888062, + "epoch": 0.7494428068157308, + "kl_loss": 0.09409552812576294, + "loss_ib": 0.0012819558614864945, + "step": 2606 + }, + { + "ce_ib": 3.313035249710083, + "ce_orig": 0.38132327795028687, + "epoch": 0.7494428068157308, + "kl_loss": 0.11825090646743774, + "loss_ib": 0.0015138124581426382, + "step": 2606 + }, + { + "ce_ib": 3.2601771354675293, + "ce_orig": 0.6747615933418274, + "epoch": 0.7494428068157308, + "kl_loss": 0.07113973051309586, + "loss_ib": 0.0010374150006100535, + "step": 2606 + }, + { + "ce_ib": 3.7183120250701904, + "ce_orig": 0.5650266408920288, + "epoch": 0.7497303903947085, + "kl_loss": 0.09947086870670319, + "loss_ib": 0.0013665398582816124, + "step": 2607 + }, + { + "ce_ib": 5.079640865325928, + "ce_orig": 0.9711543917655945, + "epoch": 0.7497303903947085, + "kl_loss": 0.09330737590789795, + "loss_ib": 0.0014410377480089664, + "step": 2607 + }, + { + "ce_ib": 6.151634216308594, + "ce_orig": 1.5629843473434448, + "epoch": 0.7497303903947085, + "kl_loss": 0.06764810532331467, + "loss_ib": 0.0012916444102302194, + "step": 2607 + }, + { + "ce_ib": 1.974952220916748, + "ce_orig": 0.2538401782512665, + "epoch": 0.7497303903947085, + "kl_loss": 0.09679515659809113, + "loss_ib": 0.0011654467089101672, + "step": 2607 + }, + { + "ce_ib": 7.821817874908447, + "ce_orig": 2.0028114318847656, + "epoch": 0.7500179739736861, + "kl_loss": 0.07003205269575119, + "loss_ib": 0.0014825022080913186, + "step": 2608 + }, + { + "ce_ib": 5.094433307647705, + "ce_orig": 1.0424219369888306, + "epoch": 0.7500179739736861, + "kl_loss": 0.05971131846308708, + "loss_ib": 0.0011065565049648285, + "step": 2608 + }, + { + "ce_ib": 5.174319744110107, + "ce_orig": 1.3551918268203735, + "epoch": 0.7500179739736861, + "kl_loss": 0.07183866947889328, + "loss_ib": 0.001235818606801331, + "step": 2608 + }, + { + "ce_ib": 3.107639789581299, + "ce_orig": 0.6840874552726746, + "epoch": 0.7500179739736861, + "kl_loss": 0.08887024223804474, + "loss_ib": 0.001199466409161687, + "step": 2608 + }, + { + "ce_ib": 5.455689430236816, + "ce_orig": 1.3934221267700195, + "epoch": 0.7503055575526637, + "kl_loss": 0.07695671916007996, + "loss_ib": 0.0013151360908523202, + "step": 2609 + }, + { + "ce_ib": 3.3523898124694824, + "ce_orig": 0.6643742322921753, + "epoch": 0.7503055575526637, + "kl_loss": 0.0533929318189621, + "loss_ib": 0.0008691683178767562, + "step": 2609 + }, + { + "ce_ib": 3.697941303253174, + "ce_orig": 0.7137842774391174, + "epoch": 0.7503055575526637, + "kl_loss": 0.04720352217555046, + "loss_ib": 0.0008418292854912579, + "step": 2609 + }, + { + "ce_ib": 2.5946109294891357, + "ce_orig": 0.7812269926071167, + "epoch": 0.7503055575526637, + "kl_loss": 0.06006157398223877, + "loss_ib": 0.0008600768051110208, + "step": 2609 + }, + { + "epoch": 0.7505931411316413, + "grad_norm": 0.09291582554578781, + "learning_rate": 4.3944471007024776e-05, + "loss": 0.8992, + "step": 2610 + }, + { + "ce_ib": 6.18729305267334, + "ce_orig": 1.0990869998931885, + "epoch": 0.7505931411316413, + "kl_loss": 0.0423511266708374, + "loss_ib": 0.0010422405321151018, + "step": 2610 + }, + { + "ce_ib": 2.5096747875213623, + "ce_orig": 0.5836231708526611, + "epoch": 0.7505931411316413, + "kl_loss": 0.07412125915288925, + "loss_ib": 0.0009921800810843706, + "step": 2610 + }, + { + "ce_ib": 3.3205442428588867, + "ce_orig": 0.742074191570282, + "epoch": 0.7505931411316413, + "kl_loss": 0.05250366032123566, + "loss_ib": 0.0008570909849368036, + "step": 2610 + }, + { + "ce_ib": 4.452269554138184, + "ce_orig": 1.0416412353515625, + "epoch": 0.7505931411316413, + "kl_loss": 0.06127874180674553, + "loss_ib": 0.0010580143425613642, + "step": 2610 + }, + { + "ce_ib": 5.384433269500732, + "ce_orig": 1.3201409578323364, + "epoch": 0.7508807247106191, + "kl_loss": 0.07957085967063904, + "loss_ib": 0.0013341519515961409, + "step": 2611 + }, + { + "ce_ib": 1.427058458328247, + "ce_orig": 0.21558207273483276, + "epoch": 0.7508807247106191, + "kl_loss": 0.1622059941291809, + "loss_ib": 0.0017647658241912723, + "step": 2611 + }, + { + "ce_ib": 1.8591203689575195, + "ce_orig": 0.5273790955543518, + "epoch": 0.7508807247106191, + "kl_loss": 0.05364081263542175, + "loss_ib": 0.0007223201100714505, + "step": 2611 + }, + { + "ce_ib": 2.7398505210876465, + "ce_orig": 0.5468233227729797, + "epoch": 0.7508807247106191, + "kl_loss": 0.04929041489958763, + "loss_ib": 0.0007668891339562833, + "step": 2611 + }, + { + "ce_ib": 3.316898822784424, + "ce_orig": 0.9485487937927246, + "epoch": 0.7511683082895967, + "kl_loss": 0.040797360241413116, + "loss_ib": 0.0007396634318865836, + "step": 2612 + }, + { + "ce_ib": 3.027635335922241, + "ce_orig": 0.5045971870422363, + "epoch": 0.7511683082895967, + "kl_loss": 0.04895342141389847, + "loss_ib": 0.0007922977092675865, + "step": 2612 + }, + { + "ce_ib": 4.269455432891846, + "ce_orig": 1.2389638423919678, + "epoch": 0.7511683082895967, + "kl_loss": 0.05529668927192688, + "loss_ib": 0.00097991235088557, + "step": 2612 + }, + { + "ce_ib": 2.9806039333343506, + "ce_orig": 0.6711074709892273, + "epoch": 0.7511683082895967, + "kl_loss": 0.08014722168445587, + "loss_ib": 0.0010995325865224004, + "step": 2612 + }, + { + "ce_ib": 5.354071617126465, + "ce_orig": 1.1772825717926025, + "epoch": 0.7514558918685743, + "kl_loss": 0.107040636241436, + "loss_ib": 0.0016058135079219937, + "step": 2613 + }, + { + "ce_ib": 4.5768842697143555, + "ce_orig": 1.1909679174423218, + "epoch": 0.7514558918685743, + "kl_loss": 0.062093980610370636, + "loss_ib": 0.0010786282364279032, + "step": 2613 + }, + { + "ce_ib": 3.3114824295043945, + "ce_orig": 0.736298680305481, + "epoch": 0.7514558918685743, + "kl_loss": 0.03919680416584015, + "loss_ib": 0.000723116216249764, + "step": 2613 + }, + { + "ce_ib": 3.463364362716675, + "ce_orig": 0.7861091494560242, + "epoch": 0.7514558918685743, + "kl_loss": 0.07951848953962326, + "loss_ib": 0.0011415212647989392, + "step": 2613 + }, + { + "ce_ib": 1.9018367528915405, + "ce_orig": 0.6017552614212036, + "epoch": 0.7517434754475519, + "kl_loss": 0.039619527757167816, + "loss_ib": 0.0005863789119757712, + "step": 2614 + }, + { + "ce_ib": 3.747687578201294, + "ce_orig": 0.8999239206314087, + "epoch": 0.7517434754475519, + "kl_loss": 0.0460980050265789, + "loss_ib": 0.0008357487386092544, + "step": 2614 + }, + { + "ce_ib": 3.358915090560913, + "ce_orig": 0.6887758374214172, + "epoch": 0.7517434754475519, + "kl_loss": 0.05897276848554611, + "loss_ib": 0.0009256191551685333, + "step": 2614 + }, + { + "ce_ib": 4.505159854888916, + "ce_orig": 1.031092882156372, + "epoch": 0.7517434754475519, + "kl_loss": 0.07742500305175781, + "loss_ib": 0.00122476601973176, + "step": 2614 + }, + { + "epoch": 0.7520310590265296, + "grad_norm": 0.09959781169891357, + "learning_rate": 4.391912797322587e-05, + "loss": 0.8508, + "step": 2615 + }, + { + "ce_ib": 3.1903345584869385, + "ce_orig": 0.43705275654792786, + "epoch": 0.7520310590265296, + "kl_loss": 0.11812029778957367, + "loss_ib": 0.0015002363361418247, + "step": 2615 + }, + { + "ce_ib": 4.734034538269043, + "ce_orig": 0.7834950089454651, + "epoch": 0.7520310590265296, + "kl_loss": 0.08060532808303833, + "loss_ib": 0.0012794567737728357, + "step": 2615 + }, + { + "ce_ib": 3.402663230895996, + "ce_orig": 0.4480191171169281, + "epoch": 0.7520310590265296, + "kl_loss": 0.028855111449956894, + "loss_ib": 0.0006288173608481884, + "step": 2615 + }, + { + "ce_ib": 5.885380744934082, + "ce_orig": 1.3666232824325562, + "epoch": 0.7520310590265296, + "kl_loss": 0.08595432341098785, + "loss_ib": 0.0014480812242254615, + "step": 2615 + }, + { + "ce_ib": 3.8798303604125977, + "ce_orig": 0.8524845242500305, + "epoch": 0.7523186426055072, + "kl_loss": 0.06394045054912567, + "loss_ib": 0.0010273874504491687, + "step": 2616 + }, + { + "ce_ib": 5.495110034942627, + "ce_orig": 1.0427242517471313, + "epoch": 0.7523186426055072, + "kl_loss": 0.09992431104183197, + "loss_ib": 0.0015487540513277054, + "step": 2616 + }, + { + "ce_ib": 6.908403396606445, + "ce_orig": 1.411257028579712, + "epoch": 0.7523186426055072, + "kl_loss": 0.07754915952682495, + "loss_ib": 0.0014663318870589137, + "step": 2616 + }, + { + "ce_ib": 3.4040474891662598, + "ce_orig": 0.8140004873275757, + "epoch": 0.7523186426055072, + "kl_loss": 0.05376093462109566, + "loss_ib": 0.0008780140778981149, + "step": 2616 + }, + { + "ce_ib": 2.162879228591919, + "ce_orig": 0.48361626267433167, + "epoch": 0.7526062261844849, + "kl_loss": 0.06654819846153259, + "loss_ib": 0.0008817698690108955, + "step": 2617 + }, + { + "ce_ib": 5.4273481369018555, + "ce_orig": 1.2855018377304077, + "epoch": 0.7526062261844849, + "kl_loss": 0.0651089996099472, + "loss_ib": 0.0011938248062506318, + "step": 2617 + }, + { + "ce_ib": 2.8519182205200195, + "ce_orig": 0.637963593006134, + "epoch": 0.7526062261844849, + "kl_loss": 0.07720184326171875, + "loss_ib": 0.0010572102619335055, + "step": 2617 + }, + { + "ce_ib": 5.638017177581787, + "ce_orig": 0.8545359969139099, + "epoch": 0.7526062261844849, + "kl_loss": 0.11512409150600433, + "loss_ib": 0.0017150425119325519, + "step": 2617 + }, + { + "ce_ib": 3.8505823612213135, + "ce_orig": 0.6346133947372437, + "epoch": 0.7528938097634625, + "kl_loss": 0.064302958548069, + "loss_ib": 0.0010280878050252795, + "step": 2618 + }, + { + "ce_ib": 4.292762756347656, + "ce_orig": 0.6578108072280884, + "epoch": 0.7528938097634625, + "kl_loss": 0.07063129544258118, + "loss_ib": 0.001135589205659926, + "step": 2618 + }, + { + "ce_ib": 6.190606594085693, + "ce_orig": 1.4974735975265503, + "epoch": 0.7528938097634625, + "kl_loss": 0.07114700227975845, + "loss_ib": 0.0013305306201800704, + "step": 2618 + }, + { + "ce_ib": 4.649906158447266, + "ce_orig": 0.7834607362747192, + "epoch": 0.7528938097634625, + "kl_loss": 0.07616277784109116, + "loss_ib": 0.0012266184203326702, + "step": 2618 + }, + { + "ce_ib": 4.040887355804443, + "ce_orig": 1.144699215888977, + "epoch": 0.7531813933424402, + "kl_loss": 0.05367632582783699, + "loss_ib": 0.0009408519836142659, + "step": 2619 + }, + { + "ce_ib": 4.438510417938232, + "ce_orig": 0.8493675589561462, + "epoch": 0.7531813933424402, + "kl_loss": 0.057596076279878616, + "loss_ib": 0.001019811723381281, + "step": 2619 + }, + { + "ce_ib": 4.3931660652160645, + "ce_orig": 0.6530018448829651, + "epoch": 0.7531813933424402, + "kl_loss": 0.06470464915037155, + "loss_ib": 0.0010863629868254066, + "step": 2619 + }, + { + "ce_ib": 3.3037986755371094, + "ce_orig": 0.6725051403045654, + "epoch": 0.7531813933424402, + "kl_loss": 0.05573200434446335, + "loss_ib": 0.0008876998326741159, + "step": 2619 + }, + { + "epoch": 0.7534689769214178, + "grad_norm": 0.09154371917247772, + "learning_rate": 4.389373935885646e-05, + "loss": 0.7932, + "step": 2620 + }, + { + "ce_ib": 5.19724178314209, + "ce_orig": 1.0252013206481934, + "epoch": 0.7534689769214178, + "kl_loss": 0.056747812777757645, + "loss_ib": 0.001087202224880457, + "step": 2620 + }, + { + "ce_ib": 2.684117078781128, + "ce_orig": 0.781640887260437, + "epoch": 0.7534689769214178, + "kl_loss": 0.04391787201166153, + "loss_ib": 0.0007075904286466539, + "step": 2620 + }, + { + "ce_ib": 6.741369247436523, + "ce_orig": 1.3771573305130005, + "epoch": 0.7534689769214178, + "kl_loss": 0.07107600569725037, + "loss_ib": 0.0013848969247192144, + "step": 2620 + }, + { + "ce_ib": 4.823448657989502, + "ce_orig": 1.0184777975082397, + "epoch": 0.7534689769214178, + "kl_loss": 0.08912801742553711, + "loss_ib": 0.0013736250111833215, + "step": 2620 + }, + { + "ce_ib": 3.3833606243133545, + "ce_orig": 0.7389194965362549, + "epoch": 0.7537565605003954, + "kl_loss": 0.08194311708211899, + "loss_ib": 0.0011577671393752098, + "step": 2621 + }, + { + "ce_ib": 5.101074695587158, + "ce_orig": 1.2532116174697876, + "epoch": 0.7537565605003954, + "kl_loss": 0.07681893557310104, + "loss_ib": 0.0012782968115061522, + "step": 2621 + }, + { + "ce_ib": 3.4867770671844482, + "ce_orig": 0.6309884190559387, + "epoch": 0.7537565605003954, + "kl_loss": 0.05517487972974777, + "loss_ib": 0.0009004264720715582, + "step": 2621 + }, + { + "ce_ib": 4.297452926635742, + "ce_orig": 0.9210222959518433, + "epoch": 0.7537565605003954, + "kl_loss": 0.046702612191438675, + "loss_ib": 0.000896771380212158, + "step": 2621 + }, + { + "ce_ib": 4.161874294281006, + "ce_orig": 0.7432108521461487, + "epoch": 0.754044144079373, + "kl_loss": 0.05952414870262146, + "loss_ib": 0.001011428888887167, + "step": 2622 + }, + { + "ce_ib": 2.8876144886016846, + "ce_orig": 0.6723262071609497, + "epoch": 0.754044144079373, + "kl_loss": 0.03890150412917137, + "loss_ib": 0.0006777764647267759, + "step": 2622 + }, + { + "ce_ib": 2.205756902694702, + "ce_orig": 0.4545075297355652, + "epoch": 0.754044144079373, + "kl_loss": 0.03061750717461109, + "loss_ib": 0.0005267507513053715, + "step": 2622 + }, + { + "ce_ib": 5.690247535705566, + "ce_orig": 1.170934796333313, + "epoch": 0.754044144079373, + "kl_loss": 0.07714737951755524, + "loss_ib": 0.0013404985656961799, + "step": 2622 + }, + { + "ce_ib": 4.197359561920166, + "ce_orig": 0.8099672198295593, + "epoch": 0.7543317276583507, + "kl_loss": 0.056998029351234436, + "loss_ib": 0.0009897161507979035, + "step": 2623 + }, + { + "ce_ib": 4.249442100524902, + "ce_orig": 0.817466676235199, + "epoch": 0.7543317276583507, + "kl_loss": 0.0679914578795433, + "loss_ib": 0.0011048588203266263, + "step": 2623 + }, + { + "ce_ib": 2.296978712081909, + "ce_orig": 0.43309086561203003, + "epoch": 0.7543317276583507, + "kl_loss": 0.06198880448937416, + "loss_ib": 0.0008495858637616038, + "step": 2623 + }, + { + "ce_ib": 3.3133251667022705, + "ce_orig": 0.5211515426635742, + "epoch": 0.7543317276583507, + "kl_loss": 0.04304755479097366, + "loss_ib": 0.000761808012612164, + "step": 2623 + }, + { + "ce_ib": 5.2385969161987305, + "ce_orig": 1.4612138271331787, + "epoch": 0.7546193112373284, + "kl_loss": 0.06759048253297806, + "loss_ib": 0.0011997644323855639, + "step": 2624 + }, + { + "ce_ib": 3.602349281311035, + "ce_orig": 0.7348685264587402, + "epoch": 0.7546193112373284, + "kl_loss": 0.06517118960618973, + "loss_ib": 0.001011946820653975, + "step": 2624 + }, + { + "ce_ib": 3.7685532569885254, + "ce_orig": 0.5115917921066284, + "epoch": 0.7546193112373284, + "kl_loss": 0.03655165433883667, + "loss_ib": 0.0007423718343488872, + "step": 2624 + }, + { + "ce_ib": 3.9359233379364014, + "ce_orig": 0.9104540348052979, + "epoch": 0.7546193112373284, + "kl_loss": 0.06558070331811905, + "loss_ib": 0.0010493993759155273, + "step": 2624 + }, + { + "epoch": 0.754906894816306, + "grad_norm": 0.10602030903100967, + "learning_rate": 4.3868305225083605e-05, + "loss": 0.8434, + "step": 2625 + }, + { + "ce_ib": 6.435441493988037, + "ce_orig": 1.0113154649734497, + "epoch": 0.754906894816306, + "kl_loss": 0.06679558753967285, + "loss_ib": 0.0013115000911056995, + "step": 2625 + }, + { + "ce_ib": 3.2206835746765137, + "ce_orig": 0.5536072850227356, + "epoch": 0.754906894816306, + "kl_loss": 0.05099589377641678, + "loss_ib": 0.0008320272318087518, + "step": 2625 + }, + { + "ce_ib": 4.7402215003967285, + "ce_orig": 1.1320427656173706, + "epoch": 0.754906894816306, + "kl_loss": 0.05609910935163498, + "loss_ib": 0.0010350131196901202, + "step": 2625 + }, + { + "ce_ib": 4.14284086227417, + "ce_orig": 0.7821236252784729, + "epoch": 0.754906894816306, + "kl_loss": 0.07668974995613098, + "loss_ib": 0.0011811815202236176, + "step": 2625 + }, + { + "ce_ib": 3.611907958984375, + "ce_orig": 0.9093359112739563, + "epoch": 0.7551944783952836, + "kl_loss": 0.08736608922481537, + "loss_ib": 0.0012348515447229147, + "step": 2626 + }, + { + "ce_ib": 2.87048602104187, + "ce_orig": 0.6203920245170593, + "epoch": 0.7551944783952836, + "kl_loss": 0.04362864792346954, + "loss_ib": 0.0007233350770547986, + "step": 2626 + }, + { + "ce_ib": 2.158963441848755, + "ce_orig": 0.27142202854156494, + "epoch": 0.7551944783952836, + "kl_loss": 0.03415003418922424, + "loss_ib": 0.0005573966773226857, + "step": 2626 + }, + { + "ce_ib": 3.5055482387542725, + "ce_orig": 0.45874229073524475, + "epoch": 0.7551944783952836, + "kl_loss": 0.09596718102693558, + "loss_ib": 0.0013102266239002347, + "step": 2626 + }, + { + "ce_ib": 5.579212188720703, + "ce_orig": 1.5982755422592163, + "epoch": 0.7554820619742613, + "kl_loss": 0.044104818254709244, + "loss_ib": 0.0009989693062379956, + "step": 2627 + }, + { + "ce_ib": 2.7854862213134766, + "ce_orig": 0.6807829141616821, + "epoch": 0.7554820619742613, + "kl_loss": 0.03469720482826233, + "loss_ib": 0.0006255206535570323, + "step": 2627 + }, + { + "ce_ib": 2.1648287773132324, + "ce_orig": 0.38064077496528625, + "epoch": 0.7554820619742613, + "kl_loss": 0.04496981203556061, + "loss_ib": 0.000666180974803865, + "step": 2627 + }, + { + "ce_ib": 2.962691068649292, + "ce_orig": 0.6837733387947083, + "epoch": 0.7554820619742613, + "kl_loss": 0.29641270637512207, + "loss_ib": 0.0032603959552943707, + "step": 2627 + }, + { + "ce_ib": 4.67193603515625, + "ce_orig": 1.0710597038269043, + "epoch": 0.7557696455532389, + "kl_loss": 0.04862070083618164, + "loss_ib": 0.000953400565776974, + "step": 2628 + }, + { + "ce_ib": 2.2577779293060303, + "ce_orig": 0.43792951107025146, + "epoch": 0.7557696455532389, + "kl_loss": 0.07634082436561584, + "loss_ib": 0.0009891859954223037, + "step": 2628 + }, + { + "ce_ib": 4.772175312042236, + "ce_orig": 1.1861381530761719, + "epoch": 0.7557696455532389, + "kl_loss": 0.07094854861497879, + "loss_ib": 0.0011867029825225472, + "step": 2628 + }, + { + "ce_ib": 3.32299542427063, + "ce_orig": 0.8933327794075012, + "epoch": 0.7557696455532389, + "kl_loss": 0.05435850843787193, + "loss_ib": 0.0008758845506235957, + "step": 2628 + }, + { + "ce_ib": 3.903562068939209, + "ce_orig": 0.8030595183372498, + "epoch": 0.7560572291322165, + "kl_loss": 0.1991681158542633, + "loss_ib": 0.002382037229835987, + "step": 2629 + }, + { + "ce_ib": 3.889604330062866, + "ce_orig": 0.8502317070960999, + "epoch": 0.7560572291322165, + "kl_loss": 0.110245481133461, + "loss_ib": 0.0014914151979610324, + "step": 2629 + }, + { + "ce_ib": 3.451765537261963, + "ce_orig": 0.9141132235527039, + "epoch": 0.7560572291322165, + "kl_loss": 0.037794798612594604, + "loss_ib": 0.0007231244817376137, + "step": 2629 + }, + { + "ce_ib": 3.1924173831939697, + "ce_orig": 0.9732596278190613, + "epoch": 0.7560572291322165, + "kl_loss": 0.039063889533281326, + "loss_ib": 0.0007098805508576334, + "step": 2629 + }, + { + "epoch": 0.7563448127111941, + "grad_norm": 0.10353568941354752, + "learning_rate": 4.384282563318403e-05, + "loss": 0.8425, + "step": 2630 + }, + { + "ce_ib": 5.469336986541748, + "ce_orig": 1.1587589979171753, + "epoch": 0.7563448127111941, + "kl_loss": 0.08208349347114563, + "loss_ib": 0.0013677686220034957, + "step": 2630 + }, + { + "ce_ib": 2.490868091583252, + "ce_orig": 0.5242978930473328, + "epoch": 0.7563448127111941, + "kl_loss": 0.059253402054309845, + "loss_ib": 0.0008416208438575268, + "step": 2630 + }, + { + "ce_ib": 5.842584133148193, + "ce_orig": 1.4747180938720703, + "epoch": 0.7563448127111941, + "kl_loss": 0.07110612094402313, + "loss_ib": 0.0012953196419402957, + "step": 2630 + }, + { + "ce_ib": 6.424262046813965, + "ce_orig": 1.4622681140899658, + "epoch": 0.7563448127111941, + "kl_loss": 0.07329615205526352, + "loss_ib": 0.0013753875391557813, + "step": 2630 + }, + { + "ce_ib": 5.079166412353516, + "ce_orig": 1.0549761056900024, + "epoch": 0.7566323962901719, + "kl_loss": 0.057973090559244156, + "loss_ib": 0.001087647513486445, + "step": 2631 + }, + { + "ce_ib": 5.431291103363037, + "ce_orig": 1.1875042915344238, + "epoch": 0.7566323962901719, + "kl_loss": 0.05985283851623535, + "loss_ib": 0.0011416574707254767, + "step": 2631 + }, + { + "ce_ib": 1.9083951711654663, + "ce_orig": 0.10706739127635956, + "epoch": 0.7566323962901719, + "kl_loss": 0.08660662174224854, + "loss_ib": 0.0010569057194516063, + "step": 2631 + }, + { + "ce_ib": 3.9621496200561523, + "ce_orig": 0.954402506351471, + "epoch": 0.7566323962901719, + "kl_loss": 0.05127166211605072, + "loss_ib": 0.0009089315426535904, + "step": 2631 + }, + { + "ce_ib": 2.653337240219116, + "ce_orig": 0.5713689923286438, + "epoch": 0.7569199798691495, + "kl_loss": 0.05183995142579079, + "loss_ib": 0.0007837332668714225, + "step": 2632 + }, + { + "ce_ib": 4.147755146026611, + "ce_orig": 0.9449085593223572, + "epoch": 0.7569199798691495, + "kl_loss": 0.06778568029403687, + "loss_ib": 0.0010926321847364306, + "step": 2632 + }, + { + "ce_ib": 3.960407018661499, + "ce_orig": 0.6830273866653442, + "epoch": 0.7569199798691495, + "kl_loss": 0.07744049280881882, + "loss_ib": 0.0011704455828294158, + "step": 2632 + }, + { + "ce_ib": 3.771969795227051, + "ce_orig": 0.9334812164306641, + "epoch": 0.7569199798691495, + "kl_loss": 0.05336169898509979, + "loss_ib": 0.0009108139201998711, + "step": 2632 + }, + { + "ce_ib": 3.2596356868743896, + "ce_orig": 0.43960657715797424, + "epoch": 0.7572075634481271, + "kl_loss": 0.08279009163379669, + "loss_ib": 0.0011538644321262836, + "step": 2633 + }, + { + "ce_ib": 2.24955677986145, + "ce_orig": 0.527242124080658, + "epoch": 0.7572075634481271, + "kl_loss": 0.04387005418539047, + "loss_ib": 0.0006636562175117433, + "step": 2633 + }, + { + "ce_ib": 3.6242330074310303, + "ce_orig": 0.8652840852737427, + "epoch": 0.7572075634481271, + "kl_loss": 0.07602746784687042, + "loss_ib": 0.0011226979549974203, + "step": 2633 + }, + { + "ce_ib": 6.232001304626465, + "ce_orig": 1.4338079690933228, + "epoch": 0.7572075634481271, + "kl_loss": 0.09077566862106323, + "loss_ib": 0.0015309567097574472, + "step": 2633 + }, + { + "ce_ib": 2.203709840774536, + "ce_orig": 0.2787202298641205, + "epoch": 0.7574951470271047, + "kl_loss": 0.0758105218410492, + "loss_ib": 0.0009784761350601912, + "step": 2634 + }, + { + "ce_ib": 4.182518005371094, + "ce_orig": 0.8343692421913147, + "epoch": 0.7574951470271047, + "kl_loss": 0.06442281603813171, + "loss_ib": 0.0010624799178913236, + "step": 2634 + }, + { + "ce_ib": 2.692591667175293, + "ce_orig": 0.5059154033660889, + "epoch": 0.7574951470271047, + "kl_loss": 0.0630151703953743, + "loss_ib": 0.0008994108065962791, + "step": 2634 + }, + { + "ce_ib": 5.196767330169678, + "ce_orig": 1.2116948366165161, + "epoch": 0.7574951470271047, + "kl_loss": 0.10674800723791122, + "loss_ib": 0.0015871567884460092, + "step": 2634 + }, + { + "epoch": 0.7577827306060824, + "grad_norm": 0.0884164422750473, + "learning_rate": 4.381730064454399e-05, + "loss": 0.8586, + "step": 2635 + }, + { + "ce_ib": 4.279022216796875, + "ce_orig": 0.870453417301178, + "epoch": 0.7577827306060824, + "kl_loss": 0.04989595711231232, + "loss_ib": 0.0009268617141060531, + "step": 2635 + }, + { + "ce_ib": 3.629657745361328, + "ce_orig": 0.8935202956199646, + "epoch": 0.7577827306060824, + "kl_loss": 0.032643869519233704, + "loss_ib": 0.0006894044345244765, + "step": 2635 + }, + { + "ce_ib": 3.9661471843719482, + "ce_orig": 0.924900233745575, + "epoch": 0.7577827306060824, + "kl_loss": 0.03883983939886093, + "loss_ib": 0.0007850131369195879, + "step": 2635 + }, + { + "ce_ib": 3.480229616165161, + "ce_orig": 0.7660970091819763, + "epoch": 0.7577827306060824, + "kl_loss": 0.04502885043621063, + "loss_ib": 0.0007983114337548614, + "step": 2635 + }, + { + "ce_ib": 3.514986276626587, + "ce_orig": 0.6756960153579712, + "epoch": 0.75807031418506, + "kl_loss": 0.05279986932873726, + "loss_ib": 0.000879497267305851, + "step": 2636 + }, + { + "ce_ib": 5.372417449951172, + "ce_orig": 0.7388408780097961, + "epoch": 0.75807031418506, + "kl_loss": 0.0676126778125763, + "loss_ib": 0.0012133684940636158, + "step": 2636 + }, + { + "ce_ib": 3.013855218887329, + "ce_orig": 0.6892964243888855, + "epoch": 0.75807031418506, + "kl_loss": 0.040659334510564804, + "loss_ib": 0.0007079789065755904, + "step": 2636 + }, + { + "ce_ib": 2.8436453342437744, + "ce_orig": 0.663540244102478, + "epoch": 0.75807031418506, + "kl_loss": 0.048726700246334076, + "loss_ib": 0.0007716314867138863, + "step": 2636 + }, + { + "ce_ib": 2.8361477851867676, + "ce_orig": 0.6111149191856384, + "epoch": 0.7583578977640377, + "kl_loss": 0.03813888132572174, + "loss_ib": 0.0006650035502389073, + "step": 2637 + }, + { + "ce_ib": 2.276116132736206, + "ce_orig": 0.3548533618450165, + "epoch": 0.7583578977640377, + "kl_loss": 0.03371089696884155, + "loss_ib": 0.0005647205398418009, + "step": 2637 + }, + { + "ce_ib": 5.46007776260376, + "ce_orig": 1.208402156829834, + "epoch": 0.7583578977640377, + "kl_loss": 0.08474823832511902, + "loss_ib": 0.0013934901216998696, + "step": 2637 + }, + { + "ce_ib": 2.689534902572632, + "ce_orig": 0.7108384370803833, + "epoch": 0.7583578977640377, + "kl_loss": 0.04731745272874832, + "loss_ib": 0.0007421280024573207, + "step": 2637 + }, + { + "ce_ib": 3.5096817016601562, + "ce_orig": 0.8135960102081299, + "epoch": 0.7586454813430153, + "kl_loss": 0.059343740344047546, + "loss_ib": 0.0009444055613130331, + "step": 2638 + }, + { + "ce_ib": 3.166877031326294, + "ce_orig": 0.29955777525901794, + "epoch": 0.7586454813430153, + "kl_loss": 0.04736865311861038, + "loss_ib": 0.0007903742371127009, + "step": 2638 + }, + { + "ce_ib": 2.8276937007904053, + "ce_orig": 0.6735939383506775, + "epoch": 0.7586454813430153, + "kl_loss": 0.07173176109790802, + "loss_ib": 0.0010000868933275342, + "step": 2638 + }, + { + "ce_ib": 4.31399393081665, + "ce_orig": 0.7311835289001465, + "epoch": 0.7586454813430153, + "kl_loss": 0.09464013576507568, + "loss_ib": 0.0013778007123619318, + "step": 2638 + }, + { + "ce_ib": 2.271056890487671, + "ce_orig": 0.3424254357814789, + "epoch": 0.758933064921993, + "kl_loss": 0.09436360746622086, + "loss_ib": 0.0011707417434081435, + "step": 2639 + }, + { + "ce_ib": 2.9299070835113525, + "ce_orig": 0.6384568810462952, + "epoch": 0.758933064921993, + "kl_loss": 0.038522444665431976, + "loss_ib": 0.0006782151758670807, + "step": 2639 + }, + { + "ce_ib": 3.571728229522705, + "ce_orig": 0.6528748869895935, + "epoch": 0.758933064921993, + "kl_loss": 0.08004459738731384, + "loss_ib": 0.0011576187098398805, + "step": 2639 + }, + { + "ce_ib": 3.9285888671875, + "ce_orig": 0.47073596715927124, + "epoch": 0.758933064921993, + "kl_loss": 0.10402826219797134, + "loss_ib": 0.0014331415295600891, + "step": 2639 + }, + { + "epoch": 0.7592206485009706, + "grad_norm": 0.08987714350223541, + "learning_rate": 4.379173032065912e-05, + "loss": 0.7845, + "step": 2640 + }, + { + "ce_ib": 2.222796678543091, + "ce_orig": 0.5049963593482971, + "epoch": 0.7592206485009706, + "kl_loss": 0.047754399478435516, + "loss_ib": 0.0006998236058279872, + "step": 2640 + }, + { + "ce_ib": 4.071335792541504, + "ce_orig": 1.135736107826233, + "epoch": 0.7592206485009706, + "kl_loss": 0.05761566013097763, + "loss_ib": 0.0009832902578637004, + "step": 2640 + }, + { + "ce_ib": 6.860150337219238, + "ce_orig": 1.6746727228164673, + "epoch": 0.7592206485009706, + "kl_loss": 0.0631849616765976, + "loss_ib": 0.0013178646331653, + "step": 2640 + }, + { + "ce_ib": 3.631319284439087, + "ce_orig": 0.8174636363983154, + "epoch": 0.7592206485009706, + "kl_loss": 0.05440428480505943, + "loss_ib": 0.0009071747772395611, + "step": 2640 + }, + { + "ce_ib": 4.127086162567139, + "ce_orig": 0.9167801737785339, + "epoch": 0.7595082320799482, + "kl_loss": 0.06759397685527802, + "loss_ib": 0.0010886483360081911, + "step": 2641 + }, + { + "ce_ib": 4.62824010848999, + "ce_orig": 0.8987078070640564, + "epoch": 0.7595082320799482, + "kl_loss": 0.06379778683185577, + "loss_ib": 0.0011008017463609576, + "step": 2641 + }, + { + "ce_ib": 2.2175920009613037, + "ce_orig": 0.35309916734695435, + "epoch": 0.7595082320799482, + "kl_loss": 0.22942419350147247, + "loss_ib": 0.002516000997275114, + "step": 2641 + }, + { + "ce_ib": 4.794754505157471, + "ce_orig": 1.0994141101837158, + "epoch": 0.7595082320799482, + "kl_loss": 0.05011329799890518, + "loss_ib": 0.0009806083980947733, + "step": 2641 + }, + { + "ce_ib": 2.649714708328247, + "ce_orig": 0.5392195582389832, + "epoch": 0.7597958156589258, + "kl_loss": 0.04814182221889496, + "loss_ib": 0.0007463896763511002, + "step": 2642 + }, + { + "ce_ib": 4.334882736206055, + "ce_orig": 0.8670239448547363, + "epoch": 0.7597958156589258, + "kl_loss": 0.06485514342784882, + "loss_ib": 0.0010820396710187197, + "step": 2642 + }, + { + "ce_ib": 2.8753304481506348, + "ce_orig": 0.4663042426109314, + "epoch": 0.7597958156589258, + "kl_loss": 0.11590809375047684, + "loss_ib": 0.0014466139255091548, + "step": 2642 + }, + { + "ce_ib": 5.901447296142578, + "ce_orig": 1.432969570159912, + "epoch": 0.7597958156589258, + "kl_loss": 0.08764243125915527, + "loss_ib": 0.0014665690250694752, + "step": 2642 + }, + { + "ce_ib": 4.318301677703857, + "ce_orig": 1.1880065202713013, + "epoch": 0.7600833992379035, + "kl_loss": 0.1081082746386528, + "loss_ib": 0.0015129129169508815, + "step": 2643 + }, + { + "ce_ib": 1.5475528240203857, + "ce_orig": 0.41385138034820557, + "epoch": 0.7600833992379035, + "kl_loss": 0.023380136117339134, + "loss_ib": 0.0003885566256940365, + "step": 2643 + }, + { + "ce_ib": 3.4355416297912598, + "ce_orig": 0.7015425562858582, + "epoch": 0.7600833992379035, + "kl_loss": 0.0705086812376976, + "loss_ib": 0.0010486409300938249, + "step": 2643 + }, + { + "ce_ib": 2.450108766555786, + "ce_orig": 0.31757646799087524, + "epoch": 0.7600833992379035, + "kl_loss": 0.06962068378925323, + "loss_ib": 0.0009412176441401243, + "step": 2643 + }, + { + "ce_ib": 3.6846346855163574, + "ce_orig": 1.089355230331421, + "epoch": 0.7603709828168812, + "kl_loss": 0.06423334777355194, + "loss_ib": 0.0010107968701049685, + "step": 2644 + }, + { + "ce_ib": 3.1024348735809326, + "ce_orig": 0.7094189524650574, + "epoch": 0.7603709828168812, + "kl_loss": 0.04412081465125084, + "loss_ib": 0.0007514515891671181, + "step": 2644 + }, + { + "ce_ib": 3.0670528411865234, + "ce_orig": 0.7954482436180115, + "epoch": 0.7603709828168812, + "kl_loss": 0.05871396139264107, + "loss_ib": 0.0008938448736444116, + "step": 2644 + }, + { + "ce_ib": 3.8454971313476562, + "ce_orig": 0.7793039679527283, + "epoch": 0.7603709828168812, + "kl_loss": 0.07394479215145111, + "loss_ib": 0.0011239976156502962, + "step": 2644 + }, + { + "epoch": 0.7606585663958588, + "grad_norm": 0.08998209983110428, + "learning_rate": 4.3766114723134246e-05, + "loss": 0.8931, + "step": 2645 + }, + { + "ce_ib": 2.8093934059143066, + "ce_orig": 0.6237976551055908, + "epoch": 0.7606585663958588, + "kl_loss": 0.09429745376110077, + "loss_ib": 0.0012239138595759869, + "step": 2645 + }, + { + "ce_ib": 3.1015169620513916, + "ce_orig": 0.642147421836853, + "epoch": 0.7606585663958588, + "kl_loss": 0.08659566938877106, + "loss_ib": 0.0011761083733290434, + "step": 2645 + }, + { + "ce_ib": 3.1891303062438965, + "ce_orig": 0.42319145798683167, + "epoch": 0.7606585663958588, + "kl_loss": 0.03365815058350563, + "loss_ib": 0.0006554944557137787, + "step": 2645 + }, + { + "ce_ib": 3.0265631675720215, + "ce_orig": 0.6537096500396729, + "epoch": 0.7606585663958588, + "kl_loss": 0.049928031861782074, + "loss_ib": 0.0008019366068765521, + "step": 2645 + }, + { + "ce_ib": 4.477949619293213, + "ce_orig": 0.9313303828239441, + "epoch": 0.7609461499748364, + "kl_loss": 0.07016407698392868, + "loss_ib": 0.0011494356440380216, + "step": 2646 + }, + { + "ce_ib": 2.6542181968688965, + "ce_orig": 0.6388009190559387, + "epoch": 0.7609461499748364, + "kl_loss": 0.04947223514318466, + "loss_ib": 0.0007601441466249526, + "step": 2646 + }, + { + "ce_ib": 3.5934431552886963, + "ce_orig": 0.8119797706604004, + "epoch": 0.7609461499748364, + "kl_loss": 0.05030295252799988, + "loss_ib": 0.000862373854033649, + "step": 2646 + }, + { + "ce_ib": 3.3750407695770264, + "ce_orig": 0.7814184427261353, + "epoch": 0.7609461499748364, + "kl_loss": 0.07781437039375305, + "loss_ib": 0.0011156477266922593, + "step": 2646 + }, + { + "ce_ib": 3.739736318588257, + "ce_orig": 0.5817291736602783, + "epoch": 0.7612337335538141, + "kl_loss": 0.04859371855854988, + "loss_ib": 0.0008599108550697565, + "step": 2647 + }, + { + "ce_ib": 2.5921261310577393, + "ce_orig": 0.5705950260162354, + "epoch": 0.7612337335538141, + "kl_loss": 0.048979904502630234, + "loss_ib": 0.0007490116404369473, + "step": 2647 + }, + { + "ce_ib": 3.117783784866333, + "ce_orig": 0.5326222777366638, + "epoch": 0.7612337335538141, + "kl_loss": 0.0579976886510849, + "loss_ib": 0.0008917552186176181, + "step": 2647 + }, + { + "ce_ib": 3.9243052005767822, + "ce_orig": 0.7473167181015015, + "epoch": 0.7612337335538141, + "kl_loss": 0.049623049795627594, + "loss_ib": 0.0008886609575711191, + "step": 2647 + }, + { + "ce_ib": 5.128884315490723, + "ce_orig": 1.1960241794586182, + "epoch": 0.7615213171327917, + "kl_loss": 0.06159691885113716, + "loss_ib": 0.0011288576060906053, + "step": 2648 + }, + { + "ce_ib": 1.569833517074585, + "ce_orig": 0.2487729787826538, + "epoch": 0.7615213171327917, + "kl_loss": 0.08642256259918213, + "loss_ib": 0.0010212089400738478, + "step": 2648 + }, + { + "ce_ib": 4.665679931640625, + "ce_orig": 0.6937192678451538, + "epoch": 0.7615213171327917, + "kl_loss": 0.07074253261089325, + "loss_ib": 0.0011739933397620916, + "step": 2648 + }, + { + "ce_ib": 4.930827617645264, + "ce_orig": 0.9730837345123291, + "epoch": 0.7615213171327917, + "kl_loss": 0.05611778795719147, + "loss_ib": 0.0010542605305090547, + "step": 2648 + }, + { + "ce_ib": 5.944694995880127, + "ce_orig": 1.381577491760254, + "epoch": 0.7618089007117693, + "kl_loss": 0.0552610345184803, + "loss_ib": 0.0011470798635855317, + "step": 2649 + }, + { + "ce_ib": 4.508270263671875, + "ce_orig": 0.670365571975708, + "epoch": 0.7618089007117693, + "kl_loss": 0.07709869742393494, + "loss_ib": 0.0012218139600008726, + "step": 2649 + }, + { + "ce_ib": 2.9199399948120117, + "ce_orig": 0.6518254280090332, + "epoch": 0.7618089007117693, + "kl_loss": 0.0792820006608963, + "loss_ib": 0.0010848139645531774, + "step": 2649 + }, + { + "ce_ib": 5.10945463180542, + "ce_orig": 0.6930752992630005, + "epoch": 0.7618089007117693, + "kl_loss": 0.11876596510410309, + "loss_ib": 0.001698605134151876, + "step": 2649 + }, + { + "epoch": 0.762096484290747, + "grad_norm": 0.0908840149641037, + "learning_rate": 4.37404539136833e-05, + "loss": 0.7861, + "step": 2650 + }, + { + "ce_ib": 5.788754940032959, + "ce_orig": 1.3941874504089355, + "epoch": 0.762096484290747, + "kl_loss": 0.08359399437904358, + "loss_ib": 0.001414815429598093, + "step": 2650 + }, + { + "ce_ib": 2.822307825088501, + "ce_orig": 0.48468446731567383, + "epoch": 0.762096484290747, + "kl_loss": 0.06987225264310837, + "loss_ib": 0.0009809532202780247, + "step": 2650 + }, + { + "ce_ib": 4.15388822555542, + "ce_orig": 0.9111103415489197, + "epoch": 0.762096484290747, + "kl_loss": 0.057039253413677216, + "loss_ib": 0.000985781429335475, + "step": 2650 + }, + { + "ce_ib": 4.898234844207764, + "ce_orig": 1.3278990983963013, + "epoch": 0.762096484290747, + "kl_loss": 0.13215351104736328, + "loss_ib": 0.0018113586120307446, + "step": 2650 + }, + { + "ce_ib": 4.039705276489258, + "ce_orig": 1.0816882848739624, + "epoch": 0.7623840678697247, + "kl_loss": 0.08659584820270538, + "loss_ib": 0.001269928878173232, + "step": 2651 + }, + { + "ce_ib": 1.522288203239441, + "ce_orig": 0.3022635579109192, + "epoch": 0.7623840678697247, + "kl_loss": 0.11615653336048126, + "loss_ib": 0.0013137940550222993, + "step": 2651 + }, + { + "ce_ib": 2.556077003479004, + "ce_orig": 0.5651264190673828, + "epoch": 0.7623840678697247, + "kl_loss": 0.04413461685180664, + "loss_ib": 0.0006969537935219705, + "step": 2651 + }, + { + "ce_ib": 3.237834930419922, + "ce_orig": 0.7725080847740173, + "epoch": 0.7623840678697247, + "kl_loss": 0.05637947842478752, + "loss_ib": 0.0008875782368704677, + "step": 2651 + }, + { + "ce_ib": 4.813643455505371, + "ce_orig": 0.5590178370475769, + "epoch": 0.7626716514487023, + "kl_loss": 0.05746662616729736, + "loss_ib": 0.0010560305090621114, + "step": 2652 + }, + { + "ce_ib": 1.7405531406402588, + "ce_orig": 0.4901607632637024, + "epoch": 0.7626716514487023, + "kl_loss": 0.025681722909212112, + "loss_ib": 0.0004308725183364004, + "step": 2652 + }, + { + "ce_ib": 5.151076316833496, + "ce_orig": 1.1446435451507568, + "epoch": 0.7626716514487023, + "kl_loss": 0.08792750537395477, + "loss_ib": 0.0013943826779723167, + "step": 2652 + }, + { + "ce_ib": 4.428952693939209, + "ce_orig": 0.9669773578643799, + "epoch": 0.7626716514487023, + "kl_loss": 0.04067005217075348, + "loss_ib": 0.00084959581727162, + "step": 2652 + }, + { + "ce_ib": 2.6410956382751465, + "ce_orig": 0.503361165523529, + "epoch": 0.7629592350276799, + "kl_loss": 0.04655015468597412, + "loss_ib": 0.0007296110270544887, + "step": 2653 + }, + { + "ce_ib": 4.117646217346191, + "ce_orig": 0.9071342349052429, + "epoch": 0.7629592350276799, + "kl_loss": 0.07293781638145447, + "loss_ib": 0.0011411427985876799, + "step": 2653 + }, + { + "ce_ib": 3.2702534198760986, + "ce_orig": 0.669751763343811, + "epoch": 0.7629592350276799, + "kl_loss": 0.039144366979599, + "loss_ib": 0.0007184690330177546, + "step": 2653 + }, + { + "ce_ib": 3.5989718437194824, + "ce_orig": 0.7683852314949036, + "epoch": 0.7629592350276799, + "kl_loss": 0.0568254292011261, + "loss_ib": 0.0009281514212489128, + "step": 2653 + }, + { + "ce_ib": 2.584961175918579, + "ce_orig": 0.5587707161903381, + "epoch": 0.7632468186066576, + "kl_loss": 0.07263801246881485, + "loss_ib": 0.0009848761837929487, + "step": 2654 + }, + { + "ce_ib": 4.591379642486572, + "ce_orig": 0.9629166722297668, + "epoch": 0.7632468186066576, + "kl_loss": 0.08665378391742706, + "loss_ib": 0.0013256758684292436, + "step": 2654 + }, + { + "ce_ib": 4.856884479522705, + "ce_orig": 0.9665273427963257, + "epoch": 0.7632468186066576, + "kl_loss": 0.0861344188451767, + "loss_ib": 0.0013470326084643602, + "step": 2654 + }, + { + "ce_ib": 4.1149749755859375, + "ce_orig": 0.5770421028137207, + "epoch": 0.7632468186066576, + "kl_loss": 0.10263442248106003, + "loss_ib": 0.001437841565348208, + "step": 2654 + }, + { + "epoch": 0.7635344021856352, + "grad_norm": 0.0858294740319252, + "learning_rate": 4.371474795412912e-05, + "loss": 0.8165, + "step": 2655 + }, + { + "ce_ib": 4.2853288650512695, + "ce_orig": 0.6375936269760132, + "epoch": 0.7635344021856352, + "kl_loss": 0.06773468852043152, + "loss_ib": 0.0011058797826990485, + "step": 2655 + }, + { + "ce_ib": 2.9780337810516357, + "ce_orig": 0.7424360513687134, + "epoch": 0.7635344021856352, + "kl_loss": 0.05932029336690903, + "loss_ib": 0.0008910062606446445, + "step": 2655 + }, + { + "ce_ib": 6.160101890563965, + "ce_orig": 1.3431720733642578, + "epoch": 0.7635344021856352, + "kl_loss": 0.08549672365188599, + "loss_ib": 0.0014709773240610957, + "step": 2655 + }, + { + "ce_ib": 4.32119607925415, + "ce_orig": 0.5477121472358704, + "epoch": 0.7635344021856352, + "kl_loss": 0.07663172483444214, + "loss_ib": 0.0011984368320554495, + "step": 2655 + }, + { + "ce_ib": 4.699697017669678, + "ce_orig": 0.8163239359855652, + "epoch": 0.7638219857646128, + "kl_loss": 0.09647032618522644, + "loss_ib": 0.0014346728567034006, + "step": 2656 + }, + { + "ce_ib": 6.033963680267334, + "ce_orig": 1.3642327785491943, + "epoch": 0.7638219857646128, + "kl_loss": 0.08824765682220459, + "loss_ib": 0.0014858727809041739, + "step": 2656 + }, + { + "ce_ib": 3.065603494644165, + "ce_orig": 0.6196361780166626, + "epoch": 0.7638219857646128, + "kl_loss": 0.04016050323843956, + "loss_ib": 0.0007081654039211571, + "step": 2656 + }, + { + "ce_ib": 2.7626962661743164, + "ce_orig": 0.6607279181480408, + "epoch": 0.7638219857646128, + "kl_loss": 0.04699118435382843, + "loss_ib": 0.0007461814675480127, + "step": 2656 + }, + { + "ce_ib": 4.874968528747559, + "ce_orig": 1.274665355682373, + "epoch": 0.7641095693435905, + "kl_loss": 0.06458456814289093, + "loss_ib": 0.001133342506363988, + "step": 2657 + }, + { + "ce_ib": 3.133589744567871, + "ce_orig": 0.7066977620124817, + "epoch": 0.7641095693435905, + "kl_loss": 0.05267305672168732, + "loss_ib": 0.0008400895167142153, + "step": 2657 + }, + { + "ce_ib": 4.832355976104736, + "ce_orig": 0.9129353165626526, + "epoch": 0.7641095693435905, + "kl_loss": 0.07676731050014496, + "loss_ib": 0.001250908593647182, + "step": 2657 + }, + { + "ce_ib": 3.424562931060791, + "ce_orig": 0.760797917842865, + "epoch": 0.7641095693435905, + "kl_loss": 0.0764864832162857, + "loss_ib": 0.001107321004383266, + "step": 2657 + }, + { + "ce_ib": 4.666360855102539, + "ce_orig": 1.0853055715560913, + "epoch": 0.7643971529225682, + "kl_loss": 0.07479951530694962, + "loss_ib": 0.0012146311346441507, + "step": 2658 + }, + { + "ce_ib": 2.5572562217712402, + "ce_orig": 0.3609181344509125, + "epoch": 0.7643971529225682, + "kl_loss": 0.05184665694832802, + "loss_ib": 0.0007741922163404524, + "step": 2658 + }, + { + "ce_ib": 3.0558836460113525, + "ce_orig": 0.7437742948532104, + "epoch": 0.7643971529225682, + "kl_loss": 0.0410805270075798, + "loss_ib": 0.000716393580660224, + "step": 2658 + }, + { + "ce_ib": 3.430769920349121, + "ce_orig": 0.7594967484474182, + "epoch": 0.7643971529225682, + "kl_loss": 0.05354149639606476, + "loss_ib": 0.0008784919627942145, + "step": 2658 + }, + { + "ce_ib": 4.513545036315918, + "ce_orig": 0.7426491975784302, + "epoch": 0.7646847365015458, + "kl_loss": 0.13240161538124084, + "loss_ib": 0.0017753706779330969, + "step": 2659 + }, + { + "ce_ib": 3.110649347305298, + "ce_orig": 0.6796711087226868, + "epoch": 0.7646847365015458, + "kl_loss": 0.06307050585746765, + "loss_ib": 0.0009417699766345322, + "step": 2659 + }, + { + "ce_ib": 2.4281187057495117, + "ce_orig": 0.6544910073280334, + "epoch": 0.7646847365015458, + "kl_loss": 0.0476977564394474, + "loss_ib": 0.0007197894155979156, + "step": 2659 + }, + { + "ce_ib": 3.3140439987182617, + "ce_orig": 0.7755199074745178, + "epoch": 0.7646847365015458, + "kl_loss": 0.04464269429445267, + "loss_ib": 0.0007778313593007624, + "step": 2659 + }, + { + "epoch": 0.7649723200805234, + "grad_norm": 0.11033149063587189, + "learning_rate": 4.3688996906403333e-05, + "loss": 0.7885, + "step": 2660 + }, + { + "ce_ib": 3.385693311691284, + "ce_orig": 0.7375624775886536, + "epoch": 0.7649723200805234, + "kl_loss": 0.07164613157510757, + "loss_ib": 0.0010550306178629398, + "step": 2660 + }, + { + "ce_ib": 3.973598003387451, + "ce_orig": 0.7210139036178589, + "epoch": 0.7649723200805234, + "kl_loss": 0.07119564712047577, + "loss_ib": 0.0011093162465840578, + "step": 2660 + }, + { + "ce_ib": 3.508446216583252, + "ce_orig": 0.42958083748817444, + "epoch": 0.7649723200805234, + "kl_loss": 0.08974499255418777, + "loss_ib": 0.0012482944875955582, + "step": 2660 + }, + { + "ce_ib": 2.4458014965057373, + "ce_orig": 0.5421603918075562, + "epoch": 0.7649723200805234, + "kl_loss": 0.10467085242271423, + "loss_ib": 0.001291288761422038, + "step": 2660 + }, + { + "ce_ib": 4.8723602294921875, + "ce_orig": 1.0635120868682861, + "epoch": 0.765259903659501, + "kl_loss": 0.10572224855422974, + "loss_ib": 0.0015444585587829351, + "step": 2661 + }, + { + "ce_ib": 3.551351308822632, + "ce_orig": 0.9552726745605469, + "epoch": 0.765259903659501, + "kl_loss": 0.07498995214700699, + "loss_ib": 0.001105034607462585, + "step": 2661 + }, + { + "ce_ib": 3.51096248626709, + "ce_orig": 0.809448778629303, + "epoch": 0.765259903659501, + "kl_loss": 0.05231986194849014, + "loss_ib": 0.000874294841196388, + "step": 2661 + }, + { + "ce_ib": 5.137720584869385, + "ce_orig": 1.3979519605636597, + "epoch": 0.765259903659501, + "kl_loss": 0.10106750577688217, + "loss_ib": 0.0015244469977915287, + "step": 2661 + }, + { + "ce_ib": 4.175265312194824, + "ce_orig": 0.45381197333335876, + "epoch": 0.7655474872384787, + "kl_loss": 0.07474331557750702, + "loss_ib": 0.0011649596272036433, + "step": 2662 + }, + { + "ce_ib": 5.970282077789307, + "ce_orig": 1.4422149658203125, + "epoch": 0.7655474872384787, + "kl_loss": 0.06044568866491318, + "loss_ib": 0.001201485050842166, + "step": 2662 + }, + { + "ce_ib": 4.10727071762085, + "ce_orig": 0.8597448468208313, + "epoch": 0.7655474872384787, + "kl_loss": 0.04692631587386131, + "loss_ib": 0.0008799902279861271, + "step": 2662 + }, + { + "ce_ib": 4.126307010650635, + "ce_orig": 0.9295262694358826, + "epoch": 0.7655474872384787, + "kl_loss": 0.13418403267860413, + "loss_ib": 0.0017544709844514728, + "step": 2662 + }, + { + "ce_ib": 3.045562267303467, + "ce_orig": 0.4604310095310211, + "epoch": 0.7658350708174563, + "kl_loss": 0.05062480643391609, + "loss_ib": 0.0008108042529784143, + "step": 2663 + }, + { + "ce_ib": 4.887362957000732, + "ce_orig": 0.8189067840576172, + "epoch": 0.7658350708174563, + "kl_loss": 0.084291011095047, + "loss_ib": 0.0013316464610397816, + "step": 2663 + }, + { + "ce_ib": 2.471510648727417, + "ce_orig": 0.599950909614563, + "epoch": 0.7658350708174563, + "kl_loss": 0.0676039308309555, + "loss_ib": 0.0009231903241015971, + "step": 2663 + }, + { + "ce_ib": 3.0256009101867676, + "ce_orig": 0.8546024560928345, + "epoch": 0.7658350708174563, + "kl_loss": 0.046612780541181564, + "loss_ib": 0.0007686878670938313, + "step": 2663 + }, + { + "ce_ib": 4.843890190124512, + "ce_orig": 0.8840936422348022, + "epoch": 0.766122654396434, + "kl_loss": 0.07361326366662979, + "loss_ib": 0.0012205216335132718, + "step": 2664 + }, + { + "ce_ib": 6.2611212730407715, + "ce_orig": 1.8641233444213867, + "epoch": 0.766122654396434, + "kl_loss": 0.08124842494726181, + "loss_ib": 0.0014385964022949338, + "step": 2664 + }, + { + "ce_ib": 5.093046188354492, + "ce_orig": 1.3757519721984863, + "epoch": 0.766122654396434, + "kl_loss": 0.063059501349926, + "loss_ib": 0.00113989959936589, + "step": 2664 + }, + { + "ce_ib": 4.189239501953125, + "ce_orig": 0.5774773955345154, + "epoch": 0.766122654396434, + "kl_loss": 0.08628067374229431, + "loss_ib": 0.0012817305978387594, + "step": 2664 + }, + { + "epoch": 0.7664102379754116, + "grad_norm": 0.09529706835746765, + "learning_rate": 4.366320083254619e-05, + "loss": 0.8453, + "step": 2665 + }, + { + "ce_ib": 2.3781676292419434, + "ce_orig": 0.4015434980392456, + "epoch": 0.7664102379754116, + "kl_loss": 0.04896277189254761, + "loss_ib": 0.0007274444214999676, + "step": 2665 + }, + { + "ce_ib": 3.5491371154785156, + "ce_orig": 0.7832902073860168, + "epoch": 0.7664102379754116, + "kl_loss": 0.049749426543712616, + "loss_ib": 0.0008524079457856715, + "step": 2665 + }, + { + "ce_ib": 3.436155319213867, + "ce_orig": 0.5772687196731567, + "epoch": 0.7664102379754116, + "kl_loss": 0.04619018733501434, + "loss_ib": 0.0008055174257606268, + "step": 2665 + }, + { + "ce_ib": 5.320281505584717, + "ce_orig": 0.9888310432434082, + "epoch": 0.7664102379754116, + "kl_loss": 0.07437177747488022, + "loss_ib": 0.0012757459189742804, + "step": 2665 + }, + { + "ce_ib": 4.581792831420898, + "ce_orig": 0.732937216758728, + "epoch": 0.7666978215543893, + "kl_loss": 0.04211517795920372, + "loss_ib": 0.000879331084433943, + "step": 2666 + }, + { + "ce_ib": 6.1903204917907715, + "ce_orig": 1.4769855737686157, + "epoch": 0.7666978215543893, + "kl_loss": 0.061441924422979355, + "loss_ib": 0.0012334513012319803, + "step": 2666 + }, + { + "ce_ib": 3.0434932708740234, + "ce_orig": 0.69410240650177, + "epoch": 0.7666978215543893, + "kl_loss": 0.07467298209667206, + "loss_ib": 0.0010510791325941682, + "step": 2666 + }, + { + "ce_ib": 4.9908552169799805, + "ce_orig": 1.1498583555221558, + "epoch": 0.7666978215543893, + "kl_loss": 0.05772031843662262, + "loss_ib": 0.0010762886377051473, + "step": 2666 + }, + { + "ce_ib": 4.893829822540283, + "ce_orig": 1.0241419076919556, + "epoch": 0.7669854051333669, + "kl_loss": 0.05977199226617813, + "loss_ib": 0.0010871029226109385, + "step": 2667 + }, + { + "ce_ib": 3.5127787590026855, + "ce_orig": 0.69636070728302, + "epoch": 0.7669854051333669, + "kl_loss": 0.07468084990978241, + "loss_ib": 0.0010980863589793444, + "step": 2667 + }, + { + "ce_ib": 3.061094284057617, + "ce_orig": 0.7909853458404541, + "epoch": 0.7669854051333669, + "kl_loss": 0.03897964209318161, + "loss_ib": 0.0006959058227948844, + "step": 2667 + }, + { + "ce_ib": 4.728803634643555, + "ce_orig": 1.0462234020233154, + "epoch": 0.7669854051333669, + "kl_loss": 0.055767446756362915, + "loss_ib": 0.001030554762110114, + "step": 2667 + }, + { + "ce_ib": 2.9952142238616943, + "ce_orig": 0.8163288831710815, + "epoch": 0.7672729887123445, + "kl_loss": 0.0603916272521019, + "loss_ib": 0.0009034377289935946, + "step": 2668 + }, + { + "ce_ib": 2.41902232170105, + "ce_orig": 0.6557413339614868, + "epoch": 0.7672729887123445, + "kl_loss": 0.041897088289260864, + "loss_ib": 0.0006608731346204877, + "step": 2668 + }, + { + "ce_ib": 4.137685775756836, + "ce_orig": 1.201201319694519, + "epoch": 0.7672729887123445, + "kl_loss": 0.057452715933322906, + "loss_ib": 0.0009882956510409713, + "step": 2668 + }, + { + "ce_ib": 2.9479668140411377, + "ce_orig": 0.7846546173095703, + "epoch": 0.7672729887123445, + "kl_loss": 0.05781757831573486, + "loss_ib": 0.0008729724213480949, + "step": 2668 + }, + { + "ce_ib": 2.2827084064483643, + "ce_orig": 0.44292017817497253, + "epoch": 0.7675605722913221, + "kl_loss": 0.06473837047815323, + "loss_ib": 0.0008756545139476657, + "step": 2669 + }, + { + "ce_ib": 2.8645999431610107, + "ce_orig": 0.5910277962684631, + "epoch": 0.7675605722913221, + "kl_loss": 0.07588384300470352, + "loss_ib": 0.00104529841337353, + "step": 2669 + }, + { + "ce_ib": 2.4348397254943848, + "ce_orig": 0.6510829329490662, + "epoch": 0.7675605722913221, + "kl_loss": 0.0717591792345047, + "loss_ib": 0.000961075711529702, + "step": 2669 + }, + { + "ce_ib": 4.976774215698242, + "ce_orig": 1.059247374534607, + "epoch": 0.7675605722913221, + "kl_loss": 0.05772826820611954, + "loss_ib": 0.0010749601060524583, + "step": 2669 + }, + { + "epoch": 0.7678481558702998, + "grad_norm": 0.08013013750314713, + "learning_rate": 4.363735979470642e-05, + "loss": 0.8214, + "step": 2670 + }, + { + "ce_ib": 3.912621259689331, + "ce_orig": 0.8661234974861145, + "epoch": 0.7678481558702998, + "kl_loss": 0.06754951924085617, + "loss_ib": 0.0010667572496458888, + "step": 2670 + }, + { + "ce_ib": 3.8724539279937744, + "ce_orig": 0.7514461278915405, + "epoch": 0.7678481558702998, + "kl_loss": 0.0708712786436081, + "loss_ib": 0.0010959581704810262, + "step": 2670 + }, + { + "ce_ib": 2.4806275367736816, + "ce_orig": 0.697605311870575, + "epoch": 0.7678481558702998, + "kl_loss": 0.03458184748888016, + "loss_ib": 0.0005938812391832471, + "step": 2670 + }, + { + "ce_ib": 4.773003101348877, + "ce_orig": 0.544187068939209, + "epoch": 0.7678481558702998, + "kl_loss": 0.0655667632818222, + "loss_ib": 0.0011329678818583488, + "step": 2670 + }, + { + "ce_ib": 4.663131237030029, + "ce_orig": 1.098256230354309, + "epoch": 0.7681357394492775, + "kl_loss": 0.05589701235294342, + "loss_ib": 0.001025283127091825, + "step": 2671 + }, + { + "ce_ib": 3.249436378479004, + "ce_orig": 0.21718548238277435, + "epoch": 0.7681357394492775, + "kl_loss": 0.05960426479578018, + "loss_ib": 0.0009209862328134477, + "step": 2671 + }, + { + "ce_ib": 4.199544906616211, + "ce_orig": 0.9783641695976257, + "epoch": 0.7681357394492775, + "kl_loss": 0.06127206236124039, + "loss_ib": 0.001032675034366548, + "step": 2671 + }, + { + "ce_ib": 4.641138076782227, + "ce_orig": 1.235436201095581, + "epoch": 0.7681357394492775, + "kl_loss": 0.05176083743572235, + "loss_ib": 0.0009817221434786916, + "step": 2671 + }, + { + "ce_ib": 5.112195014953613, + "ce_orig": 0.6378281116485596, + "epoch": 0.7684233230282551, + "kl_loss": 0.0625590831041336, + "loss_ib": 0.0011368102859705687, + "step": 2672 + }, + { + "ce_ib": 4.1467390060424805, + "ce_orig": 1.1578782796859741, + "epoch": 0.7684233230282551, + "kl_loss": 0.05772370100021362, + "loss_ib": 0.0009919109288603067, + "step": 2672 + }, + { + "ce_ib": 3.657349109649658, + "ce_orig": 0.30583176016807556, + "epoch": 0.7684233230282551, + "kl_loss": 0.09125969558954239, + "loss_ib": 0.001278331852518022, + "step": 2672 + }, + { + "ce_ib": 4.71720027923584, + "ce_orig": 0.899252712726593, + "epoch": 0.7684233230282551, + "kl_loss": 0.059569597244262695, + "loss_ib": 0.0010674159275367856, + "step": 2672 + }, + { + "ce_ib": 4.079118251800537, + "ce_orig": 0.6595386862754822, + "epoch": 0.7687109066072327, + "kl_loss": 0.10806769877672195, + "loss_ib": 0.0014885887503623962, + "step": 2673 + }, + { + "ce_ib": 3.5402417182922363, + "ce_orig": 0.8373548984527588, + "epoch": 0.7687109066072327, + "kl_loss": 0.04066375643014908, + "loss_ib": 0.0007606617291457951, + "step": 2673 + }, + { + "ce_ib": 6.583059310913086, + "ce_orig": 1.7069867849349976, + "epoch": 0.7687109066072327, + "kl_loss": 0.0719638466835022, + "loss_ib": 0.0013779443688690662, + "step": 2673 + }, + { + "ce_ib": 2.5013229846954346, + "ce_orig": 0.5382527709007263, + "epoch": 0.7687109066072327, + "kl_loss": 0.05511628836393356, + "loss_ib": 0.0008012951584532857, + "step": 2673 + }, + { + "ce_ib": 3.8572447299957275, + "ce_orig": 0.8472180962562561, + "epoch": 0.7689984901862104, + "kl_loss": 0.08874229341745377, + "loss_ib": 0.0012731474125757813, + "step": 2674 + }, + { + "ce_ib": 6.045554161071777, + "ce_orig": 1.5712743997573853, + "epoch": 0.7689984901862104, + "kl_loss": 0.038204293698072433, + "loss_ib": 0.0009865983156487346, + "step": 2674 + }, + { + "ce_ib": 1.1338969469070435, + "ce_orig": 0.18106473982334137, + "epoch": 0.7689984901862104, + "kl_loss": 0.13656312227249146, + "loss_ib": 0.0014790208078920841, + "step": 2674 + }, + { + "ce_ib": 4.320705413818359, + "ce_orig": 0.869045615196228, + "epoch": 0.7689984901862104, + "kl_loss": 0.07069321721792221, + "loss_ib": 0.0011390027357265353, + "step": 2674 + }, + { + "epoch": 0.769286073765188, + "grad_norm": 0.09350224584341049, + "learning_rate": 4.3611473855141086e-05, + "loss": 0.863, + "step": 2675 + }, + { + "ce_ib": 4.263289928436279, + "ce_orig": 0.6115321516990662, + "epoch": 0.769286073765188, + "kl_loss": 0.0684366300702095, + "loss_ib": 0.0011106953024864197, + "step": 2675 + }, + { + "ce_ib": 3.583608388900757, + "ce_orig": 1.0329934358596802, + "epoch": 0.769286073765188, + "kl_loss": 0.04280628263950348, + "loss_ib": 0.0007864236831665039, + "step": 2675 + }, + { + "ce_ib": 4.153503894805908, + "ce_orig": 0.6289424896240234, + "epoch": 0.769286073765188, + "kl_loss": 0.1524023711681366, + "loss_ib": 0.0019393740221858025, + "step": 2675 + }, + { + "ce_ib": 5.101752281188965, + "ce_orig": 1.234647274017334, + "epoch": 0.769286073765188, + "kl_loss": 0.07824903726577759, + "loss_ib": 0.0012926656054332852, + "step": 2675 + }, + { + "ce_ib": 6.700021266937256, + "ce_orig": 1.6438345909118652, + "epoch": 0.7695736573441656, + "kl_loss": 0.04540220648050308, + "loss_ib": 0.0011240241583436728, + "step": 2676 + }, + { + "ce_ib": 2.97586727142334, + "ce_orig": 0.6569598317146301, + "epoch": 0.7695736573441656, + "kl_loss": 0.07243466377258301, + "loss_ib": 0.001021933276206255, + "step": 2676 + }, + { + "ce_ib": 2.660367488861084, + "ce_orig": 0.701774001121521, + "epoch": 0.7695736573441656, + "kl_loss": 0.03715798258781433, + "loss_ib": 0.0006376165547408164, + "step": 2676 + }, + { + "ce_ib": 5.0230889320373535, + "ce_orig": 1.2233846187591553, + "epoch": 0.7695736573441656, + "kl_loss": 0.09530115872621536, + "loss_ib": 0.0014553203945979476, + "step": 2676 + }, + { + "ce_ib": 4.852808952331543, + "ce_orig": 1.1068065166473389, + "epoch": 0.7698612409231433, + "kl_loss": 0.07345584034919739, + "loss_ib": 0.0012198393233120441, + "step": 2677 + }, + { + "ce_ib": 5.612350940704346, + "ce_orig": 0.9942357540130615, + "epoch": 0.7698612409231433, + "kl_loss": 0.08976572751998901, + "loss_ib": 0.0014588923659175634, + "step": 2677 + }, + { + "ce_ib": 3.8299427032470703, + "ce_orig": 0.8155774474143982, + "epoch": 0.7698612409231433, + "kl_loss": 0.036609236150979996, + "loss_ib": 0.000749086553696543, + "step": 2677 + }, + { + "ce_ib": 3.8590235710144043, + "ce_orig": 0.5254529118537903, + "epoch": 0.7698612409231433, + "kl_loss": 0.06483644992113113, + "loss_ib": 0.0010342667810618877, + "step": 2677 + }, + { + "ce_ib": 3.3708155155181885, + "ce_orig": 0.8372495770454407, + "epoch": 0.770148824502121, + "kl_loss": 0.0613618828356266, + "loss_ib": 0.0009507003123871982, + "step": 2678 + }, + { + "ce_ib": 4.284058094024658, + "ce_orig": 0.8306896686553955, + "epoch": 0.770148824502121, + "kl_loss": 0.06765543669462204, + "loss_ib": 0.0011049601016566157, + "step": 2678 + }, + { + "ce_ib": 3.988218069076538, + "ce_orig": 0.9262073040008545, + "epoch": 0.770148824502121, + "kl_loss": 0.047220878303050995, + "loss_ib": 0.0008710304973646998, + "step": 2678 + }, + { + "ce_ib": 3.545645236968994, + "ce_orig": 0.483866810798645, + "epoch": 0.770148824502121, + "kl_loss": 0.09148328006267548, + "loss_ib": 0.0012693972093984485, + "step": 2678 + }, + { + "ce_ib": 3.792813777923584, + "ce_orig": 0.7466480731964111, + "epoch": 0.7704364080810986, + "kl_loss": 0.044365815818309784, + "loss_ib": 0.0008229395607486367, + "step": 2679 + }, + { + "ce_ib": 2.247027635574341, + "ce_orig": 0.5492655634880066, + "epoch": 0.7704364080810986, + "kl_loss": 0.07280533760786057, + "loss_ib": 0.00095275609055534, + "step": 2679 + }, + { + "ce_ib": 2.9356765747070312, + "ce_orig": 0.5498831868171692, + "epoch": 0.7704364080810986, + "kl_loss": 0.05331674963235855, + "loss_ib": 0.0008267351076938212, + "step": 2679 + }, + { + "ce_ib": 4.434048652648926, + "ce_orig": 1.0043386220932007, + "epoch": 0.7704364080810986, + "kl_loss": 0.09821680188179016, + "loss_ib": 0.0014255729038268328, + "step": 2679 + }, + { + "epoch": 0.7707239916600762, + "grad_norm": 0.10095174610614777, + "learning_rate": 4.358554307621541e-05, + "loss": 0.82, + "step": 2680 + }, + { + "ce_ib": 3.8088901042938232, + "ce_orig": 0.8299369215965271, + "epoch": 0.7707239916600762, + "kl_loss": 0.03342370316386223, + "loss_ib": 0.0007151259924285114, + "step": 2680 + }, + { + "ce_ib": 3.4676430225372314, + "ce_orig": 0.8695896863937378, + "epoch": 0.7707239916600762, + "kl_loss": 0.05469026789069176, + "loss_ib": 0.0008936669328249991, + "step": 2680 + }, + { + "ce_ib": 5.265885829925537, + "ce_orig": 1.1472878456115723, + "epoch": 0.7707239916600762, + "kl_loss": 0.12689870595932007, + "loss_ib": 0.0017955756047740579, + "step": 2680 + }, + { + "ce_ib": 2.799077033996582, + "ce_orig": 0.6912546753883362, + "epoch": 0.7707239916600762, + "kl_loss": 0.039382632821798325, + "loss_ib": 0.0006737340008839965, + "step": 2680 + }, + { + "ce_ib": 3.4998462200164795, + "ce_orig": 0.8834567666053772, + "epoch": 0.7710115752390538, + "kl_loss": 0.07022921741008759, + "loss_ib": 0.0010522768134251237, + "step": 2681 + }, + { + "ce_ib": 3.1519811153411865, + "ce_orig": 0.4092645049095154, + "epoch": 0.7710115752390538, + "kl_loss": 0.0801154375076294, + "loss_ib": 0.0011163525050505996, + "step": 2681 + }, + { + "ce_ib": 2.663156270980835, + "ce_orig": 0.39848726987838745, + "epoch": 0.7710115752390538, + "kl_loss": 0.055335089564323425, + "loss_ib": 0.0008196665439754725, + "step": 2681 + }, + { + "ce_ib": 4.33894681930542, + "ce_orig": 0.602165937423706, + "epoch": 0.7710115752390538, + "kl_loss": 0.0877770334482193, + "loss_ib": 0.0013116650516167283, + "step": 2681 + }, + { + "ce_ib": 3.1149282455444336, + "ce_orig": 0.5536927580833435, + "epoch": 0.7712991588180315, + "kl_loss": 0.06319096684455872, + "loss_ib": 0.0009434024686925113, + "step": 2682 + }, + { + "ce_ib": 3.5823047161102295, + "ce_orig": 0.5034530162811279, + "epoch": 0.7712991588180315, + "kl_loss": 0.10557833313941956, + "loss_ib": 0.0014140137936919928, + "step": 2682 + }, + { + "ce_ib": 4.261070728302002, + "ce_orig": 0.7261365056037903, + "epoch": 0.7712991588180315, + "kl_loss": 0.07223857939243317, + "loss_ib": 0.0011484927963465452, + "step": 2682 + }, + { + "ce_ib": 4.235668659210205, + "ce_orig": 0.9825583696365356, + "epoch": 0.7712991588180315, + "kl_loss": 0.08399713039398193, + "loss_ib": 0.0012635381426662207, + "step": 2682 + }, + { + "ce_ib": 5.3982930183410645, + "ce_orig": 1.3204900026321411, + "epoch": 0.7715867423970091, + "kl_loss": 0.0723784863948822, + "loss_ib": 0.0012636141618713737, + "step": 2683 + }, + { + "ce_ib": 3.0760338306427, + "ce_orig": 0.7135428786277771, + "epoch": 0.7715867423970091, + "kl_loss": 0.04623468220233917, + "loss_ib": 0.0007699502166360617, + "step": 2683 + }, + { + "ce_ib": 6.132364273071289, + "ce_orig": 1.8222633600234985, + "epoch": 0.7715867423970091, + "kl_loss": 0.07616171985864639, + "loss_ib": 0.0013748535420745611, + "step": 2683 + }, + { + "ce_ib": 3.7197446823120117, + "ce_orig": 0.38520240783691406, + "epoch": 0.7715867423970091, + "kl_loss": 0.0788557380437851, + "loss_ib": 0.0011605317704379559, + "step": 2683 + }, + { + "ce_ib": 5.0992350578308105, + "ce_orig": 1.4153923988342285, + "epoch": 0.7718743259759868, + "kl_loss": 0.08674097061157227, + "loss_ib": 0.0013773331884294748, + "step": 2684 + }, + { + "ce_ib": 4.0548858642578125, + "ce_orig": 0.894388735294342, + "epoch": 0.7718743259759868, + "kl_loss": 0.05886102095246315, + "loss_ib": 0.0009940987220034003, + "step": 2684 + }, + { + "ce_ib": 4.05594539642334, + "ce_orig": 0.6592414379119873, + "epoch": 0.7718743259759868, + "kl_loss": 0.06491027772426605, + "loss_ib": 0.0010546973207965493, + "step": 2684 + }, + { + "ce_ib": 4.0885748863220215, + "ce_orig": 0.6294820308685303, + "epoch": 0.7718743259759868, + "kl_loss": 0.04943123087286949, + "loss_ib": 0.00090316979913041, + "step": 2684 + }, + { + "epoch": 0.7721619095549644, + "grad_norm": 0.09937409311532974, + "learning_rate": 4.355956752040267e-05, + "loss": 0.8412, + "step": 2685 + }, + { + "ce_ib": 2.8449454307556152, + "ce_orig": 0.5899273157119751, + "epoch": 0.7721619095549644, + "kl_loss": 0.04182390123605728, + "loss_ib": 0.0007027335232123733, + "step": 2685 + }, + { + "ce_ib": 3.9434292316436768, + "ce_orig": 1.050053358078003, + "epoch": 0.7721619095549644, + "kl_loss": 0.07170945405960083, + "loss_ib": 0.0011114374501630664, + "step": 2685 + }, + { + "ce_ib": 3.9754161834716797, + "ce_orig": 0.9839081764221191, + "epoch": 0.7721619095549644, + "kl_loss": 0.07479889690876007, + "loss_ib": 0.001145530492067337, + "step": 2685 + }, + { + "ce_ib": 5.390632629394531, + "ce_orig": 1.1842120885849, + "epoch": 0.7721619095549644, + "kl_loss": 0.08430466800928116, + "loss_ib": 0.0013821099419146776, + "step": 2685 + }, + { + "ce_ib": 4.523552894592285, + "ce_orig": 0.7012823224067688, + "epoch": 0.7724494931339421, + "kl_loss": 0.11859620362520218, + "loss_ib": 0.001638317364268005, + "step": 2686 + }, + { + "ce_ib": 2.346705436706543, + "ce_orig": 0.6819307208061218, + "epoch": 0.7724494931339421, + "kl_loss": 0.04104466736316681, + "loss_ib": 0.0006451172521337867, + "step": 2686 + }, + { + "ce_ib": 5.3681769371032715, + "ce_orig": 0.8470500707626343, + "epoch": 0.7724494931339421, + "kl_loss": 0.07011391222476959, + "loss_ib": 0.0012379568070173264, + "step": 2686 + }, + { + "ce_ib": 3.704535484313965, + "ce_orig": 0.6432211995124817, + "epoch": 0.7724494931339421, + "kl_loss": 0.06982417404651642, + "loss_ib": 0.0010686952155083418, + "step": 2686 + }, + { + "ce_ib": 2.56727933883667, + "ce_orig": 0.42534756660461426, + "epoch": 0.7727370767129197, + "kl_loss": 0.17486047744750977, + "loss_ib": 0.002005332615226507, + "step": 2687 + }, + { + "ce_ib": 3.8441145420074463, + "ce_orig": 0.8488746881484985, + "epoch": 0.7727370767129197, + "kl_loss": 0.07772470265626907, + "loss_ib": 0.001161658437922597, + "step": 2687 + }, + { + "ce_ib": 4.283561706542969, + "ce_orig": 0.5395776033401489, + "epoch": 0.7727370767129197, + "kl_loss": 0.20562857389450073, + "loss_ib": 0.002484641969203949, + "step": 2687 + }, + { + "ce_ib": 5.0827813148498535, + "ce_orig": 1.3138093948364258, + "epoch": 0.7727370767129197, + "kl_loss": 0.05482550710439682, + "loss_ib": 0.00105653319042176, + "step": 2687 + }, + { + "ce_ib": 2.6199584007263184, + "ce_orig": 0.5959343314170837, + "epoch": 0.7730246602918973, + "kl_loss": 0.04012247547507286, + "loss_ib": 0.0006632205913774669, + "step": 2688 + }, + { + "ce_ib": 3.157959222793579, + "ce_orig": 0.7640300393104553, + "epoch": 0.7730246602918973, + "kl_loss": 0.046776045113801956, + "loss_ib": 0.0007835563737899065, + "step": 2688 + }, + { + "ce_ib": 3.322484016418457, + "ce_orig": 0.7187991738319397, + "epoch": 0.7730246602918973, + "kl_loss": 0.06799851357936859, + "loss_ib": 0.001012233435176313, + "step": 2688 + }, + { + "ce_ib": 4.377959728240967, + "ce_orig": 0.9766048789024353, + "epoch": 0.7730246602918973, + "kl_loss": 0.0684041827917099, + "loss_ib": 0.001121837762184441, + "step": 2688 + }, + { + "ce_ib": 3.137425661087036, + "ce_orig": 0.6960673928260803, + "epoch": 0.7733122438708749, + "kl_loss": 0.0847204327583313, + "loss_ib": 0.0011609469074755907, + "step": 2689 + }, + { + "ce_ib": 4.143620014190674, + "ce_orig": 0.974551796913147, + "epoch": 0.7733122438708749, + "kl_loss": 0.04044609144330025, + "loss_ib": 0.0008188228821381927, + "step": 2689 + }, + { + "ce_ib": 4.059276580810547, + "ce_orig": 0.7283102869987488, + "epoch": 0.7733122438708749, + "kl_loss": 0.07978832721710205, + "loss_ib": 0.0012038107961416245, + "step": 2689 + }, + { + "ce_ib": 5.279824733734131, + "ce_orig": 0.7170791625976562, + "epoch": 0.7733122438708749, + "kl_loss": 0.0598740354180336, + "loss_ib": 0.001126722781918943, + "step": 2689 + }, + { + "epoch": 0.7735998274498526, + "grad_norm": 0.1058819517493248, + "learning_rate": 4.3533547250284014e-05, + "loss": 0.8291, + "step": 2690 + }, + { + "ce_ib": 5.485904216766357, + "ce_orig": 1.019028663635254, + "epoch": 0.7735998274498526, + "kl_loss": 0.07415051013231277, + "loss_ib": 0.0012900953879579902, + "step": 2690 + }, + { + "ce_ib": 4.9982218742370605, + "ce_orig": 1.072984218597412, + "epoch": 0.7735998274498526, + "kl_loss": 0.06624048948287964, + "loss_ib": 0.0011622270103543997, + "step": 2690 + }, + { + "ce_ib": 4.437283515930176, + "ce_orig": 1.0993883609771729, + "epoch": 0.7735998274498526, + "kl_loss": 0.045159369707107544, + "loss_ib": 0.0008953220094554126, + "step": 2690 + }, + { + "ce_ib": 3.182481050491333, + "ce_orig": 0.5517765283584595, + "epoch": 0.7735998274498526, + "kl_loss": 0.054315660148859024, + "loss_ib": 0.0008614046964794397, + "step": 2690 + }, + { + "ce_ib": 2.7956881523132324, + "ce_orig": 0.695983350276947, + "epoch": 0.7738874110288303, + "kl_loss": 0.04648059606552124, + "loss_ib": 0.0007443747599609196, + "step": 2691 + }, + { + "ce_ib": 4.8451714515686035, + "ce_orig": 0.8044176697731018, + "epoch": 0.7738874110288303, + "kl_loss": 0.07897879183292389, + "loss_ib": 0.0012743049301207066, + "step": 2691 + }, + { + "ce_ib": 3.0893118381500244, + "ce_orig": 0.6374213695526123, + "epoch": 0.7738874110288303, + "kl_loss": 0.05494837835431099, + "loss_ib": 0.0008584149181842804, + "step": 2691 + }, + { + "ce_ib": 4.8626508712768555, + "ce_orig": 0.5446386933326721, + "epoch": 0.7738874110288303, + "kl_loss": 0.1322024166584015, + "loss_ib": 0.0018082892056554556, + "step": 2691 + }, + { + "ce_ib": 3.9884660243988037, + "ce_orig": 1.1898043155670166, + "epoch": 0.7741749946078079, + "kl_loss": 0.03903613239526749, + "loss_ib": 0.0007892079302109778, + "step": 2692 + }, + { + "ce_ib": 5.175694465637207, + "ce_orig": 1.0206552743911743, + "epoch": 0.7741749946078079, + "kl_loss": 0.08018241822719574, + "loss_ib": 0.0013193936320021749, + "step": 2692 + }, + { + "ce_ib": 3.0268657207489014, + "ce_orig": 0.6775096654891968, + "epoch": 0.7741749946078079, + "kl_loss": 0.03807976841926575, + "loss_ib": 0.0006834841915406287, + "step": 2692 + }, + { + "ce_ib": 2.882685899734497, + "ce_orig": 0.5006338357925415, + "epoch": 0.7741749946078079, + "kl_loss": 0.07463032752275467, + "loss_ib": 0.0010345717892050743, + "step": 2692 + }, + { + "ce_ib": 2.52984881401062, + "ce_orig": 0.6682056784629822, + "epoch": 0.7744625781867855, + "kl_loss": 0.03269201144576073, + "loss_ib": 0.0005799049977213144, + "step": 2693 + }, + { + "ce_ib": 3.8552157878875732, + "ce_orig": 0.6886082887649536, + "epoch": 0.7744625781867855, + "kl_loss": 0.1254425048828125, + "loss_ib": 0.0016399467131122947, + "step": 2693 + }, + { + "ce_ib": 4.728900909423828, + "ce_orig": 0.9783046841621399, + "epoch": 0.7744625781867855, + "kl_loss": 0.07267141342163086, + "loss_ib": 0.0011996041284874082, + "step": 2693 + }, + { + "ce_ib": 3.708261013031006, + "ce_orig": 0.8044725656509399, + "epoch": 0.7744625781867855, + "kl_loss": 0.040013328194618225, + "loss_ib": 0.0007709593628533185, + "step": 2693 + }, + { + "ce_ib": 2.6659657955169678, + "ce_orig": 0.6917432546615601, + "epoch": 0.7747501617657632, + "kl_loss": 0.03770031780004501, + "loss_ib": 0.0006435997784137726, + "step": 2694 + }, + { + "ce_ib": 3.6241753101348877, + "ce_orig": 0.6521391868591309, + "epoch": 0.7747501617657632, + "kl_loss": 0.07036956399679184, + "loss_ib": 0.0010661131236702204, + "step": 2694 + }, + { + "ce_ib": 3.876103639602661, + "ce_orig": 0.8698974847793579, + "epoch": 0.7747501617657632, + "kl_loss": 0.06489616632461548, + "loss_ib": 0.0010365720372647047, + "step": 2694 + }, + { + "ce_ib": 3.350219964981079, + "ce_orig": 0.8968338966369629, + "epoch": 0.7747501617657632, + "kl_loss": 0.03569311648607254, + "loss_ib": 0.0006919531151652336, + "step": 2694 + }, + { + "epoch": 0.7750377453447408, + "grad_norm": 0.08588659018278122, + "learning_rate": 4.350748232854829e-05, + "loss": 0.8489, + "step": 2695 + }, + { + "ce_ib": 2.949345827102661, + "ce_orig": 0.40665531158447266, + "epoch": 0.7750377453447408, + "kl_loss": 0.07444869726896286, + "loss_ib": 0.0010394215350970626, + "step": 2695 + }, + { + "ce_ib": 5.055398941040039, + "ce_orig": 1.2751542329788208, + "epoch": 0.7750377453447408, + "kl_loss": 0.06485392898321152, + "loss_ib": 0.0011540791019797325, + "step": 2695 + }, + { + "ce_ib": 4.112067699432373, + "ce_orig": 1.2006117105484009, + "epoch": 0.7750377453447408, + "kl_loss": 0.06263314187526703, + "loss_ib": 0.0010375381680205464, + "step": 2695 + }, + { + "ce_ib": 3.3675954341888428, + "ce_orig": 0.46262192726135254, + "epoch": 0.7750377453447408, + "kl_loss": 0.10342809557914734, + "loss_ib": 0.0013710404746234417, + "step": 2695 + }, + { + "ce_ib": 6.1901164054870605, + "ce_orig": 1.0151340961456299, + "epoch": 0.7753253289237184, + "kl_loss": 0.05958195775747299, + "loss_ib": 0.0012148311361670494, + "step": 2696 + }, + { + "ce_ib": 3.3143861293792725, + "ce_orig": 0.8519404530525208, + "epoch": 0.7753253289237184, + "kl_loss": 0.0690869688987732, + "loss_ib": 0.0010223082499578595, + "step": 2696 + }, + { + "ce_ib": 7.050264835357666, + "ce_orig": 1.3737925291061401, + "epoch": 0.7753253289237184, + "kl_loss": 0.09292074292898178, + "loss_ib": 0.0016342338640242815, + "step": 2696 + }, + { + "ce_ib": 3.89614200592041, + "ce_orig": 0.6963666677474976, + "epoch": 0.7753253289237184, + "kl_loss": 0.05780697613954544, + "loss_ib": 0.0009676839108578861, + "step": 2696 + }, + { + "ce_ib": 4.314846515655518, + "ce_orig": 0.8336844444274902, + "epoch": 0.7756129125026962, + "kl_loss": 0.13123147189617157, + "loss_ib": 0.0017437994247302413, + "step": 2697 + }, + { + "ce_ib": 2.830646514892578, + "ce_orig": 0.814231276512146, + "epoch": 0.7756129125026962, + "kl_loss": 0.056650031358003616, + "loss_ib": 0.0008495649672113359, + "step": 2697 + }, + { + "ce_ib": 5.403866767883301, + "ce_orig": 0.8081135749816895, + "epoch": 0.7756129125026962, + "kl_loss": 0.11034174263477325, + "loss_ib": 0.0016438040183857083, + "step": 2697 + }, + { + "ce_ib": 2.797408103942871, + "ce_orig": 0.6154947280883789, + "epoch": 0.7756129125026962, + "kl_loss": 0.029047982767224312, + "loss_ib": 0.0005702206399291754, + "step": 2697 + }, + { + "ce_ib": 2.8409335613250732, + "ce_orig": 0.41365736722946167, + "epoch": 0.7759004960816738, + "kl_loss": 0.0604868121445179, + "loss_ib": 0.0008889614837244153, + "step": 2698 + }, + { + "ce_ib": 3.4423797130584717, + "ce_orig": 0.7693657279014587, + "epoch": 0.7759004960816738, + "kl_loss": 0.06019386649131775, + "loss_ib": 0.0009461766458116472, + "step": 2698 + }, + { + "ce_ib": 4.11994743347168, + "ce_orig": 0.8553478717803955, + "epoch": 0.7759004960816738, + "kl_loss": 0.04336543008685112, + "loss_ib": 0.0008456489886157215, + "step": 2698 + }, + { + "ce_ib": 3.604775905609131, + "ce_orig": 0.6963568329811096, + "epoch": 0.7759004960816738, + "kl_loss": 0.054945703595876694, + "loss_ib": 0.0009099346352741122, + "step": 2698 + }, + { + "ce_ib": 2.7421555519104004, + "ce_orig": 0.6225867867469788, + "epoch": 0.7761880796606514, + "kl_loss": 0.04600502550601959, + "loss_ib": 0.0007342657772824168, + "step": 2699 + }, + { + "ce_ib": 6.470999717712402, + "ce_orig": 1.6601991653442383, + "epoch": 0.7761880796606514, + "kl_loss": 0.07516741752624512, + "loss_ib": 0.0013987740967422724, + "step": 2699 + }, + { + "ce_ib": 4.343318939208984, + "ce_orig": 0.6032227873802185, + "epoch": 0.7761880796606514, + "kl_loss": 0.07065984606742859, + "loss_ib": 0.0011409303406253457, + "step": 2699 + }, + { + "ce_ib": 5.9386820793151855, + "ce_orig": 1.2323752641677856, + "epoch": 0.7761880796606514, + "kl_loss": 0.06777714937925339, + "loss_ib": 0.0012716397177428007, + "step": 2699 + }, + { + "epoch": 0.776475663239629, + "grad_norm": 0.09297773987054825, + "learning_rate": 4.348137281799197e-05, + "loss": 0.8596, + "step": 2700 + }, + { + "ce_ib": 3.0050783157348633, + "ce_orig": 0.44388875365257263, + "epoch": 0.776475663239629, + "kl_loss": 0.0654965341091156, + "loss_ib": 0.0009554731659591198, + "step": 2700 + }, + { + "ce_ib": 3.6815919876098633, + "ce_orig": 0.7440159320831299, + "epoch": 0.776475663239629, + "kl_loss": 0.06021527200937271, + "loss_ib": 0.0009703118703328073, + "step": 2700 + }, + { + "ce_ib": 3.1557438373565674, + "ce_orig": 0.9160170555114746, + "epoch": 0.776475663239629, + "kl_loss": 0.07215739786624908, + "loss_ib": 0.0010371484095230699, + "step": 2700 + }, + { + "ce_ib": 2.7569541931152344, + "ce_orig": 0.7335131764411926, + "epoch": 0.776475663239629, + "kl_loss": 0.07651776075363159, + "loss_ib": 0.0010408730013296008, + "step": 2700 + }, + { + "ce_ib": 2.820852279663086, + "ce_orig": 0.3410119116306305, + "epoch": 0.7767632468186066, + "kl_loss": 0.0664106160402298, + "loss_ib": 0.0009461913723498583, + "step": 2701 + }, + { + "ce_ib": 4.606405258178711, + "ce_orig": 1.0208802223205566, + "epoch": 0.7767632468186066, + "kl_loss": 0.04068133234977722, + "loss_ib": 0.00086745381122455, + "step": 2701 + }, + { + "ce_ib": 4.732493877410889, + "ce_orig": 0.904913604259491, + "epoch": 0.7767632468186066, + "kl_loss": 0.06282617151737213, + "loss_ib": 0.0011015110649168491, + "step": 2701 + }, + { + "ce_ib": 5.871071815490723, + "ce_orig": 0.8655644655227661, + "epoch": 0.7767632468186066, + "kl_loss": 0.07683870196342468, + "loss_ib": 0.001355494256131351, + "step": 2701 + }, + { + "ce_ib": 4.809519290924072, + "ce_orig": 0.7737732529640198, + "epoch": 0.7770508303975843, + "kl_loss": 0.06397856771945953, + "loss_ib": 0.0011207376373931766, + "step": 2702 + }, + { + "ce_ib": 2.536261796951294, + "ce_orig": 0.5921071171760559, + "epoch": 0.7770508303975843, + "kl_loss": 0.04668164253234863, + "loss_ib": 0.0007204425637610257, + "step": 2702 + }, + { + "ce_ib": 2.232619047164917, + "ce_orig": 0.4323355555534363, + "epoch": 0.7770508303975843, + "kl_loss": 0.042084790766239166, + "loss_ib": 0.0006441098521463573, + "step": 2702 + }, + { + "ce_ib": 4.995136260986328, + "ce_orig": 1.1932203769683838, + "epoch": 0.7770508303975843, + "kl_loss": 0.06726048141717911, + "loss_ib": 0.0011721184710040689, + "step": 2702 + }, + { + "ce_ib": 3.4370222091674805, + "ce_orig": 0.6986569166183472, + "epoch": 0.7773384139765619, + "kl_loss": 0.03516159951686859, + "loss_ib": 0.000695318216457963, + "step": 2703 + }, + { + "ce_ib": 3.3844878673553467, + "ce_orig": 0.491985559463501, + "epoch": 0.7773384139765619, + "kl_loss": 0.053058922290802, + "loss_ib": 0.00086903793271631, + "step": 2703 + }, + { + "ce_ib": 5.331131458282471, + "ce_orig": 1.2424042224884033, + "epoch": 0.7773384139765619, + "kl_loss": 0.051387373358011246, + "loss_ib": 0.0010469869012013078, + "step": 2703 + }, + { + "ce_ib": 5.9206318855285645, + "ce_orig": 1.3163235187530518, + "epoch": 0.7773384139765619, + "kl_loss": 0.04022793471813202, + "loss_ib": 0.000994342495687306, + "step": 2703 + }, + { + "ce_ib": 3.9150171279907227, + "ce_orig": 0.5432799458503723, + "epoch": 0.7776259975555396, + "kl_loss": 0.0533704049885273, + "loss_ib": 0.0009252057643607259, + "step": 2704 + }, + { + "ce_ib": 5.209825038909912, + "ce_orig": 1.3694642782211304, + "epoch": 0.7776259975555396, + "kl_loss": 0.05632878467440605, + "loss_ib": 0.0010842703050002456, + "step": 2704 + }, + { + "ce_ib": 1.8517910242080688, + "ce_orig": 0.309320330619812, + "epoch": 0.7776259975555396, + "kl_loss": 0.09250050038099289, + "loss_ib": 0.0011101841228082776, + "step": 2704 + }, + { + "ce_ib": 4.600992679595947, + "ce_orig": 1.020167350769043, + "epoch": 0.7776259975555396, + "kl_loss": 0.05751311406493187, + "loss_ib": 0.0010352303506806493, + "step": 2704 + }, + { + "epoch": 0.7779135811345173, + "grad_norm": 0.09557105600833893, + "learning_rate": 4.345521878151891e-05, + "loss": 0.7518, + "step": 2705 + }, + { + "ce_ib": 3.3313276767730713, + "ce_orig": 0.834682285785675, + "epoch": 0.7779135811345173, + "kl_loss": 0.07982174307107925, + "loss_ib": 0.0011313501745462418, + "step": 2705 + }, + { + "ce_ib": 2.981689214706421, + "ce_orig": 0.5640284419059753, + "epoch": 0.7779135811345173, + "kl_loss": 0.031956978142261505, + "loss_ib": 0.0006177386967465281, + "step": 2705 + }, + { + "ce_ib": 3.2106375694274902, + "ce_orig": 0.7453392148017883, + "epoch": 0.7779135811345173, + "kl_loss": 0.12282001227140427, + "loss_ib": 0.0015492638340219855, + "step": 2705 + }, + { + "ce_ib": 4.050711631774902, + "ce_orig": 0.6930451393127441, + "epoch": 0.7779135811345173, + "kl_loss": 0.06742741167545319, + "loss_ib": 0.0010793452383950353, + "step": 2705 + }, + { + "ce_ib": 3.834749698638916, + "ce_orig": 0.43804359436035156, + "epoch": 0.7782011647134949, + "kl_loss": 0.11519961059093475, + "loss_ib": 0.0015354710631072521, + "step": 2706 + }, + { + "ce_ib": 5.0085296630859375, + "ce_orig": 1.0081876516342163, + "epoch": 0.7782011647134949, + "kl_loss": 0.06075047329068184, + "loss_ib": 0.0011083576828241348, + "step": 2706 + }, + { + "ce_ib": 4.8293843269348145, + "ce_orig": 1.0674020051956177, + "epoch": 0.7782011647134949, + "kl_loss": 0.06054159626364708, + "loss_ib": 0.001088354387320578, + "step": 2706 + }, + { + "ce_ib": 4.213943004608154, + "ce_orig": 0.6000770926475525, + "epoch": 0.7782011647134949, + "kl_loss": 0.062476880848407745, + "loss_ib": 0.0010461631463840604, + "step": 2706 + }, + { + "ce_ib": 4.592446327209473, + "ce_orig": 1.2237629890441895, + "epoch": 0.7784887482924725, + "kl_loss": 0.06150708347558975, + "loss_ib": 0.0010743153980001807, + "step": 2707 + }, + { + "ce_ib": 2.7840662002563477, + "ce_orig": 0.6731988787651062, + "epoch": 0.7784887482924725, + "kl_loss": 0.058779723942279816, + "loss_ib": 0.0008662038017064333, + "step": 2707 + }, + { + "ce_ib": 3.482534885406494, + "ce_orig": 0.5671296715736389, + "epoch": 0.7784887482924725, + "kl_loss": 0.07356081157922745, + "loss_ib": 0.0010838615708053112, + "step": 2707 + }, + { + "ce_ib": 3.063281536102295, + "ce_orig": 0.5934745073318481, + "epoch": 0.7784887482924725, + "kl_loss": 0.05946114659309387, + "loss_ib": 0.0009009396308101714, + "step": 2707 + }, + { + "ce_ib": 4.397923469543457, + "ce_orig": 0.9737852811813354, + "epoch": 0.7787763318714501, + "kl_loss": 0.04944682493805885, + "loss_ib": 0.0009342606063000858, + "step": 2708 + }, + { + "ce_ib": 3.275404930114746, + "ce_orig": 0.691648006439209, + "epoch": 0.7787763318714501, + "kl_loss": 0.036702126264572144, + "loss_ib": 0.0006945617496967316, + "step": 2708 + }, + { + "ce_ib": 2.8841793537139893, + "ce_orig": 0.5388064384460449, + "epoch": 0.7787763318714501, + "kl_loss": 0.06148168444633484, + "loss_ib": 0.0009032347588799894, + "step": 2708 + }, + { + "ce_ib": 2.8489789962768555, + "ce_orig": 0.8048861026763916, + "epoch": 0.7787763318714501, + "kl_loss": 0.05512702837586403, + "loss_ib": 0.0008361681248061359, + "step": 2708 + }, + { + "ce_ib": 4.372595310211182, + "ce_orig": 0.8278238773345947, + "epoch": 0.7790639154504277, + "kl_loss": 0.0452781580388546, + "loss_ib": 0.0008900411194190383, + "step": 2709 + }, + { + "ce_ib": 5.787680149078369, + "ce_orig": 1.4823836088180542, + "epoch": 0.7790639154504277, + "kl_loss": 0.10812216997146606, + "loss_ib": 0.0016599895898252726, + "step": 2709 + }, + { + "ce_ib": 2.908842086791992, + "ce_orig": 0.6977288722991943, + "epoch": 0.7790639154504277, + "kl_loss": 0.048852063715457916, + "loss_ib": 0.0007794048287905753, + "step": 2709 + }, + { + "ce_ib": 3.2671945095062256, + "ce_orig": 0.6725302934646606, + "epoch": 0.7790639154504277, + "kl_loss": 0.03524677827954292, + "loss_ib": 0.0006791871855966747, + "step": 2709 + }, + { + "epoch": 0.7793514990294054, + "grad_norm": 0.09919004142284393, + "learning_rate": 4.342902028214025e-05, + "loss": 0.8387, + "step": 2710 + }, + { + "ce_ib": 3.302635908126831, + "ce_orig": 0.7955312728881836, + "epoch": 0.7793514990294054, + "kl_loss": 0.06011452153325081, + "loss_ib": 0.0009314088383689523, + "step": 2710 + }, + { + "ce_ib": 4.721094608306885, + "ce_orig": 1.0486160516738892, + "epoch": 0.7793514990294054, + "kl_loss": 0.04054345190525055, + "loss_ib": 0.0008775439928285778, + "step": 2710 + }, + { + "ce_ib": 4.611231803894043, + "ce_orig": 1.1466809511184692, + "epoch": 0.7793514990294054, + "kl_loss": 0.07032188773155212, + "loss_ib": 0.0011643420439213514, + "step": 2710 + }, + { + "ce_ib": 2.793827772140503, + "ce_orig": 0.7024866342544556, + "epoch": 0.7793514990294054, + "kl_loss": 0.07563923299312592, + "loss_ib": 0.0010357750579714775, + "step": 2710 + }, + { + "ce_ib": 1.9571126699447632, + "ce_orig": 0.3463415205478668, + "epoch": 0.7796390826083831, + "kl_loss": 0.04971877485513687, + "loss_ib": 0.0006928989896550775, + "step": 2711 + }, + { + "ce_ib": 6.605043888092041, + "ce_orig": 1.6802622079849243, + "epoch": 0.7796390826083831, + "kl_loss": 0.05547555536031723, + "loss_ib": 0.001215259893797338, + "step": 2711 + }, + { + "ce_ib": 2.81038761138916, + "ce_orig": 0.5370168685913086, + "epoch": 0.7796390826083831, + "kl_loss": 0.05820222198963165, + "loss_ib": 0.0008630609372630715, + "step": 2711 + }, + { + "ce_ib": 3.2852742671966553, + "ce_orig": 0.811819314956665, + "epoch": 0.7796390826083831, + "kl_loss": 0.09025093168020248, + "loss_ib": 0.0012310367310419679, + "step": 2711 + }, + { + "ce_ib": 2.769925355911255, + "ce_orig": 0.4974853992462158, + "epoch": 0.7799266661873607, + "kl_loss": 0.13231781125068665, + "loss_ib": 0.0016001705080270767, + "step": 2712 + }, + { + "ce_ib": 5.502012729644775, + "ce_orig": 1.3683048486709595, + "epoch": 0.7799266661873607, + "kl_loss": 0.07164211571216583, + "loss_ib": 0.0012666223337873816, + "step": 2712 + }, + { + "ce_ib": 5.290166854858398, + "ce_orig": 1.1290582418441772, + "epoch": 0.7799266661873607, + "kl_loss": 0.07155481725931168, + "loss_ib": 0.0012445647735148668, + "step": 2712 + }, + { + "ce_ib": 5.155043125152588, + "ce_orig": 1.0190174579620361, + "epoch": 0.7799266661873607, + "kl_loss": 0.06828175485134125, + "loss_ib": 0.0011983218137174845, + "step": 2712 + }, + { + "ce_ib": 2.9593706130981445, + "ce_orig": 0.5821625590324402, + "epoch": 0.7802142497663384, + "kl_loss": 0.05315530672669411, + "loss_ib": 0.0008274900610558689, + "step": 2713 + }, + { + "ce_ib": 5.56655216217041, + "ce_orig": 1.1470381021499634, + "epoch": 0.7802142497663384, + "kl_loss": 0.06862430274486542, + "loss_ib": 0.0012428981717675924, + "step": 2713 + }, + { + "ce_ib": 3.0534720420837402, + "ce_orig": 0.6209225058555603, + "epoch": 0.7802142497663384, + "kl_loss": 0.06195743381977081, + "loss_ib": 0.0009249215363524854, + "step": 2713 + }, + { + "ce_ib": 3.3692572116851807, + "ce_orig": 0.8269421458244324, + "epoch": 0.7802142497663384, + "kl_loss": 0.07366090267896652, + "loss_ib": 0.0010735347168520093, + "step": 2713 + }, + { + "ce_ib": 2.662938117980957, + "ce_orig": 0.6289739608764648, + "epoch": 0.780501833345316, + "kl_loss": 0.04302338510751724, + "loss_ib": 0.000696527655236423, + "step": 2714 + }, + { + "ce_ib": 4.57045841217041, + "ce_orig": 0.8251466155052185, + "epoch": 0.780501833345316, + "kl_loss": 0.0941077172756195, + "loss_ib": 0.0013981229858472943, + "step": 2714 + }, + { + "ce_ib": 4.611301898956299, + "ce_orig": 1.3861967325210571, + "epoch": 0.780501833345316, + "kl_loss": 0.05675474554300308, + "loss_ib": 0.0010286776814609766, + "step": 2714 + }, + { + "ce_ib": 5.31988000869751, + "ce_orig": 1.0971527099609375, + "epoch": 0.780501833345316, + "kl_loss": 0.10964396595954895, + "loss_ib": 0.0016284276498481631, + "step": 2714 + }, + { + "epoch": 0.7807894169242936, + "grad_norm": 0.10004914551973343, + "learning_rate": 4.340277738297428e-05, + "loss": 0.8372, + "step": 2715 + }, + { + "ce_ib": 2.438720464706421, + "ce_orig": 0.3615637421607971, + "epoch": 0.7807894169242936, + "kl_loss": 0.08452534675598145, + "loss_ib": 0.0010891255224123597, + "step": 2715 + }, + { + "ce_ib": 4.085265636444092, + "ce_orig": 0.6568682193756104, + "epoch": 0.7807894169242936, + "kl_loss": 0.05410324037075043, + "loss_ib": 0.000949558918364346, + "step": 2715 + }, + { + "ce_ib": 2.8669726848602295, + "ce_orig": 0.4186045825481415, + "epoch": 0.7807894169242936, + "kl_loss": 0.06667798012495041, + "loss_ib": 0.0009534769924357533, + "step": 2715 + }, + { + "ce_ib": 5.367565155029297, + "ce_orig": 0.995248556137085, + "epoch": 0.7807894169242936, + "kl_loss": 0.05830511078238487, + "loss_ib": 0.0011198075953871012, + "step": 2715 + }, + { + "ce_ib": 3.427338123321533, + "ce_orig": 0.745370626449585, + "epoch": 0.7810770005032712, + "kl_loss": 0.04702412709593773, + "loss_ib": 0.0008129750494845212, + "step": 2716 + }, + { + "ce_ib": 1.5499484539031982, + "ce_orig": 0.36121103167533875, + "epoch": 0.7810770005032712, + "kl_loss": 0.027134345844388008, + "loss_ib": 0.000426338316174224, + "step": 2716 + }, + { + "ce_ib": 4.019435882568359, + "ce_orig": 0.8446844220161438, + "epoch": 0.7810770005032712, + "kl_loss": 0.09866722673177719, + "loss_ib": 0.0013886158121749759, + "step": 2716 + }, + { + "ce_ib": 3.2233097553253174, + "ce_orig": 0.6421531438827515, + "epoch": 0.7810770005032712, + "kl_loss": 0.0728272795677185, + "loss_ib": 0.001050603692419827, + "step": 2716 + }, + { + "ce_ib": 5.112757205963135, + "ce_orig": 0.9121528267860413, + "epoch": 0.781364584082249, + "kl_loss": 0.08750636875629425, + "loss_ib": 0.0013863393105566502, + "step": 2717 + }, + { + "ce_ib": 2.2798566818237305, + "ce_orig": 0.5565314888954163, + "epoch": 0.781364584082249, + "kl_loss": 0.02786901965737343, + "loss_ib": 0.0005066758021712303, + "step": 2717 + }, + { + "ce_ib": 4.835561752319336, + "ce_orig": 0.9150921702384949, + "epoch": 0.781364584082249, + "kl_loss": 0.09239034354686737, + "loss_ib": 0.0014074596110731363, + "step": 2717 + }, + { + "ce_ib": 2.308563709259033, + "ce_orig": 0.5546181201934814, + "epoch": 0.781364584082249, + "kl_loss": 0.035089023411273956, + "loss_ib": 0.0005817466299049556, + "step": 2717 + }, + { + "ce_ib": 4.135668754577637, + "ce_orig": 0.6536887884140015, + "epoch": 0.7816521676612266, + "kl_loss": 0.053687743842601776, + "loss_ib": 0.0009504443150945008, + "step": 2718 + }, + { + "ce_ib": 3.6513054370880127, + "ce_orig": 0.8920561075210571, + "epoch": 0.7816521676612266, + "kl_loss": 0.03403504937887192, + "loss_ib": 0.0007054809830151498, + "step": 2718 + }, + { + "ce_ib": 4.829391956329346, + "ce_orig": 0.7981552481651306, + "epoch": 0.7816521676612266, + "kl_loss": 0.08096916973590851, + "loss_ib": 0.001292630797252059, + "step": 2718 + }, + { + "ce_ib": 3.968592643737793, + "ce_orig": 0.934312105178833, + "epoch": 0.7816521676612266, + "kl_loss": 0.0863257646560669, + "loss_ib": 0.0012601169291883707, + "step": 2718 + }, + { + "ce_ib": 3.401559591293335, + "ce_orig": 0.6223131418228149, + "epoch": 0.7819397512402042, + "kl_loss": 0.05759362876415253, + "loss_ib": 0.0009160922490991652, + "step": 2719 + }, + { + "ce_ib": 3.7428674697875977, + "ce_orig": 1.0484991073608398, + "epoch": 0.7819397512402042, + "kl_loss": 0.04801971837878227, + "loss_ib": 0.0008544839220121503, + "step": 2719 + }, + { + "ce_ib": 5.559881687164307, + "ce_orig": 1.217010736465454, + "epoch": 0.7819397512402042, + "kl_loss": 0.043361786752939224, + "loss_ib": 0.0009896060219034553, + "step": 2719 + }, + { + "ce_ib": 5.543523788452148, + "ce_orig": 1.357413649559021, + "epoch": 0.7819397512402042, + "kl_loss": 0.07701785117387772, + "loss_ib": 0.0013245308073237538, + "step": 2719 + }, + { + "epoch": 0.7822273348191818, + "grad_norm": 0.10675136744976044, + "learning_rate": 4.337649014724621e-05, + "loss": 0.8859, + "step": 2720 + }, + { + "ce_ib": 3.200746536254883, + "ce_orig": 0.6341713666915894, + "epoch": 0.7822273348191818, + "kl_loss": 0.061908502131700516, + "loss_ib": 0.0009391596540808678, + "step": 2720 + }, + { + "ce_ib": 4.99949836730957, + "ce_orig": 0.609032392501831, + "epoch": 0.7822273348191818, + "kl_loss": 0.056670717895030975, + "loss_ib": 0.0010666570160537958, + "step": 2720 + }, + { + "ce_ib": 5.1416802406311035, + "ce_orig": 1.3267744779586792, + "epoch": 0.7822273348191818, + "kl_loss": 0.07612587511539459, + "loss_ib": 0.001275426708161831, + "step": 2720 + }, + { + "ce_ib": 4.047115802764893, + "ce_orig": 0.9179235696792603, + "epoch": 0.7822273348191818, + "kl_loss": 0.08838619291782379, + "loss_ib": 0.0012885733740404248, + "step": 2720 + }, + { + "ce_ib": 2.883903980255127, + "ce_orig": 0.7646516561508179, + "epoch": 0.7825149183981595, + "kl_loss": 0.09816956520080566, + "loss_ib": 0.0012700860388576984, + "step": 2721 + }, + { + "ce_ib": 2.464317560195923, + "ce_orig": 0.3747950792312622, + "epoch": 0.7825149183981595, + "kl_loss": 0.0922236442565918, + "loss_ib": 0.0011686681536957622, + "step": 2721 + }, + { + "ce_ib": 3.7939488887786865, + "ce_orig": 0.8648269176483154, + "epoch": 0.7825149183981595, + "kl_loss": 0.10720193386077881, + "loss_ib": 0.0014514141948893666, + "step": 2721 + }, + { + "ce_ib": 2.1426737308502197, + "ce_orig": 0.37852486968040466, + "epoch": 0.7825149183981595, + "kl_loss": 0.0569000244140625, + "loss_ib": 0.0007832676055841148, + "step": 2721 + }, + { + "ce_ib": 4.487566947937012, + "ce_orig": 0.9434440732002258, + "epoch": 0.7828025019771371, + "kl_loss": 0.0636274516582489, + "loss_ib": 0.0010850311955437064, + "step": 2722 + }, + { + "ce_ib": 2.2192864418029785, + "ce_orig": 0.39757171273231506, + "epoch": 0.7828025019771371, + "kl_loss": 0.06969348341226578, + "loss_ib": 0.0009188634576275945, + "step": 2722 + }, + { + "ce_ib": 3.7308430671691895, + "ce_orig": 0.6486664414405823, + "epoch": 0.7828025019771371, + "kl_loss": 0.060631997883319855, + "loss_ib": 0.0009794043144211173, + "step": 2722 + }, + { + "ce_ib": 4.065412521362305, + "ce_orig": 0.9309715032577515, + "epoch": 0.7828025019771371, + "kl_loss": 0.0722053200006485, + "loss_ib": 0.0011285943910479546, + "step": 2722 + }, + { + "ce_ib": 2.4605095386505127, + "ce_orig": 0.5005959272384644, + "epoch": 0.7830900855561147, + "kl_loss": 0.05024545639753342, + "loss_ib": 0.0007485055248253047, + "step": 2723 + }, + { + "ce_ib": 2.233552932739258, + "ce_orig": 0.44329404830932617, + "epoch": 0.7830900855561147, + "kl_loss": 0.03300359472632408, + "loss_ib": 0.0005533912335522473, + "step": 2723 + }, + { + "ce_ib": 4.804069519042969, + "ce_orig": 1.2249064445495605, + "epoch": 0.7830900855561147, + "kl_loss": 0.050263069570064545, + "loss_ib": 0.000983037636615336, + "step": 2723 + }, + { + "ce_ib": 3.571455717086792, + "ce_orig": 0.4911627173423767, + "epoch": 0.7830900855561147, + "kl_loss": 0.09314107894897461, + "loss_ib": 0.0012885562609881163, + "step": 2723 + }, + { + "ce_ib": 2.830704689025879, + "ce_orig": 0.6663494110107422, + "epoch": 0.7833776691350924, + "kl_loss": 0.05235178396105766, + "loss_ib": 0.0008065882720984519, + "step": 2724 + }, + { + "ce_ib": 4.040375709533691, + "ce_orig": 1.0896952152252197, + "epoch": 0.7833776691350924, + "kl_loss": 0.06868134438991547, + "loss_ib": 0.0010908509138971567, + "step": 2724 + }, + { + "ce_ib": 2.989135503768921, + "ce_orig": 0.6269635558128357, + "epoch": 0.7833776691350924, + "kl_loss": 0.04570217430591583, + "loss_ib": 0.0007559352670796216, + "step": 2724 + }, + { + "ce_ib": 4.171360969543457, + "ce_orig": 0.3718877136707306, + "epoch": 0.7833776691350924, + "kl_loss": 0.05078680068254471, + "loss_ib": 0.0009250040748156607, + "step": 2724 + }, + { + "epoch": 0.7836652527140701, + "grad_norm": 0.09389005601406097, + "learning_rate": 4.33501586382881e-05, + "loss": 0.8017, + "step": 2725 + }, + { + "ce_ib": 4.932086944580078, + "ce_orig": 0.8618472814559937, + "epoch": 0.7836652527140701, + "kl_loss": 0.11458053439855576, + "loss_ib": 0.0016390139935538173, + "step": 2725 + }, + { + "ce_ib": 1.995977520942688, + "ce_orig": 0.4202042818069458, + "epoch": 0.7836652527140701, + "kl_loss": 0.06659115850925446, + "loss_ib": 0.0008655093261040747, + "step": 2725 + }, + { + "ce_ib": 3.268118381500244, + "ce_orig": 0.7336798906326294, + "epoch": 0.7836652527140701, + "kl_loss": 0.09293245524168015, + "loss_ib": 0.0012561362236738205, + "step": 2725 + }, + { + "ce_ib": 3.185106039047241, + "ce_orig": 0.5360062122344971, + "epoch": 0.7836652527140701, + "kl_loss": 0.07381497323513031, + "loss_ib": 0.001056660315953195, + "step": 2725 + }, + { + "ce_ib": 4.750252723693848, + "ce_orig": 0.8619513511657715, + "epoch": 0.7839528362930477, + "kl_loss": 0.08956834673881531, + "loss_ib": 0.001370708574540913, + "step": 2726 + }, + { + "ce_ib": 3.186599016189575, + "ce_orig": 0.6652609705924988, + "epoch": 0.7839528362930477, + "kl_loss": 0.062189389020204544, + "loss_ib": 0.0009405537275597453, + "step": 2726 + }, + { + "ce_ib": 4.4331254959106445, + "ce_orig": 0.562034010887146, + "epoch": 0.7839528362930477, + "kl_loss": 0.0660531297326088, + "loss_ib": 0.0011038437951356173, + "step": 2726 + }, + { + "ce_ib": 5.092161178588867, + "ce_orig": 1.3957841396331787, + "epoch": 0.7839528362930477, + "kl_loss": 0.05481603741645813, + "loss_ib": 0.0010573765030130744, + "step": 2726 + }, + { + "ce_ib": 3.672804117202759, + "ce_orig": 0.6384534239768982, + "epoch": 0.7842404198720253, + "kl_loss": 0.05781262367963791, + "loss_ib": 0.0009454066166654229, + "step": 2727 + }, + { + "ce_ib": 4.27412223815918, + "ce_orig": 0.7327466011047363, + "epoch": 0.7842404198720253, + "kl_loss": 0.05720003694295883, + "loss_ib": 0.0009994124993681908, + "step": 2727 + }, + { + "ce_ib": 5.428305625915527, + "ce_orig": 1.3752270936965942, + "epoch": 0.7842404198720253, + "kl_loss": 0.07303006201982498, + "loss_ib": 0.0012731312308460474, + "step": 2727 + }, + { + "ce_ib": 3.3900833129882812, + "ce_orig": 0.8343997001647949, + "epoch": 0.7842404198720253, + "kl_loss": 0.056699227541685104, + "loss_ib": 0.0009060005540959537, + "step": 2727 + }, + { + "ce_ib": 6.320311069488525, + "ce_orig": 1.6343975067138672, + "epoch": 0.7845280034510029, + "kl_loss": 0.09207102656364441, + "loss_ib": 0.0015527413925155997, + "step": 2728 + }, + { + "ce_ib": 4.649865627288818, + "ce_orig": 0.5891127586364746, + "epoch": 0.7845280034510029, + "kl_loss": 0.081109918653965, + "loss_ib": 0.0012760856188833714, + "step": 2728 + }, + { + "ce_ib": 5.244960308074951, + "ce_orig": 0.777604341506958, + "epoch": 0.7845280034510029, + "kl_loss": 0.04608764499425888, + "loss_ib": 0.000985372462309897, + "step": 2728 + }, + { + "ce_ib": 3.025590419769287, + "ce_orig": 0.5781903862953186, + "epoch": 0.7845280034510029, + "kl_loss": 0.09984168410301208, + "loss_ib": 0.0013009757967665792, + "step": 2728 + }, + { + "ce_ib": 4.640846252441406, + "ce_orig": 1.076772689819336, + "epoch": 0.7848155870299806, + "kl_loss": 0.06774312257766724, + "loss_ib": 0.0011415157932788134, + "step": 2729 + }, + { + "ce_ib": 4.949178695678711, + "ce_orig": 0.9244461059570312, + "epoch": 0.7848155870299806, + "kl_loss": 0.08653432875871658, + "loss_ib": 0.0013602611143141985, + "step": 2729 + }, + { + "ce_ib": 5.427274227142334, + "ce_orig": 1.1895617246627808, + "epoch": 0.7848155870299806, + "kl_loss": 0.08149591833353043, + "loss_ib": 0.001357686473056674, + "step": 2729 + }, + { + "ce_ib": 6.246286392211914, + "ce_orig": 1.2643390893936157, + "epoch": 0.7848155870299806, + "kl_loss": 0.0804218053817749, + "loss_ib": 0.0014288467355072498, + "step": 2729 + }, + { + "epoch": 0.7851031706089582, + "grad_norm": 0.09517823159694672, + "learning_rate": 4.332378291953866e-05, + "loss": 0.8793, + "step": 2730 + }, + { + "ce_ib": 2.724350929260254, + "ce_orig": 0.7784748077392578, + "epoch": 0.7851031706089582, + "kl_loss": 0.07596305012702942, + "loss_ib": 0.001032065600156784, + "step": 2730 + }, + { + "ce_ib": 3.395254611968994, + "ce_orig": 0.732062578201294, + "epoch": 0.7851031706089582, + "kl_loss": 0.05191110819578171, + "loss_ib": 0.000858636456541717, + "step": 2730 + }, + { + "ce_ib": 4.778668403625488, + "ce_orig": 1.2438135147094727, + "epoch": 0.7851031706089582, + "kl_loss": 0.0953834280371666, + "loss_ib": 0.0014317011227831244, + "step": 2730 + }, + { + "ce_ib": 4.354384899139404, + "ce_orig": 1.082000970840454, + "epoch": 0.7851031706089582, + "kl_loss": 0.06287068873643875, + "loss_ib": 0.0010641453554853797, + "step": 2730 + }, + { + "ce_ib": 4.833708763122559, + "ce_orig": 1.2503976821899414, + "epoch": 0.7853907541879359, + "kl_loss": 0.06443865597248077, + "loss_ib": 0.001127757364884019, + "step": 2731 + }, + { + "ce_ib": 5.7415947914123535, + "ce_orig": 1.3510074615478516, + "epoch": 0.7853907541879359, + "kl_loss": 0.07277929782867432, + "loss_ib": 0.0013019524049013853, + "step": 2731 + }, + { + "ce_ib": 4.749528408050537, + "ce_orig": 0.7305611968040466, + "epoch": 0.7853907541879359, + "kl_loss": 0.041502226144075394, + "loss_ib": 0.0008899751119315624, + "step": 2731 + }, + { + "ce_ib": 2.5543782711029053, + "ce_orig": 0.6189329028129578, + "epoch": 0.7853907541879359, + "kl_loss": 0.038988061249256134, + "loss_ib": 0.0006453184760175645, + "step": 2731 + }, + { + "ce_ib": 2.8486640453338623, + "ce_orig": 0.7577486634254456, + "epoch": 0.7856783377669135, + "kl_loss": 0.046821847558021545, + "loss_ib": 0.0007530848379246891, + "step": 2732 + }, + { + "ce_ib": 3.3456733226776123, + "ce_orig": 0.7550545930862427, + "epoch": 0.7856783377669135, + "kl_loss": 0.043742403388023376, + "loss_ib": 0.0007719912682659924, + "step": 2732 + }, + { + "ce_ib": 4.160478115081787, + "ce_orig": 0.8303720951080322, + "epoch": 0.7856783377669135, + "kl_loss": 0.03553861007094383, + "loss_ib": 0.000771433871705085, + "step": 2732 + }, + { + "ce_ib": 3.5426292419433594, + "ce_orig": 1.204329252243042, + "epoch": 0.7856783377669135, + "kl_loss": 0.04523802548646927, + "loss_ib": 0.0008066432201303542, + "step": 2732 + }, + { + "ce_ib": 5.7777204513549805, + "ce_orig": 1.3422445058822632, + "epoch": 0.7859659213458912, + "kl_loss": 0.07974323630332947, + "loss_ib": 0.0013752043014392257, + "step": 2733 + }, + { + "ce_ib": 4.110344886779785, + "ce_orig": 0.540892481803894, + "epoch": 0.7859659213458912, + "kl_loss": 0.08284900337457657, + "loss_ib": 0.0012395244557410479, + "step": 2733 + }, + { + "ce_ib": 3.702249050140381, + "ce_orig": 0.6565930247306824, + "epoch": 0.7859659213458912, + "kl_loss": 0.06352287530899048, + "loss_ib": 0.001005453639663756, + "step": 2733 + }, + { + "ce_ib": 2.411126136779785, + "ce_orig": 0.3433438837528229, + "epoch": 0.7859659213458912, + "kl_loss": 0.0895770788192749, + "loss_ib": 0.001136883394792676, + "step": 2733 + }, + { + "ce_ib": 4.669600486755371, + "ce_orig": 1.3954849243164062, + "epoch": 0.7862535049248688, + "kl_loss": 0.06250124424695969, + "loss_ib": 0.0010919724591076374, + "step": 2734 + }, + { + "ce_ib": 4.393505573272705, + "ce_orig": 0.663326621055603, + "epoch": 0.7862535049248688, + "kl_loss": 0.0938647910952568, + "loss_ib": 0.001377998385578394, + "step": 2734 + }, + { + "ce_ib": 2.1505050659179688, + "ce_orig": 0.25798535346984863, + "epoch": 0.7862535049248688, + "kl_loss": 0.19940605759620667, + "loss_ib": 0.0022091111168265343, + "step": 2734 + }, + { + "ce_ib": 3.959503412246704, + "ce_orig": 0.8667395114898682, + "epoch": 0.7862535049248688, + "kl_loss": 0.051583774387836456, + "loss_ib": 0.0009117880254052579, + "step": 2734 + }, + { + "epoch": 0.7865410885038464, + "grad_norm": 0.09822685271501541, + "learning_rate": 4.329736305454314e-05, + "loss": 0.8562, + "step": 2735 + }, + { + "ce_ib": 3.4240989685058594, + "ce_orig": 0.6454565525054932, + "epoch": 0.7865410885038464, + "kl_loss": 0.05643429607152939, + "loss_ib": 0.0009067527716979384, + "step": 2735 + }, + { + "ce_ib": 3.6881794929504395, + "ce_orig": 0.6289147734642029, + "epoch": 0.7865410885038464, + "kl_loss": 0.08642411231994629, + "loss_ib": 0.0012330589815974236, + "step": 2735 + }, + { + "ce_ib": 4.2804036140441895, + "ce_orig": 0.9687967896461487, + "epoch": 0.7865410885038464, + "kl_loss": 0.05861719697713852, + "loss_ib": 0.001014212379232049, + "step": 2735 + }, + { + "ce_ib": 3.965467691421509, + "ce_orig": 0.6337364315986633, + "epoch": 0.7865410885038464, + "kl_loss": 0.054683662950992584, + "loss_ib": 0.0009433833765797317, + "step": 2735 + }, + { + "ce_ib": 2.3113856315612793, + "ce_orig": 0.4491522014141083, + "epoch": 0.786828672082824, + "kl_loss": 0.04415664076805115, + "loss_ib": 0.0006727049476467073, + "step": 2736 + }, + { + "ce_ib": 3.9472923278808594, + "ce_orig": 0.5197857022285461, + "epoch": 0.786828672082824, + "kl_loss": 0.07478383183479309, + "loss_ib": 0.0011425674892961979, + "step": 2736 + }, + { + "ce_ib": 6.266112804412842, + "ce_orig": 1.6420507431030273, + "epoch": 0.786828672082824, + "kl_loss": 0.07838009297847748, + "loss_ib": 0.001410412136465311, + "step": 2736 + }, + { + "ce_ib": 2.8619394302368164, + "ce_orig": 0.44709208607673645, + "epoch": 0.786828672082824, + "kl_loss": 0.03876987099647522, + "loss_ib": 0.0006738926167599857, + "step": 2736 + }, + { + "ce_ib": 3.2469985485076904, + "ce_orig": 0.6773930788040161, + "epoch": 0.7871162556618017, + "kl_loss": 0.045255839824676514, + "loss_ib": 0.0007772582466714084, + "step": 2737 + }, + { + "ce_ib": 3.251753330230713, + "ce_orig": 0.8023939728736877, + "epoch": 0.7871162556618017, + "kl_loss": 0.07095052301883698, + "loss_ib": 0.0010346806375309825, + "step": 2737 + }, + { + "ce_ib": 3.3613600730895996, + "ce_orig": 0.8770491480827332, + "epoch": 0.7871162556618017, + "kl_loss": 0.031849414110183716, + "loss_ib": 0.0006546301301568747, + "step": 2737 + }, + { + "ce_ib": 4.267024993896484, + "ce_orig": 1.0206243991851807, + "epoch": 0.7871162556618017, + "kl_loss": 0.04636462405323982, + "loss_ib": 0.0008903486886993051, + "step": 2737 + }, + { + "ce_ib": 1.8469284772872925, + "ce_orig": 0.2532729208469391, + "epoch": 0.7874038392407794, + "kl_loss": 0.11585507541894913, + "loss_ib": 0.0013432435225695372, + "step": 2738 + }, + { + "ce_ib": 3.735264301300049, + "ce_orig": 0.9927113652229309, + "epoch": 0.7874038392407794, + "kl_loss": 0.05884728580713272, + "loss_ib": 0.0009619992924854159, + "step": 2738 + }, + { + "ce_ib": 4.683136463165283, + "ce_orig": 0.9051709175109863, + "epoch": 0.7874038392407794, + "kl_loss": 0.0528361052274704, + "loss_ib": 0.000996674643829465, + "step": 2738 + }, + { + "ce_ib": 5.616206169128418, + "ce_orig": 1.226413607597351, + "epoch": 0.7874038392407794, + "kl_loss": 0.12412263453006744, + "loss_ib": 0.0018028469057753682, + "step": 2738 + }, + { + "ce_ib": 2.8270647525787354, + "ce_orig": 0.5401044487953186, + "epoch": 0.787691422819757, + "kl_loss": 0.09539412707090378, + "loss_ib": 0.0012366477167233825, + "step": 2739 + }, + { + "ce_ib": 4.161950588226318, + "ce_orig": 0.8773499131202698, + "epoch": 0.787691422819757, + "kl_loss": 0.08790025115013123, + "loss_ib": 0.0012951975222676992, + "step": 2739 + }, + { + "ce_ib": 3.40925931930542, + "ce_orig": 0.7897133231163025, + "epoch": 0.787691422819757, + "kl_loss": 0.047730591148138046, + "loss_ib": 0.0008182318415492773, + "step": 2739 + }, + { + "ce_ib": 2.1815128326416016, + "ce_orig": 0.3424106240272522, + "epoch": 0.787691422819757, + "kl_loss": 0.043924249708652496, + "loss_ib": 0.0006573937716893852, + "step": 2739 + }, + { + "epoch": 0.7879790063987346, + "grad_norm": 0.08896587044000626, + "learning_rate": 4.3270899106953105e-05, + "loss": 0.8246, + "step": 2740 + }, + { + "ce_ib": 5.76030158996582, + "ce_orig": 1.4622979164123535, + "epoch": 0.7879790063987346, + "kl_loss": 0.08049134165048599, + "loss_ib": 0.0013809434603899717, + "step": 2740 + }, + { + "ce_ib": 4.559739589691162, + "ce_orig": 0.9879452586174011, + "epoch": 0.7879790063987346, + "kl_loss": 0.059297844767570496, + "loss_ib": 0.001048952341079712, + "step": 2740 + }, + { + "ce_ib": 2.136996269226074, + "ce_orig": 0.4787465035915375, + "epoch": 0.7879790063987346, + "kl_loss": 0.05147653818130493, + "loss_ib": 0.0007284649764187634, + "step": 2740 + }, + { + "ce_ib": 4.061817169189453, + "ce_orig": 0.8465638756752014, + "epoch": 0.7879790063987346, + "kl_loss": 0.08573289960622787, + "loss_ib": 0.0012635106686502695, + "step": 2740 + }, + { + "ce_ib": 2.803297519683838, + "ce_orig": 0.6405282616615295, + "epoch": 0.7882665899777123, + "kl_loss": 0.04443920776247978, + "loss_ib": 0.000724721816368401, + "step": 2741 + }, + { + "ce_ib": 4.292599678039551, + "ce_orig": 0.43706488609313965, + "epoch": 0.7882665899777123, + "kl_loss": 0.07094869017601013, + "loss_ib": 0.0011387468548491597, + "step": 2741 + }, + { + "ce_ib": 5.980231761932373, + "ce_orig": 1.3824700117111206, + "epoch": 0.7882665899777123, + "kl_loss": 0.09077349305152893, + "loss_ib": 0.00150575814768672, + "step": 2741 + }, + { + "ce_ib": 3.301262140274048, + "ce_orig": 0.5837685465812683, + "epoch": 0.7882665899777123, + "kl_loss": 0.05322376266121864, + "loss_ib": 0.0008623638423159719, + "step": 2741 + }, + { + "ce_ib": 3.936699628829956, + "ce_orig": 0.7380988001823425, + "epoch": 0.7885541735566899, + "kl_loss": 0.06736473739147186, + "loss_ib": 0.0010673172073438764, + "step": 2742 + }, + { + "ce_ib": 3.920851230621338, + "ce_orig": 0.7942270636558533, + "epoch": 0.7885541735566899, + "kl_loss": 0.060450442135334015, + "loss_ib": 0.0009965895442292094, + "step": 2742 + }, + { + "ce_ib": 3.7874879837036133, + "ce_orig": 0.5643869638442993, + "epoch": 0.7885541735566899, + "kl_loss": 0.08032043278217316, + "loss_ib": 0.0011819531209766865, + "step": 2742 + }, + { + "ce_ib": 3.07631516456604, + "ce_orig": 0.4720967710018158, + "epoch": 0.7885541735566899, + "kl_loss": 0.04125894606113434, + "loss_ib": 0.0007202209089882672, + "step": 2742 + }, + { + "ce_ib": 2.421128749847412, + "ce_orig": 0.3359372317790985, + "epoch": 0.7888417571356675, + "kl_loss": 0.05036686733365059, + "loss_ib": 0.0007457815227098763, + "step": 2743 + }, + { + "ce_ib": 3.656679630279541, + "ce_orig": 0.7563187479972839, + "epoch": 0.7888417571356675, + "kl_loss": 0.07136253267526627, + "loss_ib": 0.0010792932007461786, + "step": 2743 + }, + { + "ce_ib": 3.5923542976379395, + "ce_orig": 0.6917510032653809, + "epoch": 0.7888417571356675, + "kl_loss": 0.11372942477464676, + "loss_ib": 0.0014965295558795333, + "step": 2743 + }, + { + "ce_ib": 3.29542875289917, + "ce_orig": 1.026768445968628, + "epoch": 0.7888417571356675, + "kl_loss": 0.051191769540309906, + "loss_ib": 0.0008414604817517102, + "step": 2743 + }, + { + "ce_ib": 3.870608329772949, + "ce_orig": 0.5801063179969788, + "epoch": 0.7891293407146452, + "kl_loss": 0.10627251863479614, + "loss_ib": 0.0014497858937829733, + "step": 2744 + }, + { + "ce_ib": 3.6940925121307373, + "ce_orig": 0.93644779920578, + "epoch": 0.7891293407146452, + "kl_loss": 0.05837040767073631, + "loss_ib": 0.0009531132527627051, + "step": 2744 + }, + { + "ce_ib": 2.4127707481384277, + "ce_orig": 0.5675532817840576, + "epoch": 0.7891293407146452, + "kl_loss": 0.09978559613227844, + "loss_ib": 0.0012391329510137439, + "step": 2744 + }, + { + "ce_ib": 3.330704689025879, + "ce_orig": 0.5716238617897034, + "epoch": 0.7891293407146452, + "kl_loss": 0.07440638542175293, + "loss_ib": 0.0010771342786028981, + "step": 2744 + }, + { + "epoch": 0.7894169242936229, + "grad_norm": 0.09184665232896805, + "learning_rate": 4.3244391140526355e-05, + "loss": 0.7818, + "step": 2745 + }, + { + "ce_ib": 4.348439693450928, + "ce_orig": 0.6828053593635559, + "epoch": 0.7894169242936229, + "kl_loss": 0.07342614978551865, + "loss_ib": 0.0011691054096445441, + "step": 2745 + }, + { + "ce_ib": 2.990492820739746, + "ce_orig": 0.48712095618247986, + "epoch": 0.7894169242936229, + "kl_loss": 0.05898652598261833, + "loss_ib": 0.0008889145101420581, + "step": 2745 + }, + { + "ce_ib": 3.369838237762451, + "ce_orig": 0.6087616086006165, + "epoch": 0.7894169242936229, + "kl_loss": 0.040941111743450165, + "loss_ib": 0.0007463949150405824, + "step": 2745 + }, + { + "ce_ib": 3.564819574356079, + "ce_orig": 0.7624062299728394, + "epoch": 0.7894169242936229, + "kl_loss": 0.06526240706443787, + "loss_ib": 0.0010091060539707541, + "step": 2745 + }, + { + "ce_ib": 5.127408027648926, + "ce_orig": 0.7075109481811523, + "epoch": 0.7897045078726005, + "kl_loss": 0.04222084581851959, + "loss_ib": 0.0009349492029286921, + "step": 2746 + }, + { + "ce_ib": 6.274588108062744, + "ce_orig": 1.332920789718628, + "epoch": 0.7897045078726005, + "kl_loss": 0.07455567270517349, + "loss_ib": 0.0013730154605582356, + "step": 2746 + }, + { + "ce_ib": 3.5801639556884766, + "ce_orig": 0.7092388272285461, + "epoch": 0.7897045078726005, + "kl_loss": 0.11733580380678177, + "loss_ib": 0.0015313744079321623, + "step": 2746 + }, + { + "ce_ib": 6.462437152862549, + "ce_orig": 1.349181056022644, + "epoch": 0.7897045078726005, + "kl_loss": 0.08175374567508698, + "loss_ib": 0.0014637811109423637, + "step": 2746 + }, + { + "ce_ib": 3.2760369777679443, + "ce_orig": 0.7819387316703796, + "epoch": 0.7899920914515781, + "kl_loss": 0.06631068885326385, + "loss_ib": 0.0009907105704769492, + "step": 2747 + }, + { + "ce_ib": 3.668401002883911, + "ce_orig": 0.6712520122528076, + "epoch": 0.7899920914515781, + "kl_loss": 0.05960628390312195, + "loss_ib": 0.0009629029082134366, + "step": 2747 + }, + { + "ce_ib": 4.047397613525391, + "ce_orig": 1.0103588104248047, + "epoch": 0.7899920914515781, + "kl_loss": 0.055606529116630554, + "loss_ib": 0.0009608050459064543, + "step": 2747 + }, + { + "ce_ib": 6.48381233215332, + "ce_orig": 1.3587619066238403, + "epoch": 0.7899920914515781, + "kl_loss": 0.03698919713497162, + "loss_ib": 0.0010182731784880161, + "step": 2747 + }, + { + "ce_ib": 4.343432426452637, + "ce_orig": 0.9248653650283813, + "epoch": 0.7902796750305557, + "kl_loss": 0.07657218724489212, + "loss_ib": 0.001200065016746521, + "step": 2748 + }, + { + "ce_ib": 4.209934711456299, + "ce_orig": 0.8820658922195435, + "epoch": 0.7902796750305557, + "kl_loss": 0.11343471705913544, + "loss_ib": 0.0015553405974060297, + "step": 2748 + }, + { + "ce_ib": 2.6093130111694336, + "ce_orig": 0.3870077431201935, + "epoch": 0.7902796750305557, + "kl_loss": 0.05898419767618179, + "loss_ib": 0.0008507733000442386, + "step": 2748 + }, + { + "ce_ib": 3.054109573364258, + "ce_orig": 0.7065462470054626, + "epoch": 0.7902796750305557, + "kl_loss": 0.053485408425331116, + "loss_ib": 0.0008402650128118694, + "step": 2748 + }, + { + "ce_ib": 6.1381659507751465, + "ce_orig": 1.420028567314148, + "epoch": 0.7905672586095334, + "kl_loss": 0.05358273535966873, + "loss_ib": 0.00114964391104877, + "step": 2749 + }, + { + "ce_ib": 2.83512020111084, + "ce_orig": 0.705005407333374, + "epoch": 0.7905672586095334, + "kl_loss": 0.06264655292034149, + "loss_ib": 0.0009099775343202055, + "step": 2749 + }, + { + "ce_ib": 6.527388095855713, + "ce_orig": 1.6994904279708862, + "epoch": 0.7905672586095334, + "kl_loss": 0.05173903703689575, + "loss_ib": 0.0011701291659846902, + "step": 2749 + }, + { + "ce_ib": 4.931689262390137, + "ce_orig": 0.9146091341972351, + "epoch": 0.7905672586095334, + "kl_loss": 0.11857984960079193, + "loss_ib": 0.0016789673827588558, + "step": 2749 + }, + { + "epoch": 0.790854842188511, + "grad_norm": 0.09590111672878265, + "learning_rate": 4.321783921912674e-05, + "loss": 0.8659, + "step": 2750 + }, + { + "ce_ib": 6.953745365142822, + "ce_orig": 1.637960433959961, + "epoch": 0.790854842188511, + "kl_loss": 0.05412355065345764, + "loss_ib": 0.0012366099981591105, + "step": 2750 + }, + { + "ce_ib": 5.487181186676025, + "ce_orig": 0.7855862379074097, + "epoch": 0.790854842188511, + "kl_loss": 0.07122021168470383, + "loss_ib": 0.0012609201949089766, + "step": 2750 + }, + { + "ce_ib": 4.088080406188965, + "ce_orig": 0.7891085743904114, + "epoch": 0.790854842188511, + "kl_loss": 0.07932189106941223, + "loss_ib": 0.0012020268477499485, + "step": 2750 + }, + { + "ce_ib": 3.7955894470214844, + "ce_orig": 0.7341013550758362, + "epoch": 0.790854842188511, + "kl_loss": 0.06514409184455872, + "loss_ib": 0.0010309998178854585, + "step": 2750 + }, + { + "ce_ib": 2.913525104522705, + "ce_orig": 0.6941717267036438, + "epoch": 0.7911424257674887, + "kl_loss": 0.05824703723192215, + "loss_ib": 0.0008738228352740407, + "step": 2751 + }, + { + "ce_ib": 4.340704917907715, + "ce_orig": 0.5268121361732483, + "epoch": 0.7911424257674887, + "kl_loss": 0.08842450380325317, + "loss_ib": 0.0013183155097067356, + "step": 2751 + }, + { + "ce_ib": 5.55404806137085, + "ce_orig": 1.1661711931228638, + "epoch": 0.7911424257674887, + "kl_loss": 0.05733572319149971, + "loss_ib": 0.0011287620291113853, + "step": 2751 + }, + { + "ce_ib": 5.372424602508545, + "ce_orig": 1.0225350856781006, + "epoch": 0.7911424257674887, + "kl_loss": 0.07296031713485718, + "loss_ib": 0.0012668456183746457, + "step": 2751 + }, + { + "ce_ib": 3.699613571166992, + "ce_orig": 0.6887558698654175, + "epoch": 0.7914300093464663, + "kl_loss": 0.05062039569020271, + "loss_ib": 0.0008761652861721814, + "step": 2752 + }, + { + "ce_ib": 5.480358123779297, + "ce_orig": 1.1815898418426514, + "epoch": 0.7914300093464663, + "kl_loss": 0.12997084856033325, + "loss_ib": 0.001847744220867753, + "step": 2752 + }, + { + "ce_ib": 3.9921743869781494, + "ce_orig": 0.7539145946502686, + "epoch": 0.7914300093464663, + "kl_loss": 0.06835566461086273, + "loss_ib": 0.0010827741352841258, + "step": 2752 + }, + { + "ce_ib": 3.1781418323516846, + "ce_orig": 1.069401502609253, + "epoch": 0.7914300093464663, + "kl_loss": 0.04595430940389633, + "loss_ib": 0.0007773571996949613, + "step": 2752 + }, + { + "ce_ib": 3.0312154293060303, + "ce_orig": 0.7763067483901978, + "epoch": 0.791717592925444, + "kl_loss": 0.04544742405414581, + "loss_ib": 0.0007575957570225, + "step": 2753 + }, + { + "ce_ib": 4.262052536010742, + "ce_orig": 1.1950721740722656, + "epoch": 0.791717592925444, + "kl_loss": 0.05890587717294693, + "loss_ib": 0.0010152639588341117, + "step": 2753 + }, + { + "ce_ib": 5.1361236572265625, + "ce_orig": 1.282379388809204, + "epoch": 0.791717592925444, + "kl_loss": 0.03890034556388855, + "loss_ib": 0.0009026157786138356, + "step": 2753 + }, + { + "ce_ib": 5.629969120025635, + "ce_orig": 1.133082628250122, + "epoch": 0.791717592925444, + "kl_loss": 0.07830218970775604, + "loss_ib": 0.0013460187474265695, + "step": 2753 + }, + { + "ce_ib": 4.3510212898254395, + "ce_orig": 0.9870458245277405, + "epoch": 0.7920051765044216, + "kl_loss": 0.040968865156173706, + "loss_ib": 0.0008447907748632133, + "step": 2754 + }, + { + "ce_ib": 6.983900547027588, + "ce_orig": 1.7947471141815186, + "epoch": 0.7920051765044216, + "kl_loss": 0.062444187700748444, + "loss_ib": 0.0013228317257016897, + "step": 2754 + }, + { + "ce_ib": 4.075145721435547, + "ce_orig": 1.0458115339279175, + "epoch": 0.7920051765044216, + "kl_loss": 0.04500262439250946, + "loss_ib": 0.0008575408137403429, + "step": 2754 + }, + { + "ce_ib": 4.925284385681152, + "ce_orig": 1.0083918571472168, + "epoch": 0.7920051765044216, + "kl_loss": 0.06319817900657654, + "loss_ib": 0.0011245101923123002, + "step": 2754 + }, + { + "epoch": 0.7922927600833992, + "grad_norm": 0.10355600714683533, + "learning_rate": 4.319124340672399e-05, + "loss": 0.9259, + "step": 2755 + }, + { + "ce_ib": 3.1492109298706055, + "ce_orig": 0.7553141117095947, + "epoch": 0.7922927600833992, + "kl_loss": 0.05466741696000099, + "loss_ib": 0.0008615952683612704, + "step": 2755 + }, + { + "ce_ib": 5.370089530944824, + "ce_orig": 0.8771728873252869, + "epoch": 0.7922927600833992, + "kl_loss": 0.07999100536108017, + "loss_ib": 0.0013369190273806453, + "step": 2755 + }, + { + "ce_ib": 3.7593765258789062, + "ce_orig": 0.9102520942687988, + "epoch": 0.7922927600833992, + "kl_loss": 0.06564751267433167, + "loss_ib": 0.001032412750646472, + "step": 2755 + }, + { + "ce_ib": 3.861632823944092, + "ce_orig": 0.720816969871521, + "epoch": 0.7922927600833992, + "kl_loss": 0.07107077538967133, + "loss_ib": 0.0010968709830194712, + "step": 2755 + }, + { + "ce_ib": 3.9223198890686035, + "ce_orig": 0.8267251253128052, + "epoch": 0.7925803436623768, + "kl_loss": 0.06830713152885437, + "loss_ib": 0.0010753031820058823, + "step": 2756 + }, + { + "ce_ib": 7.068748474121094, + "ce_orig": 1.6082700490951538, + "epoch": 0.7925803436623768, + "kl_loss": 0.09093745797872543, + "loss_ib": 0.0016162493266165257, + "step": 2756 + }, + { + "ce_ib": 4.350669860839844, + "ce_orig": 0.9542949199676514, + "epoch": 0.7925803436623768, + "kl_loss": 0.05835181474685669, + "loss_ib": 0.0010185850551351905, + "step": 2756 + }, + { + "ce_ib": 2.3282346725463867, + "ce_orig": 0.4660985767841339, + "epoch": 0.7925803436623768, + "kl_loss": 0.06991097331047058, + "loss_ib": 0.0009319332311861217, + "step": 2756 + }, + { + "ce_ib": 2.340764284133911, + "ce_orig": 0.48778459429740906, + "epoch": 0.7928679272413545, + "kl_loss": 0.05168522894382477, + "loss_ib": 0.0007509286515414715, + "step": 2757 + }, + { + "ce_ib": 7.30706262588501, + "ce_orig": 1.3189022541046143, + "epoch": 0.7928679272413545, + "kl_loss": 0.05035366863012314, + "loss_ib": 0.0012342429254204035, + "step": 2757 + }, + { + "ce_ib": 6.71275520324707, + "ce_orig": 1.878403663635254, + "epoch": 0.7928679272413545, + "kl_loss": 0.06259334832429886, + "loss_ib": 0.001297208946198225, + "step": 2757 + }, + { + "ce_ib": 3.8529086112976074, + "ce_orig": 0.6116108298301697, + "epoch": 0.7928679272413545, + "kl_loss": 0.16047774255275726, + "loss_ib": 0.0019900682382285595, + "step": 2757 + }, + { + "ce_ib": 3.18149995803833, + "ce_orig": 0.4680381715297699, + "epoch": 0.7931555108203322, + "kl_loss": 0.05547931790351868, + "loss_ib": 0.0008729431428946555, + "step": 2758 + }, + { + "ce_ib": 4.158588409423828, + "ce_orig": 0.7663615345954895, + "epoch": 0.7931555108203322, + "kl_loss": 0.08684021234512329, + "loss_ib": 0.0012842610012739897, + "step": 2758 + }, + { + "ce_ib": 3.32790207862854, + "ce_orig": 0.7597768306732178, + "epoch": 0.7931555108203322, + "kl_loss": 0.03992663323879242, + "loss_ib": 0.0007320565055124462, + "step": 2758 + }, + { + "ce_ib": 4.596432209014893, + "ce_orig": 1.0644419193267822, + "epoch": 0.7931555108203322, + "kl_loss": 0.06525372713804245, + "loss_ib": 0.0011121805291622877, + "step": 2758 + }, + { + "ce_ib": 2.017477035522461, + "ce_orig": 0.31815633177757263, + "epoch": 0.7934430943993098, + "kl_loss": 0.09446027874946594, + "loss_ib": 0.001146350521594286, + "step": 2759 + }, + { + "ce_ib": 2.6998465061187744, + "ce_orig": 0.5317661762237549, + "epoch": 0.7934430943993098, + "kl_loss": 0.035415247082710266, + "loss_ib": 0.0006241371156647801, + "step": 2759 + }, + { + "ce_ib": 7.173394203186035, + "ce_orig": 2.025332450866699, + "epoch": 0.7934430943993098, + "kl_loss": 0.08490430563688278, + "loss_ib": 0.0015663824742659926, + "step": 2759 + }, + { + "ce_ib": 4.366785049438477, + "ce_orig": 1.1936438083648682, + "epoch": 0.7934430943993098, + "kl_loss": 0.04581868648529053, + "loss_ib": 0.0008948653703555465, + "step": 2759 + }, + { + "epoch": 0.7937306779782874, + "grad_norm": 0.09432385861873627, + "learning_rate": 4.31646037673936e-05, + "loss": 0.8438, + "step": 2760 + }, + { + "ce_ib": 2.0954325199127197, + "ce_orig": 0.49084532260894775, + "epoch": 0.7937306779782874, + "kl_loss": 0.029290594160556793, + "loss_ib": 0.0005024491692893207, + "step": 2760 + }, + { + "ce_ib": 2.127211809158325, + "ce_orig": 0.4410271644592285, + "epoch": 0.7937306779782874, + "kl_loss": 0.03610348328948021, + "loss_ib": 0.0005737559986300766, + "step": 2760 + }, + { + "ce_ib": 4.839667320251465, + "ce_orig": 0.9846034049987793, + "epoch": 0.7937306779782874, + "kl_loss": 0.06985898315906525, + "loss_ib": 0.0011825566180050373, + "step": 2760 + }, + { + "ce_ib": 4.935423851013184, + "ce_orig": 1.1656190156936646, + "epoch": 0.7937306779782874, + "kl_loss": 0.05024496465921402, + "loss_ib": 0.0009959919843822718, + "step": 2760 + }, + { + "ce_ib": 2.695044994354248, + "ce_orig": 0.7832509279251099, + "epoch": 0.7940182615572651, + "kl_loss": 0.02813231572508812, + "loss_ib": 0.0005508276517502964, + "step": 2761 + }, + { + "ce_ib": 4.679258823394775, + "ce_orig": 0.8344135880470276, + "epoch": 0.7940182615572651, + "kl_loss": 0.0700642466545105, + "loss_ib": 0.0011685682693496346, + "step": 2761 + }, + { + "ce_ib": 3.099696397781372, + "ce_orig": 0.8169165849685669, + "epoch": 0.7940182615572651, + "kl_loss": 0.03864956647157669, + "loss_ib": 0.0006964652566239238, + "step": 2761 + }, + { + "ce_ib": 4.389041423797607, + "ce_orig": 0.7147507667541504, + "epoch": 0.7940182615572651, + "kl_loss": 0.047761380672454834, + "loss_ib": 0.0009165179799310863, + "step": 2761 + }, + { + "ce_ib": 6.827963352203369, + "ce_orig": 1.7231067419052124, + "epoch": 0.7943058451362427, + "kl_loss": 0.09596817195415497, + "loss_ib": 0.0016424780478700995, + "step": 2762 + }, + { + "ce_ib": 2.5429928302764893, + "ce_orig": 0.6475708484649658, + "epoch": 0.7943058451362427, + "kl_loss": 0.029720624908804893, + "loss_ib": 0.0005515055381692946, + "step": 2762 + }, + { + "ce_ib": 5.27888822555542, + "ce_orig": 1.1225628852844238, + "epoch": 0.7943058451362427, + "kl_loss": 0.07136283814907074, + "loss_ib": 0.0012415171368047595, + "step": 2762 + }, + { + "ce_ib": 3.6103055477142334, + "ce_orig": 0.9060268402099609, + "epoch": 0.7943058451362427, + "kl_loss": 0.04692227393388748, + "loss_ib": 0.000830253295134753, + "step": 2762 + }, + { + "ce_ib": 5.1642985343933105, + "ce_orig": 0.6430925726890564, + "epoch": 0.7945934287152203, + "kl_loss": 0.10600654780864716, + "loss_ib": 0.0015764952404424548, + "step": 2763 + }, + { + "ce_ib": 2.6584386825561523, + "ce_orig": 0.3836961090564728, + "epoch": 0.7945934287152203, + "kl_loss": 0.06519553810358047, + "loss_ib": 0.0009177992469631135, + "step": 2763 + }, + { + "ce_ib": 4.325611114501953, + "ce_orig": 0.9868410229682922, + "epoch": 0.7945934287152203, + "kl_loss": 0.051150161772966385, + "loss_ib": 0.0009440627763979137, + "step": 2763 + }, + { + "ce_ib": 2.9945266246795654, + "ce_orig": 0.525716245174408, + "epoch": 0.7945934287152203, + "kl_loss": 0.08192148059606552, + "loss_ib": 0.0011186674237251282, + "step": 2763 + }, + { + "ce_ib": 5.306968688964844, + "ce_orig": 1.1321525573730469, + "epoch": 0.794881012294198, + "kl_loss": 0.048653263598680496, + "loss_ib": 0.0010172295151278377, + "step": 2764 + }, + { + "ce_ib": 6.512132167816162, + "ce_orig": 1.39835786819458, + "epoch": 0.794881012294198, + "kl_loss": 0.08595350384712219, + "loss_ib": 0.0015107482904568315, + "step": 2764 + }, + { + "ce_ib": 4.934677600860596, + "ce_orig": 1.12037992477417, + "epoch": 0.794881012294198, + "kl_loss": 0.06899519264698029, + "loss_ib": 0.0011834196047857404, + "step": 2764 + }, + { + "ce_ib": 3.94823956489563, + "ce_orig": 0.668172299861908, + "epoch": 0.794881012294198, + "kl_loss": 0.06185721978545189, + "loss_ib": 0.0010133961914107203, + "step": 2764 + }, + { + "epoch": 0.7951685958731757, + "grad_norm": 0.1030939370393753, + "learning_rate": 4.313792036531663e-05, + "loss": 0.8335, + "step": 2765 + }, + { + "ce_ib": 4.532108306884766, + "ce_orig": 1.317468285560608, + "epoch": 0.7951685958731757, + "kl_loss": 0.053210243582725525, + "loss_ib": 0.000985313206911087, + "step": 2765 + }, + { + "ce_ib": 4.416714191436768, + "ce_orig": 1.2016398906707764, + "epoch": 0.7951685958731757, + "kl_loss": 0.0697488859295845, + "loss_ib": 0.0011391602456569672, + "step": 2765 + }, + { + "ce_ib": 5.90742301940918, + "ce_orig": 1.3161423206329346, + "epoch": 0.7951685958731757, + "kl_loss": 0.05440212041139603, + "loss_ib": 0.0011347634717822075, + "step": 2765 + }, + { + "ce_ib": 2.9233665466308594, + "ce_orig": 0.7069799304008484, + "epoch": 0.7951685958731757, + "kl_loss": 0.04738835245370865, + "loss_ib": 0.0007662201533094049, + "step": 2765 + }, + { + "ce_ib": 3.6551756858825684, + "ce_orig": 0.9500622749328613, + "epoch": 0.7954561794521533, + "kl_loss": 0.0726170614361763, + "loss_ib": 0.0010916880564764142, + "step": 2766 + }, + { + "ce_ib": 3.9289538860321045, + "ce_orig": 0.7764008045196533, + "epoch": 0.7954561794521533, + "kl_loss": 0.07620692998170853, + "loss_ib": 0.00115496467333287, + "step": 2766 + }, + { + "ce_ib": 1.633004069328308, + "ce_orig": 0.38389402627944946, + "epoch": 0.7954561794521533, + "kl_loss": 0.04643578827381134, + "loss_ib": 0.0006276582716964185, + "step": 2766 + }, + { + "ce_ib": 2.349423408508301, + "ce_orig": 0.49087658524513245, + "epoch": 0.7954561794521533, + "kl_loss": 0.04046554118394852, + "loss_ib": 0.000639597768895328, + "step": 2766 + }, + { + "ce_ib": 4.755190849304199, + "ce_orig": 0.787322461605072, + "epoch": 0.7957437630311309, + "kl_loss": 0.09136956930160522, + "loss_ib": 0.0013892146525904536, + "step": 2767 + }, + { + "ce_ib": 2.1413235664367676, + "ce_orig": 0.36150726675987244, + "epoch": 0.7957437630311309, + "kl_loss": 0.08738203346729279, + "loss_ib": 0.0010879526380449533, + "step": 2767 + }, + { + "ce_ib": 3.0681872367858887, + "ce_orig": 1.0172761678695679, + "epoch": 0.7957437630311309, + "kl_loss": 0.033307842910289764, + "loss_ib": 0.000639897130895406, + "step": 2767 + }, + { + "ce_ib": 3.294232130050659, + "ce_orig": 0.8437440991401672, + "epoch": 0.7957437630311309, + "kl_loss": 0.04722520336508751, + "loss_ib": 0.0008016752544790506, + "step": 2767 + }, + { + "ce_ib": 2.520233154296875, + "ce_orig": 0.3941030502319336, + "epoch": 0.7960313466101085, + "kl_loss": 0.051477592438459396, + "loss_ib": 0.000766799203120172, + "step": 2768 + }, + { + "ce_ib": 5.205615043640137, + "ce_orig": 1.1582207679748535, + "epoch": 0.7960313466101085, + "kl_loss": 0.06379898637533188, + "loss_ib": 0.001158551312983036, + "step": 2768 + }, + { + "ce_ib": 3.9790492057800293, + "ce_orig": 1.0064198970794678, + "epoch": 0.7960313466101085, + "kl_loss": 0.10457710921764374, + "loss_ib": 0.0014436760684475303, + "step": 2768 + }, + { + "ce_ib": 2.3332202434539795, + "ce_orig": 0.5716707110404968, + "epoch": 0.7960313466101085, + "kl_loss": 0.07293830811977386, + "loss_ib": 0.0009627050603739917, + "step": 2768 + }, + { + "ce_ib": 6.005173206329346, + "ce_orig": 1.630732774734497, + "epoch": 0.7963189301890862, + "kl_loss": 0.07042819261550903, + "loss_ib": 0.001304799341596663, + "step": 2769 + }, + { + "ce_ib": 3.763165235519409, + "ce_orig": 0.5609664916992188, + "epoch": 0.7963189301890862, + "kl_loss": 0.05025746673345566, + "loss_ib": 0.00087889120914042, + "step": 2769 + }, + { + "ce_ib": 2.287609815597534, + "ce_orig": 0.562455952167511, + "epoch": 0.7963189301890862, + "kl_loss": 0.11958840489387512, + "loss_ib": 0.0014246449572965503, + "step": 2769 + }, + { + "ce_ib": 5.6962480545043945, + "ce_orig": 1.1295876502990723, + "epoch": 0.7963189301890862, + "kl_loss": 0.04830142855644226, + "loss_ib": 0.0010526389814913273, + "step": 2769 + }, + { + "epoch": 0.7966065137680638, + "grad_norm": 0.10149817168712616, + "learning_rate": 4.311119326477961e-05, + "loss": 0.8171, + "step": 2770 + }, + { + "ce_ib": 3.1677772998809814, + "ce_orig": 0.633888840675354, + "epoch": 0.7966065137680638, + "kl_loss": 0.05183924362063408, + "loss_ib": 0.0008351701544597745, + "step": 2770 + }, + { + "ce_ib": 3.6448564529418945, + "ce_orig": 0.9684200286865234, + "epoch": 0.7966065137680638, + "kl_loss": 0.07906095683574677, + "loss_ib": 0.001155095174908638, + "step": 2770 + }, + { + "ce_ib": 2.255154609680176, + "ce_orig": 0.3905044198036194, + "epoch": 0.7966065137680638, + "kl_loss": 0.03504202887415886, + "loss_ib": 0.0005759357591159642, + "step": 2770 + }, + { + "ce_ib": 4.819906234741211, + "ce_orig": 1.1920193433761597, + "epoch": 0.7966065137680638, + "kl_loss": 0.07089553028345108, + "loss_ib": 0.0011909459717571735, + "step": 2770 + }, + { + "ce_ib": 3.008746862411499, + "ce_orig": 0.7850493788719177, + "epoch": 0.7968940973470415, + "kl_loss": 0.04188846796751022, + "loss_ib": 0.0007197593222372234, + "step": 2771 + }, + { + "ce_ib": 4.709314346313477, + "ce_orig": 1.0718399286270142, + "epoch": 0.7968940973470415, + "kl_loss": 0.05263027548789978, + "loss_ib": 0.0009972341358661652, + "step": 2771 + }, + { + "ce_ib": 4.16617488861084, + "ce_orig": 0.7759472727775574, + "epoch": 0.7968940973470415, + "kl_loss": 0.07304506003856659, + "loss_ib": 0.0011470679892227054, + "step": 2771 + }, + { + "ce_ib": 5.651961803436279, + "ce_orig": 1.2886803150177002, + "epoch": 0.7968940973470415, + "kl_loss": 0.05794547125697136, + "loss_ib": 0.0011446508578956127, + "step": 2771 + }, + { + "ce_ib": 4.240289211273193, + "ce_orig": 0.9180211424827576, + "epoch": 0.7971816809260192, + "kl_loss": 0.07629922032356262, + "loss_ib": 0.0011870211455971003, + "step": 2772 + }, + { + "ce_ib": 3.697352647781372, + "ce_orig": 0.8534904718399048, + "epoch": 0.7971816809260192, + "kl_loss": 0.0861571654677391, + "loss_ib": 0.0012313069310039282, + "step": 2772 + }, + { + "ce_ib": 2.4790892601013184, + "ce_orig": 0.5717146992683411, + "epoch": 0.7971816809260192, + "kl_loss": 0.03729209303855896, + "loss_ib": 0.0006208298145793378, + "step": 2772 + }, + { + "ce_ib": 4.848996162414551, + "ce_orig": 0.7977452278137207, + "epoch": 0.7971816809260192, + "kl_loss": 0.07233273237943649, + "loss_ib": 0.0012082268949598074, + "step": 2772 + }, + { + "ce_ib": 4.417542934417725, + "ce_orig": 0.9395961165428162, + "epoch": 0.7974692645049968, + "kl_loss": 0.07109728455543518, + "loss_ib": 0.0011527271708473563, + "step": 2773 + }, + { + "ce_ib": 3.1380274295806885, + "ce_orig": 0.572593629360199, + "epoch": 0.7974692645049968, + "kl_loss": 0.07183440029621124, + "loss_ib": 0.0010321467416360974, + "step": 2773 + }, + { + "ce_ib": 3.911236524581909, + "ce_orig": 0.9436517953872681, + "epoch": 0.7974692645049968, + "kl_loss": 0.050632089376449585, + "loss_ib": 0.0008974444936029613, + "step": 2773 + }, + { + "ce_ib": 4.0717010498046875, + "ce_orig": 0.727439284324646, + "epoch": 0.7974692645049968, + "kl_loss": 0.26790377497673035, + "loss_ib": 0.00308620766736567, + "step": 2773 + }, + { + "ce_ib": 3.563072681427002, + "ce_orig": 0.6947557330131531, + "epoch": 0.7977568480839744, + "kl_loss": 0.057377275079488754, + "loss_ib": 0.0009300799574702978, + "step": 2774 + }, + { + "ce_ib": 3.436284303665161, + "ce_orig": 0.6574268341064453, + "epoch": 0.7977568480839744, + "kl_loss": 0.05091220885515213, + "loss_ib": 0.0008527504978701472, + "step": 2774 + }, + { + "ce_ib": 2.475055456161499, + "ce_orig": 0.6901848316192627, + "epoch": 0.7977568480839744, + "kl_loss": 0.050455592572689056, + "loss_ib": 0.0007520614308305085, + "step": 2774 + }, + { + "ce_ib": 3.577336311340332, + "ce_orig": 0.88409024477005, + "epoch": 0.7977568480839744, + "kl_loss": 0.07907520234584808, + "loss_ib": 0.0011484856950119138, + "step": 2774 + }, + { + "epoch": 0.798044431662952, + "grad_norm": 0.10018035769462585, + "learning_rate": 4.308442253017431e-05, + "loss": 0.8109, + "step": 2775 + }, + { + "ce_ib": 2.3133575916290283, + "ce_orig": 0.5752493739128113, + "epoch": 0.798044431662952, + "kl_loss": 0.03627917170524597, + "loss_ib": 0.0005941274575889111, + "step": 2775 + }, + { + "ce_ib": 5.31532096862793, + "ce_orig": 1.4549754858016968, + "epoch": 0.798044431662952, + "kl_loss": 0.06877376139163971, + "loss_ib": 0.001219269703142345, + "step": 2775 + }, + { + "ce_ib": 1.857119083404541, + "ce_orig": 0.49905726313591003, + "epoch": 0.798044431662952, + "kl_loss": 0.03498172387480736, + "loss_ib": 0.0005355291068553925, + "step": 2775 + }, + { + "ce_ib": 4.3752641677856445, + "ce_orig": 1.1369529962539673, + "epoch": 0.798044431662952, + "kl_loss": 0.05270763486623764, + "loss_ib": 0.0009646027465350926, + "step": 2775 + }, + { + "ce_ib": 3.454179525375366, + "ce_orig": 0.5370707511901855, + "epoch": 0.7983320152419296, + "kl_loss": 0.09126663208007812, + "loss_ib": 0.0012580842012539506, + "step": 2776 + }, + { + "ce_ib": 3.798530340194702, + "ce_orig": 0.921844482421875, + "epoch": 0.7983320152419296, + "kl_loss": 0.04724680259823799, + "loss_ib": 0.0008523210417479277, + "step": 2776 + }, + { + "ce_ib": 3.4793083667755127, + "ce_orig": 0.7239628434181213, + "epoch": 0.7983320152419296, + "kl_loss": 0.04170840606093407, + "loss_ib": 0.0007650148472748697, + "step": 2776 + }, + { + "ce_ib": 3.241266965866089, + "ce_orig": 0.5558717846870422, + "epoch": 0.7983320152419296, + "kl_loss": 0.050844188779592514, + "loss_ib": 0.0008325685630552471, + "step": 2776 + }, + { + "ce_ib": 3.792546510696411, + "ce_orig": 0.5279029607772827, + "epoch": 0.7986195988209073, + "kl_loss": 0.04453600198030472, + "loss_ib": 0.0008246146026067436, + "step": 2777 + }, + { + "ce_ib": 4.501153945922852, + "ce_orig": 0.8710455298423767, + "epoch": 0.7986195988209073, + "kl_loss": 0.14452627301216125, + "loss_ib": 0.0018953779945150018, + "step": 2777 + }, + { + "ce_ib": 5.273581504821777, + "ce_orig": 0.8747265934944153, + "epoch": 0.7986195988209073, + "kl_loss": 0.09018924832344055, + "loss_ib": 0.0014292504638433456, + "step": 2777 + }, + { + "ce_ib": 5.406040191650391, + "ce_orig": 0.9842308759689331, + "epoch": 0.7986195988209073, + "kl_loss": 0.08354675769805908, + "loss_ib": 0.0013760715955868363, + "step": 2777 + }, + { + "ce_ib": 4.276729106903076, + "ce_orig": 0.9597039818763733, + "epoch": 0.798907182399885, + "kl_loss": 0.04971292242407799, + "loss_ib": 0.000924802094232291, + "step": 2778 + }, + { + "ce_ib": 3.4685721397399902, + "ce_orig": 0.4150676131248474, + "epoch": 0.798907182399885, + "kl_loss": 0.05382636561989784, + "loss_ib": 0.0008851208258420229, + "step": 2778 + }, + { + "ce_ib": 5.13667106628418, + "ce_orig": 1.131181240081787, + "epoch": 0.798907182399885, + "kl_loss": 0.0713772177696228, + "loss_ib": 0.001227439264766872, + "step": 2778 + }, + { + "ce_ib": 2.7824392318725586, + "ce_orig": 0.5669775605201721, + "epoch": 0.798907182399885, + "kl_loss": 0.056725744158029556, + "loss_ib": 0.000845501315779984, + "step": 2778 + }, + { + "ce_ib": 4.17990255355835, + "ce_orig": 0.9949167966842651, + "epoch": 0.7991947659788626, + "kl_loss": 0.08205518126487732, + "loss_ib": 0.0012385420268401504, + "step": 2779 + }, + { + "ce_ib": 2.742875337600708, + "ce_orig": 0.4957374632358551, + "epoch": 0.7991947659788626, + "kl_loss": 0.05894085764884949, + "loss_ib": 0.0008636960992589593, + "step": 2779 + }, + { + "ce_ib": 3.727627992630005, + "ce_orig": 0.8093018531799316, + "epoch": 0.7991947659788626, + "kl_loss": 0.0576351061463356, + "loss_ib": 0.0009491138043813407, + "step": 2779 + }, + { + "ce_ib": 2.992847442626953, + "ce_orig": 0.3333796262741089, + "epoch": 0.7991947659788626, + "kl_loss": 0.04892199486494064, + "loss_ib": 0.0007885046652518213, + "step": 2779 + }, + { + "epoch": 0.7994823495578403, + "grad_norm": 0.1039729118347168, + "learning_rate": 4.305760822599766e-05, + "loss": 0.8737, + "step": 2780 + }, + { + "ce_ib": 4.5188398361206055, + "ce_orig": 0.7493094205856323, + "epoch": 0.7994823495578403, + "kl_loss": 0.06431175768375397, + "loss_ib": 0.0010950014693662524, + "step": 2780 + }, + { + "ce_ib": 5.252851963043213, + "ce_orig": 1.2405815124511719, + "epoch": 0.7994823495578403, + "kl_loss": 0.07021147757768631, + "loss_ib": 0.0012274000328034163, + "step": 2780 + }, + { + "ce_ib": 3.521284818649292, + "ce_orig": 0.5597873330116272, + "epoch": 0.7994823495578403, + "kl_loss": 0.07233621180057526, + "loss_ib": 0.0010754904942587018, + "step": 2780 + }, + { + "ce_ib": 4.789313316345215, + "ce_orig": 0.8685615062713623, + "epoch": 0.7994823495578403, + "kl_loss": 0.08651231974363327, + "loss_ib": 0.0013440544717013836, + "step": 2780 + }, + { + "ce_ib": 3.644822835922241, + "ce_orig": 0.8012219071388245, + "epoch": 0.7997699331368179, + "kl_loss": 0.06335708498954773, + "loss_ib": 0.0009980531176552176, + "step": 2781 + }, + { + "ce_ib": 4.163719177246094, + "ce_orig": 0.6543939113616943, + "epoch": 0.7997699331368179, + "kl_loss": 0.06999766826629639, + "loss_ib": 0.0011163485469296575, + "step": 2781 + }, + { + "ce_ib": 4.902627944946289, + "ce_orig": 1.192283034324646, + "epoch": 0.7997699331368179, + "kl_loss": 0.05714747682213783, + "loss_ib": 0.0010617375373840332, + "step": 2781 + }, + { + "ce_ib": 2.650094985961914, + "ce_orig": 0.6196550726890564, + "epoch": 0.7997699331368179, + "kl_loss": 0.039657533168792725, + "loss_ib": 0.0006615847814828157, + "step": 2781 + }, + { + "ce_ib": 2.8732712268829346, + "ce_orig": 0.327261358499527, + "epoch": 0.8000575167157955, + "kl_loss": 0.06257051229476929, + "loss_ib": 0.0009130322141572833, + "step": 2782 + }, + { + "ce_ib": 5.550877094268799, + "ce_orig": 1.4798474311828613, + "epoch": 0.8000575167157955, + "kl_loss": 0.06821580231189728, + "loss_ib": 0.0012372457422316074, + "step": 2782 + }, + { + "ce_ib": 3.1717989444732666, + "ce_orig": 0.6493034362792969, + "epoch": 0.8000575167157955, + "kl_loss": 0.03579232096672058, + "loss_ib": 0.0006751030450686812, + "step": 2782 + }, + { + "ce_ib": 3.9736249446868896, + "ce_orig": 0.36549803614616394, + "epoch": 0.8000575167157955, + "kl_loss": 0.09295276552438736, + "loss_ib": 0.0013268900802358985, + "step": 2782 + }, + { + "ce_ib": 4.003111839294434, + "ce_orig": 0.6158371567726135, + "epoch": 0.8003451002947731, + "kl_loss": 0.06916362792253494, + "loss_ib": 0.0010919474298134446, + "step": 2783 + }, + { + "ce_ib": 3.4221839904785156, + "ce_orig": 0.6582720875740051, + "epoch": 0.8003451002947731, + "kl_loss": 0.06134367361664772, + "loss_ib": 0.0009556551231071353, + "step": 2783 + }, + { + "ce_ib": 3.4667775630950928, + "ce_orig": 0.8682497143745422, + "epoch": 0.8003451002947731, + "kl_loss": 0.04719420522451401, + "loss_ib": 0.0008186197956092656, + "step": 2783 + }, + { + "ce_ib": 3.835693359375, + "ce_orig": 0.7507675886154175, + "epoch": 0.8003451002947731, + "kl_loss": 0.04382248967885971, + "loss_ib": 0.0008217942668125033, + "step": 2783 + }, + { + "ce_ib": 3.2683181762695312, + "ce_orig": 0.7520684003829956, + "epoch": 0.8006326838737509, + "kl_loss": 0.0772356390953064, + "loss_ib": 0.0010991881135851145, + "step": 2784 + }, + { + "ce_ib": 3.611927032470703, + "ce_orig": 0.7834255695343018, + "epoch": 0.8006326838737509, + "kl_loss": 0.08385944366455078, + "loss_ib": 0.00119978713337332, + "step": 2784 + }, + { + "ce_ib": 4.645783424377441, + "ce_orig": 0.7325895428657532, + "epoch": 0.8006326838737509, + "kl_loss": 0.07645636796951294, + "loss_ib": 0.0012291419552639127, + "step": 2784 + }, + { + "ce_ib": 2.435009241104126, + "ce_orig": 0.31921374797821045, + "epoch": 0.8006326838737509, + "kl_loss": 0.05426100268959999, + "loss_ib": 0.0007861108751967549, + "step": 2784 + }, + { + "epoch": 0.8009202674527285, + "grad_norm": 0.09394791722297668, + "learning_rate": 4.303075041685152e-05, + "loss": 0.7748, + "step": 2785 + }, + { + "ce_ib": 6.009460926055908, + "ce_orig": 1.263778567314148, + "epoch": 0.8009202674527285, + "kl_loss": 0.08873192965984344, + "loss_ib": 0.001488265348598361, + "step": 2785 + }, + { + "ce_ib": 5.212767601013184, + "ce_orig": 0.841185986995697, + "epoch": 0.8009202674527285, + "kl_loss": 0.06282983720302582, + "loss_ib": 0.0011495751095935702, + "step": 2785 + }, + { + "ce_ib": 2.1100070476531982, + "ce_orig": 0.46035587787628174, + "epoch": 0.8009202674527285, + "kl_loss": 0.04866867512464523, + "loss_ib": 0.0006976873846724629, + "step": 2785 + }, + { + "ce_ib": 3.7316389083862305, + "ce_orig": 0.5767799615859985, + "epoch": 0.8009202674527285, + "kl_loss": 0.06468038260936737, + "loss_ib": 0.001019967719912529, + "step": 2785 + }, + { + "ce_ib": 4.9479146003723145, + "ce_orig": 1.205575704574585, + "epoch": 0.8012078510317061, + "kl_loss": 0.05671490356326103, + "loss_ib": 0.0010619404492899776, + "step": 2786 + }, + { + "ce_ib": 2.923267364501953, + "ce_orig": 0.6963656544685364, + "epoch": 0.8012078510317061, + "kl_loss": 0.038322675973176956, + "loss_ib": 0.0006755535141564906, + "step": 2786 + }, + { + "ce_ib": 4.747457981109619, + "ce_orig": 1.3031419515609741, + "epoch": 0.8012078510317061, + "kl_loss": 0.05324758589267731, + "loss_ib": 0.0010072216391563416, + "step": 2786 + }, + { + "ce_ib": 2.7359533309936523, + "ce_orig": 0.6366972923278809, + "epoch": 0.8012078510317061, + "kl_loss": 0.0554719939827919, + "loss_ib": 0.0008283152128569782, + "step": 2786 + }, + { + "ce_ib": 7.022327423095703, + "ce_orig": 1.5986502170562744, + "epoch": 0.8014954346106837, + "kl_loss": 0.08910167217254639, + "loss_ib": 0.0015932493843138218, + "step": 2787 + }, + { + "ce_ib": 2.523120880126953, + "ce_orig": 0.46121251583099365, + "epoch": 0.8014954346106837, + "kl_loss": 0.062098607420921326, + "loss_ib": 0.0008732981514185667, + "step": 2787 + }, + { + "ce_ib": 3.412642002105713, + "ce_orig": 0.9363806247711182, + "epoch": 0.8014954346106837, + "kl_loss": 0.050120532512664795, + "loss_ib": 0.0008424694533459842, + "step": 2787 + }, + { + "ce_ib": 3.586941957473755, + "ce_orig": 0.7506765127182007, + "epoch": 0.8014954346106837, + "kl_loss": 0.04424023628234863, + "loss_ib": 0.0008010965539142489, + "step": 2787 + }, + { + "ce_ib": 3.286259889602661, + "ce_orig": 0.7491219639778137, + "epoch": 0.8017830181896614, + "kl_loss": 0.07386104762554169, + "loss_ib": 0.0010672365315258503, + "step": 2788 + }, + { + "ce_ib": 3.0912346839904785, + "ce_orig": 0.40968677401542664, + "epoch": 0.8017830181896614, + "kl_loss": 0.15189200639724731, + "loss_ib": 0.0018280433723703027, + "step": 2788 + }, + { + "ce_ib": 4.300029754638672, + "ce_orig": 0.8705939650535583, + "epoch": 0.8017830181896614, + "kl_loss": 0.10274089872837067, + "loss_ib": 0.0014574119122698903, + "step": 2788 + }, + { + "ce_ib": 6.129761219024658, + "ce_orig": 1.4742475748062134, + "epoch": 0.8017830181896614, + "kl_loss": 0.06922514736652374, + "loss_ib": 0.0013052275171503425, + "step": 2788 + }, + { + "ce_ib": 4.221517562866211, + "ce_orig": 0.5895676016807556, + "epoch": 0.802070601768639, + "kl_loss": 0.07881335914134979, + "loss_ib": 0.0012102853506803513, + "step": 2789 + }, + { + "ce_ib": 4.457080841064453, + "ce_orig": 0.9985543489456177, + "epoch": 0.802070601768639, + "kl_loss": 0.060325540602207184, + "loss_ib": 0.0010489635169506073, + "step": 2789 + }, + { + "ce_ib": 4.136157035827637, + "ce_orig": 0.6122975945472717, + "epoch": 0.802070601768639, + "kl_loss": 0.0467827171087265, + "loss_ib": 0.0008814428583718836, + "step": 2789 + }, + { + "ce_ib": 2.034712076187134, + "ce_orig": 0.5152589082717896, + "epoch": 0.802070601768639, + "kl_loss": 0.04351775348186493, + "loss_ib": 0.000638648692984134, + "step": 2789 + }, + { + "epoch": 0.8023581853476166, + "grad_norm": 0.09285304695367813, + "learning_rate": 4.300384916744261e-05, + "loss": 0.8677, + "step": 2790 + }, + { + "ce_ib": 3.259428024291992, + "ce_orig": 0.5918373465538025, + "epoch": 0.8023581853476166, + "kl_loss": 0.08050665259361267, + "loss_ib": 0.0011310093104839325, + "step": 2790 + }, + { + "ce_ib": 3.5718696117401123, + "ce_orig": 1.0117661952972412, + "epoch": 0.8023581853476166, + "kl_loss": 0.046871840953826904, + "loss_ib": 0.0008259053574874997, + "step": 2790 + }, + { + "ce_ib": 2.884453058242798, + "ce_orig": 0.6427131295204163, + "epoch": 0.8023581853476166, + "kl_loss": 0.06246233731508255, + "loss_ib": 0.0009130686521530151, + "step": 2790 + }, + { + "ce_ib": 4.308780670166016, + "ce_orig": 0.7729065418243408, + "epoch": 0.8023581853476166, + "kl_loss": 0.06038539111614227, + "loss_ib": 0.0010347319766879082, + "step": 2790 + }, + { + "ce_ib": 6.162014484405518, + "ce_orig": 1.5191571712493896, + "epoch": 0.8026457689265943, + "kl_loss": 0.05483422800898552, + "loss_ib": 0.0011645436752587557, + "step": 2791 + }, + { + "ce_ib": 2.736621618270874, + "ce_orig": 0.764778196811676, + "epoch": 0.8026457689265943, + "kl_loss": 0.07101018726825714, + "loss_ib": 0.0009837639518082142, + "step": 2791 + }, + { + "ce_ib": 1.989526629447937, + "ce_orig": 0.41304242610931396, + "epoch": 0.8026457689265943, + "kl_loss": 0.077919602394104, + "loss_ib": 0.000978148658759892, + "step": 2791 + }, + { + "ce_ib": 3.6890857219696045, + "ce_orig": 0.8108035922050476, + "epoch": 0.8026457689265943, + "kl_loss": 0.06487536430358887, + "loss_ib": 0.0010176622308790684, + "step": 2791 + }, + { + "ce_ib": 3.1518397331237793, + "ce_orig": 0.5796883702278137, + "epoch": 0.802933352505572, + "kl_loss": 0.1379975974559784, + "loss_ib": 0.0016951599391177297, + "step": 2792 + }, + { + "ce_ib": 3.566190242767334, + "ce_orig": 0.9894468188285828, + "epoch": 0.802933352505572, + "kl_loss": 0.05916464328765869, + "loss_ib": 0.0009482654859311879, + "step": 2792 + }, + { + "ce_ib": 5.081014156341553, + "ce_orig": 1.2000694274902344, + "epoch": 0.802933352505572, + "kl_loss": 0.041349831968545914, + "loss_ib": 0.0009215996833518147, + "step": 2792 + }, + { + "ce_ib": 3.797490358352661, + "ce_orig": 0.7724114656448364, + "epoch": 0.802933352505572, + "kl_loss": 0.04384986311197281, + "loss_ib": 0.0008182476158253849, + "step": 2792 + }, + { + "ce_ib": 3.856207847595215, + "ce_orig": 0.7611627578735352, + "epoch": 0.8032209360845496, + "kl_loss": 0.07841872423887253, + "loss_ib": 0.0011698079761117697, + "step": 2793 + }, + { + "ce_ib": 6.26647424697876, + "ce_orig": 1.5935899019241333, + "epoch": 0.8032209360845496, + "kl_loss": 0.09118853509426117, + "loss_ib": 0.0015385326696559787, + "step": 2793 + }, + { + "ce_ib": 4.166974067687988, + "ce_orig": 1.1007580757141113, + "epoch": 0.8032209360845496, + "kl_loss": 0.07116931676864624, + "loss_ib": 0.0011283905478194356, + "step": 2793 + }, + { + "ce_ib": 2.902090072631836, + "ce_orig": 0.875754177570343, + "epoch": 0.8032209360845496, + "kl_loss": 0.04508616030216217, + "loss_ib": 0.0007410705438815057, + "step": 2793 + }, + { + "ce_ib": 4.363460063934326, + "ce_orig": 0.805544376373291, + "epoch": 0.8035085196635272, + "kl_loss": 0.07036960124969482, + "loss_ib": 0.0011400419753044844, + "step": 2794 + }, + { + "ce_ib": 5.562314510345459, + "ce_orig": 0.9759882092475891, + "epoch": 0.8035085196635272, + "kl_loss": 0.05826704949140549, + "loss_ib": 0.0011389019200578332, + "step": 2794 + }, + { + "ce_ib": 3.930072546005249, + "ce_orig": 0.5354949235916138, + "epoch": 0.8035085196635272, + "kl_loss": 0.10107627511024475, + "loss_ib": 0.0014037699438631535, + "step": 2794 + }, + { + "ce_ib": 3.3556323051452637, + "ce_orig": 0.7980233430862427, + "epoch": 0.8035085196635272, + "kl_loss": 0.039998799562454224, + "loss_ib": 0.0007355512352660298, + "step": 2794 + }, + { + "epoch": 0.8037961032425048, + "grad_norm": 0.09024666249752045, + "learning_rate": 4.297690454258227e-05, + "loss": 0.903, + "step": 2795 + }, + { + "ce_ib": 3.580435276031494, + "ce_orig": 0.8540958762168884, + "epoch": 0.8037961032425048, + "kl_loss": 0.05750410631299019, + "loss_ib": 0.000933084636926651, + "step": 2795 + }, + { + "ce_ib": 4.4290313720703125, + "ce_orig": 0.715499758720398, + "epoch": 0.8037961032425048, + "kl_loss": 0.08717624843120575, + "loss_ib": 0.0013146656565368176, + "step": 2795 + }, + { + "ce_ib": 3.3991622924804688, + "ce_orig": 0.8790170550346375, + "epoch": 0.8037961032425048, + "kl_loss": 0.05381014943122864, + "loss_ib": 0.0008780176867730916, + "step": 2795 + }, + { + "ce_ib": 2.3698055744171143, + "ce_orig": 0.5552039742469788, + "epoch": 0.8037961032425048, + "kl_loss": 0.07288984954357147, + "loss_ib": 0.0009658790077082813, + "step": 2795 + }, + { + "ce_ib": 2.8376004695892334, + "ce_orig": 0.3569388687610626, + "epoch": 0.8040836868214825, + "kl_loss": 0.07954586297273636, + "loss_ib": 0.0010792186949402094, + "step": 2796 + }, + { + "ce_ib": 4.995804786682129, + "ce_orig": 0.6709532737731934, + "epoch": 0.8040836868214825, + "kl_loss": 0.08931293338537216, + "loss_ib": 0.0013927096733823419, + "step": 2796 + }, + { + "ce_ib": 4.480438709259033, + "ce_orig": 0.8262181282043457, + "epoch": 0.8040836868214825, + "kl_loss": 0.06536826491355896, + "loss_ib": 0.0011017265496775508, + "step": 2796 + }, + { + "ce_ib": 5.455348014831543, + "ce_orig": 1.0712203979492188, + "epoch": 0.8040836868214825, + "kl_loss": 0.0680001899600029, + "loss_ib": 0.0012255366891622543, + "step": 2796 + }, + { + "ce_ib": 2.058647632598877, + "ce_orig": 0.22299128770828247, + "epoch": 0.8043712704004601, + "kl_loss": 0.07397286593914032, + "loss_ib": 0.0009455934050492942, + "step": 2797 + }, + { + "ce_ib": 4.629581451416016, + "ce_orig": 0.710688591003418, + "epoch": 0.8043712704004601, + "kl_loss": 0.0645715743303299, + "loss_ib": 0.0011086738668382168, + "step": 2797 + }, + { + "ce_ib": 2.400752544403076, + "ce_orig": 0.6087847948074341, + "epoch": 0.8043712704004601, + "kl_loss": 0.05361686646938324, + "loss_ib": 0.0007762439199723303, + "step": 2797 + }, + { + "ce_ib": 2.4782607555389404, + "ce_orig": 0.41846537590026855, + "epoch": 0.8043712704004601, + "kl_loss": 0.05063673108816147, + "loss_ib": 0.0007541933446191251, + "step": 2797 + }, + { + "ce_ib": 3.221158981323242, + "ce_orig": 0.68253493309021, + "epoch": 0.8046588539794378, + "kl_loss": 0.04569082707166672, + "loss_ib": 0.0007790242088958621, + "step": 2798 + }, + { + "ce_ib": 3.132749319076538, + "ce_orig": 0.7105351686477661, + "epoch": 0.8046588539794378, + "kl_loss": 0.04634464159607887, + "loss_ib": 0.0007767213974148035, + "step": 2798 + }, + { + "ce_ib": 4.1053361892700195, + "ce_orig": 0.43537768721580505, + "epoch": 0.8046588539794378, + "kl_loss": 0.08119573444128036, + "loss_ib": 0.0012224909150972962, + "step": 2798 + }, + { + "ce_ib": 2.5565967559814453, + "ce_orig": 0.6688821315765381, + "epoch": 0.8046588539794378, + "kl_loss": 0.03824060410261154, + "loss_ib": 0.0006380656850524247, + "step": 2798 + }, + { + "ce_ib": 3.7040748596191406, + "ce_orig": 0.9526134133338928, + "epoch": 0.8049464375584154, + "kl_loss": 0.07288604974746704, + "loss_ib": 0.0010992679744958878, + "step": 2799 + }, + { + "ce_ib": 6.255610466003418, + "ce_orig": 1.7454771995544434, + "epoch": 0.8049464375584154, + "kl_loss": 0.05704963952302933, + "loss_ib": 0.0011960574192926288, + "step": 2799 + }, + { + "ce_ib": 5.6009087562561035, + "ce_orig": 0.7423248887062073, + "epoch": 0.8049464375584154, + "kl_loss": 0.09092587232589722, + "loss_ib": 0.0014693494886159897, + "step": 2799 + }, + { + "ce_ib": 1.4937676191329956, + "ce_orig": 0.2984970808029175, + "epoch": 0.8049464375584154, + "kl_loss": 0.14653505384922028, + "loss_ib": 0.0016147271962836385, + "step": 2799 + }, + { + "epoch": 0.8052340211373931, + "grad_norm": 0.08608611673116684, + "learning_rate": 4.2949916607186357e-05, + "loss": 0.8348, + "step": 2800 + }, + { + "ce_ib": 2.593160629272461, + "ce_orig": 0.5135070085525513, + "epoch": 0.8052340211373931, + "kl_loss": 0.06578093767166138, + "loss_ib": 0.0009171253768727183, + "step": 2800 + }, + { + "ce_ib": 3.5328757762908936, + "ce_orig": 0.9674763679504395, + "epoch": 0.8052340211373931, + "kl_loss": 0.04524284601211548, + "loss_ib": 0.0008057160302996635, + "step": 2800 + }, + { + "ce_ib": 4.573200225830078, + "ce_orig": 0.37906163930892944, + "epoch": 0.8052340211373931, + "kl_loss": 0.04652133584022522, + "loss_ib": 0.0009225333924405277, + "step": 2800 + }, + { + "ce_ib": 3.4726967811584473, + "ce_orig": 0.4164227843284607, + "epoch": 0.8052340211373931, + "kl_loss": 0.06189584359526634, + "loss_ib": 0.0009662280790507793, + "step": 2800 + }, + { + "ce_ib": 2.4074363708496094, + "ce_orig": 0.6796159744262695, + "epoch": 0.8055216047163707, + "kl_loss": 0.06222547963261604, + "loss_ib": 0.0008629984222352505, + "step": 2801 + }, + { + "ce_ib": 3.1676721572875977, + "ce_orig": 0.6591743230819702, + "epoch": 0.8055216047163707, + "kl_loss": 0.051517996937036514, + "loss_ib": 0.0008319471380673349, + "step": 2801 + }, + { + "ce_ib": 3.418274164199829, + "ce_orig": 0.49905142188072205, + "epoch": 0.8055216047163707, + "kl_loss": 0.06885931640863419, + "loss_ib": 0.0010304205352440476, + "step": 2801 + }, + { + "ce_ib": 5.5044426918029785, + "ce_orig": 1.2813369035720825, + "epoch": 0.8055216047163707, + "kl_loss": 0.07398340106010437, + "loss_ib": 0.0012902782764285803, + "step": 2801 + }, + { + "ce_ib": 3.982818841934204, + "ce_orig": 0.6316425204277039, + "epoch": 0.8058091882953483, + "kl_loss": 0.04551146179437637, + "loss_ib": 0.0008533964864909649, + "step": 2802 + }, + { + "ce_ib": 3.78408145904541, + "ce_orig": 1.200227975845337, + "epoch": 0.8058091882953483, + "kl_loss": 0.05486438050866127, + "loss_ib": 0.0009270518785342574, + "step": 2802 + }, + { + "ce_ib": 3.292829751968384, + "ce_orig": 0.6343132257461548, + "epoch": 0.8058091882953483, + "kl_loss": 0.09481840580701828, + "loss_ib": 0.0012774670030921698, + "step": 2802 + }, + { + "ce_ib": 2.7361879348754883, + "ce_orig": 0.6173849701881409, + "epoch": 0.8058091882953483, + "kl_loss": 0.034883204847574234, + "loss_ib": 0.000622450839728117, + "step": 2802 + }, + { + "ce_ib": 4.984631061553955, + "ce_orig": 1.0655900239944458, + "epoch": 0.8060967718743259, + "kl_loss": 0.05720861256122589, + "loss_ib": 0.0010705491295084357, + "step": 2803 + }, + { + "ce_ib": 3.8452277183532715, + "ce_orig": 0.6207940578460693, + "epoch": 0.8060967718743259, + "kl_loss": 0.05574939027428627, + "loss_ib": 0.0009420166607014835, + "step": 2803 + }, + { + "ce_ib": 2.739047050476074, + "ce_orig": 0.7275076508522034, + "epoch": 0.8060967718743259, + "kl_loss": 0.04175177961587906, + "loss_ib": 0.0006914224941283464, + "step": 2803 + }, + { + "ce_ib": 3.1908061504364014, + "ce_orig": 0.6435509324073792, + "epoch": 0.8060967718743259, + "kl_loss": 0.06308645755052567, + "loss_ib": 0.0009499451261945069, + "step": 2803 + }, + { + "ce_ib": 3.6722195148468018, + "ce_orig": 0.7271295785903931, + "epoch": 0.8063843554533037, + "kl_loss": 0.07591794431209564, + "loss_ib": 0.0011264013592153788, + "step": 2804 + }, + { + "ce_ib": 2.1958694458007812, + "ce_orig": 0.471758633852005, + "epoch": 0.8063843554533037, + "kl_loss": 0.06421157717704773, + "loss_ib": 0.0008617027197033167, + "step": 2804 + }, + { + "ce_ib": 3.0443007946014404, + "ce_orig": 0.6292927265167236, + "epoch": 0.8063843554533037, + "kl_loss": 0.068825364112854, + "loss_ib": 0.000992683693766594, + "step": 2804 + }, + { + "ce_ib": 5.09236478805542, + "ce_orig": 0.950176477432251, + "epoch": 0.8063843554533037, + "kl_loss": 0.07290607690811157, + "loss_ib": 0.0012382972054183483, + "step": 2804 + }, + { + "epoch": 0.8066719390322813, + "grad_norm": 0.09812460094690323, + "learning_rate": 4.292288542627507e-05, + "loss": 0.727, + "step": 2805 + }, + { + "ce_ib": 2.419449806213379, + "ce_orig": 0.5679433345794678, + "epoch": 0.8066719390322813, + "kl_loss": 0.04581543803215027, + "loss_ib": 0.0007000992773100734, + "step": 2805 + }, + { + "ce_ib": 4.5930280685424805, + "ce_orig": 0.8501792550086975, + "epoch": 0.8066719390322813, + "kl_loss": 0.06737855076789856, + "loss_ib": 0.001133088255301118, + "step": 2805 + }, + { + "ce_ib": 3.5067837238311768, + "ce_orig": 0.8572978973388672, + "epoch": 0.8066719390322813, + "kl_loss": 0.06298828125, + "loss_ib": 0.0009805612498894334, + "step": 2805 + }, + { + "ce_ib": 2.832054615020752, + "ce_orig": 0.27093416452407837, + "epoch": 0.8066719390322813, + "kl_loss": 0.06356309354305267, + "loss_ib": 0.0009188363328576088, + "step": 2805 + }, + { + "ce_ib": 2.7738027572631836, + "ce_orig": 0.7677273750305176, + "epoch": 0.8069595226112589, + "kl_loss": 0.044804543256759644, + "loss_ib": 0.0007254256634041667, + "step": 2806 + }, + { + "ce_ib": 4.847275733947754, + "ce_orig": 1.0889945030212402, + "epoch": 0.8069595226112589, + "kl_loss": 0.08558472245931625, + "loss_ib": 0.0013405747013166547, + "step": 2806 + }, + { + "ce_ib": 4.365029811859131, + "ce_orig": 1.0879672765731812, + "epoch": 0.8069595226112589, + "kl_loss": 0.046995386481285095, + "loss_ib": 0.0009064568439498544, + "step": 2806 + }, + { + "ce_ib": 2.9993855953216553, + "ce_orig": 0.5165055394172668, + "epoch": 0.8069595226112589, + "kl_loss": 0.05502494424581528, + "loss_ib": 0.0008501879638060927, + "step": 2806 + }, + { + "ce_ib": 3.6942124366760254, + "ce_orig": 0.811820924282074, + "epoch": 0.8072471061902365, + "kl_loss": 0.04262714833021164, + "loss_ib": 0.0007956927292980254, + "step": 2807 + }, + { + "ce_ib": 5.01615047454834, + "ce_orig": 0.8631246089935303, + "epoch": 0.8072471061902365, + "kl_loss": 0.0656929463148117, + "loss_ib": 0.0011585444444790483, + "step": 2807 + }, + { + "ce_ib": 4.700525283813477, + "ce_orig": 1.1559691429138184, + "epoch": 0.8072471061902365, + "kl_loss": 0.0614861324429512, + "loss_ib": 0.0010849138488993049, + "step": 2807 + }, + { + "ce_ib": 4.325393199920654, + "ce_orig": 1.182008147239685, + "epoch": 0.8072471061902365, + "kl_loss": 0.054596226662397385, + "loss_ib": 0.0009785015136003494, + "step": 2807 + }, + { + "ce_ib": 4.096922874450684, + "ce_orig": 0.6802369952201843, + "epoch": 0.8075346897692142, + "kl_loss": 0.07302109152078629, + "loss_ib": 0.0011399032082408667, + "step": 2808 + }, + { + "ce_ib": 2.214186906814575, + "ce_orig": 0.2996130883693695, + "epoch": 0.8075346897692142, + "kl_loss": 0.04212663322687149, + "loss_ib": 0.0006426849868148565, + "step": 2808 + }, + { + "ce_ib": 3.3543858528137207, + "ce_orig": 0.7814090251922607, + "epoch": 0.8075346897692142, + "kl_loss": 0.05552506074309349, + "loss_ib": 0.0008906891453079879, + "step": 2808 + }, + { + "ce_ib": 4.090487480163574, + "ce_orig": 0.995890200138092, + "epoch": 0.8075346897692142, + "kl_loss": 0.04939277470111847, + "loss_ib": 0.0009029764332808554, + "step": 2808 + }, + { + "ce_ib": 4.4842119216918945, + "ce_orig": 1.0814521312713623, + "epoch": 0.8078222733481918, + "kl_loss": 0.16907444596290588, + "loss_ib": 0.0021391657646745443, + "step": 2809 + }, + { + "ce_ib": 3.776352882385254, + "ce_orig": 0.6950590014457703, + "epoch": 0.8078222733481918, + "kl_loss": 0.05309908837080002, + "loss_ib": 0.0009086261270567775, + "step": 2809 + }, + { + "ce_ib": 4.024361610412598, + "ce_orig": 0.772256076335907, + "epoch": 0.8078222733481918, + "kl_loss": 0.03996829688549042, + "loss_ib": 0.0008021190296858549, + "step": 2809 + }, + { + "ce_ib": 4.771635055541992, + "ce_orig": 1.0742484331130981, + "epoch": 0.8078222733481918, + "kl_loss": 0.16187484562397003, + "loss_ib": 0.0020959118846803904, + "step": 2809 + }, + { + "epoch": 0.8081098569271694, + "grad_norm": 0.0921996459364891, + "learning_rate": 4.2895811064972814e-05, + "loss": 0.8392, + "step": 2810 + }, + { + "ce_ib": 5.6590704917907715, + "ce_orig": 1.4127057790756226, + "epoch": 0.8081098569271694, + "kl_loss": 0.044673092663288116, + "loss_ib": 0.0010126378620043397, + "step": 2810 + }, + { + "ce_ib": 3.8842318058013916, + "ce_orig": 0.6836390495300293, + "epoch": 0.8081098569271694, + "kl_loss": 0.0976065993309021, + "loss_ib": 0.001364489202387631, + "step": 2810 + }, + { + "ce_ib": 5.422473430633545, + "ce_orig": 1.461331844329834, + "epoch": 0.8081098569271694, + "kl_loss": 0.038755714893341064, + "loss_ib": 0.0009298044024035335, + "step": 2810 + }, + { + "ce_ib": 4.6796650886535645, + "ce_orig": 1.0863430500030518, + "epoch": 0.8081098569271694, + "kl_loss": 0.04579062759876251, + "loss_ib": 0.0009258727659471333, + "step": 2810 + }, + { + "ce_ib": 3.876755952835083, + "ce_orig": 0.8623917698860168, + "epoch": 0.8083974405061471, + "kl_loss": 0.09529426693916321, + "loss_ib": 0.001340618240647018, + "step": 2811 + }, + { + "ce_ib": 3.9431746006011963, + "ce_orig": 0.8634815216064453, + "epoch": 0.8083974405061471, + "kl_loss": 0.041207484900951385, + "loss_ib": 0.0008063922869041562, + "step": 2811 + }, + { + "ce_ib": 4.447483062744141, + "ce_orig": 0.9114997982978821, + "epoch": 0.8083974405061471, + "kl_loss": 0.07173293828964233, + "loss_ib": 0.0011620776494964957, + "step": 2811 + }, + { + "ce_ib": 2.7305495738983154, + "ce_orig": 0.791706919670105, + "epoch": 0.8083974405061471, + "kl_loss": 0.050517015159130096, + "loss_ib": 0.0007782250759191811, + "step": 2811 + }, + { + "ce_ib": 4.867639541625977, + "ce_orig": 1.2518091201782227, + "epoch": 0.8086850240851248, + "kl_loss": 0.03666654974222183, + "loss_ib": 0.0008534294320270419, + "step": 2812 + }, + { + "ce_ib": 5.150210857391357, + "ce_orig": 1.2680009603500366, + "epoch": 0.8086850240851248, + "kl_loss": 0.08136443793773651, + "loss_ib": 0.0013286654138937593, + "step": 2812 + }, + { + "ce_ib": 3.506246328353882, + "ce_orig": 0.5354413390159607, + "epoch": 0.8086850240851248, + "kl_loss": 0.04539526626467705, + "loss_ib": 0.0008045773138292134, + "step": 2812 + }, + { + "ce_ib": 4.033875465393066, + "ce_orig": 1.0537214279174805, + "epoch": 0.8086850240851248, + "kl_loss": 0.05030646175146103, + "loss_ib": 0.0009064521291293204, + "step": 2812 + }, + { + "ce_ib": 4.81425666809082, + "ce_orig": 1.045875072479248, + "epoch": 0.8089726076641024, + "kl_loss": 0.0709066241979599, + "loss_ib": 0.0011904918355867267, + "step": 2813 + }, + { + "ce_ib": 6.115577220916748, + "ce_orig": 1.348751425743103, + "epoch": 0.8089726076641024, + "kl_loss": 0.0784284844994545, + "loss_ib": 0.0013958425261080265, + "step": 2813 + }, + { + "ce_ib": 3.1965103149414062, + "ce_orig": 0.9129912853240967, + "epoch": 0.8089726076641024, + "kl_loss": 0.044353075325489044, + "loss_ib": 0.0007631817134097219, + "step": 2813 + }, + { + "ce_ib": 2.7476513385772705, + "ce_orig": 0.8694785833358765, + "epoch": 0.8089726076641024, + "kl_loss": 0.05379428341984749, + "loss_ib": 0.0008127079927362502, + "step": 2813 + }, + { + "ce_ib": 4.237269878387451, + "ce_orig": 1.0259171724319458, + "epoch": 0.80926019124308, + "kl_loss": 0.08213835954666138, + "loss_ib": 0.0012451105285435915, + "step": 2814 + }, + { + "ce_ib": 5.203212261199951, + "ce_orig": 1.4135150909423828, + "epoch": 0.80926019124308, + "kl_loss": 0.055420245975255966, + "loss_ib": 0.0010745235485956073, + "step": 2814 + }, + { + "ce_ib": 4.284892559051514, + "ce_orig": 0.9301359057426453, + "epoch": 0.80926019124308, + "kl_loss": 0.0628245621919632, + "loss_ib": 0.0010567349381744862, + "step": 2814 + }, + { + "ce_ib": 2.1781649589538574, + "ce_orig": 0.5858385562896729, + "epoch": 0.80926019124308, + "kl_loss": 0.05268348008394241, + "loss_ib": 0.0007446512463502586, + "step": 2814 + }, + { + "epoch": 0.8095477748220576, + "grad_norm": 0.12124304473400116, + "learning_rate": 4.286869358850798e-05, + "loss": 0.9318, + "step": 2815 + }, + { + "ce_ib": 3.075924873352051, + "ce_orig": 0.6109804511070251, + "epoch": 0.8095477748220576, + "kl_loss": 0.034983519464731216, + "loss_ib": 0.0006574276485480368, + "step": 2815 + }, + { + "ce_ib": 6.0060553550720215, + "ce_orig": 1.4830180406570435, + "epoch": 0.8095477748220576, + "kl_loss": 0.07062789797782898, + "loss_ib": 0.001306884572841227, + "step": 2815 + }, + { + "ce_ib": 4.193084716796875, + "ce_orig": 0.8828693628311157, + "epoch": 0.8095477748220576, + "kl_loss": 0.045152679085731506, + "loss_ib": 0.0008708352688699961, + "step": 2815 + }, + { + "ce_ib": 6.6834235191345215, + "ce_orig": 1.534148931503296, + "epoch": 0.8095477748220576, + "kl_loss": 0.06947401911020279, + "loss_ib": 0.0013630824396386743, + "step": 2815 + }, + { + "ce_ib": 8.707320213317871, + "ce_orig": 2.166839599609375, + "epoch": 0.8098353584010353, + "kl_loss": 0.058604300022125244, + "loss_ib": 0.0014567750040441751, + "step": 2816 + }, + { + "ce_ib": 4.239480972290039, + "ce_orig": 0.7554767727851868, + "epoch": 0.8098353584010353, + "kl_loss": 0.06784763932228088, + "loss_ib": 0.0011024244595319033, + "step": 2816 + }, + { + "ce_ib": 4.673598766326904, + "ce_orig": 1.0906518697738647, + "epoch": 0.8098353584010353, + "kl_loss": 0.06701909005641937, + "loss_ib": 0.0011375508038327098, + "step": 2816 + }, + { + "ce_ib": 2.7611286640167236, + "ce_orig": 0.5307312607765198, + "epoch": 0.8098353584010353, + "kl_loss": 0.06915511190891266, + "loss_ib": 0.0009676639456301928, + "step": 2816 + }, + { + "ce_ib": 2.6555004119873047, + "ce_orig": 0.4836732745170593, + "epoch": 0.8101229419800129, + "kl_loss": 0.0815264880657196, + "loss_ib": 0.0010808148654177785, + "step": 2817 + }, + { + "ce_ib": 4.366032600402832, + "ce_orig": 1.037082314491272, + "epoch": 0.8101229419800129, + "kl_loss": 0.08420774340629578, + "loss_ib": 0.0012786806328222156, + "step": 2817 + }, + { + "ce_ib": 3.970612049102783, + "ce_orig": 0.5126366019248962, + "epoch": 0.8101229419800129, + "kl_loss": 0.058252543210983276, + "loss_ib": 0.0009795866208150983, + "step": 2817 + }, + { + "ce_ib": 4.737979412078857, + "ce_orig": 1.0081287622451782, + "epoch": 0.8101229419800129, + "kl_loss": 0.07983285933732986, + "loss_ib": 0.0012721264502033591, + "step": 2817 + }, + { + "ce_ib": 4.534115791320801, + "ce_orig": 1.2641240358352661, + "epoch": 0.8104105255589906, + "kl_loss": 0.04403030499815941, + "loss_ib": 0.0008937145466916263, + "step": 2818 + }, + { + "ce_ib": 4.93393087387085, + "ce_orig": 1.0438241958618164, + "epoch": 0.8104105255589906, + "kl_loss": 0.07985500991344452, + "loss_ib": 0.0012919431319460273, + "step": 2818 + }, + { + "ce_ib": 3.1153955459594727, + "ce_orig": 0.780728816986084, + "epoch": 0.8104105255589906, + "kl_loss": 0.046541403979063034, + "loss_ib": 0.0007769535295665264, + "step": 2818 + }, + { + "ce_ib": 1.90758216381073, + "ce_orig": 0.46094757318496704, + "epoch": 0.8104105255589906, + "kl_loss": 0.0237630233168602, + "loss_ib": 0.00042838844819925725, + "step": 2818 + }, + { + "ce_ib": 2.814727306365967, + "ce_orig": 0.9427799582481384, + "epoch": 0.8106981091379682, + "kl_loss": 0.04188046604394913, + "loss_ib": 0.0007002773927524686, + "step": 2819 + }, + { + "ce_ib": 3.114901304244995, + "ce_orig": 0.4434991776943207, + "epoch": 0.8106981091379682, + "kl_loss": 0.05870252847671509, + "loss_ib": 0.0008985153981484473, + "step": 2819 + }, + { + "ce_ib": 3.786454200744629, + "ce_orig": 0.6229361295700073, + "epoch": 0.8106981091379682, + "kl_loss": 0.08028246462345123, + "loss_ib": 0.0011814699973911047, + "step": 2819 + }, + { + "ce_ib": 5.213230609893799, + "ce_orig": 1.251202940940857, + "epoch": 0.8106981091379682, + "kl_loss": 0.05523292347788811, + "loss_ib": 0.0010736522963270545, + "step": 2819 + }, + { + "epoch": 0.8109856927169459, + "grad_norm": 0.10648587346076965, + "learning_rate": 4.284153306221289e-05, + "loss": 0.8365, + "step": 2820 + }, + { + "ce_ib": 5.896533966064453, + "ce_orig": 1.248779296875, + "epoch": 0.8109856927169459, + "kl_loss": 0.09211656451225281, + "loss_ib": 0.0015108190709725022, + "step": 2820 + }, + { + "ce_ib": 3.8680455684661865, + "ce_orig": 0.741024374961853, + "epoch": 0.8109856927169459, + "kl_loss": 0.11095944046974182, + "loss_ib": 0.0014963990543037653, + "step": 2820 + }, + { + "ce_ib": 3.000241994857788, + "ce_orig": 0.351680725812912, + "epoch": 0.8109856927169459, + "kl_loss": 0.07788707315921783, + "loss_ib": 0.0010788948275148869, + "step": 2820 + }, + { + "ce_ib": 3.122934579849243, + "ce_orig": 0.6354974508285522, + "epoch": 0.8109856927169459, + "kl_loss": 0.06342283636331558, + "loss_ib": 0.000946521817240864, + "step": 2820 + }, + { + "ce_ib": 3.8547937870025635, + "ce_orig": 1.103771448135376, + "epoch": 0.8112732762959235, + "kl_loss": 0.07873371243476868, + "loss_ib": 0.0011728163808584213, + "step": 2821 + }, + { + "ce_ib": 2.4923079013824463, + "ce_orig": 0.611700713634491, + "epoch": 0.8112732762959235, + "kl_loss": 0.04029242694377899, + "loss_ib": 0.0006521550239995122, + "step": 2821 + }, + { + "ce_ib": 3.3449690341949463, + "ce_orig": 0.7858791351318359, + "epoch": 0.8112732762959235, + "kl_loss": 0.05659525468945503, + "loss_ib": 0.0009004494058899581, + "step": 2821 + }, + { + "ce_ib": 4.107063293457031, + "ce_orig": 0.6442622542381287, + "epoch": 0.8112732762959235, + "kl_loss": 0.05351804196834564, + "loss_ib": 0.0009458867134526372, + "step": 2821 + }, + { + "ce_ib": 2.9339118003845215, + "ce_orig": 0.45133236050605774, + "epoch": 0.8115608598749011, + "kl_loss": 0.08317485451698303, + "loss_ib": 0.0011251397663727403, + "step": 2822 + }, + { + "ce_ib": 3.9446723461151123, + "ce_orig": 0.8724271059036255, + "epoch": 0.8115608598749011, + "kl_loss": 0.03420388326048851, + "loss_ib": 0.0007365060155279934, + "step": 2822 + }, + { + "ce_ib": 3.591268539428711, + "ce_orig": 0.7701236009597778, + "epoch": 0.8115608598749011, + "kl_loss": 0.06951993703842163, + "loss_ib": 0.001054326188750565, + "step": 2822 + }, + { + "ce_ib": 5.5101518630981445, + "ce_orig": 1.149230718612671, + "epoch": 0.8115608598749011, + "kl_loss": 0.03574838489294052, + "loss_ib": 0.0009084990597330034, + "step": 2822 + }, + { + "ce_ib": 1.7254841327667236, + "ce_orig": 0.23637983202934265, + "epoch": 0.8118484434538787, + "kl_loss": 0.05136322230100632, + "loss_ib": 0.0006861806032247841, + "step": 2823 + }, + { + "ce_ib": 3.582291841506958, + "ce_orig": 0.6171096563339233, + "epoch": 0.8118484434538787, + "kl_loss": 0.08260811865329742, + "loss_ib": 0.0011843102984130383, + "step": 2823 + }, + { + "ce_ib": 3.289958953857422, + "ce_orig": 0.7721973657608032, + "epoch": 0.8118484434538787, + "kl_loss": 0.09509702026844025, + "loss_ib": 0.0012799660908058286, + "step": 2823 + }, + { + "ce_ib": 4.773255825042725, + "ce_orig": 1.3126112222671509, + "epoch": 0.8118484434538787, + "kl_loss": 0.039554912596940994, + "loss_ib": 0.0008728746906854212, + "step": 2823 + }, + { + "ce_ib": 2.135138511657715, + "ce_orig": 0.41056913137435913, + "epoch": 0.8121360270328565, + "kl_loss": 0.12198561429977417, + "loss_ib": 0.0014333699364215136, + "step": 2824 + }, + { + "ce_ib": 3.7915070056915283, + "ce_orig": 0.7426280379295349, + "epoch": 0.8121360270328565, + "kl_loss": 0.10773339122533798, + "loss_ib": 0.0014564846642315388, + "step": 2824 + }, + { + "ce_ib": 4.988602638244629, + "ce_orig": 0.5987022519111633, + "epoch": 0.8121360270328565, + "kl_loss": 0.061046503484249115, + "loss_ib": 0.0011093252105638385, + "step": 2824 + }, + { + "ce_ib": 3.2920281887054443, + "ce_orig": 0.5005414485931396, + "epoch": 0.8121360270328565, + "kl_loss": 0.19244667887687683, + "loss_ib": 0.002253669546917081, + "step": 2824 + }, + { + "epoch": 0.8124236106118341, + "grad_norm": 0.09520366042852402, + "learning_rate": 4.281432955152354e-05, + "loss": 0.7945, + "step": 2825 + }, + { + "ce_ib": 3.6384847164154053, + "ce_orig": 0.8386361598968506, + "epoch": 0.8124236106118341, + "kl_loss": 0.057286348193883896, + "loss_ib": 0.0009367119055241346, + "step": 2825 + }, + { + "ce_ib": 4.356637477874756, + "ce_orig": 0.9467763900756836, + "epoch": 0.8124236106118341, + "kl_loss": 0.07791094481945038, + "loss_ib": 0.0012147731613367796, + "step": 2825 + }, + { + "ce_ib": 3.4998371601104736, + "ce_orig": 0.8794094920158386, + "epoch": 0.8124236106118341, + "kl_loss": 0.04370966553688049, + "loss_ib": 0.0007870803819969296, + "step": 2825 + }, + { + "ce_ib": 6.506490230560303, + "ce_orig": 1.707002878189087, + "epoch": 0.8124236106118341, + "kl_loss": 0.0865507572889328, + "loss_ib": 0.0015161564806476235, + "step": 2825 + }, + { + "ce_ib": 4.512806415557861, + "ce_orig": 1.1337380409240723, + "epoch": 0.8127111941908117, + "kl_loss": 0.06614077091217041, + "loss_ib": 0.0011126883327960968, + "step": 2826 + }, + { + "ce_ib": 4.035088539123535, + "ce_orig": 1.0300800800323486, + "epoch": 0.8127111941908117, + "kl_loss": 0.06378300487995148, + "loss_ib": 0.0010413388954475522, + "step": 2826 + }, + { + "ce_ib": 6.561336994171143, + "ce_orig": 1.0866763591766357, + "epoch": 0.8127111941908117, + "kl_loss": 0.06935682892799377, + "loss_ib": 0.00134970189537853, + "step": 2826 + }, + { + "ce_ib": 3.6268646717071533, + "ce_orig": 0.8324140310287476, + "epoch": 0.8127111941908117, + "kl_loss": 0.042181629687547684, + "loss_ib": 0.0007845027721486986, + "step": 2826 + }, + { + "ce_ib": 3.9499664306640625, + "ce_orig": 0.7886189818382263, + "epoch": 0.8129987777697893, + "kl_loss": 0.0764208734035492, + "loss_ib": 0.0011592053342610598, + "step": 2827 + }, + { + "ce_ib": 3.500645160675049, + "ce_orig": 0.9924100637435913, + "epoch": 0.8129987777697893, + "kl_loss": 0.03651813417673111, + "loss_ib": 0.0007152459002099931, + "step": 2827 + }, + { + "ce_ib": 2.45164155960083, + "ce_orig": 0.3820023238658905, + "epoch": 0.8129987777697893, + "kl_loss": 0.06326758861541748, + "loss_ib": 0.0008778400369919837, + "step": 2827 + }, + { + "ce_ib": 2.7421789169311523, + "ce_orig": 0.6093149185180664, + "epoch": 0.8129987777697893, + "kl_loss": 0.07160329818725586, + "loss_ib": 0.0009902508463710546, + "step": 2827 + }, + { + "ce_ib": 2.5146682262420654, + "ce_orig": 0.317629873752594, + "epoch": 0.813286361348767, + "kl_loss": 0.07590235769748688, + "loss_ib": 0.0010104903485625982, + "step": 2828 + }, + { + "ce_ib": 3.055420160293579, + "ce_orig": 0.741834819316864, + "epoch": 0.813286361348767, + "kl_loss": 0.05687567591667175, + "loss_ib": 0.0008742987993173301, + "step": 2828 + }, + { + "ce_ib": 3.5266916751861572, + "ce_orig": 0.774075448513031, + "epoch": 0.813286361348767, + "kl_loss": 0.05860632658004761, + "loss_ib": 0.0009387324680574238, + "step": 2828 + }, + { + "ce_ib": 3.636784315109253, + "ce_orig": 0.7079830169677734, + "epoch": 0.813286361348767, + "kl_loss": 0.06633751839399338, + "loss_ib": 0.001027053571306169, + "step": 2828 + }, + { + "ce_ib": 3.5866644382476807, + "ce_orig": 0.7439398169517517, + "epoch": 0.8135739449277446, + "kl_loss": 0.09080234169960022, + "loss_ib": 0.0012666898546740413, + "step": 2829 + }, + { + "ce_ib": 4.785107612609863, + "ce_orig": 0.811502993106842, + "epoch": 0.8135739449277446, + "kl_loss": 0.09389708936214447, + "loss_ib": 0.0014174815732985735, + "step": 2829 + }, + { + "ce_ib": 5.616039752960205, + "ce_orig": 1.3710849285125732, + "epoch": 0.8135739449277446, + "kl_loss": 0.09302426874637604, + "loss_ib": 0.0014918467495590448, + "step": 2829 + }, + { + "ce_ib": 3.8492136001586914, + "ce_orig": 0.8684499859809875, + "epoch": 0.8135739449277446, + "kl_loss": 0.05905665084719658, + "loss_ib": 0.0009754878119565547, + "step": 2829 + }, + { + "epoch": 0.8138615285067222, + "grad_norm": 0.09455235302448273, + "learning_rate": 4.27870831219795e-05, + "loss": 0.8918, + "step": 2830 + }, + { + "ce_ib": 4.149899482727051, + "ce_orig": 0.7772961258888245, + "epoch": 0.8138615285067222, + "kl_loss": 0.08706504851579666, + "loss_ib": 0.001285640406422317, + "step": 2830 + }, + { + "ce_ib": 5.76322078704834, + "ce_orig": 1.3417019844055176, + "epoch": 0.8138615285067222, + "kl_loss": 0.06136152520775795, + "loss_ib": 0.0011899372329935431, + "step": 2830 + }, + { + "ce_ib": 2.480985403060913, + "ce_orig": 0.6829887628555298, + "epoch": 0.8138615285067222, + "kl_loss": 0.06119714304804802, + "loss_ib": 0.0008600699948146939, + "step": 2830 + }, + { + "ce_ib": 3.8665385246276855, + "ce_orig": 0.9787777662277222, + "epoch": 0.8138615285067222, + "kl_loss": 0.0661868005990982, + "loss_ib": 0.001048521837219596, + "step": 2830 + }, + { + "ce_ib": 2.1731784343719482, + "ce_orig": 0.5568891763687134, + "epoch": 0.8141491120857, + "kl_loss": 0.044678427278995514, + "loss_ib": 0.0006641021464020014, + "step": 2831 + }, + { + "ce_ib": 5.017473220825195, + "ce_orig": 1.383500576019287, + "epoch": 0.8141491120857, + "kl_loss": 0.04386095330119133, + "loss_ib": 0.000940356811042875, + "step": 2831 + }, + { + "ce_ib": 2.9854111671447754, + "ce_orig": 0.7299017310142517, + "epoch": 0.8141491120857, + "kl_loss": 0.08723090589046478, + "loss_ib": 0.0011708501260727644, + "step": 2831 + }, + { + "ce_ib": 3.6285996437072754, + "ce_orig": 0.7165617942810059, + "epoch": 0.8141491120857, + "kl_loss": 0.053437598049640656, + "loss_ib": 0.0008972359355539083, + "step": 2831 + }, + { + "ce_ib": 3.3569881916046143, + "ce_orig": 0.7360665798187256, + "epoch": 0.8144366956646776, + "kl_loss": 0.10138247907161713, + "loss_ib": 0.001349523663520813, + "step": 2832 + }, + { + "ce_ib": 3.6452486515045166, + "ce_orig": 0.4517959952354431, + "epoch": 0.8144366956646776, + "kl_loss": 0.07257949560880661, + "loss_ib": 0.0010903198271989822, + "step": 2832 + }, + { + "ce_ib": 5.382143020629883, + "ce_orig": 0.5606733560562134, + "epoch": 0.8144366956646776, + "kl_loss": 0.053952787071466446, + "loss_ib": 0.0010777420829981565, + "step": 2832 + }, + { + "ce_ib": 4.435298919677734, + "ce_orig": 0.7445781826972961, + "epoch": 0.8144366956646776, + "kl_loss": 0.07718373090028763, + "loss_ib": 0.0012153671123087406, + "step": 2832 + }, + { + "ce_ib": 4.875594615936279, + "ce_orig": 0.37516626715660095, + "epoch": 0.8147242792436552, + "kl_loss": 0.09651698917150497, + "loss_ib": 0.0014527293387800455, + "step": 2833 + }, + { + "ce_ib": 3.2397239208221436, + "ce_orig": 0.723114013671875, + "epoch": 0.8147242792436552, + "kl_loss": 0.0861864909529686, + "loss_ib": 0.0011858372017741203, + "step": 2833 + }, + { + "ce_ib": 4.592997074127197, + "ce_orig": 0.7704910039901733, + "epoch": 0.8147242792436552, + "kl_loss": 0.09097403287887573, + "loss_ib": 0.0013690399937331676, + "step": 2833 + }, + { + "ce_ib": 6.117581844329834, + "ce_orig": 0.9548546075820923, + "epoch": 0.8147242792436552, + "kl_loss": 0.04960188642144203, + "loss_ib": 0.001107777003198862, + "step": 2833 + }, + { + "ce_ib": 4.127015113830566, + "ce_orig": 0.9055514335632324, + "epoch": 0.8150118628226328, + "kl_loss": 0.04335695505142212, + "loss_ib": 0.0008462710538879037, + "step": 2834 + }, + { + "ce_ib": 2.8645284175872803, + "ce_orig": 0.4959232807159424, + "epoch": 0.8150118628226328, + "kl_loss": 0.05001385882496834, + "loss_ib": 0.0007865913794375956, + "step": 2834 + }, + { + "ce_ib": 2.985888957977295, + "ce_orig": 0.7039154767990112, + "epoch": 0.8150118628226328, + "kl_loss": 0.05178355425596237, + "loss_ib": 0.0008164243772625923, + "step": 2834 + }, + { + "ce_ib": 3.9692001342773438, + "ce_orig": 0.7143813967704773, + "epoch": 0.8150118628226328, + "kl_loss": 0.03981299698352814, + "loss_ib": 0.000795049942098558, + "step": 2834 + }, + { + "epoch": 0.8152994464016105, + "grad_norm": 0.09764674305915833, + "learning_rate": 4.275979383922376e-05, + "loss": 0.8512, + "step": 2835 + }, + { + "ce_ib": 3.8583426475524902, + "ce_orig": 1.1529181003570557, + "epoch": 0.8152994464016105, + "kl_loss": 0.09015233814716339, + "loss_ib": 0.0012873576488345861, + "step": 2835 + }, + { + "ce_ib": 5.391717433929443, + "ce_orig": 1.2460886240005493, + "epoch": 0.8152994464016105, + "kl_loss": 0.06095389649271965, + "loss_ib": 0.0011487107258290052, + "step": 2835 + }, + { + "ce_ib": 4.9217071533203125, + "ce_orig": 1.2122355699539185, + "epoch": 0.8152994464016105, + "kl_loss": 0.04819486290216446, + "loss_ib": 0.0009741192916408181, + "step": 2835 + }, + { + "ce_ib": 3.8306121826171875, + "ce_orig": 0.6135891079902649, + "epoch": 0.8152994464016105, + "kl_loss": 0.0636972039937973, + "loss_ib": 0.0010200331453233957, + "step": 2835 + }, + { + "ce_ib": 2.497019052505493, + "ce_orig": 0.625745952129364, + "epoch": 0.8155870299805881, + "kl_loss": 0.03351334482431412, + "loss_ib": 0.000584835303016007, + "step": 2836 + }, + { + "ce_ib": 3.2104697227478027, + "ce_orig": 0.6976656317710876, + "epoch": 0.8155870299805881, + "kl_loss": 0.05994366481900215, + "loss_ib": 0.0009204836096614599, + "step": 2836 + }, + { + "ce_ib": 4.132018089294434, + "ce_orig": 0.6881366968154907, + "epoch": 0.8155870299805881, + "kl_loss": 0.06933949887752533, + "loss_ib": 0.0011065966682508588, + "step": 2836 + }, + { + "ce_ib": 4.655333042144775, + "ce_orig": 1.1081902980804443, + "epoch": 0.8155870299805881, + "kl_loss": 0.07087615132331848, + "loss_ib": 0.0011742947390303016, + "step": 2836 + }, + { + "ce_ib": 6.4372639656066895, + "ce_orig": 1.7822211980819702, + "epoch": 0.8158746135595657, + "kl_loss": 0.07168354839086533, + "loss_ib": 0.0013605618150904775, + "step": 2837 + }, + { + "ce_ib": 4.085530757904053, + "ce_orig": 0.7272170186042786, + "epoch": 0.8158746135595657, + "kl_loss": 0.051313407719135284, + "loss_ib": 0.0009216871694661677, + "step": 2837 + }, + { + "ce_ib": 2.74377179145813, + "ce_orig": 0.5720328092575073, + "epoch": 0.8158746135595657, + "kl_loss": 0.05347152054309845, + "loss_ib": 0.0008090923656709492, + "step": 2837 + }, + { + "ce_ib": 2.933422803878784, + "ce_orig": 0.7734019160270691, + "epoch": 0.8158746135595657, + "kl_loss": 0.08342989534139633, + "loss_ib": 0.0011276411823928356, + "step": 2837 + }, + { + "ce_ib": 3.607072114944458, + "ce_orig": 0.7266466617584229, + "epoch": 0.8161621971385434, + "kl_loss": 0.08155035227537155, + "loss_ib": 0.0011762107023969293, + "step": 2838 + }, + { + "ce_ib": 5.262489318847656, + "ce_orig": 0.929052472114563, + "epoch": 0.8161621971385434, + "kl_loss": 0.08134257793426514, + "loss_ib": 0.0013396746944636106, + "step": 2838 + }, + { + "ce_ib": 2.871669292449951, + "ce_orig": 0.6076202392578125, + "epoch": 0.8161621971385434, + "kl_loss": 0.041324637830257416, + "loss_ib": 0.0007004132494330406, + "step": 2838 + }, + { + "ce_ib": 3.2861368656158447, + "ce_orig": 0.6813513040542603, + "epoch": 0.8161621971385434, + "kl_loss": 0.04792745038866997, + "loss_ib": 0.0008078881655819714, + "step": 2838 + }, + { + "ce_ib": 3.4165894985198975, + "ce_orig": 0.5092839002609253, + "epoch": 0.8164497807175211, + "kl_loss": 0.08960998803377151, + "loss_ib": 0.0012377587845548987, + "step": 2839 + }, + { + "ce_ib": 3.4107820987701416, + "ce_orig": 0.8227537870407104, + "epoch": 0.8164497807175211, + "kl_loss": 0.10221736878156662, + "loss_ib": 0.0013632518239319324, + "step": 2839 + }, + { + "ce_ib": 3.4928576946258545, + "ce_orig": 0.6871152520179749, + "epoch": 0.8164497807175211, + "kl_loss": 0.0388384684920311, + "loss_ib": 0.0007376704015769064, + "step": 2839 + }, + { + "ce_ib": 3.1530652046203613, + "ce_orig": 0.7459583282470703, + "epoch": 0.8164497807175211, + "kl_loss": 0.028156397864222527, + "loss_ib": 0.0005968704936094582, + "step": 2839 + }, + { + "epoch": 0.8167373642964987, + "grad_norm": 0.09399058669805527, + "learning_rate": 4.273246176900252e-05, + "loss": 0.7796, + "step": 2840 + }, + { + "ce_ib": 3.2661850452423096, + "ce_orig": 1.0011752843856812, + "epoch": 0.8167373642964987, + "kl_loss": 0.04647888243198395, + "loss_ib": 0.0007914073648862541, + "step": 2840 + }, + { + "ce_ib": 3.3977277278900146, + "ce_orig": 0.7896098494529724, + "epoch": 0.8167373642964987, + "kl_loss": 0.05133587121963501, + "loss_ib": 0.0008531314670108259, + "step": 2840 + }, + { + "ce_ib": 2.3324637413024902, + "ce_orig": 0.5327751636505127, + "epoch": 0.8167373642964987, + "kl_loss": 0.048400238156318665, + "loss_ib": 0.0007172487676143646, + "step": 2840 + }, + { + "ce_ib": 4.091373443603516, + "ce_orig": 1.0150412321090698, + "epoch": 0.8167373642964987, + "kl_loss": 0.042321108281612396, + "loss_ib": 0.0008323484216816723, + "step": 2840 + }, + { + "ce_ib": 3.623133659362793, + "ce_orig": 0.9656203985214233, + "epoch": 0.8170249478754763, + "kl_loss": 0.07382352650165558, + "loss_ib": 0.0011005486594513059, + "step": 2841 + }, + { + "ce_ib": 4.864160537719727, + "ce_orig": 0.9752691388130188, + "epoch": 0.8170249478754763, + "kl_loss": 0.04061059653759003, + "loss_ib": 0.0008925219881348312, + "step": 2841 + }, + { + "ce_ib": 4.73557186126709, + "ce_orig": 0.9850118160247803, + "epoch": 0.8170249478754763, + "kl_loss": 0.06516137719154358, + "loss_ib": 0.0011251709656789899, + "step": 2841 + }, + { + "ce_ib": 4.022462368011475, + "ce_orig": 0.6899422407150269, + "epoch": 0.8170249478754763, + "kl_loss": 0.053338780999183655, + "loss_ib": 0.000935634074267, + "step": 2841 + }, + { + "ce_ib": 4.090367317199707, + "ce_orig": 0.8077825307846069, + "epoch": 0.8173125314544539, + "kl_loss": 0.08248399198055267, + "loss_ib": 0.0012338765664026141, + "step": 2842 + }, + { + "ce_ib": 5.866304397583008, + "ce_orig": 1.0542597770690918, + "epoch": 0.8173125314544539, + "kl_loss": 0.07373255491256714, + "loss_ib": 0.0013239559484645724, + "step": 2842 + }, + { + "ce_ib": 3.987138509750366, + "ce_orig": 1.0943986177444458, + "epoch": 0.8173125314544539, + "kl_loss": 0.07081608474254608, + "loss_ib": 0.0011068746680393815, + "step": 2842 + }, + { + "ce_ib": 2.682509422302246, + "ce_orig": 0.5869259834289551, + "epoch": 0.8173125314544539, + "kl_loss": 0.09678662568330765, + "loss_ib": 0.0012361172121018171, + "step": 2842 + }, + { + "ce_ib": 4.4676737785339355, + "ce_orig": 0.8830022811889648, + "epoch": 0.8176001150334316, + "kl_loss": 0.0752459168434143, + "loss_ib": 0.0011992264771834016, + "step": 2843 + }, + { + "ce_ib": 4.019378662109375, + "ce_orig": 1.1568957567214966, + "epoch": 0.8176001150334316, + "kl_loss": 0.059932731091976166, + "loss_ib": 0.0010012651327997446, + "step": 2843 + }, + { + "ce_ib": 3.3570337295532227, + "ce_orig": 0.8494413495063782, + "epoch": 0.8176001150334316, + "kl_loss": 0.0458628311753273, + "loss_ib": 0.0007943317177705467, + "step": 2843 + }, + { + "ce_ib": 2.1119935512542725, + "ce_orig": 0.5490738749504089, + "epoch": 0.8176001150334316, + "kl_loss": 0.02598380669951439, + "loss_ib": 0.000471037405077368, + "step": 2843 + }, + { + "ce_ib": 2.3603546619415283, + "ce_orig": 0.4603574872016907, + "epoch": 0.8178876986124093, + "kl_loss": 0.07464314997196198, + "loss_ib": 0.0009824669687077403, + "step": 2844 + }, + { + "ce_ib": 4.1058759689331055, + "ce_orig": 1.0971039533615112, + "epoch": 0.8178876986124093, + "kl_loss": 0.06196746230125427, + "loss_ib": 0.001030262210406363, + "step": 2844 + }, + { + "ce_ib": 3.0561747550964355, + "ce_orig": 0.4322145879268646, + "epoch": 0.8178876986124093, + "kl_loss": 0.05511259660124779, + "loss_ib": 0.0008567434269934893, + "step": 2844 + }, + { + "ce_ib": 2.776458740234375, + "ce_orig": 0.757347583770752, + "epoch": 0.8178876986124093, + "kl_loss": 0.0506574809551239, + "loss_ib": 0.0007842206978239119, + "step": 2844 + }, + { + "epoch": 0.8181752821913869, + "grad_norm": 0.10929224640130997, + "learning_rate": 4.2705086977165086e-05, + "loss": 0.8542, + "step": 2845 + }, + { + "ce_ib": 5.060543060302734, + "ce_orig": 1.2177118062973022, + "epoch": 0.8181752821913869, + "kl_loss": 0.07473769783973694, + "loss_ib": 0.0012534313136711717, + "step": 2845 + }, + { + "ce_ib": 4.225248336791992, + "ce_orig": 0.9742667078971863, + "epoch": 0.8181752821913869, + "kl_loss": 0.07058560103178024, + "loss_ib": 0.0011283807689324021, + "step": 2845 + }, + { + "ce_ib": 3.344569683074951, + "ce_orig": 0.6556500196456909, + "epoch": 0.8181752821913869, + "kl_loss": 0.03607427328824997, + "loss_ib": 0.0006951996474526823, + "step": 2845 + }, + { + "ce_ib": 3.5646326541900635, + "ce_orig": 0.7463329434394836, + "epoch": 0.8181752821913869, + "kl_loss": 0.06002812087535858, + "loss_ib": 0.0009567444794811308, + "step": 2845 + }, + { + "ce_ib": 4.578811168670654, + "ce_orig": 0.8977862596511841, + "epoch": 0.8184628657703645, + "kl_loss": 0.04737246409058571, + "loss_ib": 0.0009316056966781616, + "step": 2846 + }, + { + "ce_ib": 5.517589092254639, + "ce_orig": 1.5169930458068848, + "epoch": 0.8184628657703645, + "kl_loss": 0.06249328702688217, + "loss_ib": 0.0011766917305067182, + "step": 2846 + }, + { + "ce_ib": 6.982140064239502, + "ce_orig": 1.5725915431976318, + "epoch": 0.8184628657703645, + "kl_loss": 0.051538046449422836, + "loss_ib": 0.0012135944562032819, + "step": 2846 + }, + { + "ce_ib": 3.3869450092315674, + "ce_orig": 0.7923058271408081, + "epoch": 0.8184628657703645, + "kl_loss": 0.06983457505702972, + "loss_ib": 0.0010370401432737708, + "step": 2846 + }, + { + "ce_ib": 5.7431230545043945, + "ce_orig": 1.2836573123931885, + "epoch": 0.8187504493493422, + "kl_loss": 0.03993940353393555, + "loss_ib": 0.0009737063082866371, + "step": 2847 + }, + { + "ce_ib": 1.7474799156188965, + "ce_orig": 0.41625094413757324, + "epoch": 0.8187504493493422, + "kl_loss": 0.03151099383831024, + "loss_ib": 0.0004898579209111631, + "step": 2847 + }, + { + "ce_ib": 2.4820871353149414, + "ce_orig": 0.6304119825363159, + "epoch": 0.8187504493493422, + "kl_loss": 0.04571249708533287, + "loss_ib": 0.0007053336594253778, + "step": 2847 + }, + { + "ce_ib": 2.6804656982421875, + "ce_orig": 0.8055607676506042, + "epoch": 0.8187504493493422, + "kl_loss": 0.044189587235450745, + "loss_ib": 0.0007099424256011844, + "step": 2847 + }, + { + "ce_ib": 2.1231470108032227, + "ce_orig": 0.28169065713882446, + "epoch": 0.8190380329283198, + "kl_loss": 0.0503682866692543, + "loss_ib": 0.0007159974775277078, + "step": 2848 + }, + { + "ce_ib": 2.9816911220550537, + "ce_orig": 0.518082857131958, + "epoch": 0.8190380329283198, + "kl_loss": 0.07138494402170181, + "loss_ib": 0.0010120185324922204, + "step": 2848 + }, + { + "ce_ib": 5.030839920043945, + "ce_orig": 0.9989057183265686, + "epoch": 0.8190380329283198, + "kl_loss": 0.08469721674919128, + "loss_ib": 0.0013500561472028494, + "step": 2848 + }, + { + "ce_ib": 5.178757190704346, + "ce_orig": 0.846607506275177, + "epoch": 0.8190380329283198, + "kl_loss": 0.06378229707479477, + "loss_ib": 0.0011556986719369888, + "step": 2848 + }, + { + "ce_ib": 6.9627461433410645, + "ce_orig": 1.5485141277313232, + "epoch": 0.8193256165072974, + "kl_loss": 0.0405830442905426, + "loss_ib": 0.0011021050158888102, + "step": 2849 + }, + { + "ce_ib": 3.6069345474243164, + "ce_orig": 0.6174886226654053, + "epoch": 0.8193256165072974, + "kl_loss": 0.04826129227876663, + "loss_ib": 0.0008433063048869371, + "step": 2849 + }, + { + "ce_ib": 3.4872002601623535, + "ce_orig": 0.7897846698760986, + "epoch": 0.8193256165072974, + "kl_loss": 0.03361543267965317, + "loss_ib": 0.000684874365106225, + "step": 2849 + }, + { + "ce_ib": 3.338632822036743, + "ce_orig": 0.7208465933799744, + "epoch": 0.8193256165072974, + "kl_loss": 0.06388945877552032, + "loss_ib": 0.0009727578144520521, + "step": 2849 + }, + { + "epoch": 0.819613200086275, + "grad_norm": 0.10713863372802734, + "learning_rate": 4.267766952966369e-05, + "loss": 0.8396, + "step": 2850 + }, + { + "ce_ib": 4.206454753875732, + "ce_orig": 0.6724959015846252, + "epoch": 0.819613200086275, + "kl_loss": 0.06046919524669647, + "loss_ib": 0.0010253373766317964, + "step": 2850 + }, + { + "ce_ib": 6.00639533996582, + "ce_orig": 1.3912720680236816, + "epoch": 0.819613200086275, + "kl_loss": 0.04793987050652504, + "loss_ib": 0.001080038258805871, + "step": 2850 + }, + { + "ce_ib": 3.3386950492858887, + "ce_orig": 0.8754256963729858, + "epoch": 0.819613200086275, + "kl_loss": 0.04767925292253494, + "loss_ib": 0.0008106620516628027, + "step": 2850 + }, + { + "ce_ib": 2.8472607135772705, + "ce_orig": 0.7955624461174011, + "epoch": 0.819613200086275, + "kl_loss": 0.062486082315444946, + "loss_ib": 0.0009095868445001543, + "step": 2850 + }, + { + "ce_ib": 3.7034518718719482, + "ce_orig": 0.7545110583305359, + "epoch": 0.8199007836652528, + "kl_loss": 0.08130022883415222, + "loss_ib": 0.0011833474272862077, + "step": 2851 + }, + { + "ce_ib": 2.6668648719787598, + "ce_orig": 0.5896424055099487, + "epoch": 0.8199007836652528, + "kl_loss": 0.11364176869392395, + "loss_ib": 0.0014031041646376252, + "step": 2851 + }, + { + "ce_ib": 2.7997548580169678, + "ce_orig": 0.7142096161842346, + "epoch": 0.8199007836652528, + "kl_loss": 0.052398741245269775, + "loss_ib": 0.0008039628737606108, + "step": 2851 + }, + { + "ce_ib": 3.8528859615325928, + "ce_orig": 0.8102023005485535, + "epoch": 0.8199007836652528, + "kl_loss": 0.08177779614925385, + "loss_ib": 0.0012030665529891849, + "step": 2851 + }, + { + "ce_ib": 3.443795680999756, + "ce_orig": 0.6436088681221008, + "epoch": 0.8201883672442304, + "kl_loss": 0.05100645124912262, + "loss_ib": 0.0008544440497644246, + "step": 2852 + }, + { + "ce_ib": 4.657784938812256, + "ce_orig": 0.9319099187850952, + "epoch": 0.8201883672442304, + "kl_loss": 0.04030987247824669, + "loss_ib": 0.000868877163156867, + "step": 2852 + }, + { + "ce_ib": 3.8517494201660156, + "ce_orig": 0.9517545700073242, + "epoch": 0.8201883672442304, + "kl_loss": 0.04873547703027725, + "loss_ib": 0.000872529752086848, + "step": 2852 + }, + { + "ce_ib": 2.9286084175109863, + "ce_orig": 0.6351181864738464, + "epoch": 0.8201883672442304, + "kl_loss": 0.066372349858284, + "loss_ib": 0.0009565843502059579, + "step": 2852 + }, + { + "ce_ib": 3.692204475402832, + "ce_orig": 0.8051603436470032, + "epoch": 0.820475950823208, + "kl_loss": 0.10255910456180573, + "loss_ib": 0.0013948115520179272, + "step": 2853 + }, + { + "ce_ib": 3.350531816482544, + "ce_orig": 0.7835817337036133, + "epoch": 0.820475950823208, + "kl_loss": 0.05923532322049141, + "loss_ib": 0.0009274063631892204, + "step": 2853 + }, + { + "ce_ib": 2.050156831741333, + "ce_orig": 0.6026071310043335, + "epoch": 0.820475950823208, + "kl_loss": 0.035780176520347595, + "loss_ib": 0.0005628174403682351, + "step": 2853 + }, + { + "ce_ib": 5.236144065856934, + "ce_orig": 1.2404242753982544, + "epoch": 0.820475950823208, + "kl_loss": 0.07394623756408691, + "loss_ib": 0.0012630767887458205, + "step": 2853 + }, + { + "ce_ib": 6.4027605056762695, + "ce_orig": 1.488748550415039, + "epoch": 0.8207635344021856, + "kl_loss": 0.05392779782414436, + "loss_ib": 0.001179554034024477, + "step": 2854 + }, + { + "ce_ib": 3.278918981552124, + "ce_orig": 0.4658076763153076, + "epoch": 0.8207635344021856, + "kl_loss": 0.09455463290214539, + "loss_ib": 0.0012734381016343832, + "step": 2854 + }, + { + "ce_ib": 4.74031925201416, + "ce_orig": 0.893263578414917, + "epoch": 0.8207635344021856, + "kl_loss": 0.04573716223239899, + "loss_ib": 0.0009314034832641482, + "step": 2854 + }, + { + "ce_ib": 2.624307155609131, + "ce_orig": 0.48451003432273865, + "epoch": 0.8207635344021856, + "kl_loss": 0.06141785532236099, + "loss_ib": 0.0008766092360019684, + "step": 2854 + }, + { + "epoch": 0.8210511179811633, + "grad_norm": 0.09754510223865509, + "learning_rate": 4.2650209492553316e-05, + "loss": 0.839, + "step": 2855 + }, + { + "ce_ib": 3.815160036087036, + "ce_orig": 0.784087061882019, + "epoch": 0.8210511179811633, + "kl_loss": 0.05779178813099861, + "loss_ib": 0.0009594339062459767, + "step": 2855 + }, + { + "ce_ib": 3.673478126525879, + "ce_orig": 1.012693166732788, + "epoch": 0.8210511179811633, + "kl_loss": 0.05057092010974884, + "loss_ib": 0.0008730569970794022, + "step": 2855 + }, + { + "ce_ib": 4.343874454498291, + "ce_orig": 1.0658926963806152, + "epoch": 0.8210511179811633, + "kl_loss": 0.06122007966041565, + "loss_ib": 0.0010465882951393723, + "step": 2855 + }, + { + "ce_ib": 3.7340030670166016, + "ce_orig": 0.8196126222610474, + "epoch": 0.8210511179811633, + "kl_loss": 0.08916909247636795, + "loss_ib": 0.0012650912394747138, + "step": 2855 + }, + { + "ce_ib": 3.8041467666625977, + "ce_orig": 0.8778063058853149, + "epoch": 0.8213387015601409, + "kl_loss": 0.08133932948112488, + "loss_ib": 0.001193807926028967, + "step": 2856 + }, + { + "ce_ib": 4.231988906860352, + "ce_orig": 1.0542312860488892, + "epoch": 0.8213387015601409, + "kl_loss": 0.03814844787120819, + "loss_ib": 0.0008046833099797368, + "step": 2856 + }, + { + "ce_ib": 3.3711750507354736, + "ce_orig": 0.7397298216819763, + "epoch": 0.8213387015601409, + "kl_loss": 0.06851983070373535, + "loss_ib": 0.0010223157005384564, + "step": 2856 + }, + { + "ce_ib": 4.896501064300537, + "ce_orig": 1.509569525718689, + "epoch": 0.8213387015601409, + "kl_loss": 0.045291077345609665, + "loss_ib": 0.0009425608441233635, + "step": 2856 + }, + { + "ce_ib": 3.365787982940674, + "ce_orig": 0.8227017521858215, + "epoch": 0.8216262851391185, + "kl_loss": 0.06112423911690712, + "loss_ib": 0.0009478211286477745, + "step": 2857 + }, + { + "ce_ib": 2.7506721019744873, + "ce_orig": 0.392389178276062, + "epoch": 0.8216262851391185, + "kl_loss": 0.0481894388794899, + "loss_ib": 0.0007569615263491869, + "step": 2857 + }, + { + "ce_ib": 4.203797817230225, + "ce_orig": 0.8391342163085938, + "epoch": 0.8216262851391185, + "kl_loss": 0.05205219238996506, + "loss_ib": 0.000940901692956686, + "step": 2857 + }, + { + "ce_ib": 2.6402716636657715, + "ce_orig": 0.8186982274055481, + "epoch": 0.8216262851391185, + "kl_loss": 0.04200141131877899, + "loss_ib": 0.0006840412388555706, + "step": 2857 + }, + { + "ce_ib": 2.992237091064453, + "ce_orig": 0.4253234565258026, + "epoch": 0.8219138687180962, + "kl_loss": 0.05100169777870178, + "loss_ib": 0.0008092406205832958, + "step": 2858 + }, + { + "ce_ib": 5.4045305252075195, + "ce_orig": 1.3879927396774292, + "epoch": 0.8219138687180962, + "kl_loss": 0.08222661167383194, + "loss_ib": 0.0013627192238345742, + "step": 2858 + }, + { + "ce_ib": 3.197813034057617, + "ce_orig": 0.7447512149810791, + "epoch": 0.8219138687180962, + "kl_loss": 0.05278605967760086, + "loss_ib": 0.0008476419025100768, + "step": 2858 + }, + { + "ce_ib": 2.8515093326568604, + "ce_orig": 0.6977147459983826, + "epoch": 0.8219138687180962, + "kl_loss": 0.044316649436950684, + "loss_ib": 0.0007283174199983478, + "step": 2858 + }, + { + "ce_ib": 2.5394604206085205, + "ce_orig": 0.5159644484519958, + "epoch": 0.8222014522970739, + "kl_loss": 0.047934457659721375, + "loss_ib": 0.0007332906243391335, + "step": 2859 + }, + { + "ce_ib": 5.550010681152344, + "ce_orig": 1.0674904584884644, + "epoch": 0.8222014522970739, + "kl_loss": 0.08471108973026276, + "loss_ib": 0.0014021119568496943, + "step": 2859 + }, + { + "ce_ib": 5.083916187286377, + "ce_orig": 0.943641185760498, + "epoch": 0.8222014522970739, + "kl_loss": 0.06572627276182175, + "loss_ib": 0.0011656542774289846, + "step": 2859 + }, + { + "ce_ib": 4.878239631652832, + "ce_orig": 0.5580321550369263, + "epoch": 0.8222014522970739, + "kl_loss": 0.06516800820827484, + "loss_ib": 0.001139504020102322, + "step": 2859 + }, + { + "epoch": 0.8224890358760515, + "grad_norm": 0.09669454395771027, + "learning_rate": 4.262270693199158e-05, + "loss": 0.8173, + "step": 2860 + }, + { + "ce_ib": 3.8318183422088623, + "ce_orig": 0.8815252780914307, + "epoch": 0.8224890358760515, + "kl_loss": 0.06716644018888474, + "loss_ib": 0.001054846215993166, + "step": 2860 + }, + { + "ce_ib": 5.938762664794922, + "ce_orig": 1.187029480934143, + "epoch": 0.8224890358760515, + "kl_loss": 0.07753956317901611, + "loss_ib": 0.001369271893054247, + "step": 2860 + }, + { + "ce_ib": 3.844797134399414, + "ce_orig": 0.809782087802887, + "epoch": 0.8224890358760515, + "kl_loss": 0.044847141951322556, + "loss_ib": 0.0008329510455951095, + "step": 2860 + }, + { + "ce_ib": 2.913450002670288, + "ce_orig": 0.8288679718971252, + "epoch": 0.8224890358760515, + "kl_loss": 0.06453634798526764, + "loss_ib": 0.0009367084130644798, + "step": 2860 + }, + { + "ce_ib": 4.294488430023193, + "ce_orig": 0.8744193911552429, + "epoch": 0.8227766194550291, + "kl_loss": 0.05265940725803375, + "loss_ib": 0.0009560428443364799, + "step": 2861 + }, + { + "ce_ib": 4.442530155181885, + "ce_orig": 1.0518079996109009, + "epoch": 0.8227766194550291, + "kl_loss": 0.05968637764453888, + "loss_ib": 0.0010411167750135064, + "step": 2861 + }, + { + "ce_ib": 2.6201717853546143, + "ce_orig": 0.6052139401435852, + "epoch": 0.8227766194550291, + "kl_loss": 0.047609593719244, + "loss_ib": 0.0007381130708381534, + "step": 2861 + }, + { + "ce_ib": 3.3658955097198486, + "ce_orig": 0.8332252502441406, + "epoch": 0.8227766194550291, + "kl_loss": 0.056036002933979034, + "loss_ib": 0.000896949612069875, + "step": 2861 + }, + { + "ce_ib": 6.044628620147705, + "ce_orig": 1.1790955066680908, + "epoch": 0.8230642030340067, + "kl_loss": 0.09164687991142273, + "loss_ib": 0.0015209317207336426, + "step": 2862 + }, + { + "ce_ib": 2.4762749671936035, + "ce_orig": 0.5211288332939148, + "epoch": 0.8230642030340067, + "kl_loss": 0.0676228404045105, + "loss_ib": 0.0009238558704964817, + "step": 2862 + }, + { + "ce_ib": 3.739393949508667, + "ce_orig": 1.0213613510131836, + "epoch": 0.8230642030340067, + "kl_loss": 0.055478084832429886, + "loss_ib": 0.0009287202265113592, + "step": 2862 + }, + { + "ce_ib": 5.887117385864258, + "ce_orig": 1.2299120426177979, + "epoch": 0.8230642030340067, + "kl_loss": 0.06529497355222702, + "loss_ib": 0.001241661375388503, + "step": 2862 + }, + { + "ce_ib": 3.3095269203186035, + "ce_orig": 0.601777195930481, + "epoch": 0.8233517866129844, + "kl_loss": 0.06757734715938568, + "loss_ib": 0.0010067261755466461, + "step": 2863 + }, + { + "ce_ib": 2.542278528213501, + "ce_orig": 0.37801775336265564, + "epoch": 0.8233517866129844, + "kl_loss": 0.05825740844011307, + "loss_ib": 0.0008368019480258226, + "step": 2863 + }, + { + "ce_ib": 2.8804736137390137, + "ce_orig": 0.5961182713508606, + "epoch": 0.8233517866129844, + "kl_loss": 0.07744944840669632, + "loss_ib": 0.0010625418508425355, + "step": 2863 + }, + { + "ce_ib": 4.100618362426758, + "ce_orig": 0.4811749756336212, + "epoch": 0.8233517866129844, + "kl_loss": 0.1581065058708191, + "loss_ib": 0.00199112668633461, + "step": 2863 + }, + { + "ce_ib": 4.464682102203369, + "ce_orig": 1.2351504564285278, + "epoch": 0.823639370191962, + "kl_loss": 0.03830665349960327, + "loss_ib": 0.0008295346633531153, + "step": 2864 + }, + { + "ce_ib": 2.0230438709259033, + "ce_orig": 0.47082212567329407, + "epoch": 0.823639370191962, + "kl_loss": 0.11159265041351318, + "loss_ib": 0.0013182308757677674, + "step": 2864 + }, + { + "ce_ib": 2.0820472240448, + "ce_orig": 0.4096181094646454, + "epoch": 0.823639370191962, + "kl_loss": 0.03842414915561676, + "loss_ib": 0.0005924461875110865, + "step": 2864 + }, + { + "ce_ib": 3.574397563934326, + "ce_orig": 0.710187554359436, + "epoch": 0.823639370191962, + "kl_loss": 0.06443780660629272, + "loss_ib": 0.001001817756332457, + "step": 2864 + }, + { + "epoch": 0.8239269537709397, + "grad_norm": 0.1215810626745224, + "learning_rate": 4.259516191423852e-05, + "loss": 0.8361, + "step": 2865 + }, + { + "ce_ib": 2.7192680835723877, + "ce_orig": 0.47426027059555054, + "epoch": 0.8239269537709397, + "kl_loss": 0.04464519023895264, + "loss_ib": 0.0007183786947280169, + "step": 2865 + }, + { + "ce_ib": 4.354310512542725, + "ce_orig": 1.0846542119979858, + "epoch": 0.8239269537709397, + "kl_loss": 0.060086145997047424, + "loss_ib": 0.0010362924076616764, + "step": 2865 + }, + { + "ce_ib": 5.988035678863525, + "ce_orig": 1.4863221645355225, + "epoch": 0.8239269537709397, + "kl_loss": 0.08034850656986237, + "loss_ib": 0.0014022886753082275, + "step": 2865 + }, + { + "ce_ib": 3.8010692596435547, + "ce_orig": 0.9623076319694519, + "epoch": 0.8239269537709397, + "kl_loss": 0.04437200725078583, + "loss_ib": 0.0008238269365392625, + "step": 2865 + }, + { + "ce_ib": 3.996776580810547, + "ce_orig": 1.0105255842208862, + "epoch": 0.8242145373499173, + "kl_loss": 0.04443513602018356, + "loss_ib": 0.0008440290112048388, + "step": 2866 + }, + { + "ce_ib": 2.367702007293701, + "ce_orig": 0.49510136246681213, + "epoch": 0.8242145373499173, + "kl_loss": 0.05199606344103813, + "loss_ib": 0.0007567307911813259, + "step": 2866 + }, + { + "ce_ib": 2.628385543823242, + "ce_orig": 0.7548450827598572, + "epoch": 0.8242145373499173, + "kl_loss": 0.02789083868265152, + "loss_ib": 0.00054174690740183, + "step": 2866 + }, + { + "ce_ib": 2.947026491165161, + "ce_orig": 0.6825656890869141, + "epoch": 0.8242145373499173, + "kl_loss": 0.07642398774623871, + "loss_ib": 0.0010589425219222903, + "step": 2866 + }, + { + "ce_ib": 5.901373863220215, + "ce_orig": 1.5132733583450317, + "epoch": 0.824502120928895, + "kl_loss": 0.05494812875986099, + "loss_ib": 0.0011396185727789998, + "step": 2867 + }, + { + "ce_ib": 3.5615153312683105, + "ce_orig": 0.9184847474098206, + "epoch": 0.824502120928895, + "kl_loss": 0.03612537682056427, + "loss_ib": 0.0007174052880145609, + "step": 2867 + }, + { + "ce_ib": 3.859060049057007, + "ce_orig": 0.7495130300521851, + "epoch": 0.824502120928895, + "kl_loss": 0.04787198454141617, + "loss_ib": 0.0008646258502267301, + "step": 2867 + }, + { + "ce_ib": 1.7550132274627686, + "ce_orig": 0.596872866153717, + "epoch": 0.824502120928895, + "kl_loss": 0.03881646320223808, + "loss_ib": 0.0005636659334413707, + "step": 2867 + }, + { + "ce_ib": 3.568235158920288, + "ce_orig": 0.7422435283660889, + "epoch": 0.8247897045078726, + "kl_loss": 0.04186958819627762, + "loss_ib": 0.0007755193510092795, + "step": 2868 + }, + { + "ce_ib": 2.615142583847046, + "ce_orig": 0.2765530049800873, + "epoch": 0.8247897045078726, + "kl_loss": 0.07165522873401642, + "loss_ib": 0.0009780664695426822, + "step": 2868 + }, + { + "ce_ib": 4.403372764587402, + "ce_orig": 0.8551969528198242, + "epoch": 0.8247897045078726, + "kl_loss": 0.057033441960811615, + "loss_ib": 0.0010106717236340046, + "step": 2868 + }, + { + "ce_ib": 4.102797508239746, + "ce_orig": 0.9084195494651794, + "epoch": 0.8247897045078726, + "kl_loss": 0.08017656207084656, + "loss_ib": 0.0012120453175157309, + "step": 2868 + }, + { + "ce_ib": 2.9331748485565186, + "ce_orig": 0.5439095497131348, + "epoch": 0.8250772880868502, + "kl_loss": 0.052434660494327545, + "loss_ib": 0.0008176640840247273, + "step": 2869 + }, + { + "ce_ib": 3.3292362689971924, + "ce_orig": 0.8451746702194214, + "epoch": 0.8250772880868502, + "kl_loss": 0.05391588807106018, + "loss_ib": 0.0008720824844203889, + "step": 2869 + }, + { + "ce_ib": 3.452059030532837, + "ce_orig": 0.7232296466827393, + "epoch": 0.8250772880868502, + "kl_loss": 0.05031951144337654, + "loss_ib": 0.0008484010468237102, + "step": 2869 + }, + { + "ce_ib": 2.738264799118042, + "ce_orig": 0.45007041096687317, + "epoch": 0.8250772880868502, + "kl_loss": 0.06931742280721664, + "loss_ib": 0.0009670007275417447, + "step": 2869 + }, + { + "epoch": 0.8253648716658278, + "grad_norm": 0.09895429760217667, + "learning_rate": 4.2567574505656495e-05, + "loss": 0.8681, + "step": 2870 + }, + { + "ce_ib": 5.034543991088867, + "ce_orig": 1.4490875005722046, + "epoch": 0.8253648716658278, + "kl_loss": 0.042065076529979706, + "loss_ib": 0.000924105173908174, + "step": 2870 + }, + { + "ce_ib": 2.906524896621704, + "ce_orig": 0.5861116051673889, + "epoch": 0.8253648716658278, + "kl_loss": 0.048473432660102844, + "loss_ib": 0.0007753868121653795, + "step": 2870 + }, + { + "ce_ib": 2.5684659481048584, + "ce_orig": 0.6346572041511536, + "epoch": 0.8253648716658278, + "kl_loss": 0.029193712398409843, + "loss_ib": 0.00054878368973732, + "step": 2870 + }, + { + "ce_ib": 3.731553316116333, + "ce_orig": 0.8836603760719299, + "epoch": 0.8253648716658278, + "kl_loss": 0.060926295816898346, + "loss_ib": 0.0009824183071032166, + "step": 2870 + }, + { + "ce_ib": 3.946255922317505, + "ce_orig": 0.9045253992080688, + "epoch": 0.8256524552448056, + "kl_loss": 0.06419773399829865, + "loss_ib": 0.0010366028873249888, + "step": 2871 + }, + { + "ce_ib": 1.6765002012252808, + "ce_orig": 0.20466823875904083, + "epoch": 0.8256524552448056, + "kl_loss": 0.09819669276475906, + "loss_ib": 0.001149616902694106, + "step": 2871 + }, + { + "ce_ib": 2.9427032470703125, + "ce_orig": 0.5586912035942078, + "epoch": 0.8256524552448056, + "kl_loss": 0.020740773528814316, + "loss_ib": 0.0005016780341975391, + "step": 2871 + }, + { + "ce_ib": 5.1284589767456055, + "ce_orig": 0.9029795527458191, + "epoch": 0.8256524552448056, + "kl_loss": 0.0871124118566513, + "loss_ib": 0.0013839700259268284, + "step": 2871 + }, + { + "ce_ib": 2.78551983833313, + "ce_orig": 0.5186451077461243, + "epoch": 0.8259400388237832, + "kl_loss": 0.06832247972488403, + "loss_ib": 0.0009617767063900828, + "step": 2872 + }, + { + "ce_ib": 6.341606140136719, + "ce_orig": 1.5382546186447144, + "epoch": 0.8259400388237832, + "kl_loss": 0.059307947754859924, + "loss_ib": 0.0012272399617359042, + "step": 2872 + }, + { + "ce_ib": 4.079300880432129, + "ce_orig": 0.9742735028266907, + "epoch": 0.8259400388237832, + "kl_loss": 0.043510958552360535, + "loss_ib": 0.0008430396555922925, + "step": 2872 + }, + { + "ce_ib": 3.569075584411621, + "ce_orig": 0.6201762557029724, + "epoch": 0.8259400388237832, + "kl_loss": 0.04940737783908844, + "loss_ib": 0.0008509813342243433, + "step": 2872 + }, + { + "ce_ib": 6.130711555480957, + "ce_orig": 1.5882190465927124, + "epoch": 0.8262276224027608, + "kl_loss": 0.06807730346918106, + "loss_ib": 0.0012938440777361393, + "step": 2873 + }, + { + "ce_ib": 3.490363597869873, + "ce_orig": 0.558238685131073, + "epoch": 0.8262276224027608, + "kl_loss": 0.08383380621671677, + "loss_ib": 0.001187374466098845, + "step": 2873 + }, + { + "ce_ib": 2.332235813140869, + "ce_orig": 0.5356996059417725, + "epoch": 0.8262276224027608, + "kl_loss": 0.06865129619836807, + "loss_ib": 0.0009197365143336356, + "step": 2873 + }, + { + "ce_ib": 5.938739776611328, + "ce_orig": 1.4861986637115479, + "epoch": 0.8262276224027608, + "kl_loss": 0.06347882002592087, + "loss_ib": 0.001228662091307342, + "step": 2873 + }, + { + "ce_ib": 4.8307647705078125, + "ce_orig": 0.8478534817695618, + "epoch": 0.8265152059817384, + "kl_loss": 0.05089206248521805, + "loss_ib": 0.0009919970761984587, + "step": 2874 + }, + { + "ce_ib": 3.918139934539795, + "ce_orig": 0.4395225942134857, + "epoch": 0.8265152059817384, + "kl_loss": 0.059513483196496964, + "loss_ib": 0.0009869488421827555, + "step": 2874 + }, + { + "ce_ib": 2.4986560344696045, + "ce_orig": 0.5894647836685181, + "epoch": 0.8265152059817384, + "kl_loss": 0.0825781375169754, + "loss_ib": 0.0010756469564512372, + "step": 2874 + }, + { + "ce_ib": 3.7202835083007812, + "ce_orig": 0.624586820602417, + "epoch": 0.8265152059817384, + "kl_loss": 0.05208849534392357, + "loss_ib": 0.0008929133182391524, + "step": 2874 + }, + { + "epoch": 0.8268027895607161, + "grad_norm": 0.0946708470582962, + "learning_rate": 4.253994477270996e-05, + "loss": 0.8185, + "step": 2875 + }, + { + "ce_ib": 7.3700785636901855, + "ce_orig": 1.68136727809906, + "epoch": 0.8268027895607161, + "kl_loss": 0.084136962890625, + "loss_ib": 0.0015783774433657527, + "step": 2875 + }, + { + "ce_ib": 2.8946566581726074, + "ce_orig": 0.6535291075706482, + "epoch": 0.8268027895607161, + "kl_loss": 0.050266165286302567, + "loss_ib": 0.0007921273354440928, + "step": 2875 + }, + { + "ce_ib": 3.7368898391723633, + "ce_orig": 0.9918766021728516, + "epoch": 0.8268027895607161, + "kl_loss": 0.0369059294462204, + "loss_ib": 0.0007427482050843537, + "step": 2875 + }, + { + "ce_ib": 3.037860155105591, + "ce_orig": 0.4465171694755554, + "epoch": 0.8268027895607161, + "kl_loss": 0.09648104012012482, + "loss_ib": 0.0012685962719842792, + "step": 2875 + }, + { + "ce_ib": 2.9326441287994385, + "ce_orig": 0.49968260526657104, + "epoch": 0.8270903731396937, + "kl_loss": 0.06716080754995346, + "loss_ib": 0.0009648724226281047, + "step": 2876 + }, + { + "ce_ib": 3.2579092979431152, + "ce_orig": 0.6713523268699646, + "epoch": 0.8270903731396937, + "kl_loss": 0.10600875318050385, + "loss_ib": 0.0013858784222975373, + "step": 2876 + }, + { + "ce_ib": 4.724514961242676, + "ce_orig": 1.0677003860473633, + "epoch": 0.8270903731396937, + "kl_loss": 0.06972667574882507, + "loss_ib": 0.0011697182198986411, + "step": 2876 + }, + { + "ce_ib": 5.798907279968262, + "ce_orig": 1.0142945051193237, + "epoch": 0.8270903731396937, + "kl_loss": 0.06526602804660797, + "loss_ib": 0.0012325510615482926, + "step": 2876 + }, + { + "ce_ib": 2.091799736022949, + "ce_orig": 0.5025718212127686, + "epoch": 0.8273779567186713, + "kl_loss": 0.039439067244529724, + "loss_ib": 0.0006035706028342247, + "step": 2877 + }, + { + "ce_ib": 4.801750183105469, + "ce_orig": 1.1898285150527954, + "epoch": 0.8273779567186713, + "kl_loss": 0.04806862026453018, + "loss_ib": 0.0009608612162992358, + "step": 2877 + }, + { + "ce_ib": 2.338022470474243, + "ce_orig": 0.4006619453430176, + "epoch": 0.8273779567186713, + "kl_loss": 0.02525225654244423, + "loss_ib": 0.000486324803205207, + "step": 2877 + }, + { + "ce_ib": 4.42298698425293, + "ce_orig": 0.40989553928375244, + "epoch": 0.8273779567186713, + "kl_loss": 0.19956545531749725, + "loss_ib": 0.0024379531387239695, + "step": 2877 + }, + { + "ce_ib": 2.4714579582214355, + "ce_orig": 0.3736704885959625, + "epoch": 0.827665540297649, + "kl_loss": 0.06282263249158859, + "loss_ib": 0.0008753721485845745, + "step": 2878 + }, + { + "ce_ib": 2.917022943496704, + "ce_orig": 0.8350589871406555, + "epoch": 0.827665540297649, + "kl_loss": 0.029277872294187546, + "loss_ib": 0.0005844809929840267, + "step": 2878 + }, + { + "ce_ib": 2.498126983642578, + "ce_orig": 0.5294021964073181, + "epoch": 0.827665540297649, + "kl_loss": 0.060309141874313354, + "loss_ib": 0.000852904049679637, + "step": 2878 + }, + { + "ce_ib": 3.2760908603668213, + "ce_orig": 0.5328690409660339, + "epoch": 0.827665540297649, + "kl_loss": 0.10695585608482361, + "loss_ib": 0.0013971675653010607, + "step": 2878 + }, + { + "ce_ib": 1.6796883344650269, + "ce_orig": 0.4974634349346161, + "epoch": 0.8279531238766267, + "kl_loss": 0.03701581805944443, + "loss_ib": 0.0005381269729696214, + "step": 2879 + }, + { + "ce_ib": 3.120168924331665, + "ce_orig": 0.6142247915267944, + "epoch": 0.8279531238766267, + "kl_loss": 0.0357881523668766, + "loss_ib": 0.0006698983488604426, + "step": 2879 + }, + { + "ce_ib": 4.628814697265625, + "ce_orig": 0.7744136452674866, + "epoch": 0.8279531238766267, + "kl_loss": 0.08023516833782196, + "loss_ib": 0.0012652331497520208, + "step": 2879 + }, + { + "ce_ib": 5.564671516418457, + "ce_orig": 1.2061574459075928, + "epoch": 0.8279531238766267, + "kl_loss": 0.04395944997668266, + "loss_ib": 0.0009960616007447243, + "step": 2879 + }, + { + "epoch": 0.8282407074556043, + "grad_norm": 0.10050760954618454, + "learning_rate": 4.251227278196536e-05, + "loss": 0.8092, + "step": 2880 + }, + { + "ce_ib": 2.544820785522461, + "ce_orig": 0.5951195359230042, + "epoch": 0.8282407074556043, + "kl_loss": 0.04478111118078232, + "loss_ib": 0.0007022931822575629, + "step": 2880 + }, + { + "ce_ib": 6.2488484382629395, + "ce_orig": 1.606971263885498, + "epoch": 0.8282407074556043, + "kl_loss": 0.06102568656206131, + "loss_ib": 0.0012351416517049074, + "step": 2880 + }, + { + "ce_ib": 4.108672618865967, + "ce_orig": 1.24937105178833, + "epoch": 0.8282407074556043, + "kl_loss": 0.0773942768573761, + "loss_ib": 0.0011848099529743195, + "step": 2880 + }, + { + "ce_ib": 6.127164840698242, + "ce_orig": 1.1701219081878662, + "epoch": 0.8282407074556043, + "kl_loss": 0.0358416773378849, + "loss_ib": 0.0009711332386359572, + "step": 2880 + }, + { + "ce_ib": 3.953540563583374, + "ce_orig": 1.0144283771514893, + "epoch": 0.8285282910345819, + "kl_loss": 0.05551404878497124, + "loss_ib": 0.0009504945483058691, + "step": 2881 + }, + { + "ce_ib": 4.441999912261963, + "ce_orig": 1.003846526145935, + "epoch": 0.8285282910345819, + "kl_loss": 0.09603164345026016, + "loss_ib": 0.0014045163989067078, + "step": 2881 + }, + { + "ce_ib": 2.7008919715881348, + "ce_orig": 0.562048077583313, + "epoch": 0.8285282910345819, + "kl_loss": 0.038542795926332474, + "loss_ib": 0.0006555170984938741, + "step": 2881 + }, + { + "ce_ib": 3.7111034393310547, + "ce_orig": 0.9183014631271362, + "epoch": 0.8285282910345819, + "kl_loss": 0.047896891832351685, + "loss_ib": 0.0008500791736878455, + "step": 2881 + }, + { + "ce_ib": 4.155374050140381, + "ce_orig": 1.0805739164352417, + "epoch": 0.8288158746135595, + "kl_loss": 0.050363991409540176, + "loss_ib": 0.0009191773133352399, + "step": 2882 + }, + { + "ce_ib": 2.861518621444702, + "ce_orig": 0.7223523259162903, + "epoch": 0.8288158746135595, + "kl_loss": 0.05951717123389244, + "loss_ib": 0.0008813235908746719, + "step": 2882 + }, + { + "ce_ib": 3.9029345512390137, + "ce_orig": 0.7900390625, + "epoch": 0.8288158746135595, + "kl_loss": 0.05032516270875931, + "loss_ib": 0.0008935450459830463, + "step": 2882 + }, + { + "ce_ib": 3.157841444015503, + "ce_orig": 0.6504552364349365, + "epoch": 0.8288158746135595, + "kl_loss": 0.06831961870193481, + "loss_ib": 0.0009989802492782474, + "step": 2882 + }, + { + "ce_ib": 4.101802349090576, + "ce_orig": 1.1319085359573364, + "epoch": 0.8291034581925372, + "kl_loss": 0.05689012631773949, + "loss_ib": 0.0009790814947336912, + "step": 2883 + }, + { + "ce_ib": 3.978705644607544, + "ce_orig": 0.9003989100456238, + "epoch": 0.8291034581925372, + "kl_loss": 0.04126172512769699, + "loss_ib": 0.0008104877779260278, + "step": 2883 + }, + { + "ce_ib": 1.459426760673523, + "ce_orig": 0.2558237612247467, + "epoch": 0.8291034581925372, + "kl_loss": 0.10963763296604156, + "loss_ib": 0.0012423188891261816, + "step": 2883 + }, + { + "ce_ib": 3.663538694381714, + "ce_orig": 0.8758653998374939, + "epoch": 0.8291034581925372, + "kl_loss": 0.07826060801744461, + "loss_ib": 0.0011489599710330367, + "step": 2883 + }, + { + "ce_ib": 2.9695160388946533, + "ce_orig": 0.6447189450263977, + "epoch": 0.8293910417715148, + "kl_loss": 0.05991426855325699, + "loss_ib": 0.0008960942504927516, + "step": 2884 + }, + { + "ce_ib": 3.4706718921661377, + "ce_orig": 0.8201225399971008, + "epoch": 0.8293910417715148, + "kl_loss": 0.06931740790605545, + "loss_ib": 0.001040241215378046, + "step": 2884 + }, + { + "ce_ib": 3.897049903869629, + "ce_orig": 0.7116057276725769, + "epoch": 0.8293910417715148, + "kl_loss": 0.07477555423974991, + "loss_ib": 0.001137460465542972, + "step": 2884 + }, + { + "ce_ib": 3.7683517932891846, + "ce_orig": 1.0362043380737305, + "epoch": 0.8293910417715148, + "kl_loss": 0.06848171353340149, + "loss_ib": 0.0010616523213684559, + "step": 2884 + }, + { + "epoch": 0.8296786253504925, + "grad_norm": 0.09367582201957703, + "learning_rate": 4.248455860009094e-05, + "loss": 0.906, + "step": 2885 + }, + { + "ce_ib": 3.87099289894104, + "ce_orig": 0.7778834104537964, + "epoch": 0.8296786253504925, + "kl_loss": 0.08846011012792587, + "loss_ib": 0.0012717003701254725, + "step": 2885 + }, + { + "ce_ib": 3.47615909576416, + "ce_orig": 0.8090024590492249, + "epoch": 0.8296786253504925, + "kl_loss": 0.07003923505544662, + "loss_ib": 0.0010480082128196955, + "step": 2885 + }, + { + "ce_ib": 4.346395492553711, + "ce_orig": 0.9705674052238464, + "epoch": 0.8296786253504925, + "kl_loss": 0.0857279896736145, + "loss_ib": 0.0012919192668050528, + "step": 2885 + }, + { + "ce_ib": 2.939077615737915, + "ce_orig": 0.6332619190216064, + "epoch": 0.8296786253504925, + "kl_loss": 0.06522934138774872, + "loss_ib": 0.0009462011512368917, + "step": 2885 + }, + { + "ce_ib": 5.038772106170654, + "ce_orig": 0.5894158482551575, + "epoch": 0.8299662089294702, + "kl_loss": 0.062402281910181046, + "loss_ib": 0.001127899973653257, + "step": 2886 + }, + { + "ce_ib": 4.919326305389404, + "ce_orig": 1.0048742294311523, + "epoch": 0.8299662089294702, + "kl_loss": 0.06713343411684036, + "loss_ib": 0.0011632669484242797, + "step": 2886 + }, + { + "ce_ib": 4.2108635902404785, + "ce_orig": 0.9998424053192139, + "epoch": 0.8299662089294702, + "kl_loss": 0.07651814818382263, + "loss_ib": 0.0011862678220495582, + "step": 2886 + }, + { + "ce_ib": 4.750165939331055, + "ce_orig": 1.1170908212661743, + "epoch": 0.8299662089294702, + "kl_loss": 0.05979698523879051, + "loss_ib": 0.0010729864006862044, + "step": 2886 + }, + { + "ce_ib": 4.248290538787842, + "ce_orig": 0.8374379873275757, + "epoch": 0.8302537925084478, + "kl_loss": 0.09216742217540741, + "loss_ib": 0.0013465031515806913, + "step": 2887 + }, + { + "ce_ib": 4.208846569061279, + "ce_orig": 1.138475775718689, + "epoch": 0.8302537925084478, + "kl_loss": 0.053019434213638306, + "loss_ib": 0.0009510789532214403, + "step": 2887 + }, + { + "ce_ib": 3.709806203842163, + "ce_orig": 0.8801575303077698, + "epoch": 0.8302537925084478, + "kl_loss": 0.08439406007528305, + "loss_ib": 0.0012149211252108216, + "step": 2887 + }, + { + "ce_ib": 3.662853956222534, + "ce_orig": 0.8746320009231567, + "epoch": 0.8302537925084478, + "kl_loss": 0.06381455808877945, + "loss_ib": 0.0010044309310615063, + "step": 2887 + }, + { + "ce_ib": 4.0986809730529785, + "ce_orig": 0.9273536801338196, + "epoch": 0.8305413760874254, + "kl_loss": 0.03337770327925682, + "loss_ib": 0.0007436451269313693, + "step": 2888 + }, + { + "ce_ib": 3.448355197906494, + "ce_orig": 0.9326812624931335, + "epoch": 0.8305413760874254, + "kl_loss": 0.04150637611746788, + "loss_ib": 0.0007598993252031505, + "step": 2888 + }, + { + "ce_ib": 2.967881917953491, + "ce_orig": 0.5665206909179688, + "epoch": 0.8305413760874254, + "kl_loss": 0.052360184490680695, + "loss_ib": 0.000820389948785305, + "step": 2888 + }, + { + "ce_ib": 4.45109224319458, + "ce_orig": 1.0011415481567383, + "epoch": 0.8305413760874254, + "kl_loss": 0.03198971599340439, + "loss_ib": 0.0007650063489563763, + "step": 2888 + }, + { + "ce_ib": 3.958127498626709, + "ce_orig": 0.8200027346611023, + "epoch": 0.830828959666403, + "kl_loss": 0.049245066940784454, + "loss_ib": 0.0008882633992470801, + "step": 2889 + }, + { + "ce_ib": 4.32711935043335, + "ce_orig": 0.4896586239337921, + "epoch": 0.830828959666403, + "kl_loss": 0.07084789872169495, + "loss_ib": 0.0011411908781155944, + "step": 2889 + }, + { + "ce_ib": 2.9963266849517822, + "ce_orig": 0.6391838192939758, + "epoch": 0.830828959666403, + "kl_loss": 0.07309867441654205, + "loss_ib": 0.001030619372613728, + "step": 2889 + }, + { + "ce_ib": 2.787891387939453, + "ce_orig": 0.41210439801216125, + "epoch": 0.830828959666403, + "kl_loss": 0.08013094961643219, + "loss_ib": 0.0010800985619425774, + "step": 2889 + }, + { + "epoch": 0.8311165432453806, + "grad_norm": 0.09349442273378372, + "learning_rate": 4.24568022938566e-05, + "loss": 0.8825, + "step": 2890 + }, + { + "ce_ib": 2.0410666465759277, + "ce_orig": 0.4219886362552643, + "epoch": 0.8311165432453806, + "kl_loss": 0.040075793862342834, + "loss_ib": 0.0006048645591363311, + "step": 2890 + }, + { + "ce_ib": 2.8590121269226074, + "ce_orig": 0.3916804790496826, + "epoch": 0.8311165432453806, + "kl_loss": 0.08658125251531601, + "loss_ib": 0.001151713659055531, + "step": 2890 + }, + { + "ce_ib": 4.748854160308838, + "ce_orig": 0.971412718296051, + "epoch": 0.8311165432453806, + "kl_loss": 0.0842997133731842, + "loss_ib": 0.0013178825611248612, + "step": 2890 + }, + { + "ce_ib": 2.8528695106506348, + "ce_orig": 0.44106993079185486, + "epoch": 0.8311165432453806, + "kl_loss": 0.11685113608837128, + "loss_ib": 0.0014537982642650604, + "step": 2890 + }, + { + "ce_ib": 4.0870041847229, + "ce_orig": 1.1533517837524414, + "epoch": 0.8314041268243584, + "kl_loss": 0.04964830353856087, + "loss_ib": 0.0009051833767443895, + "step": 2891 + }, + { + "ce_ib": 2.7977731227874756, + "ce_orig": 0.6155841946601868, + "epoch": 0.8314041268243584, + "kl_loss": 0.04368962347507477, + "loss_ib": 0.0007166735595092177, + "step": 2891 + }, + { + "ce_ib": 2.1953372955322266, + "ce_orig": 0.47461292147636414, + "epoch": 0.8314041268243584, + "kl_loss": 0.06326285004615784, + "loss_ib": 0.0008521622512489557, + "step": 2891 + }, + { + "ce_ib": 4.167481422424316, + "ce_orig": 1.046128749847412, + "epoch": 0.8314041268243584, + "kl_loss": 0.042121585458517075, + "loss_ib": 0.0008379639475606382, + "step": 2891 + }, + { + "ce_ib": 2.1103005409240723, + "ce_orig": 0.27401745319366455, + "epoch": 0.831691710403336, + "kl_loss": 0.04715055972337723, + "loss_ib": 0.0006825355812907219, + "step": 2892 + }, + { + "ce_ib": 2.9459714889526367, + "ce_orig": 0.5553699135780334, + "epoch": 0.831691710403336, + "kl_loss": 0.036504633724689484, + "loss_ib": 0.0006596434977836907, + "step": 2892 + }, + { + "ce_ib": 6.688068389892578, + "ce_orig": 1.371295690536499, + "epoch": 0.831691710403336, + "kl_loss": 0.08106476068496704, + "loss_ib": 0.0014794543385505676, + "step": 2892 + }, + { + "ce_ib": 6.318736553192139, + "ce_orig": 1.4647051095962524, + "epoch": 0.831691710403336, + "kl_loss": 0.07012562453746796, + "loss_ib": 0.0013331298250705004, + "step": 2892 + }, + { + "ce_ib": 7.327885150909424, + "ce_orig": 1.5567176342010498, + "epoch": 0.8319792939823136, + "kl_loss": 0.07354054600000381, + "loss_ib": 0.0014681939501315355, + "step": 2893 + }, + { + "ce_ib": 2.907984972000122, + "ce_orig": 0.7426938414573669, + "epoch": 0.8319792939823136, + "kl_loss": 0.05421154946088791, + "loss_ib": 0.0008329139673151076, + "step": 2893 + }, + { + "ce_ib": 4.6287431716918945, + "ce_orig": 1.277734398841858, + "epoch": 0.8319792939823136, + "kl_loss": 0.09267805516719818, + "loss_ib": 0.0013896548189222813, + "step": 2893 + }, + { + "ce_ib": 4.114233016967773, + "ce_orig": 0.9662086963653564, + "epoch": 0.8319792939823136, + "kl_loss": 0.06993988156318665, + "loss_ib": 0.0011108220787718892, + "step": 2893 + }, + { + "ce_ib": 3.972726583480835, + "ce_orig": 1.1314526796340942, + "epoch": 0.8322668775612913, + "kl_loss": 0.06854704022407532, + "loss_ib": 0.001082743052393198, + "step": 2894 + }, + { + "ce_ib": 4.920707702636719, + "ce_orig": 1.128432273864746, + "epoch": 0.8322668775612913, + "kl_loss": 0.10280308127403259, + "loss_ib": 0.0015201015630736947, + "step": 2894 + }, + { + "ce_ib": 3.113340377807617, + "ce_orig": 0.8937186002731323, + "epoch": 0.8322668775612913, + "kl_loss": 0.04568275064229965, + "loss_ib": 0.0007681615534238517, + "step": 2894 + }, + { + "ce_ib": 3.2564074993133545, + "ce_orig": 0.47919172048568726, + "epoch": 0.8322668775612913, + "kl_loss": 0.06894297152757645, + "loss_ib": 0.0010150704765692353, + "step": 2894 + }, + { + "epoch": 0.8325544611402689, + "grad_norm": 0.09455300122499466, + "learning_rate": 4.242900393013373e-05, + "loss": 0.9052, + "step": 2895 + }, + { + "ce_ib": 2.7530319690704346, + "ce_orig": 0.6813746094703674, + "epoch": 0.8325544611402689, + "kl_loss": 0.05206789821386337, + "loss_ib": 0.0007959821377880871, + "step": 2895 + }, + { + "ce_ib": 6.196787357330322, + "ce_orig": 1.645922064781189, + "epoch": 0.8325544611402689, + "kl_loss": 0.07996752113103867, + "loss_ib": 0.001419353880919516, + "step": 2895 + }, + { + "ce_ib": 3.3805294036865234, + "ce_orig": 0.788504958152771, + "epoch": 0.8325544611402689, + "kl_loss": 0.05940394848585129, + "loss_ib": 0.0009320923709310591, + "step": 2895 + }, + { + "ce_ib": 3.1095523834228516, + "ce_orig": 0.7378770709037781, + "epoch": 0.8325544611402689, + "kl_loss": 0.07921691238880157, + "loss_ib": 0.0011031243484467268, + "step": 2895 + }, + { + "ce_ib": 5.004881381988525, + "ce_orig": 1.4502607583999634, + "epoch": 0.8328420447192465, + "kl_loss": 0.0433383509516716, + "loss_ib": 0.0009338716045022011, + "step": 2896 + }, + { + "ce_ib": 3.183093547821045, + "ce_orig": 0.6850222945213318, + "epoch": 0.8328420447192465, + "kl_loss": 0.038522880524396896, + "loss_ib": 0.0007035381277091801, + "step": 2896 + }, + { + "ce_ib": 4.541013717651367, + "ce_orig": 0.5387528538703918, + "epoch": 0.8328420447192465, + "kl_loss": 0.05269104242324829, + "loss_ib": 0.0009810117771849036, + "step": 2896 + }, + { + "ce_ib": 6.458069324493408, + "ce_orig": 1.0590711832046509, + "epoch": 0.8328420447192465, + "kl_loss": 0.07101559638977051, + "loss_ib": 0.0013559628278017044, + "step": 2896 + }, + { + "ce_ib": 4.107298851013184, + "ce_orig": 1.0599713325500488, + "epoch": 0.8331296282982241, + "kl_loss": 0.06429991871118546, + "loss_ib": 0.0010537289781495929, + "step": 2897 + }, + { + "ce_ib": 3.5443804264068604, + "ce_orig": 0.8912143707275391, + "epoch": 0.8331296282982241, + "kl_loss": 0.06006910651922226, + "loss_ib": 0.0009551291004754603, + "step": 2897 + }, + { + "ce_ib": 4.428806781768799, + "ce_orig": 1.070143461227417, + "epoch": 0.8331296282982241, + "kl_loss": 0.055616602301597595, + "loss_ib": 0.0009990467224270105, + "step": 2897 + }, + { + "ce_ib": 3.7166523933410645, + "ce_orig": 0.6743528842926025, + "epoch": 0.8331296282982241, + "kl_loss": 0.06770671904087067, + "loss_ib": 0.0010487324325367808, + "step": 2897 + }, + { + "ce_ib": 2.007737398147583, + "ce_orig": 0.4918227195739746, + "epoch": 0.8334172118772019, + "kl_loss": 0.023019608110189438, + "loss_ib": 0.00043096981244161725, + "step": 2898 + }, + { + "ce_ib": 3.1020896434783936, + "ce_orig": 0.43045514822006226, + "epoch": 0.8334172118772019, + "kl_loss": 0.040560975670814514, + "loss_ib": 0.0007158186635933816, + "step": 2898 + }, + { + "ce_ib": 4.8523430824279785, + "ce_orig": 0.9533727765083313, + "epoch": 0.8334172118772019, + "kl_loss": 0.06493958830833435, + "loss_ib": 0.0011346301762387156, + "step": 2898 + }, + { + "ce_ib": 5.0333147048950195, + "ce_orig": 1.2972581386566162, + "epoch": 0.8334172118772019, + "kl_loss": 0.04996078461408615, + "loss_ib": 0.0010029393015429378, + "step": 2898 + }, + { + "ce_ib": 3.427729606628418, + "ce_orig": 0.8898983597755432, + "epoch": 0.8337047954561795, + "kl_loss": 0.04584003984928131, + "loss_ib": 0.0008011732716113329, + "step": 2899 + }, + { + "ce_ib": 5.031983375549316, + "ce_orig": 0.7481300234794617, + "epoch": 0.8337047954561795, + "kl_loss": 0.0590338334441185, + "loss_ib": 0.001093536615371704, + "step": 2899 + }, + { + "ce_ib": 3.605795383453369, + "ce_orig": 0.9049514532089233, + "epoch": 0.8337047954561795, + "kl_loss": 0.0546158067882061, + "loss_ib": 0.00090673757949844, + "step": 2899 + }, + { + "ce_ib": 4.328200817108154, + "ce_orig": 0.8654047846794128, + "epoch": 0.8337047954561795, + "kl_loss": 0.09142553061246872, + "loss_ib": 0.0013470753328874707, + "step": 2899 + }, + { + "epoch": 0.8339923790351571, + "grad_norm": 0.09823144972324371, + "learning_rate": 4.240116357589502e-05, + "loss": 0.808, + "step": 2900 + }, + { + "ce_ib": 6.798651695251465, + "ce_orig": 1.5198628902435303, + "epoch": 0.8339923790351571, + "kl_loss": 0.05120144411921501, + "loss_ib": 0.0011918796226382256, + "step": 2900 + }, + { + "ce_ib": 3.2406535148620605, + "ce_orig": 0.9191260933876038, + "epoch": 0.8339923790351571, + "kl_loss": 0.06048537790775299, + "loss_ib": 0.0009289191220887005, + "step": 2900 + }, + { + "ce_ib": 4.001488208770752, + "ce_orig": 0.8583823442459106, + "epoch": 0.8339923790351571, + "kl_loss": 0.045866839587688446, + "loss_ib": 0.0008588172495365143, + "step": 2900 + }, + { + "ce_ib": 4.349786758422852, + "ce_orig": 0.9775081276893616, + "epoch": 0.8339923790351571, + "kl_loss": 0.052944932132959366, + "loss_ib": 0.0009644280071370304, + "step": 2900 + }, + { + "ce_ib": 3.6426541805267334, + "ce_orig": 0.8481990098953247, + "epoch": 0.8342799626141347, + "kl_loss": 0.06455530226230621, + "loss_ib": 0.0010098183993250132, + "step": 2901 + }, + { + "ce_ib": 2.0744311809539795, + "ce_orig": 0.4731428027153015, + "epoch": 0.8342799626141347, + "kl_loss": 0.04125893488526344, + "loss_ib": 0.0006200324860401452, + "step": 2901 + }, + { + "ce_ib": 2.893362045288086, + "ce_orig": 0.3996380567550659, + "epoch": 0.8342799626141347, + "kl_loss": 0.05742163211107254, + "loss_ib": 0.0008635525009594858, + "step": 2901 + }, + { + "ce_ib": 3.204561471939087, + "ce_orig": 0.7132078409194946, + "epoch": 0.8342799626141347, + "kl_loss": 0.05201432853937149, + "loss_ib": 0.0008405993576161563, + "step": 2901 + }, + { + "ce_ib": 3.989074945449829, + "ce_orig": 1.0535112619400024, + "epoch": 0.8345675461931124, + "kl_loss": 0.061356090009212494, + "loss_ib": 0.0010124683612957597, + "step": 2902 + }, + { + "ce_ib": 3.912721872329712, + "ce_orig": 0.8146249055862427, + "epoch": 0.8345675461931124, + "kl_loss": 0.06650593876838684, + "loss_ib": 0.0010563315590843558, + "step": 2902 + }, + { + "ce_ib": 6.390257358551025, + "ce_orig": 1.6330136060714722, + "epoch": 0.8345675461931124, + "kl_loss": 0.06464779376983643, + "loss_ib": 0.0012855036184191704, + "step": 2902 + }, + { + "ce_ib": 0.9789436459541321, + "ce_orig": 0.21227765083312988, + "epoch": 0.8345675461931124, + "kl_loss": 0.1384873241186142, + "loss_ib": 0.001482767635025084, + "step": 2902 + }, + { + "ce_ib": 5.069983005523682, + "ce_orig": 0.7868247032165527, + "epoch": 0.83485512977209, + "kl_loss": 0.060480955988168716, + "loss_ib": 0.0011118077673017979, + "step": 2903 + }, + { + "ce_ib": 2.5595428943634033, + "ce_orig": 0.6567975878715515, + "epoch": 0.83485512977209, + "kl_loss": 0.056590884923934937, + "loss_ib": 0.0008218630682677031, + "step": 2903 + }, + { + "ce_ib": 2.152836322784424, + "ce_orig": 0.3821345567703247, + "epoch": 0.83485512977209, + "kl_loss": 0.04898538440465927, + "loss_ib": 0.0007051374414004385, + "step": 2903 + }, + { + "ce_ib": 4.5904436111450195, + "ce_orig": 1.2128514051437378, + "epoch": 0.83485512977209, + "kl_loss": 0.06708469986915588, + "loss_ib": 0.0011298913741484284, + "step": 2903 + }, + { + "ce_ib": 5.028052806854248, + "ce_orig": 0.8949452042579651, + "epoch": 0.8351427133510676, + "kl_loss": 0.05704619362950325, + "loss_ib": 0.001073267194442451, + "step": 2904 + }, + { + "ce_ib": 5.737788200378418, + "ce_orig": 0.9022984504699707, + "epoch": 0.8351427133510676, + "kl_loss": 0.05700869485735893, + "loss_ib": 0.0011438657529652119, + "step": 2904 + }, + { + "ce_ib": 5.257167339324951, + "ce_orig": 1.1154850721359253, + "epoch": 0.8351427133510676, + "kl_loss": 0.05717183277010918, + "loss_ib": 0.0010974350152537227, + "step": 2904 + }, + { + "ce_ib": 3.9311187267303467, + "ce_orig": 1.0351940393447876, + "epoch": 0.8351427133510676, + "kl_loss": 0.05280759930610657, + "loss_ib": 0.000921187805943191, + "step": 2904 + }, + { + "epoch": 0.8354302969300453, + "grad_norm": 0.10196394473314285, + "learning_rate": 4.2373281298214366e-05, + "loss": 0.8219, + "step": 2905 + }, + { + "ce_ib": 3.439544916152954, + "ce_orig": 0.8275643587112427, + "epoch": 0.8354302969300453, + "kl_loss": 0.07300879061222076, + "loss_ib": 0.0010740424040704966, + "step": 2905 + }, + { + "ce_ib": 3.3787500858306885, + "ce_orig": 0.8239868879318237, + "epoch": 0.8354302969300453, + "kl_loss": 0.09609292447566986, + "loss_ib": 0.0012988043017685413, + "step": 2905 + }, + { + "ce_ib": 2.7781143188476562, + "ce_orig": 0.6821938157081604, + "epoch": 0.8354302969300453, + "kl_loss": 0.048771414905786514, + "loss_ib": 0.0007655255612917244, + "step": 2905 + }, + { + "ce_ib": 5.613955497741699, + "ce_orig": 1.3816407918930054, + "epoch": 0.8354302969300453, + "kl_loss": 0.07084432244300842, + "loss_ib": 0.001269838772714138, + "step": 2905 + }, + { + "ce_ib": 2.8031487464904785, + "ce_orig": 0.6314443945884705, + "epoch": 0.835717880509023, + "kl_loss": 0.06366433203220367, + "loss_ib": 0.0009169581462629139, + "step": 2906 + }, + { + "ce_ib": 7.104470729827881, + "ce_orig": 1.9108694791793823, + "epoch": 0.835717880509023, + "kl_loss": 0.05199076235294342, + "loss_ib": 0.0012303546536713839, + "step": 2906 + }, + { + "ce_ib": 4.4708709716796875, + "ce_orig": 0.9250718355178833, + "epoch": 0.835717880509023, + "kl_loss": 0.05760878324508667, + "loss_ib": 0.001023174962028861, + "step": 2906 + }, + { + "ce_ib": 4.373354434967041, + "ce_orig": 0.8252599835395813, + "epoch": 0.835717880509023, + "kl_loss": 0.07066160440444946, + "loss_ib": 0.0011439514346420765, + "step": 2906 + }, + { + "ce_ib": 3.4567172527313232, + "ce_orig": 0.910551130771637, + "epoch": 0.8360054640880006, + "kl_loss": 0.06401136517524719, + "loss_ib": 0.000985785387456417, + "step": 2907 + }, + { + "ce_ib": 2.7786669731140137, + "ce_orig": 0.7744088768959045, + "epoch": 0.8360054640880006, + "kl_loss": 0.04617522656917572, + "loss_ib": 0.0007396189612336457, + "step": 2907 + }, + { + "ce_ib": 4.445835113525391, + "ce_orig": 0.6774250864982605, + "epoch": 0.8360054640880006, + "kl_loss": 0.07396908104419708, + "loss_ib": 0.001184274209663272, + "step": 2907 + }, + { + "ce_ib": 5.795502185821533, + "ce_orig": 1.0472164154052734, + "epoch": 0.8360054640880006, + "kl_loss": 0.06494138389825821, + "loss_ib": 0.0012289639562368393, + "step": 2907 + }, + { + "ce_ib": 3.11003041267395, + "ce_orig": 0.5776458978652954, + "epoch": 0.8362930476669782, + "kl_loss": 0.03140895813703537, + "loss_ib": 0.0006250925944186747, + "step": 2908 + }, + { + "ce_ib": 3.8623692989349365, + "ce_orig": 1.114819884300232, + "epoch": 0.8362930476669782, + "kl_loss": 0.05551283806562424, + "loss_ib": 0.0009413652587682009, + "step": 2908 + }, + { + "ce_ib": 3.6976499557495117, + "ce_orig": 0.12681062519550323, + "epoch": 0.8362930476669782, + "kl_loss": 0.056377965956926346, + "loss_ib": 0.0009335445938631892, + "step": 2908 + }, + { + "ce_ib": 4.016857624053955, + "ce_orig": 0.8154986500740051, + "epoch": 0.8362930476669782, + "kl_loss": 0.07042495161294937, + "loss_ib": 0.0011059351963922381, + "step": 2908 + }, + { + "ce_ib": 4.022881507873535, + "ce_orig": 0.8243684768676758, + "epoch": 0.8365806312459558, + "kl_loss": 0.03989044576883316, + "loss_ib": 0.0008011925965547562, + "step": 2909 + }, + { + "ce_ib": 3.522686243057251, + "ce_orig": 0.7076656222343445, + "epoch": 0.8365806312459558, + "kl_loss": 0.06797508150339127, + "loss_ib": 0.0010320193832740188, + "step": 2909 + }, + { + "ce_ib": 2.6814279556274414, + "ce_orig": 0.5961374640464783, + "epoch": 0.8365806312459558, + "kl_loss": 0.04222884401679039, + "loss_ib": 0.0006904312176629901, + "step": 2909 + }, + { + "ce_ib": 2.577178955078125, + "ce_orig": 0.4782045781612396, + "epoch": 0.8365806312459558, + "kl_loss": 0.07849598675966263, + "loss_ib": 0.0010426777880638838, + "step": 2909 + }, + { + "epoch": 0.8368682148249335, + "grad_norm": 0.10536881536245346, + "learning_rate": 4.234535716426664e-05, + "loss": 0.8583, + "step": 2910 + }, + { + "ce_ib": 2.9921579360961914, + "ce_orig": 0.5505641102790833, + "epoch": 0.8368682148249335, + "kl_loss": 0.07284793257713318, + "loss_ib": 0.0010276950197294354, + "step": 2910 + }, + { + "ce_ib": 4.415637016296387, + "ce_orig": 0.7914740443229675, + "epoch": 0.8368682148249335, + "kl_loss": 0.06415395438671112, + "loss_ib": 0.0010831032413989305, + "step": 2910 + }, + { + "ce_ib": 3.0192973613739014, + "ce_orig": 0.6533714532852173, + "epoch": 0.8368682148249335, + "kl_loss": 0.041510775685310364, + "loss_ib": 0.0007170374738052487, + "step": 2910 + }, + { + "ce_ib": 3.191110134124756, + "ce_orig": 0.7597149014472961, + "epoch": 0.8368682148249335, + "kl_loss": 0.0441950187087059, + "loss_ib": 0.0007610611501149833, + "step": 2910 + }, + { + "ce_ib": 4.689417839050293, + "ce_orig": 1.0476467609405518, + "epoch": 0.8371557984039112, + "kl_loss": 0.031460732221603394, + "loss_ib": 0.0007835490978322923, + "step": 2911 + }, + { + "ce_ib": 4.084675312042236, + "ce_orig": 0.7957685589790344, + "epoch": 0.8371557984039112, + "kl_loss": 0.05532008409500122, + "loss_ib": 0.000961668323725462, + "step": 2911 + }, + { + "ce_ib": 3.145914077758789, + "ce_orig": 0.4564470052719116, + "epoch": 0.8371557984039112, + "kl_loss": 0.09141110628843307, + "loss_ib": 0.001228702487424016, + "step": 2911 + }, + { + "ce_ib": 5.2202839851379395, + "ce_orig": 1.3824926614761353, + "epoch": 0.8371557984039112, + "kl_loss": 0.04272403568029404, + "loss_ib": 0.0009492687531746924, + "step": 2911 + }, + { + "ce_ib": 5.26939058303833, + "ce_orig": 1.4069299697875977, + "epoch": 0.8374433819828888, + "kl_loss": 0.0694953203201294, + "loss_ib": 0.0012218921910971403, + "step": 2912 + }, + { + "ce_ib": 6.383814334869385, + "ce_orig": 1.5099841356277466, + "epoch": 0.8374433819828888, + "kl_loss": 0.0938137024641037, + "loss_ib": 0.0015765184070914984, + "step": 2912 + }, + { + "ce_ib": 6.5075788497924805, + "ce_orig": 1.819623589515686, + "epoch": 0.8374433819828888, + "kl_loss": 0.07735177129507065, + "loss_ib": 0.0014242755714803934, + "step": 2912 + }, + { + "ce_ib": 4.7238969802856445, + "ce_orig": 1.066870927810669, + "epoch": 0.8374433819828888, + "kl_loss": 0.044293008744716644, + "loss_ib": 0.0009153197752311826, + "step": 2912 + }, + { + "ce_ib": 4.473023414611816, + "ce_orig": 1.315665602684021, + "epoch": 0.8377309655618664, + "kl_loss": 0.059712231159210205, + "loss_ib": 0.001044424599967897, + "step": 2913 + }, + { + "ce_ib": 2.220292568206787, + "ce_orig": 0.5857694745063782, + "epoch": 0.8377309655618664, + "kl_loss": 0.20686006546020508, + "loss_ib": 0.002290629781782627, + "step": 2913 + }, + { + "ce_ib": 2.679579973220825, + "ce_orig": 0.7177382111549377, + "epoch": 0.8377309655618664, + "kl_loss": 0.03705231845378876, + "loss_ib": 0.0006384811131283641, + "step": 2913 + }, + { + "ce_ib": 4.962996006011963, + "ce_orig": 0.9590126276016235, + "epoch": 0.8377309655618664, + "kl_loss": 0.09266246110200882, + "loss_ib": 0.0014229242224246264, + "step": 2913 + }, + { + "ce_ib": 4.696095943450928, + "ce_orig": 0.8949576020240784, + "epoch": 0.8380185491408441, + "kl_loss": 0.08538531512022018, + "loss_ib": 0.0013234626967459917, + "step": 2914 + }, + { + "ce_ib": 2.3563880920410156, + "ce_orig": 0.5298552513122559, + "epoch": 0.8380185491408441, + "kl_loss": 0.036534249782562256, + "loss_ib": 0.00060098129324615, + "step": 2914 + }, + { + "ce_ib": 5.98453950881958, + "ce_orig": 1.4689152240753174, + "epoch": 0.8380185491408441, + "kl_loss": 0.06742970645427704, + "loss_ib": 0.0012727510184049606, + "step": 2914 + }, + { + "ce_ib": 4.114254474639893, + "ce_orig": 1.2414382696151733, + "epoch": 0.8380185491408441, + "kl_loss": 0.053418487310409546, + "loss_ib": 0.0009456102852709591, + "step": 2914 + }, + { + "epoch": 0.8383061327198217, + "grad_norm": 0.1142268106341362, + "learning_rate": 4.2317391241327565e-05, + "loss": 0.8763, + "step": 2915 + }, + { + "ce_ib": 3.4716453552246094, + "ce_orig": 0.6059183478355408, + "epoch": 0.8383061327198217, + "kl_loss": 0.048588596284389496, + "loss_ib": 0.0008330504642799497, + "step": 2915 + }, + { + "ce_ib": 3.5815374851226807, + "ce_orig": 1.0404253005981445, + "epoch": 0.8383061327198217, + "kl_loss": 0.05438753962516785, + "loss_ib": 0.0009020291035994887, + "step": 2915 + }, + { + "ce_ib": 3.4178688526153564, + "ce_orig": 0.7195425033569336, + "epoch": 0.8383061327198217, + "kl_loss": 0.056339140981435776, + "loss_ib": 0.0009051783126778901, + "step": 2915 + }, + { + "ce_ib": 3.618431806564331, + "ce_orig": 0.8157820105552673, + "epoch": 0.8383061327198217, + "kl_loss": 0.06512206792831421, + "loss_ib": 0.0010130638256669044, + "step": 2915 + }, + { + "ce_ib": 3.461843967437744, + "ce_orig": 1.0314109325408936, + "epoch": 0.8385937162987993, + "kl_loss": 0.048494406044483185, + "loss_ib": 0.0008311283891089261, + "step": 2916 + }, + { + "ce_ib": 4.81719446182251, + "ce_orig": 1.3695069551467896, + "epoch": 0.8385937162987993, + "kl_loss": 0.06606519222259521, + "loss_ib": 0.0011423713294789195, + "step": 2916 + }, + { + "ce_ib": 5.101425647735596, + "ce_orig": 1.3488664627075195, + "epoch": 0.8385937162987993, + "kl_loss": 0.04663751646876335, + "loss_ib": 0.0009765176801010966, + "step": 2916 + }, + { + "ce_ib": 2.9692060947418213, + "ce_orig": 0.8186429142951965, + "epoch": 0.8385937162987993, + "kl_loss": 0.036508671939373016, + "loss_ib": 0.0006620073108933866, + "step": 2916 + }, + { + "ce_ib": 3.314697027206421, + "ce_orig": 0.6244081258773804, + "epoch": 0.8388812998777769, + "kl_loss": 0.06098748743534088, + "loss_ib": 0.0009413445368409157, + "step": 2917 + }, + { + "ce_ib": 2.3902924060821533, + "ce_orig": 0.33128589391708374, + "epoch": 0.8388812998777769, + "kl_loss": 0.07036848366260529, + "loss_ib": 0.0009427140466868877, + "step": 2917 + }, + { + "ce_ib": 3.802217721939087, + "ce_orig": 0.5944423675537109, + "epoch": 0.8388812998777769, + "kl_loss": 0.05338297039270401, + "loss_ib": 0.0009140514302998781, + "step": 2917 + }, + { + "ce_ib": 4.533663749694824, + "ce_orig": 1.3301105499267578, + "epoch": 0.8388812998777769, + "kl_loss": 0.04825614020228386, + "loss_ib": 0.0009359277319163084, + "step": 2917 + }, + { + "ce_ib": 4.00959587097168, + "ce_orig": 0.74881911277771, + "epoch": 0.8391688834567547, + "kl_loss": 0.06144864857196808, + "loss_ib": 0.001015446032397449, + "step": 2918 + }, + { + "ce_ib": 4.707242488861084, + "ce_orig": 1.1788378953933716, + "epoch": 0.8391688834567547, + "kl_loss": 0.056170254945755005, + "loss_ib": 0.001032426836900413, + "step": 2918 + }, + { + "ce_ib": 4.437960624694824, + "ce_orig": 0.5013977885246277, + "epoch": 0.8391688834567547, + "kl_loss": 0.12938185036182404, + "loss_ib": 0.00173761451151222, + "step": 2918 + }, + { + "ce_ib": 4.076880931854248, + "ce_orig": 1.2103708982467651, + "epoch": 0.8391688834567547, + "kl_loss": 0.05890640616416931, + "loss_ib": 0.0009967521764338017, + "step": 2918 + }, + { + "ce_ib": 2.0914714336395264, + "ce_orig": 0.4595732092857361, + "epoch": 0.8394564670357323, + "kl_loss": 0.039707861840724945, + "loss_ib": 0.0006062257452867925, + "step": 2919 + }, + { + "ce_ib": 3.796091318130493, + "ce_orig": 1.0108205080032349, + "epoch": 0.8394564670357323, + "kl_loss": 0.05312454327940941, + "loss_ib": 0.0009108545491471887, + "step": 2919 + }, + { + "ce_ib": 5.694004058837891, + "ce_orig": 0.8307312726974487, + "epoch": 0.8394564670357323, + "kl_loss": 0.12966902554035187, + "loss_ib": 0.001866090577095747, + "step": 2919 + }, + { + "ce_ib": 6.242962837219238, + "ce_orig": 1.2963905334472656, + "epoch": 0.8394564670357323, + "kl_loss": 0.051252130419015884, + "loss_ib": 0.001136817503720522, + "step": 2919 + }, + { + "epoch": 0.8397440506147099, + "grad_norm": 0.09915119409561157, + "learning_rate": 4.228938359677354e-05, + "loss": 0.8915, + "step": 2920 + }, + { + "ce_ib": 2.842174768447876, + "ce_orig": 0.4010550081729889, + "epoch": 0.8397440506147099, + "kl_loss": 0.02906469628214836, + "loss_ib": 0.0005748644471168518, + "step": 2920 + }, + { + "ce_ib": 2.104522466659546, + "ce_orig": 0.5136969089508057, + "epoch": 0.8397440506147099, + "kl_loss": 0.04925776645541191, + "loss_ib": 0.0007030299166217446, + "step": 2920 + }, + { + "ce_ib": 3.495361089706421, + "ce_orig": 0.8865842819213867, + "epoch": 0.8397440506147099, + "kl_loss": 0.0697888657450676, + "loss_ib": 0.0010474247392266989, + "step": 2920 + }, + { + "ce_ib": 5.152044296264648, + "ce_orig": 1.294539451599121, + "epoch": 0.8397440506147099, + "kl_loss": 0.057831935584545135, + "loss_ib": 0.0010935238096863031, + "step": 2920 + }, + { + "ce_ib": 3.1142947673797607, + "ce_orig": 0.5637631416320801, + "epoch": 0.8400316341936875, + "kl_loss": 0.08995822817087173, + "loss_ib": 0.0012110116658732295, + "step": 2921 + }, + { + "ce_ib": 4.208439826965332, + "ce_orig": 1.094889521598816, + "epoch": 0.8400316341936875, + "kl_loss": 0.05790635570883751, + "loss_ib": 0.000999907497316599, + "step": 2921 + }, + { + "ce_ib": 2.731239080429077, + "ce_orig": 0.7414222955703735, + "epoch": 0.8400316341936875, + "kl_loss": 0.03201393783092499, + "loss_ib": 0.0005932632484473288, + "step": 2921 + }, + { + "ce_ib": 3.372668981552124, + "ce_orig": 0.5469934344291687, + "epoch": 0.8400316341936875, + "kl_loss": 0.08088292181491852, + "loss_ib": 0.0011460961541160941, + "step": 2921 + }, + { + "ce_ib": 4.334124565124512, + "ce_orig": 1.0084483623504639, + "epoch": 0.8403192177726652, + "kl_loss": 0.07582934945821762, + "loss_ib": 0.0011917059309780598, + "step": 2922 + }, + { + "ce_ib": 5.9336934089660645, + "ce_orig": 1.138704776763916, + "epoch": 0.8403192177726652, + "kl_loss": 0.07908177375793457, + "loss_ib": 0.0013841870240867138, + "step": 2922 + }, + { + "ce_ib": 5.579078197479248, + "ce_orig": 0.7403547763824463, + "epoch": 0.8403192177726652, + "kl_loss": 0.0806313082575798, + "loss_ib": 0.0013642209814861417, + "step": 2922 + }, + { + "ce_ib": 4.321743488311768, + "ce_orig": 1.1531833410263062, + "epoch": 0.8403192177726652, + "kl_loss": 0.045153528451919556, + "loss_ib": 0.0008837096393108368, + "step": 2922 + }, + { + "ce_ib": 2.948094367980957, + "ce_orig": 0.6476316452026367, + "epoch": 0.8406068013516428, + "kl_loss": 0.04744439572095871, + "loss_ib": 0.0007692533545196056, + "step": 2923 + }, + { + "ce_ib": 5.022059917449951, + "ce_orig": 0.8071534037590027, + "epoch": 0.8406068013516428, + "kl_loss": 0.06235374137759209, + "loss_ib": 0.001125743379816413, + "step": 2923 + }, + { + "ce_ib": 4.2395501136779785, + "ce_orig": 0.898064374923706, + "epoch": 0.8406068013516428, + "kl_loss": 0.05710224062204361, + "loss_ib": 0.0009949773084372282, + "step": 2923 + }, + { + "ce_ib": 3.767279624938965, + "ce_orig": 0.7671864628791809, + "epoch": 0.8406068013516428, + "kl_loss": 0.03696288913488388, + "loss_ib": 0.000746356847230345, + "step": 2923 + }, + { + "ce_ib": 3.9515581130981445, + "ce_orig": 0.6856030821800232, + "epoch": 0.8408943849306204, + "kl_loss": 0.03341159224510193, + "loss_ib": 0.0007292717345990241, + "step": 2924 + }, + { + "ce_ib": 5.904714107513428, + "ce_orig": 1.3871643543243408, + "epoch": 0.8408943849306204, + "kl_loss": 0.05807124078273773, + "loss_ib": 0.0011711837723851204, + "step": 2924 + }, + { + "ce_ib": 2.9252231121063232, + "ce_orig": 0.7045883536338806, + "epoch": 0.8408943849306204, + "kl_loss": 0.04730147868394852, + "loss_ib": 0.0007655370864085853, + "step": 2924 + }, + { + "ce_ib": 3.3842709064483643, + "ce_orig": 0.8251820206642151, + "epoch": 0.8408943849306204, + "kl_loss": 0.04921417310833931, + "loss_ib": 0.000830568780656904, + "step": 2924 + }, + { + "epoch": 0.8411819685095981, + "grad_norm": 0.14080312848091125, + "learning_rate": 4.226133429808148e-05, + "loss": 0.8708, + "step": 2925 + }, + { + "ce_ib": 2.862941265106201, + "ce_orig": 0.5309591293334961, + "epoch": 0.8411819685095981, + "kl_loss": 0.06979869306087494, + "loss_ib": 0.0009842810686677694, + "step": 2925 + }, + { + "ce_ib": 2.0263092517852783, + "ce_orig": 0.3916867971420288, + "epoch": 0.8411819685095981, + "kl_loss": 0.07313045859336853, + "loss_ib": 0.0009339354583062232, + "step": 2925 + }, + { + "ce_ib": 3.3768787384033203, + "ce_orig": 0.8291341662406921, + "epoch": 0.8411819685095981, + "kl_loss": 0.05476975813508034, + "loss_ib": 0.0008853853796608746, + "step": 2925 + }, + { + "ce_ib": 2.3749330043792725, + "ce_orig": 0.549461841583252, + "epoch": 0.8411819685095981, + "kl_loss": 0.051989905536174774, + "loss_ib": 0.0007573923212476075, + "step": 2925 + }, + { + "ce_ib": 2.053882360458374, + "ce_orig": 0.445073664188385, + "epoch": 0.8414695520885758, + "kl_loss": 0.03510299324989319, + "loss_ib": 0.0005564181483350694, + "step": 2926 + }, + { + "ce_ib": 4.620932102203369, + "ce_orig": 1.221630573272705, + "epoch": 0.8414695520885758, + "kl_loss": 0.05109596252441406, + "loss_ib": 0.0009730528108775616, + "step": 2926 + }, + { + "ce_ib": 3.9728689193725586, + "ce_orig": 0.5583850145339966, + "epoch": 0.8414695520885758, + "kl_loss": 0.0770334005355835, + "loss_ib": 0.001167620881460607, + "step": 2926 + }, + { + "ce_ib": 5.3914313316345215, + "ce_orig": 1.119436264038086, + "epoch": 0.8414695520885758, + "kl_loss": 0.09179956465959549, + "loss_ib": 0.0014571388019248843, + "step": 2926 + }, + { + "ce_ib": 2.774606704711914, + "ce_orig": 0.7325511574745178, + "epoch": 0.8417571356675534, + "kl_loss": 0.05796629935503006, + "loss_ib": 0.000857123639434576, + "step": 2927 + }, + { + "ce_ib": 5.272850513458252, + "ce_orig": 1.3447160720825195, + "epoch": 0.8417571356675534, + "kl_loss": 0.05968185141682625, + "loss_ib": 0.001124103437177837, + "step": 2927 + }, + { + "ce_ib": 2.3899238109588623, + "ce_orig": 0.6992630362510681, + "epoch": 0.8417571356675534, + "kl_loss": 0.030682628974318504, + "loss_ib": 0.0005458186496980488, + "step": 2927 + }, + { + "ce_ib": 3.7688419818878174, + "ce_orig": 0.6184819936752319, + "epoch": 0.8417571356675534, + "kl_loss": 0.08603186160326004, + "loss_ib": 0.0012372027849778533, + "step": 2927 + }, + { + "ce_ib": 3.9052789211273193, + "ce_orig": 0.9473676681518555, + "epoch": 0.842044719246531, + "kl_loss": 0.06344230473041534, + "loss_ib": 0.001024950877763331, + "step": 2928 + }, + { + "ce_ib": 3.750483751296997, + "ce_orig": 1.0527970790863037, + "epoch": 0.842044719246531, + "kl_loss": 0.07844418287277222, + "loss_ib": 0.0011594902025535703, + "step": 2928 + }, + { + "ce_ib": 3.4699227809906006, + "ce_orig": 0.8241795301437378, + "epoch": 0.842044719246531, + "kl_loss": 0.05488704890012741, + "loss_ib": 0.0008958627004176378, + "step": 2928 + }, + { + "ce_ib": 5.025858402252197, + "ce_orig": 1.040860652923584, + "epoch": 0.842044719246531, + "kl_loss": 0.060781627893447876, + "loss_ib": 0.0011104020522907376, + "step": 2928 + }, + { + "ce_ib": 2.2497482299804688, + "ce_orig": 0.447378933429718, + "epoch": 0.8423323028255086, + "kl_loss": 0.04303325340151787, + "loss_ib": 0.0006553073180839419, + "step": 2929 + }, + { + "ce_ib": 3.1611087322235107, + "ce_orig": 0.6193906664848328, + "epoch": 0.8423323028255086, + "kl_loss": 0.040428876876831055, + "loss_ib": 0.0007203996065072715, + "step": 2929 + }, + { + "ce_ib": 6.746562480926514, + "ce_orig": 1.7502481937408447, + "epoch": 0.8423323028255086, + "kl_loss": 0.059551794081926346, + "loss_ib": 0.0012701741652563214, + "step": 2929 + }, + { + "ce_ib": 2.4214210510253906, + "ce_orig": 0.521824061870575, + "epoch": 0.8423323028255086, + "kl_loss": 0.05286455899477005, + "loss_ib": 0.0007707876502536237, + "step": 2929 + }, + { + "epoch": 0.8426198864044863, + "grad_norm": 0.111411914229393, + "learning_rate": 4.223324341282865e-05, + "loss": 0.8581, + "step": 2930 + }, + { + "ce_ib": 2.2127158641815186, + "ce_orig": 0.6374592185020447, + "epoch": 0.8426198864044863, + "kl_loss": 0.028145547956228256, + "loss_ib": 0.0005027270526625216, + "step": 2930 + }, + { + "ce_ib": 2.418701410293579, + "ce_orig": 0.693716824054718, + "epoch": 0.8426198864044863, + "kl_loss": 0.04127029702067375, + "loss_ib": 0.0006545731448568404, + "step": 2930 + }, + { + "ce_ib": 2.9907021522521973, + "ce_orig": 0.653064489364624, + "epoch": 0.8426198864044863, + "kl_loss": 0.07618637382984161, + "loss_ib": 0.0010609339224174619, + "step": 2930 + }, + { + "ce_ib": 3.7353458404541016, + "ce_orig": 0.6470984816551208, + "epoch": 0.8426198864044863, + "kl_loss": 0.07629960775375366, + "loss_ib": 0.0011365306563675404, + "step": 2930 + }, + { + "ce_ib": 3.9909307956695557, + "ce_orig": 1.3182817697525024, + "epoch": 0.842907469983464, + "kl_loss": 0.03692757338285446, + "loss_ib": 0.0007683688309043646, + "step": 2931 + }, + { + "ce_ib": 3.29107666015625, + "ce_orig": 0.46591493487358093, + "epoch": 0.842907469983464, + "kl_loss": 0.06337740272283554, + "loss_ib": 0.0009628816624172032, + "step": 2931 + }, + { + "ce_ib": 2.985159397125244, + "ce_orig": 0.6118798851966858, + "epoch": 0.842907469983464, + "kl_loss": 0.06115040183067322, + "loss_ib": 0.0009100199094973505, + "step": 2931 + }, + { + "ce_ib": 4.216599941253662, + "ce_orig": 0.8756731748580933, + "epoch": 0.842907469983464, + "kl_loss": 0.07036427408456802, + "loss_ib": 0.001125302747823298, + "step": 2931 + }, + { + "ce_ib": 3.3854100704193115, + "ce_orig": 0.7336314916610718, + "epoch": 0.8431950535624416, + "kl_loss": 0.04952756315469742, + "loss_ib": 0.0008338166517205536, + "step": 2932 + }, + { + "ce_ib": 2.5369784832000732, + "ce_orig": 0.6294693946838379, + "epoch": 0.8431950535624416, + "kl_loss": 0.05221167951822281, + "loss_ib": 0.0007758146384730935, + "step": 2932 + }, + { + "ce_ib": 5.087923049926758, + "ce_orig": 0.9686965942382812, + "epoch": 0.8431950535624416, + "kl_loss": 0.10485928505659103, + "loss_ib": 0.0015573851997032762, + "step": 2932 + }, + { + "ce_ib": 3.277897834777832, + "ce_orig": 0.474202424287796, + "epoch": 0.8431950535624416, + "kl_loss": 0.03145468980073929, + "loss_ib": 0.0006423366721719503, + "step": 2932 + }, + { + "ce_ib": 4.202235221862793, + "ce_orig": 0.6217940449714661, + "epoch": 0.8434826371414192, + "kl_loss": 0.0710381492972374, + "loss_ib": 0.0011306050000712276, + "step": 2933 + }, + { + "ce_ib": 3.0814950466156006, + "ce_orig": 0.8327419757843018, + "epoch": 0.8434826371414192, + "kl_loss": 0.0432833731174469, + "loss_ib": 0.0007409831741824746, + "step": 2933 + }, + { + "ce_ib": 2.2718403339385986, + "ce_orig": 0.5754038691520691, + "epoch": 0.8434826371414192, + "kl_loss": 0.03650811314582825, + "loss_ib": 0.0005922651616856456, + "step": 2933 + }, + { + "ce_ib": 3.5703368186950684, + "ce_orig": 0.7474686503410339, + "epoch": 0.8434826371414192, + "kl_loss": 0.07642567157745361, + "loss_ib": 0.0011212903773412108, + "step": 2933 + }, + { + "ce_ib": 0.8698338866233826, + "ce_orig": 0.09080909192562103, + "epoch": 0.8437702207203969, + "kl_loss": 0.12425372749567032, + "loss_ib": 0.0013295206008479, + "step": 2934 + }, + { + "ce_ib": 2.870135545730591, + "ce_orig": 0.5390529036521912, + "epoch": 0.8437702207203969, + "kl_loss": 0.03964249789714813, + "loss_ib": 0.0006834385567344725, + "step": 2934 + }, + { + "ce_ib": 5.032948970794678, + "ce_orig": 1.4270052909851074, + "epoch": 0.8437702207203969, + "kl_loss": 0.05617234483361244, + "loss_ib": 0.0010650183539837599, + "step": 2934 + }, + { + "ce_ib": 3.677358627319336, + "ce_orig": 0.8301585912704468, + "epoch": 0.8437702207203969, + "kl_loss": 0.06013484299182892, + "loss_ib": 0.0009690842707641423, + "step": 2934 + }, + { + "epoch": 0.8440578042993745, + "grad_norm": 0.09168057143688202, + "learning_rate": 4.220511100869251e-05, + "loss": 0.8104, + "step": 2935 + }, + { + "ce_ib": 5.151701927185059, + "ce_orig": 1.3690011501312256, + "epoch": 0.8440578042993745, + "kl_loss": 0.05919463932514191, + "loss_ib": 0.0011071165790781379, + "step": 2935 + }, + { + "ce_ib": 4.207757949829102, + "ce_orig": 0.9936772584915161, + "epoch": 0.8440578042993745, + "kl_loss": 0.10788820683956146, + "loss_ib": 0.0014996578684076667, + "step": 2935 + }, + { + "ce_ib": 4.071832180023193, + "ce_orig": 1.2877119779586792, + "epoch": 0.8440578042993745, + "kl_loss": 0.05162578821182251, + "loss_ib": 0.0009234410244971514, + "step": 2935 + }, + { + "ce_ib": 3.2945141792297363, + "ce_orig": 0.7863078117370605, + "epoch": 0.8440578042993745, + "kl_loss": 0.05654609203338623, + "loss_ib": 0.0008949122857302427, + "step": 2935 + }, + { + "ce_ib": 3.6657650470733643, + "ce_orig": 0.5080509781837463, + "epoch": 0.8443453878783521, + "kl_loss": 0.07356306910514832, + "loss_ib": 0.0011022071121260524, + "step": 2936 + }, + { + "ce_ib": 5.273009777069092, + "ce_orig": 1.2068946361541748, + "epoch": 0.8443453878783521, + "kl_loss": 0.042984895408153534, + "loss_ib": 0.0009571498958393931, + "step": 2936 + }, + { + "ce_ib": 3.914961814880371, + "ce_orig": 0.6243366003036499, + "epoch": 0.8443453878783521, + "kl_loss": 0.05915416032075882, + "loss_ib": 0.0009830377530306578, + "step": 2936 + }, + { + "ce_ib": 4.47145414352417, + "ce_orig": 0.9324422478675842, + "epoch": 0.8443453878783521, + "kl_loss": 0.06878205388784409, + "loss_ib": 0.0011349659180268645, + "step": 2936 + }, + { + "ce_ib": 2.6241965293884277, + "ce_orig": 0.7666677236557007, + "epoch": 0.8446329714573297, + "kl_loss": 0.029653461650013924, + "loss_ib": 0.0005589542561210692, + "step": 2937 + }, + { + "ce_ib": 4.08616304397583, + "ce_orig": 0.872751772403717, + "epoch": 0.8446329714573297, + "kl_loss": 0.047979988157749176, + "loss_ib": 0.000888416136149317, + "step": 2937 + }, + { + "ce_ib": 3.8230981826782227, + "ce_orig": 0.741943359375, + "epoch": 0.8446329714573297, + "kl_loss": 0.0468381904065609, + "loss_ib": 0.0008506916929036379, + "step": 2937 + }, + { + "ce_ib": 6.471540451049805, + "ce_orig": 1.5965286493301392, + "epoch": 0.8446329714573297, + "kl_loss": 0.051083676517009735, + "loss_ib": 0.0011579907732084394, + "step": 2937 + }, + { + "ce_ib": 5.5517449378967285, + "ce_orig": 1.4483243227005005, + "epoch": 0.8449205550363075, + "kl_loss": 0.06896507740020752, + "loss_ib": 0.0012448251945897937, + "step": 2938 + }, + { + "ce_ib": 4.613162517547607, + "ce_orig": 0.9052286148071289, + "epoch": 0.8449205550363075, + "kl_loss": 0.05325504019856453, + "loss_ib": 0.0009938665898516774, + "step": 2938 + }, + { + "ce_ib": 3.4651100635528564, + "ce_orig": 0.6631564497947693, + "epoch": 0.8449205550363075, + "kl_loss": 0.05674723535776138, + "loss_ib": 0.0009139833273366094, + "step": 2938 + }, + { + "ce_ib": 2.4221158027648926, + "ce_orig": 0.6704754829406738, + "epoch": 0.8449205550363075, + "kl_loss": 0.04073965921998024, + "loss_ib": 0.0006496081477962434, + "step": 2938 + }, + { + "ce_ib": 2.5486202239990234, + "ce_orig": 0.6273974180221558, + "epoch": 0.8452081386152851, + "kl_loss": 0.03565948083996773, + "loss_ib": 0.0006114568095654249, + "step": 2939 + }, + { + "ce_ib": 2.710446834564209, + "ce_orig": 0.4978472888469696, + "epoch": 0.8452081386152851, + "kl_loss": 0.05504971370100975, + "loss_ib": 0.0008215418201871216, + "step": 2939 + }, + { + "ce_ib": 3.9780678749084473, + "ce_orig": 1.023729681968689, + "epoch": 0.8452081386152851, + "kl_loss": 0.061995506286621094, + "loss_ib": 0.0010177617659792304, + "step": 2939 + }, + { + "ce_ib": 2.68414044380188, + "ce_orig": 0.36201024055480957, + "epoch": 0.8452081386152851, + "kl_loss": 0.05075710266828537, + "loss_ib": 0.0007759850122965872, + "step": 2939 + }, + { + "epoch": 0.8454957221942627, + "grad_norm": 0.1013275757431984, + "learning_rate": 4.217693715345057e-05, + "loss": 0.849, + "step": 2940 + }, + { + "ce_ib": 3.090665578842163, + "ce_orig": 0.844394862651825, + "epoch": 0.8454957221942627, + "kl_loss": 0.03768853098154068, + "loss_ib": 0.0006859518471173942, + "step": 2940 + }, + { + "ce_ib": 5.227171897888184, + "ce_orig": 1.0657258033752441, + "epoch": 0.8454957221942627, + "kl_loss": 0.06397872418165207, + "loss_ib": 0.0011625044280663133, + "step": 2940 + }, + { + "ce_ib": 3.5344738960266113, + "ce_orig": 0.4883110225200653, + "epoch": 0.8454957221942627, + "kl_loss": 0.04174929857254028, + "loss_ib": 0.0007709403289481997, + "step": 2940 + }, + { + "ce_ib": 3.2624611854553223, + "ce_orig": 0.6044927835464478, + "epoch": 0.8454957221942627, + "kl_loss": 0.04796651005744934, + "loss_ib": 0.0008059112005867064, + "step": 2940 + }, + { + "ce_ib": 2.545891523361206, + "ce_orig": 0.5076866745948792, + "epoch": 0.8457833057732403, + "kl_loss": 0.08030631393194199, + "loss_ib": 0.0010576522909104824, + "step": 2941 + }, + { + "ce_ib": 4.787938594818115, + "ce_orig": 1.1910699605941772, + "epoch": 0.8457833057732403, + "kl_loss": 0.06583864241838455, + "loss_ib": 0.0011371802538633347, + "step": 2941 + }, + { + "ce_ib": 3.2480180263519287, + "ce_orig": 0.7783859372138977, + "epoch": 0.8457833057732403, + "kl_loss": 0.038848526775836945, + "loss_ib": 0.0007132870377972722, + "step": 2941 + }, + { + "ce_ib": 4.541635036468506, + "ce_orig": 1.1874545812606812, + "epoch": 0.8457833057732403, + "kl_loss": 0.030993428081274033, + "loss_ib": 0.0007640977273695171, + "step": 2941 + }, + { + "ce_ib": 5.553267002105713, + "ce_orig": 1.1468145847320557, + "epoch": 0.846070889352218, + "kl_loss": 0.07215562462806702, + "loss_ib": 0.0012768828310072422, + "step": 2942 + }, + { + "ce_ib": 3.308574914932251, + "ce_orig": 0.6351811289787292, + "epoch": 0.846070889352218, + "kl_loss": 0.07270658761262894, + "loss_ib": 0.0010579233057796955, + "step": 2942 + }, + { + "ce_ib": 6.142485618591309, + "ce_orig": 1.6075531244277954, + "epoch": 0.846070889352218, + "kl_loss": 0.07967346906661987, + "loss_ib": 0.0014109831536188722, + "step": 2942 + }, + { + "ce_ib": 5.786733150482178, + "ce_orig": 1.509534239768982, + "epoch": 0.846070889352218, + "kl_loss": 0.05164068937301636, + "loss_ib": 0.0010950801661238074, + "step": 2942 + }, + { + "ce_ib": 5.68151330947876, + "ce_orig": 1.414775013923645, + "epoch": 0.8463584729311956, + "kl_loss": 0.10047346353530884, + "loss_ib": 0.0015728858998045325, + "step": 2943 + }, + { + "ce_ib": 5.310988903045654, + "ce_orig": 1.0639182329177856, + "epoch": 0.8463584729311956, + "kl_loss": 0.08511857688426971, + "loss_ib": 0.001382284564897418, + "step": 2943 + }, + { + "ce_ib": 6.6186299324035645, + "ce_orig": 1.805030107498169, + "epoch": 0.8463584729311956, + "kl_loss": 0.05358628183603287, + "loss_ib": 0.0011977257672697306, + "step": 2943 + }, + { + "ce_ib": 2.392406702041626, + "ce_orig": 0.47949880361557007, + "epoch": 0.8463584729311956, + "kl_loss": 0.0912344679236412, + "loss_ib": 0.0011515853693708777, + "step": 2943 + }, + { + "ce_ib": 3.873394250869751, + "ce_orig": 0.817818820476532, + "epoch": 0.8466460565101732, + "kl_loss": 0.03752033784985542, + "loss_ib": 0.0007625427679158747, + "step": 2944 + }, + { + "ce_ib": 6.4872517585754395, + "ce_orig": 0.618857741355896, + "epoch": 0.8466460565101732, + "kl_loss": 0.16978800296783447, + "loss_ib": 0.002346605062484741, + "step": 2944 + }, + { + "ce_ib": 3.1130402088165283, + "ce_orig": 0.7114922404289246, + "epoch": 0.8466460565101732, + "kl_loss": 0.06508779525756836, + "loss_ib": 0.0009621819481253624, + "step": 2944 + }, + { + "ce_ib": 4.226995468139648, + "ce_orig": 1.2291069030761719, + "epoch": 0.8466460565101732, + "kl_loss": 0.024053353816270828, + "loss_ib": 0.0006632331060245633, + "step": 2944 + }, + { + "epoch": 0.846933640089151, + "grad_norm": 0.25389713048934937, + "learning_rate": 4.214872191498017e-05, + "loss": 0.8978, + "step": 2945 + }, + { + "ce_ib": 2.7637410163879395, + "ce_orig": 0.5026226043701172, + "epoch": 0.846933640089151, + "kl_loss": 0.025706758722662926, + "loss_ib": 0.0005334416637197137, + "step": 2945 + }, + { + "ce_ib": 2.433011054992676, + "ce_orig": 0.5669987797737122, + "epoch": 0.846933640089151, + "kl_loss": 0.04500839486718178, + "loss_ib": 0.000693384965416044, + "step": 2945 + }, + { + "ce_ib": 3.8831074237823486, + "ce_orig": 1.002677321434021, + "epoch": 0.846933640089151, + "kl_loss": 0.046944983303546906, + "loss_ib": 0.0008577606058679521, + "step": 2945 + }, + { + "ce_ib": 3.5759620666503906, + "ce_orig": 0.7352712154388428, + "epoch": 0.846933640089151, + "kl_loss": 0.047514550387859344, + "loss_ib": 0.0008327416726388037, + "step": 2945 + }, + { + "ce_ib": 4.2281293869018555, + "ce_orig": 0.591670572757721, + "epoch": 0.8472212236681286, + "kl_loss": 0.0836210772395134, + "loss_ib": 0.001259023672901094, + "step": 2946 + }, + { + "ce_ib": 2.5054333209991455, + "ce_orig": 0.6065483689308167, + "epoch": 0.8472212236681286, + "kl_loss": 0.03418310359120369, + "loss_ib": 0.0005923743592575192, + "step": 2946 + }, + { + "ce_ib": 5.5457282066345215, + "ce_orig": 1.4040706157684326, + "epoch": 0.8472212236681286, + "kl_loss": 0.07498837262392044, + "loss_ib": 0.0013044566148892045, + "step": 2946 + }, + { + "ce_ib": 3.4719932079315186, + "ce_orig": 0.8191347122192383, + "epoch": 0.8472212236681286, + "kl_loss": 0.08628492057323456, + "loss_ib": 0.0012100484455004334, + "step": 2946 + }, + { + "ce_ib": 4.475590705871582, + "ce_orig": 0.8888567090034485, + "epoch": 0.8475088072471062, + "kl_loss": 0.04952527955174446, + "loss_ib": 0.0009428117773495615, + "step": 2947 + }, + { + "ce_ib": 5.9659552574157715, + "ce_orig": 1.308870553970337, + "epoch": 0.8475088072471062, + "kl_loss": 0.05785106122493744, + "loss_ib": 0.0011751061538234353, + "step": 2947 + }, + { + "ce_ib": 4.325484752655029, + "ce_orig": 0.6595272421836853, + "epoch": 0.8475088072471062, + "kl_loss": 0.06295645236968994, + "loss_ib": 0.001062112976796925, + "step": 2947 + }, + { + "ce_ib": 6.908522605895996, + "ce_orig": 1.7729065418243408, + "epoch": 0.8475088072471062, + "kl_loss": 0.08983369171619415, + "loss_ib": 0.0015891891671344638, + "step": 2947 + }, + { + "ce_ib": 3.308590888977051, + "ce_orig": 0.4074186086654663, + "epoch": 0.8477963908260838, + "kl_loss": 0.07233060896396637, + "loss_ib": 0.0010541651863604784, + "step": 2948 + }, + { + "ce_ib": 3.021214008331299, + "ce_orig": 0.7762196660041809, + "epoch": 0.8477963908260838, + "kl_loss": 0.05254391208291054, + "loss_ib": 0.0008275604923255742, + "step": 2948 + }, + { + "ce_ib": 5.3293585777282715, + "ce_orig": 1.4345166683197021, + "epoch": 0.8477963908260838, + "kl_loss": 0.04468872398138046, + "loss_ib": 0.0009798230603337288, + "step": 2948 + }, + { + "ce_ib": 5.334582805633545, + "ce_orig": 1.1377980709075928, + "epoch": 0.8477963908260838, + "kl_loss": 0.060249749571084976, + "loss_ib": 0.0011359556810930371, + "step": 2948 + }, + { + "ce_ib": 2.5757102966308594, + "ce_orig": 0.3865382969379425, + "epoch": 0.8480839744050614, + "kl_loss": 0.07977117598056793, + "loss_ib": 0.001055282773450017, + "step": 2949 + }, + { + "ce_ib": 2.9749109745025635, + "ce_orig": 0.7196968197822571, + "epoch": 0.8480839744050614, + "kl_loss": 0.03888125717639923, + "loss_ib": 0.0006863036542199552, + "step": 2949 + }, + { + "ce_ib": 3.5601694583892822, + "ce_orig": 0.5643253326416016, + "epoch": 0.8480839744050614, + "kl_loss": 0.07121114432811737, + "loss_ib": 0.0010681282728910446, + "step": 2949 + }, + { + "ce_ib": 3.3081977367401123, + "ce_orig": 0.6402242183685303, + "epoch": 0.8480839744050614, + "kl_loss": 0.05569476634263992, + "loss_ib": 0.0008877673535607755, + "step": 2949 + }, + { + "epoch": 0.8483715579840391, + "grad_norm": 0.11194002628326416, + "learning_rate": 4.2120465361258375e-05, + "loss": 0.892, + "step": 2950 + }, + { + "ce_ib": 3.265932321548462, + "ce_orig": 0.5796489119529724, + "epoch": 0.8483715579840391, + "kl_loss": 0.04352051392197609, + "loss_ib": 0.0007617982919327915, + "step": 2950 + }, + { + "ce_ib": 4.153784275054932, + "ce_orig": 0.6563313007354736, + "epoch": 0.8483715579840391, + "kl_loss": 0.10559523850679398, + "loss_ib": 0.0014713307609781623, + "step": 2950 + }, + { + "ce_ib": 1.84724760055542, + "ce_orig": 0.3544124364852905, + "epoch": 0.8483715579840391, + "kl_loss": 0.043684277683496475, + "loss_ib": 0.0006215674802660942, + "step": 2950 + }, + { + "ce_ib": 3.058420419692993, + "ce_orig": 0.6136511564254761, + "epoch": 0.8483715579840391, + "kl_loss": 0.07981614768505096, + "loss_ib": 0.0011040035169571638, + "step": 2950 + }, + { + "ce_ib": 3.997619867324829, + "ce_orig": 1.0241334438323975, + "epoch": 0.8486591415630168, + "kl_loss": 0.05539637804031372, + "loss_ib": 0.0009537257137708366, + "step": 2951 + }, + { + "ce_ib": 4.648843765258789, + "ce_orig": 0.9662114381790161, + "epoch": 0.8486591415630168, + "kl_loss": 0.05804300308227539, + "loss_ib": 0.0010453143622726202, + "step": 2951 + }, + { + "ce_ib": 2.86006236076355, + "ce_orig": 0.7700325846672058, + "epoch": 0.8486591415630168, + "kl_loss": 0.08326022326946259, + "loss_ib": 0.0011186085175722837, + "step": 2951 + }, + { + "ce_ib": 3.8275983333587646, + "ce_orig": 0.724922776222229, + "epoch": 0.8486591415630168, + "kl_loss": 0.04711761325597763, + "loss_ib": 0.00085393589688465, + "step": 2951 + }, + { + "ce_ib": 6.036694526672363, + "ce_orig": 1.5672646760940552, + "epoch": 0.8489467251419944, + "kl_loss": 0.08074253797531128, + "loss_ib": 0.0014110947959125042, + "step": 2952 + }, + { + "ce_ib": 2.674887180328369, + "ce_orig": 0.598597526550293, + "epoch": 0.8489467251419944, + "kl_loss": 0.03049590066075325, + "loss_ib": 0.0005724476650357246, + "step": 2952 + }, + { + "ce_ib": 3.456757068634033, + "ce_orig": 0.4907352030277252, + "epoch": 0.8489467251419944, + "kl_loss": 0.06687906384468079, + "loss_ib": 0.0010144662810489535, + "step": 2952 + }, + { + "ce_ib": 3.2918663024902344, + "ce_orig": 0.7596467137336731, + "epoch": 0.8489467251419944, + "kl_loss": 0.05356539040803909, + "loss_ib": 0.000864840520080179, + "step": 2952 + }, + { + "ce_ib": 3.7473649978637695, + "ce_orig": 0.5305184125900269, + "epoch": 0.849234308720972, + "kl_loss": 0.032913416624069214, + "loss_ib": 0.0007038706098683178, + "step": 2953 + }, + { + "ce_ib": 2.547701597213745, + "ce_orig": 0.5396169424057007, + "epoch": 0.849234308720972, + "kl_loss": 0.11668440699577332, + "loss_ib": 0.0014216142008081079, + "step": 2953 + }, + { + "ce_ib": 2.399402141571045, + "ce_orig": 0.5515289306640625, + "epoch": 0.849234308720972, + "kl_loss": 0.044056154787540436, + "loss_ib": 0.0006805017474107444, + "step": 2953 + }, + { + "ce_ib": 5.880999565124512, + "ce_orig": 1.1155887842178345, + "epoch": 0.849234308720972, + "kl_loss": 0.04106989875435829, + "loss_ib": 0.000998798874206841, + "step": 2953 + }, + { + "ce_ib": 3.459228754043579, + "ce_orig": 0.9035856127738953, + "epoch": 0.8495218922999497, + "kl_loss": 0.05990922078490257, + "loss_ib": 0.000945015053730458, + "step": 2954 + }, + { + "ce_ib": 5.12708854675293, + "ce_orig": 1.0823848247528076, + "epoch": 0.8495218922999497, + "kl_loss": 0.07243385910987854, + "loss_ib": 0.0012370474869385362, + "step": 2954 + }, + { + "ce_ib": 3.2534210681915283, + "ce_orig": 0.6941150426864624, + "epoch": 0.8495218922999497, + "kl_loss": 0.049028344452381134, + "loss_ib": 0.000815625477116555, + "step": 2954 + }, + { + "ce_ib": 2.234178066253662, + "ce_orig": 0.42227107286453247, + "epoch": 0.8495218922999497, + "kl_loss": 0.07674447447061539, + "loss_ib": 0.0009908624924719334, + "step": 2954 + }, + { + "epoch": 0.8498094758789273, + "grad_norm": 0.08862821012735367, + "learning_rate": 4.209216756036178e-05, + "loss": 0.7949, + "step": 2955 + }, + { + "ce_ib": 4.3392157554626465, + "ce_orig": 0.9281389713287354, + "epoch": 0.8498094758789273, + "kl_loss": 0.07683052867650986, + "loss_ib": 0.0012022268492728472, + "step": 2955 + }, + { + "ce_ib": 4.288582801818848, + "ce_orig": 1.2454854249954224, + "epoch": 0.8498094758789273, + "kl_loss": 0.1541144847869873, + "loss_ib": 0.0019700031261891127, + "step": 2955 + }, + { + "ce_ib": 3.4929115772247314, + "ce_orig": 0.9961258769035339, + "epoch": 0.8498094758789273, + "kl_loss": 0.04837345331907272, + "loss_ib": 0.0008330257260240614, + "step": 2955 + }, + { + "ce_ib": 5.174856185913086, + "ce_orig": 0.7758191823959351, + "epoch": 0.8498094758789273, + "kl_loss": 0.0834323838353157, + "loss_ib": 0.0013518094783648849, + "step": 2955 + }, + { + "ce_ib": 6.208956718444824, + "ce_orig": 1.3137578964233398, + "epoch": 0.8500970594579049, + "kl_loss": 0.04407954588532448, + "loss_ib": 0.0010616910876706243, + "step": 2956 + }, + { + "ce_ib": 3.4594852924346924, + "ce_orig": 0.798003077507019, + "epoch": 0.8500970594579049, + "kl_loss": 0.05163630098104477, + "loss_ib": 0.0008623115718364716, + "step": 2956 + }, + { + "ce_ib": 5.554886341094971, + "ce_orig": 1.4781391620635986, + "epoch": 0.8500970594579049, + "kl_loss": 0.08428861200809479, + "loss_ib": 0.0013983746757730842, + "step": 2956 + }, + { + "ce_ib": 1.6653937101364136, + "ce_orig": 0.27930864691734314, + "epoch": 0.8500970594579049, + "kl_loss": 0.14943429827690125, + "loss_ib": 0.0016608823789283633, + "step": 2956 + }, + { + "ce_ib": 3.235880136489868, + "ce_orig": 0.6834813356399536, + "epoch": 0.8503846430368825, + "kl_loss": 0.05377795919775963, + "loss_ib": 0.0008613675599917769, + "step": 2957 + }, + { + "ce_ib": 3.974520444869995, + "ce_orig": 0.7434409856796265, + "epoch": 0.8503846430368825, + "kl_loss": 0.039076946675777435, + "loss_ib": 0.0007882214849814773, + "step": 2957 + }, + { + "ce_ib": 3.2442843914031982, + "ce_orig": 0.6885434985160828, + "epoch": 0.8503846430368825, + "kl_loss": 0.0638309195637703, + "loss_ib": 0.0009627376566641033, + "step": 2957 + }, + { + "ce_ib": 5.845125675201416, + "ce_orig": 1.0894414186477661, + "epoch": 0.8503846430368825, + "kl_loss": 0.09507923573255539, + "loss_ib": 0.001535304938443005, + "step": 2957 + }, + { + "ce_ib": 4.082759380340576, + "ce_orig": 1.0634711980819702, + "epoch": 0.8506722266158603, + "kl_loss": 0.06016490235924721, + "loss_ib": 0.0010099250357598066, + "step": 2958 + }, + { + "ce_ib": 5.669275760650635, + "ce_orig": 0.8056240081787109, + "epoch": 0.8506722266158603, + "kl_loss": 0.05921904742717743, + "loss_ib": 0.0011591180227696896, + "step": 2958 + }, + { + "ce_ib": 4.1027140617370605, + "ce_orig": 0.7992832660675049, + "epoch": 0.8506722266158603, + "kl_loss": 0.08297596871852875, + "loss_ib": 0.0012400310952216387, + "step": 2958 + }, + { + "ce_ib": 3.4613194465637207, + "ce_orig": 1.0713379383087158, + "epoch": 0.8506722266158603, + "kl_loss": 0.05762052536010742, + "loss_ib": 0.0009223371744155884, + "step": 2958 + }, + { + "ce_ib": 4.363404273986816, + "ce_orig": 0.9081714153289795, + "epoch": 0.8509598101948379, + "kl_loss": 0.07948186993598938, + "loss_ib": 0.001231159083545208, + "step": 2959 + }, + { + "ce_ib": 3.0876147747039795, + "ce_orig": 0.8494662642478943, + "epoch": 0.8509598101948379, + "kl_loss": 0.019894149154424667, + "loss_ib": 0.0005077029927633703, + "step": 2959 + }, + { + "ce_ib": 3.0444812774658203, + "ce_orig": 0.8735960721969604, + "epoch": 0.8509598101948379, + "kl_loss": 0.07164876163005829, + "loss_ib": 0.00102093571331352, + "step": 2959 + }, + { + "ce_ib": 3.2394142150878906, + "ce_orig": 0.6865732669830322, + "epoch": 0.8509598101948379, + "kl_loss": 0.05409276485443115, + "loss_ib": 0.0008648690418340266, + "step": 2959 + }, + { + "epoch": 0.8512473937738155, + "grad_norm": 0.11538831144571304, + "learning_rate": 4.206382858046636e-05, + "loss": 0.8445, + "step": 2960 + }, + { + "ce_ib": 2.807856321334839, + "ce_orig": 0.5913465023040771, + "epoch": 0.8512473937738155, + "kl_loss": 0.04581893980503082, + "loss_ib": 0.00073897500988096, + "step": 2960 + }, + { + "ce_ib": 2.8564674854278564, + "ce_orig": 0.591985821723938, + "epoch": 0.8512473937738155, + "kl_loss": 0.044832389801740646, + "loss_ib": 0.0007339706062339246, + "step": 2960 + }, + { + "ce_ib": 6.514830589294434, + "ce_orig": 1.7272155284881592, + "epoch": 0.8512473937738155, + "kl_loss": 0.032740481197834015, + "loss_ib": 0.0009788877796381712, + "step": 2960 + }, + { + "ce_ib": 4.749907970428467, + "ce_orig": 1.0592868328094482, + "epoch": 0.8512473937738155, + "kl_loss": 0.04423731565475464, + "loss_ib": 0.0009173639118671417, + "step": 2960 + }, + { + "ce_ib": 3.4610283374786377, + "ce_orig": 0.8443171977996826, + "epoch": 0.8515349773527932, + "kl_loss": 0.04051249474287033, + "loss_ib": 0.0007512277225032449, + "step": 2961 + }, + { + "ce_ib": 4.751905918121338, + "ce_orig": 1.4247167110443115, + "epoch": 0.8515349773527932, + "kl_loss": 0.031143510714173317, + "loss_ib": 0.0007866256637498736, + "step": 2961 + }, + { + "ce_ib": 3.8570611476898193, + "ce_orig": 0.8542420864105225, + "epoch": 0.8515349773527932, + "kl_loss": 0.08581432700157166, + "loss_ib": 0.0012438494013622403, + "step": 2961 + }, + { + "ce_ib": 2.647407054901123, + "ce_orig": 0.49248361587524414, + "epoch": 0.8515349773527932, + "kl_loss": 0.04661955684423447, + "loss_ib": 0.0007309362408705056, + "step": 2961 + }, + { + "ce_ib": 3.029400110244751, + "ce_orig": 0.928446352481842, + "epoch": 0.8518225609317708, + "kl_loss": 0.056406158953905106, + "loss_ib": 0.0008670015376992524, + "step": 2962 + }, + { + "ce_ib": 6.524372100830078, + "ce_orig": 1.575563907623291, + "epoch": 0.8518225609317708, + "kl_loss": 0.048152435570955276, + "loss_ib": 0.0011339614866301417, + "step": 2962 + }, + { + "ce_ib": 3.8500635623931885, + "ce_orig": 0.7818031311035156, + "epoch": 0.8518225609317708, + "kl_loss": 0.061284761875867844, + "loss_ib": 0.0009978539310395718, + "step": 2962 + }, + { + "ce_ib": 4.236267566680908, + "ce_orig": 0.9592888355255127, + "epoch": 0.8518225609317708, + "kl_loss": 0.03742952272295952, + "loss_ib": 0.0007979220245033503, + "step": 2962 + }, + { + "ce_ib": 4.39677095413208, + "ce_orig": 0.7975402474403381, + "epoch": 0.8521101445107484, + "kl_loss": 0.051756978034973145, + "loss_ib": 0.000957246869802475, + "step": 2963 + }, + { + "ce_ib": 1.8771193027496338, + "ce_orig": 0.5550150275230408, + "epoch": 0.8521101445107484, + "kl_loss": 0.03776685521006584, + "loss_ib": 0.0005653804400935769, + "step": 2963 + }, + { + "ce_ib": 2.6175668239593506, + "ce_orig": 0.2545911967754364, + "epoch": 0.8521101445107484, + "kl_loss": 0.09408847987651825, + "loss_ib": 0.0012026415206491947, + "step": 2963 + }, + { + "ce_ib": 2.0900111198425293, + "ce_orig": 0.6235379576683044, + "epoch": 0.8521101445107484, + "kl_loss": 0.046412393450737, + "loss_ib": 0.0006731250905431807, + "step": 2963 + }, + { + "ce_ib": 4.906187534332275, + "ce_orig": 1.08811354637146, + "epoch": 0.852397728089726, + "kl_loss": 0.06681647896766663, + "loss_ib": 0.0011587835615500808, + "step": 2964 + }, + { + "ce_ib": 5.4685139656066895, + "ce_orig": 1.311179280281067, + "epoch": 0.852397728089726, + "kl_loss": 0.08205035328865051, + "loss_ib": 0.0013673549983650446, + "step": 2964 + }, + { + "ce_ib": 2.5803544521331787, + "ce_orig": 0.6954718828201294, + "epoch": 0.852397728089726, + "kl_loss": 0.038596123456954956, + "loss_ib": 0.0006439966382458806, + "step": 2964 + }, + { + "ce_ib": 2.6242098808288574, + "ce_orig": 0.5764316916465759, + "epoch": 0.852397728089726, + "kl_loss": 0.04842095077037811, + "loss_ib": 0.0007466304814442992, + "step": 2964 + }, + { + "epoch": 0.8526853116687038, + "grad_norm": 0.098207987844944, + "learning_rate": 4.2035448489847284e-05, + "loss": 0.8505, + "step": 2965 + }, + { + "ce_ib": 2.8495442867279053, + "ce_orig": 0.5689408183097839, + "epoch": 0.8526853116687038, + "kl_loss": 0.06855255365371704, + "loss_ib": 0.0009704799740575254, + "step": 2965 + }, + { + "ce_ib": 4.24330472946167, + "ce_orig": 0.7012568116188049, + "epoch": 0.8526853116687038, + "kl_loss": 0.09586138278245926, + "loss_ib": 0.0013829442905262113, + "step": 2965 + }, + { + "ce_ib": 3.9072537422180176, + "ce_orig": 0.4784850776195526, + "epoch": 0.8526853116687038, + "kl_loss": 0.07601208239793777, + "loss_ib": 0.0011508461320772767, + "step": 2965 + }, + { + "ce_ib": 4.705366611480713, + "ce_orig": 1.362687349319458, + "epoch": 0.8526853116687038, + "kl_loss": 0.037728309631347656, + "loss_ib": 0.0008478197269141674, + "step": 2965 + }, + { + "ce_ib": 3.738515853881836, + "ce_orig": 0.7424197793006897, + "epoch": 0.8529728952476814, + "kl_loss": 0.075398288667202, + "loss_ib": 0.0011278344318270683, + "step": 2966 + }, + { + "ce_ib": 2.6155128479003906, + "ce_orig": 0.5991731286048889, + "epoch": 0.8529728952476814, + "kl_loss": 0.048141513019800186, + "loss_ib": 0.0007429663673974574, + "step": 2966 + }, + { + "ce_ib": 2.6103622913360596, + "ce_orig": 0.5266762971878052, + "epoch": 0.8529728952476814, + "kl_loss": 0.033575356006622314, + "loss_ib": 0.0005967897595837712, + "step": 2966 + }, + { + "ce_ib": 4.789224624633789, + "ce_orig": 1.0531913042068481, + "epoch": 0.8529728952476814, + "kl_loss": 0.06938466429710388, + "loss_ib": 0.0011727689998224378, + "step": 2966 + }, + { + "ce_ib": 3.5353176593780518, + "ce_orig": 0.9607828855514526, + "epoch": 0.853260478826659, + "kl_loss": 0.056261539459228516, + "loss_ib": 0.0009161471389234066, + "step": 2967 + }, + { + "ce_ib": 5.887589454650879, + "ce_orig": 1.7351160049438477, + "epoch": 0.853260478826659, + "kl_loss": 0.05536347255110741, + "loss_ib": 0.0011423936812207103, + "step": 2967 + }, + { + "ce_ib": 3.2285923957824707, + "ce_orig": 0.7056406736373901, + "epoch": 0.853260478826659, + "kl_loss": 0.05241445451974869, + "loss_ib": 0.0008470037137158215, + "step": 2967 + }, + { + "ce_ib": 2.0670766830444336, + "ce_orig": 0.5590754747390747, + "epoch": 0.853260478826659, + "kl_loss": 0.033570997416973114, + "loss_ib": 0.0005424175760708749, + "step": 2967 + }, + { + "ce_ib": 3.985860586166382, + "ce_orig": 0.8932667970657349, + "epoch": 0.8535480624056366, + "kl_loss": 0.041446227580308914, + "loss_ib": 0.0008130483329296112, + "step": 2968 + }, + { + "ce_ib": 3.2614409923553467, + "ce_orig": 0.5485915541648865, + "epoch": 0.8535480624056366, + "kl_loss": 0.09826184809207916, + "loss_ib": 0.0013087625848129392, + "step": 2968 + }, + { + "ce_ib": 5.2688798904418945, + "ce_orig": 1.342125654220581, + "epoch": 0.8535480624056366, + "kl_loss": 0.04911983013153076, + "loss_ib": 0.001018086215481162, + "step": 2968 + }, + { + "ce_ib": 4.433938026428223, + "ce_orig": 1.1967135667800903, + "epoch": 0.8535480624056366, + "kl_loss": 0.06138497591018677, + "loss_ib": 0.0010572434403002262, + "step": 2968 + }, + { + "ce_ib": 3.942009687423706, + "ce_orig": 0.574432373046875, + "epoch": 0.8538356459846143, + "kl_loss": 0.08953133225440979, + "loss_ib": 0.0012895142426714301, + "step": 2969 + }, + { + "ce_ib": 6.087357044219971, + "ce_orig": 1.5812889337539673, + "epoch": 0.8538356459846143, + "kl_loss": 0.04247399792075157, + "loss_ib": 0.001033475622534752, + "step": 2969 + }, + { + "ce_ib": 3.6442079544067383, + "ce_orig": 0.669823408126831, + "epoch": 0.8538356459846143, + "kl_loss": 0.07993048429489136, + "loss_ib": 0.0011637256247922778, + "step": 2969 + }, + { + "ce_ib": 4.8772969245910645, + "ce_orig": 0.9184302091598511, + "epoch": 0.8538356459846143, + "kl_loss": 0.04382111877202988, + "loss_ib": 0.0009259408689104021, + "step": 2969 + }, + { + "epoch": 0.8541232295635919, + "grad_norm": 0.09562056511640549, + "learning_rate": 4.200702735687878e-05, + "loss": 0.854, + "step": 2970 + }, + { + "ce_ib": 2.4769906997680664, + "ce_orig": 0.395380437374115, + "epoch": 0.8541232295635919, + "kl_loss": 0.050485312938690186, + "loss_ib": 0.000752552121412009, + "step": 2970 + }, + { + "ce_ib": 3.2593350410461426, + "ce_orig": 0.6721780896186829, + "epoch": 0.8541232295635919, + "kl_loss": 0.04671288654208183, + "loss_ib": 0.0007930623833090067, + "step": 2970 + }, + { + "ce_ib": 3.02301287651062, + "ce_orig": 0.7976857423782349, + "epoch": 0.8541232295635919, + "kl_loss": 0.031508877873420715, + "loss_ib": 0.0006173900328576565, + "step": 2970 + }, + { + "ce_ib": 3.8834869861602783, + "ce_orig": 0.9981798529624939, + "epoch": 0.8541232295635919, + "kl_loss": 0.04733869433403015, + "loss_ib": 0.0008617356070317328, + "step": 2970 + }, + { + "ce_ib": 3.482208728790283, + "ce_orig": 0.8585935831069946, + "epoch": 0.8544108131425696, + "kl_loss": 0.08136071264743805, + "loss_ib": 0.0011618280550464988, + "step": 2971 + }, + { + "ce_ib": 7.402725696563721, + "ce_orig": 1.8620750904083252, + "epoch": 0.8544108131425696, + "kl_loss": 0.07323475182056427, + "loss_ib": 0.0014726200606673956, + "step": 2971 + }, + { + "ce_ib": 3.96440052986145, + "ce_orig": 1.0267194509506226, + "epoch": 0.8544108131425696, + "kl_loss": 0.08282504975795746, + "loss_ib": 0.0012246904661878943, + "step": 2971 + }, + { + "ce_ib": 2.1100666522979736, + "ce_orig": 0.4298453629016876, + "epoch": 0.8544108131425696, + "kl_loss": 0.023413322865962982, + "loss_ib": 0.00044513988541439176, + "step": 2971 + }, + { + "ce_ib": 2.393383502960205, + "ce_orig": 0.5805566310882568, + "epoch": 0.8546983967215472, + "kl_loss": 0.05828618258237839, + "loss_ib": 0.000822200148832053, + "step": 2972 + }, + { + "ce_ib": 3.035754680633545, + "ce_orig": 0.6768638491630554, + "epoch": 0.8546983967215472, + "kl_loss": 0.04707137495279312, + "loss_ib": 0.0007742891903035343, + "step": 2972 + }, + { + "ce_ib": 6.905402660369873, + "ce_orig": 1.7050025463104248, + "epoch": 0.8546983967215472, + "kl_loss": 0.0853126123547554, + "loss_ib": 0.0015436663525179029, + "step": 2972 + }, + { + "ce_ib": 2.4601480960845947, + "ce_orig": 0.2535463273525238, + "epoch": 0.8546983967215472, + "kl_loss": 0.09653669595718384, + "loss_ib": 0.0012113817501813173, + "step": 2972 + }, + { + "ce_ib": 3.8975369930267334, + "ce_orig": 0.6450223326683044, + "epoch": 0.8549859803005249, + "kl_loss": 0.0887761190533638, + "loss_ib": 0.0012775148497894406, + "step": 2973 + }, + { + "ce_ib": 6.236013889312744, + "ce_orig": 1.3768491744995117, + "epoch": 0.8549859803005249, + "kl_loss": 0.11088769882917404, + "loss_ib": 0.0017324783839285374, + "step": 2973 + }, + { + "ce_ib": 2.402275562286377, + "ce_orig": 0.48372501134872437, + "epoch": 0.8549859803005249, + "kl_loss": 0.04599207639694214, + "loss_ib": 0.0007001483463682234, + "step": 2973 + }, + { + "ce_ib": 1.8409382104873657, + "ce_orig": 0.3255995213985443, + "epoch": 0.8549859803005249, + "kl_loss": 0.11528176069259644, + "loss_ib": 0.0013369114603847265, + "step": 2973 + }, + { + "ce_ib": 5.132012844085693, + "ce_orig": 1.3701832294464111, + "epoch": 0.8552735638795025, + "kl_loss": 0.04097094386816025, + "loss_ib": 0.0009229106944985688, + "step": 2974 + }, + { + "ce_ib": 3.086902379989624, + "ce_orig": 0.5642790198326111, + "epoch": 0.8552735638795025, + "kl_loss": 0.07933326810598373, + "loss_ib": 0.0011020228266716003, + "step": 2974 + }, + { + "ce_ib": 3.2838873863220215, + "ce_orig": 0.6821342706680298, + "epoch": 0.8552735638795025, + "kl_loss": 0.09378889948129654, + "loss_ib": 0.001266277744434774, + "step": 2974 + }, + { + "ce_ib": 2.278480291366577, + "ce_orig": 0.6547552347183228, + "epoch": 0.8552735638795025, + "kl_loss": 0.032900337129831314, + "loss_ib": 0.0005568513879552484, + "step": 2974 + }, + { + "epoch": 0.8555611474584801, + "grad_norm": 0.11226686835289001, + "learning_rate": 4.1978565250033964e-05, + "loss": 0.8729, + "step": 2975 + }, + { + "ce_ib": 4.965061187744141, + "ce_orig": 0.9853735566139221, + "epoch": 0.8555611474584801, + "kl_loss": 0.04888869822025299, + "loss_ib": 0.0009853930678218603, + "step": 2975 + }, + { + "ce_ib": 3.957098960876465, + "ce_orig": 1.0093810558319092, + "epoch": 0.8555611474584801, + "kl_loss": 0.05028572306036949, + "loss_ib": 0.0008985671447589993, + "step": 2975 + }, + { + "ce_ib": 3.9446911811828613, + "ce_orig": 0.955518901348114, + "epoch": 0.8555611474584801, + "kl_loss": 0.03882741555571556, + "loss_ib": 0.0007827432127669454, + "step": 2975 + }, + { + "ce_ib": 2.558028221130371, + "ce_orig": 0.5022332668304443, + "epoch": 0.8555611474584801, + "kl_loss": 0.06842446327209473, + "loss_ib": 0.0009400473791174591, + "step": 2975 + }, + { + "ce_ib": 2.81486439704895, + "ce_orig": 0.9415779709815979, + "epoch": 0.8558487310374577, + "kl_loss": 0.04067865386605263, + "loss_ib": 0.0006882729940116405, + "step": 2976 + }, + { + "ce_ib": 7.270084857940674, + "ce_orig": 1.7202659845352173, + "epoch": 0.8558487310374577, + "kl_loss": 0.04663108289241791, + "loss_ib": 0.0011933193309232593, + "step": 2976 + }, + { + "ce_ib": 2.6095457077026367, + "ce_orig": 0.4806115925312042, + "epoch": 0.8558487310374577, + "kl_loss": 0.05756739154458046, + "loss_ib": 0.0008366284891963005, + "step": 2976 + }, + { + "ce_ib": 2.0690882205963135, + "ce_orig": 0.5550634860992432, + "epoch": 0.8558487310374577, + "kl_loss": 0.06188826262950897, + "loss_ib": 0.0008257913868874311, + "step": 2976 + }, + { + "ce_ib": 4.300014019012451, + "ce_orig": 1.1465612649917603, + "epoch": 0.8561363146164354, + "kl_loss": 0.058128535747528076, + "loss_ib": 0.0010112867457792163, + "step": 2977 + }, + { + "ce_ib": 5.185865879058838, + "ce_orig": 1.4953367710113525, + "epoch": 0.8561363146164354, + "kl_loss": 0.060696106404066086, + "loss_ib": 0.0011255475692451, + "step": 2977 + }, + { + "ce_ib": 5.4879255294799805, + "ce_orig": 1.2922239303588867, + "epoch": 0.8561363146164354, + "kl_loss": 0.05923646688461304, + "loss_ib": 0.0011411572340875864, + "step": 2977 + }, + { + "ce_ib": 1.6378813982009888, + "ce_orig": 0.34204819798469543, + "epoch": 0.8561363146164354, + "kl_loss": 0.03672605752944946, + "loss_ib": 0.0005310486885719001, + "step": 2977 + }, + { + "ce_ib": 4.845137119293213, + "ce_orig": 1.0499523878097534, + "epoch": 0.8564238981954131, + "kl_loss": 0.05505267530679703, + "loss_ib": 0.0010350404772907495, + "step": 2978 + }, + { + "ce_ib": 4.907800197601318, + "ce_orig": 1.236713171005249, + "epoch": 0.8564238981954131, + "kl_loss": 0.06720010936260223, + "loss_ib": 0.0011627811472862959, + "step": 2978 + }, + { + "ce_ib": 5.485886573791504, + "ce_orig": 1.65700364112854, + "epoch": 0.8564238981954131, + "kl_loss": 0.06519349664449692, + "loss_ib": 0.0012005235766991973, + "step": 2978 + }, + { + "ce_ib": 2.1911253929138184, + "ce_orig": 0.7102169394493103, + "epoch": 0.8564238981954131, + "kl_loss": 0.03528787940740585, + "loss_ib": 0.0005719913169741631, + "step": 2978 + }, + { + "ce_ib": 3.1291797161102295, + "ce_orig": 0.43634283542633057, + "epoch": 0.8567114817743907, + "kl_loss": 0.04543566703796387, + "loss_ib": 0.0007672746432945132, + "step": 2979 + }, + { + "ce_ib": 4.8860979080200195, + "ce_orig": 1.1160577535629272, + "epoch": 0.8567114817743907, + "kl_loss": 0.08784234523773193, + "loss_ib": 0.001367033226415515, + "step": 2979 + }, + { + "ce_ib": 5.2601423263549805, + "ce_orig": 0.8745582699775696, + "epoch": 0.8567114817743907, + "kl_loss": 0.06172928586602211, + "loss_ib": 0.0011433070758357644, + "step": 2979 + }, + { + "ce_ib": 1.8574233055114746, + "ce_orig": 0.3799116909503937, + "epoch": 0.8567114817743907, + "kl_loss": 0.06622470170259476, + "loss_ib": 0.0008479892858304083, + "step": 2979 + }, + { + "epoch": 0.8569990653533683, + "grad_norm": 0.09703709930181503, + "learning_rate": 4.1950062237884656e-05, + "loss": 0.8639, + "step": 2980 + }, + { + "ce_ib": 4.438849449157715, + "ce_orig": 1.128379464149475, + "epoch": 0.8569990653533683, + "kl_loss": 0.05718132108449936, + "loss_ib": 0.0010156981879845262, + "step": 2980 + }, + { + "ce_ib": 3.0028185844421387, + "ce_orig": 0.47865384817123413, + "epoch": 0.8569990653533683, + "kl_loss": 0.05526410788297653, + "loss_ib": 0.0008529229089617729, + "step": 2980 + }, + { + "ce_ib": 3.5935771465301514, + "ce_orig": 0.8057845234870911, + "epoch": 0.8569990653533683, + "kl_loss": 0.08088572323322296, + "loss_ib": 0.00116821494884789, + "step": 2980 + }, + { + "ce_ib": 2.549107789993286, + "ce_orig": 0.5569929480552673, + "epoch": 0.8569990653533683, + "kl_loss": 0.034598078578710556, + "loss_ib": 0.0006008915370330215, + "step": 2980 + }, + { + "ce_ib": 1.99412202835083, + "ce_orig": 0.47428098320961, + "epoch": 0.857286648932346, + "kl_loss": 0.071128249168396, + "loss_ib": 0.0009106946527026594, + "step": 2981 + }, + { + "ce_ib": 5.483347415924072, + "ce_orig": 1.3714430332183838, + "epoch": 0.857286648932346, + "kl_loss": 0.06755002588033676, + "loss_ib": 0.0012238349299877882, + "step": 2981 + }, + { + "ce_ib": 3.2779295444488525, + "ce_orig": 0.8019994497299194, + "epoch": 0.857286648932346, + "kl_loss": 0.08507947623729706, + "loss_ib": 0.0011785876704379916, + "step": 2981 + }, + { + "ce_ib": 2.6207849979400635, + "ce_orig": 0.775807797908783, + "epoch": 0.857286648932346, + "kl_loss": 0.0490640252828598, + "loss_ib": 0.0007527187117375433, + "step": 2981 + }, + { + "ce_ib": 3.6797778606414795, + "ce_orig": 0.3576512932777405, + "epoch": 0.8575742325113236, + "kl_loss": 0.07191857695579529, + "loss_ib": 0.0010871634585782886, + "step": 2982 + }, + { + "ce_ib": 3.7482614517211914, + "ce_orig": 0.5565910935401917, + "epoch": 0.8575742325113236, + "kl_loss": 0.04605509713292122, + "loss_ib": 0.0008353770826943219, + "step": 2982 + }, + { + "ce_ib": 4.892259120941162, + "ce_orig": 0.8647422194480896, + "epoch": 0.8575742325113236, + "kl_loss": 0.08934535831212997, + "loss_ib": 0.001382679445669055, + "step": 2982 + }, + { + "ce_ib": 3.6049141883850098, + "ce_orig": 0.7069617509841919, + "epoch": 0.8575742325113236, + "kl_loss": 0.02657429501414299, + "loss_ib": 0.0006262343958951533, + "step": 2982 + }, + { + "ce_ib": 3.493936538696289, + "ce_orig": 0.5718881487846375, + "epoch": 0.8578618160903012, + "kl_loss": 0.06980252265930176, + "loss_ib": 0.0010474188020452857, + "step": 2983 + }, + { + "ce_ib": 5.226736545562744, + "ce_orig": 1.2632999420166016, + "epoch": 0.8578618160903012, + "kl_loss": 0.0761936604976654, + "loss_ib": 0.0012846102472394705, + "step": 2983 + }, + { + "ce_ib": 2.5676794052124023, + "ce_orig": 0.8911001682281494, + "epoch": 0.8578618160903012, + "kl_loss": 0.030063923448324203, + "loss_ib": 0.0005574071547016501, + "step": 2983 + }, + { + "ce_ib": 5.141313552856445, + "ce_orig": 1.3513239622116089, + "epoch": 0.8578618160903012, + "kl_loss": 0.05076032131910324, + "loss_ib": 0.0010217345552518964, + "step": 2983 + }, + { + "ce_ib": 5.039977073669434, + "ce_orig": 1.426147222518921, + "epoch": 0.8581493996692788, + "kl_loss": 0.053303759545087814, + "loss_ib": 0.001037035253830254, + "step": 2984 + }, + { + "ce_ib": 6.169479846954346, + "ce_orig": 1.373288869857788, + "epoch": 0.8581493996692788, + "kl_loss": 0.05603425204753876, + "loss_ib": 0.001177290454506874, + "step": 2984 + }, + { + "ce_ib": 1.4648350477218628, + "ce_orig": 0.20466740429401398, + "epoch": 0.8581493996692788, + "kl_loss": 0.12686456739902496, + "loss_ib": 0.0014151291688904166, + "step": 2984 + }, + { + "ce_ib": 2.676520824432373, + "ce_orig": 0.6981580257415771, + "epoch": 0.8581493996692788, + "kl_loss": 0.052867162972688675, + "loss_ib": 0.0007963237003423274, + "step": 2984 + }, + { + "epoch": 0.8584369832482566, + "grad_norm": 0.09302707761526108, + "learning_rate": 4.192151838910122e-05, + "loss": 0.8091, + "step": 2985 + }, + { + "ce_ib": 3.6913912296295166, + "ce_orig": 0.7128337621688843, + "epoch": 0.8584369832482566, + "kl_loss": 0.05105532705783844, + "loss_ib": 0.0008796924375928938, + "step": 2985 + }, + { + "ce_ib": 3.0830373764038086, + "ce_orig": 0.708928108215332, + "epoch": 0.8584369832482566, + "kl_loss": 0.03054964169859886, + "loss_ib": 0.0006138001335784793, + "step": 2985 + }, + { + "ce_ib": 5.285787105560303, + "ce_orig": 1.416795253753662, + "epoch": 0.8584369832482566, + "kl_loss": 0.06117883697152138, + "loss_ib": 0.001140367123298347, + "step": 2985 + }, + { + "ce_ib": 5.5229058265686035, + "ce_orig": 1.5425537824630737, + "epoch": 0.8584369832482566, + "kl_loss": 0.06857320666313171, + "loss_ib": 0.0012380225816741586, + "step": 2985 + }, + { + "ce_ib": 4.359062671661377, + "ce_orig": 0.7774598002433777, + "epoch": 0.8587245668272342, + "kl_loss": 0.09463727474212646, + "loss_ib": 0.0013822788605466485, + "step": 2986 + }, + { + "ce_ib": 3.601762294769287, + "ce_orig": 1.0632603168487549, + "epoch": 0.8587245668272342, + "kl_loss": 0.047297246754169464, + "loss_ib": 0.0008331486606039107, + "step": 2986 + }, + { + "ce_ib": 5.10903787612915, + "ce_orig": 1.283057689666748, + "epoch": 0.8587245668272342, + "kl_loss": 0.07545590400695801, + "loss_ib": 0.0012654627207666636, + "step": 2986 + }, + { + "ce_ib": 3.4763076305389404, + "ce_orig": 0.8702535629272461, + "epoch": 0.8587245668272342, + "kl_loss": 0.048765361309051514, + "loss_ib": 0.0008352843578904867, + "step": 2986 + }, + { + "ce_ib": 3.28291916847229, + "ce_orig": 0.7372118830680847, + "epoch": 0.8590121504062118, + "kl_loss": 0.07401489466428757, + "loss_ib": 0.00106844084803015, + "step": 2987 + }, + { + "ce_ib": 2.6651082038879395, + "ce_orig": 0.581652045249939, + "epoch": 0.8590121504062118, + "kl_loss": 0.041232578456401825, + "loss_ib": 0.000678836542647332, + "step": 2987 + }, + { + "ce_ib": 4.150271415710449, + "ce_orig": 1.0544111728668213, + "epoch": 0.8590121504062118, + "kl_loss": 0.06277519464492798, + "loss_ib": 0.0010427790693938732, + "step": 2987 + }, + { + "ce_ib": 5.579162120819092, + "ce_orig": 0.9612053036689758, + "epoch": 0.8590121504062118, + "kl_loss": 0.05356215685606003, + "loss_ib": 0.0010935377795249224, + "step": 2987 + }, + { + "ce_ib": 2.7591326236724854, + "ce_orig": 0.6356216073036194, + "epoch": 0.8592997339851894, + "kl_loss": 0.050619687885046005, + "loss_ib": 0.0007821100880391896, + "step": 2988 + }, + { + "ce_ib": 5.331314563751221, + "ce_orig": 1.4070934057235718, + "epoch": 0.8592997339851894, + "kl_loss": 0.07331179082393646, + "loss_ib": 0.0012662493390962481, + "step": 2988 + }, + { + "ce_ib": 1.9597386121749878, + "ce_orig": 0.37870797514915466, + "epoch": 0.8592997339851894, + "kl_loss": 0.053206104785203934, + "loss_ib": 0.0007280348800122738, + "step": 2988 + }, + { + "ce_ib": 3.7763805389404297, + "ce_orig": 1.0193147659301758, + "epoch": 0.8592997339851894, + "kl_loss": 0.06128915026783943, + "loss_ib": 0.0009905294282361865, + "step": 2988 + }, + { + "ce_ib": 4.458715915679932, + "ce_orig": 0.9957762360572815, + "epoch": 0.8595873175641671, + "kl_loss": 0.08809801191091537, + "loss_ib": 0.0013268516631796956, + "step": 2989 + }, + { + "ce_ib": 3.531137228012085, + "ce_orig": 0.6256084442138672, + "epoch": 0.8595873175641671, + "kl_loss": 0.07474908232688904, + "loss_ib": 0.0011006045388057828, + "step": 2989 + }, + { + "ce_ib": 1.0307176113128662, + "ce_orig": 0.14375151693820953, + "epoch": 0.8595873175641671, + "kl_loss": 0.12903381884098053, + "loss_ib": 0.0013934099115431309, + "step": 2989 + }, + { + "ce_ib": 3.1087210178375244, + "ce_orig": 0.814203679561615, + "epoch": 0.8595873175641671, + "kl_loss": 0.08693720400333405, + "loss_ib": 0.001180244144052267, + "step": 2989 + }, + { + "epoch": 0.8598749011431447, + "grad_norm": 0.07708434760570526, + "learning_rate": 4.189293377245241e-05, + "loss": 0.822, + "step": 2990 + }, + { + "ce_ib": 2.980087995529175, + "ce_orig": 0.7548376321792603, + "epoch": 0.8598749011431447, + "kl_loss": 0.04460069537162781, + "loss_ib": 0.0007440157351084054, + "step": 2990 + }, + { + "ce_ib": 5.660459041595459, + "ce_orig": 1.3107730150222778, + "epoch": 0.8598749011431447, + "kl_loss": 0.06937726587057114, + "loss_ib": 0.0012598184403032064, + "step": 2990 + }, + { + "ce_ib": 2.6054790019989014, + "ce_orig": 0.6206973791122437, + "epoch": 0.8598749011431447, + "kl_loss": 0.05359848588705063, + "loss_ib": 0.0007965327822603285, + "step": 2990 + }, + { + "ce_ib": 5.992776393890381, + "ce_orig": 1.748688817024231, + "epoch": 0.8598749011431447, + "kl_loss": 0.046618618071079254, + "loss_ib": 0.0010654637590050697, + "step": 2990 + }, + { + "ce_ib": 2.506220817565918, + "ce_orig": 0.5929728150367737, + "epoch": 0.8601624847221223, + "kl_loss": 0.046928517520427704, + "loss_ib": 0.0007199072861112654, + "step": 2991 + }, + { + "ce_ib": 5.436748504638672, + "ce_orig": 1.6004605293273926, + "epoch": 0.8601624847221223, + "kl_loss": 0.056519269943237305, + "loss_ib": 0.001108867465518415, + "step": 2991 + }, + { + "ce_ib": 5.878716468811035, + "ce_orig": 1.3848663568496704, + "epoch": 0.8601624847221223, + "kl_loss": 0.06908971071243286, + "loss_ib": 0.0012787687592208385, + "step": 2991 + }, + { + "ce_ib": 3.9992175102233887, + "ce_orig": 0.9676986932754517, + "epoch": 0.8601624847221223, + "kl_loss": 0.023760013282299042, + "loss_ib": 0.0006375218508765101, + "step": 2991 + }, + { + "ce_ib": 4.099398612976074, + "ce_orig": 0.7293134331703186, + "epoch": 0.8604500683011, + "kl_loss": 0.08427619934082031, + "loss_ib": 0.001252701855264604, + "step": 2992 + }, + { + "ce_ib": 1.4641673564910889, + "ce_orig": 0.4130011200904846, + "epoch": 0.8604500683011, + "kl_loss": 0.023720070719718933, + "loss_ib": 0.0003836174146272242, + "step": 2992 + }, + { + "ce_ib": 4.6550774574279785, + "ce_orig": 1.0837254524230957, + "epoch": 0.8604500683011, + "kl_loss": 0.06247542053461075, + "loss_ib": 0.0010902619687840343, + "step": 2992 + }, + { + "ce_ib": 4.361858367919922, + "ce_orig": 0.7072306275367737, + "epoch": 0.8604500683011, + "kl_loss": 0.0744103267788887, + "loss_ib": 0.0011802890803664923, + "step": 2992 + }, + { + "ce_ib": 3.10329008102417, + "ce_orig": 0.8927087783813477, + "epoch": 0.8607376518800777, + "kl_loss": 0.04329589754343033, + "loss_ib": 0.0007432879065163434, + "step": 2993 + }, + { + "ce_ib": 3.0547382831573486, + "ce_orig": 0.635036289691925, + "epoch": 0.8607376518800777, + "kl_loss": 0.061398498713970184, + "loss_ib": 0.0009194587473757565, + "step": 2993 + }, + { + "ce_ib": 4.936373710632324, + "ce_orig": 1.323392629623413, + "epoch": 0.8607376518800777, + "kl_loss": 0.0483587421476841, + "loss_ib": 0.0009772247867658734, + "step": 2993 + }, + { + "ce_ib": 4.14300012588501, + "ce_orig": 1.0979337692260742, + "epoch": 0.8607376518800777, + "kl_loss": 0.03995385393500328, + "loss_ib": 0.0008138385019265115, + "step": 2993 + }, + { + "ce_ib": 2.4941611289978027, + "ce_orig": 0.7008461356163025, + "epoch": 0.8610252354590553, + "kl_loss": 0.04826176166534424, + "loss_ib": 0.0007320336881093681, + "step": 2994 + }, + { + "ce_ib": 4.502664566040039, + "ce_orig": 0.7435257434844971, + "epoch": 0.8610252354590553, + "kl_loss": 0.09720723330974579, + "loss_ib": 0.0014223387697711587, + "step": 2994 + }, + { + "ce_ib": 3.585704803466797, + "ce_orig": 0.8232493996620178, + "epoch": 0.8610252354590553, + "kl_loss": 0.07005567103624344, + "loss_ib": 0.0010591271566227078, + "step": 2994 + }, + { + "ce_ib": 4.329352855682373, + "ce_orig": 1.0832645893096924, + "epoch": 0.8610252354590553, + "kl_loss": 0.06171745806932449, + "loss_ib": 0.0010501098586246371, + "step": 2994 + }, + { + "epoch": 0.8613128190380329, + "grad_norm": 0.09579747170209885, + "learning_rate": 4.1864308456805214e-05, + "loss": 0.852, + "step": 2995 + }, + { + "ce_ib": 2.5828659534454346, + "ce_orig": 0.571911633014679, + "epoch": 0.8613128190380329, + "kl_loss": 0.06757941842079163, + "loss_ib": 0.0009340807446278632, + "step": 2995 + }, + { + "ce_ib": 2.237853527069092, + "ce_orig": 0.619317352771759, + "epoch": 0.8613128190380329, + "kl_loss": 0.04142823815345764, + "loss_ib": 0.0006380677223205566, + "step": 2995 + }, + { + "ce_ib": 2.1294620037078857, + "ce_orig": 0.5367808938026428, + "epoch": 0.8613128190380329, + "kl_loss": 0.047258391976356506, + "loss_ib": 0.0006855301326140761, + "step": 2995 + }, + { + "ce_ib": 2.908958911895752, + "ce_orig": 0.6495174169540405, + "epoch": 0.8613128190380329, + "kl_loss": 0.05632846802473068, + "loss_ib": 0.0008541806018911302, + "step": 2995 + }, + { + "ce_ib": 3.3586418628692627, + "ce_orig": 0.7133363485336304, + "epoch": 0.8616004026170105, + "kl_loss": 0.03911397606134415, + "loss_ib": 0.0007270039641298354, + "step": 2996 + }, + { + "ce_ib": 2.659391164779663, + "ce_orig": 0.44628068804740906, + "epoch": 0.8616004026170105, + "kl_loss": 0.06125441938638687, + "loss_ib": 0.0008784832898527384, + "step": 2996 + }, + { + "ce_ib": 3.9742279052734375, + "ce_orig": 0.8749635219573975, + "epoch": 0.8616004026170105, + "kl_loss": 0.04995201528072357, + "loss_ib": 0.00089694291818887, + "step": 2996 + }, + { + "ce_ib": 4.942647457122803, + "ce_orig": 1.1423684358596802, + "epoch": 0.8616004026170105, + "kl_loss": 0.06338344514369965, + "loss_ib": 0.0011280991602689028, + "step": 2996 + }, + { + "ce_ib": 3.2576186656951904, + "ce_orig": 0.677207350730896, + "epoch": 0.8618879861959882, + "kl_loss": 0.05676572397351265, + "loss_ib": 0.0008934190846048295, + "step": 2997 + }, + { + "ce_ib": 2.3524718284606934, + "ce_orig": 0.7811833620071411, + "epoch": 0.8618879861959882, + "kl_loss": 0.037988826632499695, + "loss_ib": 0.0006151354173198342, + "step": 2997 + }, + { + "ce_ib": 3.2590885162353516, + "ce_orig": 0.629682183265686, + "epoch": 0.8618879861959882, + "kl_loss": 0.05555712431669235, + "loss_ib": 0.0008814801112748682, + "step": 2997 + }, + { + "ce_ib": 2.1091971397399902, + "ce_orig": 0.4541114568710327, + "epoch": 0.8618879861959882, + "kl_loss": 0.05718323215842247, + "loss_ib": 0.0007827520021237433, + "step": 2997 + }, + { + "ce_ib": 3.3564343452453613, + "ce_orig": 0.27761954069137573, + "epoch": 0.8621755697749659, + "kl_loss": 0.044571034610271454, + "loss_ib": 0.0007813537959009409, + "step": 2998 + }, + { + "ce_ib": 3.6518404483795166, + "ce_orig": 0.9964746832847595, + "epoch": 0.8621755697749659, + "kl_loss": 0.06039860099554062, + "loss_ib": 0.0009691700106486678, + "step": 2998 + }, + { + "ce_ib": 2.5241384506225586, + "ce_orig": 0.5241273641586304, + "epoch": 0.8621755697749659, + "kl_loss": 0.0619761124253273, + "loss_ib": 0.0008721749181859195, + "step": 2998 + }, + { + "ce_ib": 1.8837579488754272, + "ce_orig": 0.5326011180877686, + "epoch": 0.8621755697749659, + "kl_loss": 0.031518515199422836, + "loss_ib": 0.0005035609356127679, + "step": 2998 + }, + { + "ce_ib": 4.048685550689697, + "ce_orig": 0.6758573055267334, + "epoch": 0.8624631533539435, + "kl_loss": 0.06484195590019226, + "loss_ib": 0.0010532881133258343, + "step": 2999 + }, + { + "ce_ib": 4.285137176513672, + "ce_orig": 1.1862608194351196, + "epoch": 0.8624631533539435, + "kl_loss": 0.042554616928100586, + "loss_ib": 0.0008540598792023957, + "step": 2999 + }, + { + "ce_ib": 4.437625408172607, + "ce_orig": 1.0706287622451782, + "epoch": 0.8624631533539435, + "kl_loss": 0.09191133081912994, + "loss_ib": 0.0013628758024424314, + "step": 2999 + }, + { + "ce_ib": 4.216197490692139, + "ce_orig": 0.6308202147483826, + "epoch": 0.8624631533539435, + "kl_loss": 0.07761002331972122, + "loss_ib": 0.0011977199465036392, + "step": 2999 + }, + { + "epoch": 0.8627507369329211, + "grad_norm": 0.09524478763341904, + "learning_rate": 4.1835642511124656e-05, + "loss": 0.8213, + "step": 3000 + }, + { + "ce_ib": 2.0767080783843994, + "ce_orig": 0.5266839265823364, + "epoch": 0.8627507369329211, + "kl_loss": 0.05112726241350174, + "loss_ib": 0.0007189433672465384, + "step": 3000 + }, + { + "ce_ib": 4.433276176452637, + "ce_orig": 1.2689861059188843, + "epoch": 0.8627507369329211, + "kl_loss": 0.05293045938014984, + "loss_ib": 0.0009726321441121399, + "step": 3000 + }, + { + "ce_ib": 2.793017864227295, + "ce_orig": 0.34508171677589417, + "epoch": 0.8627507369329211, + "kl_loss": 0.05222548544406891, + "loss_ib": 0.000801556627266109, + "step": 3000 + }, + { + "ce_ib": 5.073208332061768, + "ce_orig": 0.8933168053627014, + "epoch": 0.8627507369329211, + "kl_loss": 0.07109817862510681, + "loss_ib": 0.0012183026410639286, + "step": 3000 + }, + { + "ce_ib": 1.300544023513794, + "ce_orig": 0.18920734524726868, + "epoch": 0.8630383205118988, + "kl_loss": 0.11990756541490555, + "loss_ib": 0.0013291300274431705, + "step": 3001 + }, + { + "ce_ib": 1.7152016162872314, + "ce_orig": 0.397152304649353, + "epoch": 0.8630383205118988, + "kl_loss": 0.06221187114715576, + "loss_ib": 0.0007936388137750328, + "step": 3001 + }, + { + "ce_ib": 3.919041872024536, + "ce_orig": 0.978486180305481, + "epoch": 0.8630383205118988, + "kl_loss": 0.048168785870075226, + "loss_ib": 0.0008735920418985188, + "step": 3001 + }, + { + "ce_ib": 2.9185397624969482, + "ce_orig": 0.631712019443512, + "epoch": 0.8630383205118988, + "kl_loss": 0.05962430685758591, + "loss_ib": 0.0008880970417521894, + "step": 3001 + }, + { + "ce_ib": 2.531970739364624, + "ce_orig": 0.6828842163085938, + "epoch": 0.8633259040908764, + "kl_loss": 0.03238285332918167, + "loss_ib": 0.000577025581151247, + "step": 3002 + }, + { + "ce_ib": 4.200273513793945, + "ce_orig": 1.1299688816070557, + "epoch": 0.8633259040908764, + "kl_loss": 0.08129346370697021, + "loss_ib": 0.0012329620076343417, + "step": 3002 + }, + { + "ce_ib": 2.711892604827881, + "ce_orig": 0.6768491864204407, + "epoch": 0.8633259040908764, + "kl_loss": 0.0411997027695179, + "loss_ib": 0.0006831862847320735, + "step": 3002 + }, + { + "ce_ib": 4.183706283569336, + "ce_orig": 1.0039259195327759, + "epoch": 0.8633259040908764, + "kl_loss": 0.07485033571720123, + "loss_ib": 0.0011668739607557654, + "step": 3002 + }, + { + "ce_ib": 4.2193169593811035, + "ce_orig": 1.0053248405456543, + "epoch": 0.863613487669854, + "kl_loss": 0.05444502830505371, + "loss_ib": 0.0009663819218985736, + "step": 3003 + }, + { + "ce_ib": 5.22760009765625, + "ce_orig": 0.9763957262039185, + "epoch": 0.863613487669854, + "kl_loss": 0.05692718178033829, + "loss_ib": 0.0010920318309217691, + "step": 3003 + }, + { + "ce_ib": 4.623554229736328, + "ce_orig": 1.0443874597549438, + "epoch": 0.863613487669854, + "kl_loss": 0.11264783143997192, + "loss_ib": 0.0015888337511569262, + "step": 3003 + }, + { + "ce_ib": 4.1526288986206055, + "ce_orig": 1.2694035768508911, + "epoch": 0.863613487669854, + "kl_loss": 0.0623466894030571, + "loss_ib": 0.001038729795254767, + "step": 3003 + }, + { + "ce_ib": 3.7857439517974854, + "ce_orig": 0.8364673256874084, + "epoch": 0.8639010712488316, + "kl_loss": 0.05984703078866005, + "loss_ib": 0.0009770446922630072, + "step": 3004 + }, + { + "ce_ib": 3.629507303237915, + "ce_orig": 0.7321583032608032, + "epoch": 0.8639010712488316, + "kl_loss": 0.046933967620134354, + "loss_ib": 0.0008322903304360807, + "step": 3004 + }, + { + "ce_ib": 2.9219307899475098, + "ce_orig": 0.841671884059906, + "epoch": 0.8639010712488316, + "kl_loss": 0.05555855855345726, + "loss_ib": 0.0008477785740979016, + "step": 3004 + }, + { + "ce_ib": 4.501309871673584, + "ce_orig": 1.2525392770767212, + "epoch": 0.8639010712488316, + "kl_loss": 0.07293684780597687, + "loss_ib": 0.0011794994352385402, + "step": 3004 + }, + { + "epoch": 0.8641886548278094, + "grad_norm": 0.09124861657619476, + "learning_rate": 4.180693600447365e-05, + "loss": 0.8148, + "step": 3005 + }, + { + "ce_ib": 6.846287727355957, + "ce_orig": 1.6371976137161255, + "epoch": 0.8641886548278094, + "kl_loss": 0.09050936996936798, + "loss_ib": 0.001589722465723753, + "step": 3005 + }, + { + "ce_ib": 2.56353759765625, + "ce_orig": 0.5401108860969543, + "epoch": 0.8641886548278094, + "kl_loss": 0.019373595714569092, + "loss_ib": 0.00045008971937932074, + "step": 3005 + }, + { + "ce_ib": 4.864195823669434, + "ce_orig": 1.5076878070831299, + "epoch": 0.8641886548278094, + "kl_loss": 0.039918094873428345, + "loss_ib": 0.0008856005151756108, + "step": 3005 + }, + { + "ce_ib": 2.362905740737915, + "ce_orig": 0.6382970809936523, + "epoch": 0.8641886548278094, + "kl_loss": 0.0313103124499321, + "loss_ib": 0.0005493936478160322, + "step": 3005 + }, + { + "ce_ib": 3.762622117996216, + "ce_orig": 0.7484158873558044, + "epoch": 0.864476238406787, + "kl_loss": 0.07456439733505249, + "loss_ib": 0.0011219062143936753, + "step": 3006 + }, + { + "ce_ib": 3.933856248855591, + "ce_orig": 0.6845016479492188, + "epoch": 0.864476238406787, + "kl_loss": 0.0836901143193245, + "loss_ib": 0.0012302867835387588, + "step": 3006 + }, + { + "ce_ib": 5.442783832550049, + "ce_orig": 1.4569815397262573, + "epoch": 0.864476238406787, + "kl_loss": 0.05222015455365181, + "loss_ib": 0.0010664798319339752, + "step": 3006 + }, + { + "ce_ib": 3.1275672912597656, + "ce_orig": 0.8226110339164734, + "epoch": 0.864476238406787, + "kl_loss": 0.038101185113191605, + "loss_ib": 0.0006937685539014637, + "step": 3006 + }, + { + "ce_ib": 2.960057258605957, + "ce_orig": 0.47181907296180725, + "epoch": 0.8647638219857646, + "kl_loss": 0.05149753391742706, + "loss_ib": 0.0008109810878522694, + "step": 3007 + }, + { + "ce_ib": 3.529979705810547, + "ce_orig": 0.6539387702941895, + "epoch": 0.8647638219857646, + "kl_loss": 0.05651966109871864, + "loss_ib": 0.0009181945351883769, + "step": 3007 + }, + { + "ce_ib": 1.3129936456680298, + "ce_orig": 0.18923631310462952, + "epoch": 0.8647638219857646, + "kl_loss": 0.12947139143943787, + "loss_ib": 0.0014260131865739822, + "step": 3007 + }, + { + "ce_ib": 3.1279819011688232, + "ce_orig": 0.7926934361457825, + "epoch": 0.8647638219857646, + "kl_loss": 0.0542946457862854, + "loss_ib": 0.0008557445835322142, + "step": 3007 + }, + { + "ce_ib": 4.226551055908203, + "ce_orig": 1.0410325527191162, + "epoch": 0.8650514055647422, + "kl_loss": 0.06810391694307327, + "loss_ib": 0.0011036943178623915, + "step": 3008 + }, + { + "ce_ib": 3.2415153980255127, + "ce_orig": 0.7551939487457275, + "epoch": 0.8650514055647422, + "kl_loss": 0.05235688388347626, + "loss_ib": 0.0008477203082293272, + "step": 3008 + }, + { + "ce_ib": 2.166907787322998, + "ce_orig": 0.5629973411560059, + "epoch": 0.8650514055647422, + "kl_loss": 0.04467838257551193, + "loss_ib": 0.0006634745514020324, + "step": 3008 + }, + { + "ce_ib": 2.9113998413085938, + "ce_orig": 0.819574236869812, + "epoch": 0.8650514055647422, + "kl_loss": 0.05576453357934952, + "loss_ib": 0.0008487853338010609, + "step": 3008 + }, + { + "ce_ib": 3.811567544937134, + "ce_orig": 0.9377001523971558, + "epoch": 0.8653389891437199, + "kl_loss": 0.04026580974459648, + "loss_ib": 0.0007838147575967014, + "step": 3009 + }, + { + "ce_ib": 5.938493728637695, + "ce_orig": 1.609991192817688, + "epoch": 0.8653389891437199, + "kl_loss": 0.048244088888168335, + "loss_ib": 0.0010762902675196528, + "step": 3009 + }, + { + "ce_ib": 3.010608196258545, + "ce_orig": 0.46111923456192017, + "epoch": 0.8653389891437199, + "kl_loss": 0.055582765489816666, + "loss_ib": 0.0008568884222768247, + "step": 3009 + }, + { + "ce_ib": 4.388724327087402, + "ce_orig": 1.0443966388702393, + "epoch": 0.8653389891437199, + "kl_loss": 0.07556288689374924, + "loss_ib": 0.0011945012956857681, + "step": 3009 + }, + { + "epoch": 0.8656265727226975, + "grad_norm": 0.09133870154619217, + "learning_rate": 4.177818900601284e-05, + "loss": 0.8381, + "step": 3010 + }, + { + "ce_ib": 2.464834451675415, + "ce_orig": 0.42866766452789307, + "epoch": 0.8656265727226975, + "kl_loss": 0.04291185736656189, + "loss_ib": 0.0006756020011380315, + "step": 3010 + }, + { + "ce_ib": 3.9345414638519287, + "ce_orig": 0.9114260077476501, + "epoch": 0.8656265727226975, + "kl_loss": 0.03298158943653107, + "loss_ib": 0.0007232700008898973, + "step": 3010 + }, + { + "ce_ib": 4.131486415863037, + "ce_orig": 1.0726323127746582, + "epoch": 0.8656265727226975, + "kl_loss": 0.054075904190540314, + "loss_ib": 0.0009539076709188521, + "step": 3010 + }, + { + "ce_ib": 5.708404541015625, + "ce_orig": 1.2838947772979736, + "epoch": 0.8656265727226975, + "kl_loss": 0.05826291814446449, + "loss_ib": 0.0011534695513546467, + "step": 3010 + }, + { + "ce_ib": 3.205702781677246, + "ce_orig": 0.7427955865859985, + "epoch": 0.8659141563016751, + "kl_loss": 0.04791105166077614, + "loss_ib": 0.0007996807689778507, + "step": 3011 + }, + { + "ce_ib": 2.5647635459899902, + "ce_orig": 0.6155127882957458, + "epoch": 0.8659141563016751, + "kl_loss": 0.04840013384819031, + "loss_ib": 0.0007404776406474411, + "step": 3011 + }, + { + "ce_ib": 4.837284564971924, + "ce_orig": 1.1118220090866089, + "epoch": 0.8659141563016751, + "kl_loss": 0.06310392916202545, + "loss_ib": 0.0011147677432745695, + "step": 3011 + }, + { + "ce_ib": 3.466076135635376, + "ce_orig": 0.6203659176826477, + "epoch": 0.8659141563016751, + "kl_loss": 0.04845510050654411, + "loss_ib": 0.0008311585988849401, + "step": 3011 + }, + { + "ce_ib": 6.181683540344238, + "ce_orig": 1.3546677827835083, + "epoch": 0.8662017398806529, + "kl_loss": 0.046446461230516434, + "loss_ib": 0.0010826329234987497, + "step": 3012 + }, + { + "ce_ib": 3.903931140899658, + "ce_orig": 0.48500004410743713, + "epoch": 0.8662017398806529, + "kl_loss": 0.06195070222020149, + "loss_ib": 0.0010099001228809357, + "step": 3012 + }, + { + "ce_ib": 1.9789100885391235, + "ce_orig": 0.2520938217639923, + "epoch": 0.8662017398806529, + "kl_loss": 0.06352929770946503, + "loss_ib": 0.0008331839344464242, + "step": 3012 + }, + { + "ce_ib": 2.613703966140747, + "ce_orig": 0.618155837059021, + "epoch": 0.8662017398806529, + "kl_loss": 0.054343242198228836, + "loss_ib": 0.0008048027520999312, + "step": 3012 + }, + { + "ce_ib": 3.2171900272369385, + "ce_orig": 0.7186639308929443, + "epoch": 0.8664893234596305, + "kl_loss": 0.06412243843078613, + "loss_ib": 0.0009629434207454324, + "step": 3013 + }, + { + "ce_ib": 4.531600475311279, + "ce_orig": 0.7628046274185181, + "epoch": 0.8664893234596305, + "kl_loss": 0.07869578152894974, + "loss_ib": 0.0012401178246363997, + "step": 3013 + }, + { + "ce_ib": 3.731574535369873, + "ce_orig": 0.8009179830551147, + "epoch": 0.8664893234596305, + "kl_loss": 0.054384343326091766, + "loss_ib": 0.0009170008706860244, + "step": 3013 + }, + { + "ce_ib": 3.2279036045074463, + "ce_orig": 0.8835029602050781, + "epoch": 0.8664893234596305, + "kl_loss": 0.04563012346625328, + "loss_ib": 0.000779091555159539, + "step": 3013 + }, + { + "ce_ib": 5.9399614334106445, + "ce_orig": 1.239327311515808, + "epoch": 0.8667769070386081, + "kl_loss": 0.05784577503800392, + "loss_ib": 0.0011724538635462523, + "step": 3014 + }, + { + "ce_ib": 3.1130619049072266, + "ce_orig": 0.4165746867656708, + "epoch": 0.8667769070386081, + "kl_loss": 0.05575266480445862, + "loss_ib": 0.000868832808919251, + "step": 3014 + }, + { + "ce_ib": 6.761065483093262, + "ce_orig": 1.5651758909225464, + "epoch": 0.8667769070386081, + "kl_loss": 0.05400721728801727, + "loss_ib": 0.0012161786435171962, + "step": 3014 + }, + { + "ce_ib": 5.491340637207031, + "ce_orig": 1.4216506481170654, + "epoch": 0.8667769070386081, + "kl_loss": 0.10562673956155777, + "loss_ib": 0.0016054013976827264, + "step": 3014 + }, + { + "epoch": 0.8670644906175857, + "grad_norm": 0.09205108880996704, + "learning_rate": 4.174940158500041e-05, + "loss": 0.8006, + "step": 3015 + }, + { + "ce_ib": 2.241142988204956, + "ce_orig": 0.6272971034049988, + "epoch": 0.8670644906175857, + "kl_loss": 0.034529127180576324, + "loss_ib": 0.0005694055580534041, + "step": 3015 + }, + { + "ce_ib": 4.319194793701172, + "ce_orig": 1.2594599723815918, + "epoch": 0.8670644906175857, + "kl_loss": 0.04557640105485916, + "loss_ib": 0.0008876834763213992, + "step": 3015 + }, + { + "ce_ib": 4.366693496704102, + "ce_orig": 1.0135786533355713, + "epoch": 0.8670644906175857, + "kl_loss": 0.05552667751908302, + "loss_ib": 0.0009919360745698214, + "step": 3015 + }, + { + "ce_ib": 3.3377442359924316, + "ce_orig": 0.5662882328033447, + "epoch": 0.8670644906175857, + "kl_loss": 0.047504156827926636, + "loss_ib": 0.0008088159374892712, + "step": 3015 + }, + { + "ce_ib": 3.0391845703125, + "ce_orig": 0.5089914798736572, + "epoch": 0.8673520741965633, + "kl_loss": 0.073642298579216, + "loss_ib": 0.0010403413325548172, + "step": 3016 + }, + { + "ce_ib": 4.854435920715332, + "ce_orig": 1.2161377668380737, + "epoch": 0.8673520741965633, + "kl_loss": 0.06682945787906647, + "loss_ib": 0.0011537381215021014, + "step": 3016 + }, + { + "ce_ib": 1.8960038423538208, + "ce_orig": 0.5135582089424133, + "epoch": 0.8673520741965633, + "kl_loss": 0.029367880895733833, + "loss_ib": 0.0004832791746594012, + "step": 3016 + }, + { + "ce_ib": 5.052788257598877, + "ce_orig": 0.9242228865623474, + "epoch": 0.8673520741965633, + "kl_loss": 0.06092597544193268, + "loss_ib": 0.001114538637921214, + "step": 3016 + }, + { + "ce_ib": 1.9324699640274048, + "ce_orig": 0.4980759620666504, + "epoch": 0.867639657775541, + "kl_loss": 0.04853895679116249, + "loss_ib": 0.0006786365411244333, + "step": 3017 + }, + { + "ce_ib": 3.9390416145324707, + "ce_orig": 0.8829923272132874, + "epoch": 0.867639657775541, + "kl_loss": 0.0755566954612732, + "loss_ib": 0.001149471034295857, + "step": 3017 + }, + { + "ce_ib": 3.358847141265869, + "ce_orig": 0.7702977061271667, + "epoch": 0.867639657775541, + "kl_loss": 0.05096602439880371, + "loss_ib": 0.0008455449133180082, + "step": 3017 + }, + { + "ce_ib": 3.5631706714630127, + "ce_orig": 0.994205117225647, + "epoch": 0.867639657775541, + "kl_loss": 0.06972454488277435, + "loss_ib": 0.0010535623878240585, + "step": 3017 + }, + { + "ce_ib": 4.507917404174805, + "ce_orig": 1.0478906631469727, + "epoch": 0.8679272413545187, + "kl_loss": 0.06455150246620178, + "loss_ib": 0.001096306717954576, + "step": 3018 + }, + { + "ce_ib": 3.3278005123138428, + "ce_orig": 0.9168696403503418, + "epoch": 0.8679272413545187, + "kl_loss": 0.029697872698307037, + "loss_ib": 0.0006297587533481419, + "step": 3018 + }, + { + "ce_ib": 0.7889219522476196, + "ce_orig": 0.11198069900274277, + "epoch": 0.8679272413545187, + "kl_loss": 0.10582689940929413, + "loss_ib": 0.001137161161750555, + "step": 3018 + }, + { + "ce_ib": 3.1094276905059814, + "ce_orig": 0.5721842646598816, + "epoch": 0.8679272413545187, + "kl_loss": 0.05518069863319397, + "loss_ib": 0.0008627497009001672, + "step": 3018 + }, + { + "ce_ib": 3.225510597229004, + "ce_orig": 0.7074896693229675, + "epoch": 0.8682148249334963, + "kl_loss": 0.039184968918561935, + "loss_ib": 0.0007144006667658687, + "step": 3019 + }, + { + "ce_ib": 4.646677494049072, + "ce_orig": 0.7661557793617249, + "epoch": 0.8682148249334963, + "kl_loss": 0.06322295218706131, + "loss_ib": 0.001096897292882204, + "step": 3019 + }, + { + "ce_ib": 1.7316622734069824, + "ce_orig": 0.4495092034339905, + "epoch": 0.8682148249334963, + "kl_loss": 0.04451613873243332, + "loss_ib": 0.0006183276418596506, + "step": 3019 + }, + { + "ce_ib": 3.19995379447937, + "ce_orig": 0.5330225229263306, + "epoch": 0.8682148249334963, + "kl_loss": 0.06405472010374069, + "loss_ib": 0.0009605425293557346, + "step": 3019 + }, + { + "epoch": 0.868502408512474, + "grad_norm": 0.08504742383956909, + "learning_rate": 4.172057381079195e-05, + "loss": 0.8156, + "step": 3020 + }, + { + "ce_ib": 3.70680570602417, + "ce_orig": 0.824737548828125, + "epoch": 0.868502408512474, + "kl_loss": 0.0690574198961258, + "loss_ib": 0.001061254763044417, + "step": 3020 + }, + { + "ce_ib": 3.520092487335205, + "ce_orig": 0.9879938364028931, + "epoch": 0.868502408512474, + "kl_loss": 0.03766651824116707, + "loss_ib": 0.0007286743493750691, + "step": 3020 + }, + { + "ce_ib": 3.6349709033966064, + "ce_orig": 0.8762478828430176, + "epoch": 0.868502408512474, + "kl_loss": 0.03644275292754173, + "loss_ib": 0.0007279246347025037, + "step": 3020 + }, + { + "ce_ib": 3.319366693496704, + "ce_orig": 0.8207045197486877, + "epoch": 0.868502408512474, + "kl_loss": 0.048541195690631866, + "loss_ib": 0.0008173486567102373, + "step": 3020 + }, + { + "ce_ib": 2.968315839767456, + "ce_orig": 0.480675607919693, + "epoch": 0.8687899920914516, + "kl_loss": 0.0940205454826355, + "loss_ib": 0.0012370370095595717, + "step": 3021 + }, + { + "ce_ib": 2.2498281002044678, + "ce_orig": 0.294718861579895, + "epoch": 0.8687899920914516, + "kl_loss": 0.10827086865901947, + "loss_ib": 0.0013076914474368095, + "step": 3021 + }, + { + "ce_ib": 2.227036237716675, + "ce_orig": 0.7257435321807861, + "epoch": 0.8687899920914516, + "kl_loss": 0.03674925118684769, + "loss_ib": 0.0005901961121708155, + "step": 3021 + }, + { + "ce_ib": 4.41558837890625, + "ce_orig": 0.6980039477348328, + "epoch": 0.8687899920914516, + "kl_loss": 0.08757656812667847, + "loss_ib": 0.001317324466072023, + "step": 3021 + }, + { + "ce_ib": 4.489384651184082, + "ce_orig": 1.0728620290756226, + "epoch": 0.8690775756704292, + "kl_loss": 0.06118280440568924, + "loss_ib": 0.0010607665171846747, + "step": 3022 + }, + { + "ce_ib": 4.126491546630859, + "ce_orig": 1.025114893913269, + "epoch": 0.8690775756704292, + "kl_loss": 0.062185902148485184, + "loss_ib": 0.0010345082264393568, + "step": 3022 + }, + { + "ce_ib": 4.5644097328186035, + "ce_orig": 1.3563249111175537, + "epoch": 0.8690775756704292, + "kl_loss": 0.06608178466558456, + "loss_ib": 0.0011172587983310223, + "step": 3022 + }, + { + "ce_ib": 3.8238751888275146, + "ce_orig": 0.6441200971603394, + "epoch": 0.8690775756704292, + "kl_loss": 0.06675868481397629, + "loss_ib": 0.0010499743511900306, + "step": 3022 + }, + { + "ce_ib": 2.5082015991210938, + "ce_orig": 0.6228718757629395, + "epoch": 0.8693651592494068, + "kl_loss": 0.03640144318342209, + "loss_ib": 0.0006148345419205725, + "step": 3023 + }, + { + "ce_ib": 4.122946262359619, + "ce_orig": 0.8630608320236206, + "epoch": 0.8693651592494068, + "kl_loss": 0.04479263722896576, + "loss_ib": 0.0008602209272794425, + "step": 3023 + }, + { + "ce_ib": 3.8712196350097656, + "ce_orig": 0.880741536617279, + "epoch": 0.8693651592494068, + "kl_loss": 0.06163661926984787, + "loss_ib": 0.00100348808337003, + "step": 3023 + }, + { + "ce_ib": 5.204606056213379, + "ce_orig": 1.3065969944000244, + "epoch": 0.8693651592494068, + "kl_loss": 0.058511462062597275, + "loss_ib": 0.0011055752402171493, + "step": 3023 + }, + { + "ce_ib": 2.570221185684204, + "ce_orig": 0.6194698810577393, + "epoch": 0.8696527428283845, + "kl_loss": 0.054013192653656006, + "loss_ib": 0.0007971539744175971, + "step": 3024 + }, + { + "ce_ib": 6.267463207244873, + "ce_orig": 1.5772826671600342, + "epoch": 0.8696527428283845, + "kl_loss": 0.07236558943986893, + "loss_ib": 0.0013504022499546409, + "step": 3024 + }, + { + "ce_ib": 4.933436393737793, + "ce_orig": 1.150275707244873, + "epoch": 0.8696527428283845, + "kl_loss": 0.052063196897506714, + "loss_ib": 0.001013975590467453, + "step": 3024 + }, + { + "ce_ib": 2.8540492057800293, + "ce_orig": 0.34307345747947693, + "epoch": 0.8696527428283845, + "kl_loss": 0.16667884588241577, + "loss_ib": 0.001952193328179419, + "step": 3024 + }, + { + "epoch": 0.8699403264073622, + "grad_norm": 0.10137511044740677, + "learning_rate": 4.169170575284025e-05, + "loss": 0.863, + "step": 3025 + }, + { + "ce_ib": 3.6456170082092285, + "ce_orig": 0.6499080657958984, + "epoch": 0.8699403264073622, + "kl_loss": 0.05925554037094116, + "loss_ib": 0.0009571170667186379, + "step": 3025 + }, + { + "ce_ib": 4.540950298309326, + "ce_orig": 1.0497665405273438, + "epoch": 0.8699403264073622, + "kl_loss": 0.05602794140577316, + "loss_ib": 0.001014374429360032, + "step": 3025 + }, + { + "ce_ib": 2.0531740188598633, + "ce_orig": 0.3942524492740631, + "epoch": 0.8699403264073622, + "kl_loss": 0.06809478253126144, + "loss_ib": 0.0008862651884555817, + "step": 3025 + }, + { + "ce_ib": 3.6414406299591064, + "ce_orig": 0.36509227752685547, + "epoch": 0.8699403264073622, + "kl_loss": 0.08307443559169769, + "loss_ib": 0.0011948883766308427, + "step": 3025 + }, + { + "ce_ib": 2.769261121749878, + "ce_orig": 0.6932346820831299, + "epoch": 0.8702279099863398, + "kl_loss": 0.047332897782325745, + "loss_ib": 0.0007502550724893808, + "step": 3026 + }, + { + "ce_ib": 2.596672773361206, + "ce_orig": 0.7326536774635315, + "epoch": 0.8702279099863398, + "kl_loss": 0.03649981692433357, + "loss_ib": 0.0006246654083952308, + "step": 3026 + }, + { + "ce_ib": 2.6164605617523193, + "ce_orig": 0.3962057828903198, + "epoch": 0.8702279099863398, + "kl_loss": 0.07905859500169754, + "loss_ib": 0.0010522319935262203, + "step": 3026 + }, + { + "ce_ib": 4.058056831359863, + "ce_orig": 0.8503920435905457, + "epoch": 0.8702279099863398, + "kl_loss": 0.05666734278202057, + "loss_ib": 0.0009724791161715984, + "step": 3026 + }, + { + "ce_ib": 4.6899566650390625, + "ce_orig": 0.35554903745651245, + "epoch": 0.8705154935653174, + "kl_loss": 0.06132027506828308, + "loss_ib": 0.0010821983451023698, + "step": 3027 + }, + { + "ce_ib": 3.8017146587371826, + "ce_orig": 0.4879588186740875, + "epoch": 0.8705154935653174, + "kl_loss": 0.07949118316173553, + "loss_ib": 0.0011750832200050354, + "step": 3027 + }, + { + "ce_ib": 6.186929225921631, + "ce_orig": 1.6020888090133667, + "epoch": 0.8705154935653174, + "kl_loss": 0.05937805771827698, + "loss_ib": 0.0012124734930694103, + "step": 3027 + }, + { + "ce_ib": 3.3665127754211426, + "ce_orig": 0.6010436415672302, + "epoch": 0.8705154935653174, + "kl_loss": 0.07204266637563705, + "loss_ib": 0.0010570778977125883, + "step": 3027 + }, + { + "ce_ib": 2.9036295413970947, + "ce_orig": 0.7568005323410034, + "epoch": 0.8708030771442951, + "kl_loss": 0.04133091866970062, + "loss_ib": 0.0007036721799522638, + "step": 3028 + }, + { + "ce_ib": 6.0669941902160645, + "ce_orig": 1.7026398181915283, + "epoch": 0.8708030771442951, + "kl_loss": 0.05538560822606087, + "loss_ib": 0.0011605554027482867, + "step": 3028 + }, + { + "ce_ib": 4.598349571228027, + "ce_orig": 1.0603824853897095, + "epoch": 0.8708030771442951, + "kl_loss": 0.0549909807741642, + "loss_ib": 0.0010097447084262967, + "step": 3028 + }, + { + "ce_ib": 3.576057195663452, + "ce_orig": 0.6950160264968872, + "epoch": 0.8708030771442951, + "kl_loss": 0.055170562118291855, + "loss_ib": 0.0009093112894333899, + "step": 3028 + }, + { + "ce_ib": 3.7634644508361816, + "ce_orig": 0.6840044260025024, + "epoch": 0.8710906607232727, + "kl_loss": 0.12269966304302216, + "loss_ib": 0.0016033430583775043, + "step": 3029 + }, + { + "ce_ib": 6.221066951751709, + "ce_orig": 1.3887971639633179, + "epoch": 0.8710906607232727, + "kl_loss": 0.05055009573698044, + "loss_ib": 0.0011276076547801495, + "step": 3029 + }, + { + "ce_ib": 3.5525612831115723, + "ce_orig": 0.5312432646751404, + "epoch": 0.8710906607232727, + "kl_loss": 0.06706751883029938, + "loss_ib": 0.0010259313276037574, + "step": 3029 + }, + { + "ce_ib": 4.173059940338135, + "ce_orig": 0.9528973698616028, + "epoch": 0.8710906607232727, + "kl_loss": 0.044356897473335266, + "loss_ib": 0.0008608749485574663, + "step": 3029 + }, + { + "epoch": 0.8713782443022503, + "grad_norm": 0.10437563061714172, + "learning_rate": 4.166279748069516e-05, + "loss": 0.8522, + "step": 3030 + }, + { + "ce_ib": 3.2915821075439453, + "ce_orig": 0.7884457111358643, + "epoch": 0.8713782443022503, + "kl_loss": 0.06535467505455017, + "loss_ib": 0.0009827049216255546, + "step": 3030 + }, + { + "ce_ib": 5.212599754333496, + "ce_orig": 1.47515070438385, + "epoch": 0.8713782443022503, + "kl_loss": 0.04019814729690552, + "loss_ib": 0.0009232414304278791, + "step": 3030 + }, + { + "ce_ib": 2.7730515003204346, + "ce_orig": 0.6428220868110657, + "epoch": 0.8713782443022503, + "kl_loss": 0.06426183879375458, + "loss_ib": 0.0009199235355481505, + "step": 3030 + }, + { + "ce_ib": 3.3891654014587402, + "ce_orig": 0.7126870155334473, + "epoch": 0.8713782443022503, + "kl_loss": 0.05728204548358917, + "loss_ib": 0.0009117369190789759, + "step": 3030 + }, + { + "ce_ib": 3.4432013034820557, + "ce_orig": 0.3043091297149658, + "epoch": 0.8716658278812279, + "kl_loss": 0.09147100150585175, + "loss_ib": 0.0012590300757437944, + "step": 3031 + }, + { + "ce_ib": 4.589301586151123, + "ce_orig": 0.9392631649971008, + "epoch": 0.8716658278812279, + "kl_loss": 0.07665561139583588, + "loss_ib": 0.0012254861649125814, + "step": 3031 + }, + { + "ce_ib": 4.073348045349121, + "ce_orig": 1.033167839050293, + "epoch": 0.8716658278812279, + "kl_loss": 0.04191748797893524, + "loss_ib": 0.0008265096694231033, + "step": 3031 + }, + { + "ce_ib": 2.634512186050415, + "ce_orig": 0.36541733145713806, + "epoch": 0.8716658278812279, + "kl_loss": 0.07045159488916397, + "loss_ib": 0.0009679670911282301, + "step": 3031 + }, + { + "ce_ib": 4.0325026512146, + "ce_orig": 0.9062567949295044, + "epoch": 0.8719534114602057, + "kl_loss": 0.04664456099271774, + "loss_ib": 0.000869695795699954, + "step": 3032 + }, + { + "ce_ib": 5.620802402496338, + "ce_orig": 1.2923916578292847, + "epoch": 0.8719534114602057, + "kl_loss": 0.05630149692296982, + "loss_ib": 0.0011250951793044806, + "step": 3032 + }, + { + "ce_ib": 3.8326632976531982, + "ce_orig": 0.6463671326637268, + "epoch": 0.8719534114602057, + "kl_loss": 0.08000146597623825, + "loss_ib": 0.0011832809541374445, + "step": 3032 + }, + { + "ce_ib": 6.26491641998291, + "ce_orig": 1.7884224653244019, + "epoch": 0.8719534114602057, + "kl_loss": 0.05807767063379288, + "loss_ib": 0.0012072683311998844, + "step": 3032 + }, + { + "ce_ib": 2.9468424320220947, + "ce_orig": 0.7760134339332581, + "epoch": 0.8722409950391833, + "kl_loss": 0.024828145280480385, + "loss_ib": 0.0005429656594060361, + "step": 3033 + }, + { + "ce_ib": 2.3515677452087402, + "ce_orig": 0.40274709463119507, + "epoch": 0.8722409950391833, + "kl_loss": 0.041496194899082184, + "loss_ib": 0.0006501187453977764, + "step": 3033 + }, + { + "ce_ib": 2.0868334770202637, + "ce_orig": 0.637847900390625, + "epoch": 0.8722409950391833, + "kl_loss": 0.03817892447113991, + "loss_ib": 0.0005904725985601544, + "step": 3033 + }, + { + "ce_ib": 3.384354829788208, + "ce_orig": 0.5171413421630859, + "epoch": 0.8722409950391833, + "kl_loss": 0.0709875300526619, + "loss_ib": 0.0010483107762411237, + "step": 3033 + }, + { + "ce_ib": 4.363737106323242, + "ce_orig": 1.3176956176757812, + "epoch": 0.8725285786181609, + "kl_loss": 0.07131441682577133, + "loss_ib": 0.0011495178332552314, + "step": 3034 + }, + { + "ce_ib": 4.68792200088501, + "ce_orig": 1.215984582901001, + "epoch": 0.8725285786181609, + "kl_loss": 0.059081096202135086, + "loss_ib": 0.0010596031788736582, + "step": 3034 + }, + { + "ce_ib": 5.412724018096924, + "ce_orig": 0.8969581127166748, + "epoch": 0.8725285786181609, + "kl_loss": 0.09446071088314056, + "loss_ib": 0.001485879416577518, + "step": 3034 + }, + { + "ce_ib": 2.3819618225097656, + "ce_orig": 0.535946249961853, + "epoch": 0.8725285786181609, + "kl_loss": 0.08705213665962219, + "loss_ib": 0.0011087175225839019, + "step": 3034 + }, + { + "epoch": 0.8728161621971385, + "grad_norm": 0.09226226806640625, + "learning_rate": 4.163384906400342e-05, + "loss": 0.8572, + "step": 3035 + }, + { + "ce_ib": 2.362743616104126, + "ce_orig": 0.7641963362693787, + "epoch": 0.8728161621971385, + "kl_loss": 0.04906555265188217, + "loss_ib": 0.0007269298657774925, + "step": 3035 + }, + { + "ce_ib": 4.175940990447998, + "ce_orig": 0.7240681648254395, + "epoch": 0.8728161621971385, + "kl_loss": 0.046695295721292496, + "loss_ib": 0.0008845470729283988, + "step": 3035 + }, + { + "ce_ib": 3.538627862930298, + "ce_orig": 0.8375080227851868, + "epoch": 0.8728161621971385, + "kl_loss": 0.06234923005104065, + "loss_ib": 0.0009773550555109978, + "step": 3035 + }, + { + "ce_ib": 1.8226248025894165, + "ce_orig": 0.40555340051651, + "epoch": 0.8728161621971385, + "kl_loss": 0.022671189159154892, + "loss_ib": 0.0004089743597432971, + "step": 3035 + }, + { + "ce_ib": 3.6233770847320557, + "ce_orig": 0.6199732422828674, + "epoch": 0.8731037457761162, + "kl_loss": 0.05353717505931854, + "loss_ib": 0.0008977093966677785, + "step": 3036 + }, + { + "ce_ib": 5.371979713439941, + "ce_orig": 1.3361318111419678, + "epoch": 0.8731037457761162, + "kl_loss": 0.051109492778778076, + "loss_ib": 0.0010482928482815623, + "step": 3036 + }, + { + "ce_ib": 3.248904228210449, + "ce_orig": 0.6621671915054321, + "epoch": 0.8731037457761162, + "kl_loss": 0.044665515422821045, + "loss_ib": 0.0007715456304140389, + "step": 3036 + }, + { + "ce_ib": 3.213974952697754, + "ce_orig": 0.8197899460792542, + "epoch": 0.8731037457761162, + "kl_loss": 0.035951584577560425, + "loss_ib": 0.0006809133337810636, + "step": 3036 + }, + { + "ce_ib": 2.724273681640625, + "ce_orig": 0.6194796562194824, + "epoch": 0.8733913293550938, + "kl_loss": 0.05731012672185898, + "loss_ib": 0.0008455286151729524, + "step": 3037 + }, + { + "ce_ib": 2.93475079536438, + "ce_orig": 0.7240334153175354, + "epoch": 0.8733913293550938, + "kl_loss": 0.05820850282907486, + "loss_ib": 0.000875560101121664, + "step": 3037 + }, + { + "ce_ib": 3.7062456607818604, + "ce_orig": 0.9303901791572571, + "epoch": 0.8733913293550938, + "kl_loss": 0.02795352414250374, + "loss_ib": 0.0006501597817987204, + "step": 3037 + }, + { + "ce_ib": 4.0406904220581055, + "ce_orig": 0.6367298364639282, + "epoch": 0.8733913293550938, + "kl_loss": 0.08906164765357971, + "loss_ib": 0.0012946855276823044, + "step": 3037 + }, + { + "ce_ib": 5.69645881652832, + "ce_orig": 1.3269823789596558, + "epoch": 0.8736789129340715, + "kl_loss": 0.07801228761672974, + "loss_ib": 0.0013497687177732587, + "step": 3038 + }, + { + "ce_ib": 4.91315221786499, + "ce_orig": 0.9240849614143372, + "epoch": 0.8736789129340715, + "kl_loss": 0.0502469539642334, + "loss_ib": 0.000993784749880433, + "step": 3038 + }, + { + "ce_ib": 4.71425199508667, + "ce_orig": 1.1215234994888306, + "epoch": 0.8736789129340715, + "kl_loss": 0.07514680922031403, + "loss_ib": 0.0012228932464495301, + "step": 3038 + }, + { + "ce_ib": 2.2314586639404297, + "ce_orig": 0.5114004015922546, + "epoch": 0.8736789129340715, + "kl_loss": 0.06401588022708893, + "loss_ib": 0.0008633046527393162, + "step": 3038 + }, + { + "ce_ib": 3.9771041870117188, + "ce_orig": 0.7070015072822571, + "epoch": 0.8739664965130491, + "kl_loss": 0.08189839124679565, + "loss_ib": 0.0012166943633928895, + "step": 3039 + }, + { + "ce_ib": 3.3790035247802734, + "ce_orig": 0.7458550930023193, + "epoch": 0.8739664965130491, + "kl_loss": 0.05828925222158432, + "loss_ib": 0.0009207928087562323, + "step": 3039 + }, + { + "ce_ib": 2.9742698669433594, + "ce_orig": 0.8789253830909729, + "epoch": 0.8739664965130491, + "kl_loss": 0.03574984893202782, + "loss_ib": 0.0006549254758283496, + "step": 3039 + }, + { + "ce_ib": 4.410493850708008, + "ce_orig": 1.0149390697479248, + "epoch": 0.8739664965130491, + "kl_loss": 0.05947680026292801, + "loss_ib": 0.0010358174331486225, + "step": 3039 + }, + { + "epoch": 0.8742540800920268, + "grad_norm": 0.0857730284333229, + "learning_rate": 4.160486057250849e-05, + "loss": 0.9135, + "step": 3040 + }, + { + "ce_ib": 4.240890979766846, + "ce_orig": 0.7588217854499817, + "epoch": 0.8742540800920268, + "kl_loss": 0.04871433973312378, + "loss_ib": 0.0009112324914894998, + "step": 3040 + }, + { + "ce_ib": 3.1971213817596436, + "ce_orig": 0.8301436901092529, + "epoch": 0.8742540800920268, + "kl_loss": 0.051534056663513184, + "loss_ib": 0.0008350526914000511, + "step": 3040 + }, + { + "ce_ib": 4.459492206573486, + "ce_orig": 1.2080024480819702, + "epoch": 0.8742540800920268, + "kl_loss": 0.060156553983688354, + "loss_ib": 0.001047514728270471, + "step": 3040 + }, + { + "ce_ib": 4.2052435874938965, + "ce_orig": 0.7749762535095215, + "epoch": 0.8742540800920268, + "kl_loss": 0.06630492210388184, + "loss_ib": 0.0010835735592991114, + "step": 3040 + }, + { + "ce_ib": 5.529706001281738, + "ce_orig": 0.9863408207893372, + "epoch": 0.8745416636710044, + "kl_loss": 0.07703940570354462, + "loss_ib": 0.0013233646750450134, + "step": 3041 + }, + { + "ce_ib": 4.365805149078369, + "ce_orig": 0.9170456528663635, + "epoch": 0.8745416636710044, + "kl_loss": 0.11902455985546112, + "loss_ib": 0.0016268260078504682, + "step": 3041 + }, + { + "ce_ib": 3.6262929439544678, + "ce_orig": 0.6525526642799377, + "epoch": 0.8745416636710044, + "kl_loss": 0.05030880495905876, + "loss_ib": 0.0008657173602841794, + "step": 3041 + }, + { + "ce_ib": 4.020951271057129, + "ce_orig": 0.8528340458869934, + "epoch": 0.8745416636710044, + "kl_loss": 0.08279037475585938, + "loss_ib": 0.0012299988884478807, + "step": 3041 + }, + { + "ce_ib": 3.485962390899658, + "ce_orig": 0.8603911995887756, + "epoch": 0.874829247249982, + "kl_loss": 0.06153877079486847, + "loss_ib": 0.0009639839408919215, + "step": 3042 + }, + { + "ce_ib": 4.321264743804932, + "ce_orig": 1.079668641090393, + "epoch": 0.874829247249982, + "kl_loss": 0.06323026120662689, + "loss_ib": 0.001064429059624672, + "step": 3042 + }, + { + "ce_ib": 4.145527362823486, + "ce_orig": 0.4186329245567322, + "epoch": 0.874829247249982, + "kl_loss": 0.15568095445632935, + "loss_ib": 0.0019713621586561203, + "step": 3042 + }, + { + "ce_ib": 3.9588382244110107, + "ce_orig": 1.0882834196090698, + "epoch": 0.874829247249982, + "kl_loss": 0.06215454638004303, + "loss_ib": 0.0010174291674047709, + "step": 3042 + }, + { + "ce_ib": 4.844738960266113, + "ce_orig": 1.2567933797836304, + "epoch": 0.8751168308289596, + "kl_loss": 0.06866495311260223, + "loss_ib": 0.0011711233528330922, + "step": 3043 + }, + { + "ce_ib": 2.718783378601074, + "ce_orig": 0.5846661329269409, + "epoch": 0.8751168308289596, + "kl_loss": 0.06460059434175491, + "loss_ib": 0.0009178842301480472, + "step": 3043 + }, + { + "ce_ib": 4.347616195678711, + "ce_orig": 0.8651480078697205, + "epoch": 0.8751168308289596, + "kl_loss": 0.060267768800258636, + "loss_ib": 0.0010374392149969935, + "step": 3043 + }, + { + "ce_ib": 5.006418228149414, + "ce_orig": 1.5522030591964722, + "epoch": 0.8751168308289596, + "kl_loss": 0.04694080352783203, + "loss_ib": 0.000970049761235714, + "step": 3043 + }, + { + "ce_ib": 5.172334671020508, + "ce_orig": 1.166685700416565, + "epoch": 0.8754044144079373, + "kl_loss": 0.06300091743469238, + "loss_ib": 0.0011472426122054458, + "step": 3044 + }, + { + "ce_ib": 3.5440165996551514, + "ce_orig": 0.9471510648727417, + "epoch": 0.8754044144079373, + "kl_loss": 0.05706283822655678, + "loss_ib": 0.0009250300354324281, + "step": 3044 + }, + { + "ce_ib": 5.606021881103516, + "ce_orig": 1.2630892992019653, + "epoch": 0.8754044144079373, + "kl_loss": 0.046994347125291824, + "loss_ib": 0.0010305456817150116, + "step": 3044 + }, + { + "ce_ib": 1.0784111022949219, + "ce_orig": 0.18286041915416718, + "epoch": 0.8754044144079373, + "kl_loss": 0.0918320044875145, + "loss_ib": 0.0010261611314490438, + "step": 3044 + }, + { + "epoch": 0.875691997986915, + "grad_norm": 0.09032389521598816, + "learning_rate": 4.157583207605037e-05, + "loss": 0.8165, + "step": 3045 + }, + { + "ce_ib": 3.6121065616607666, + "ce_orig": 0.9087159037590027, + "epoch": 0.875691997986915, + "kl_loss": 0.05671244114637375, + "loss_ib": 0.0009283350082114339, + "step": 3045 + }, + { + "ce_ib": 2.5780975818634033, + "ce_orig": 0.5338592529296875, + "epoch": 0.875691997986915, + "kl_loss": 0.03111054189503193, + "loss_ib": 0.0005689151585102081, + "step": 3045 + }, + { + "ce_ib": 2.8569962978363037, + "ce_orig": 0.6468030214309692, + "epoch": 0.875691997986915, + "kl_loss": 0.03632103651762009, + "loss_ib": 0.0006489099469035864, + "step": 3045 + }, + { + "ce_ib": 3.0881457328796387, + "ce_orig": 0.777762234210968, + "epoch": 0.875691997986915, + "kl_loss": 0.06741449236869812, + "loss_ib": 0.00098295952193439, + "step": 3045 + }, + { + "ce_ib": 6.4278788566589355, + "ce_orig": 1.5587477684020996, + "epoch": 0.8759795815658926, + "kl_loss": 0.05247354507446289, + "loss_ib": 0.001167523325420916, + "step": 3046 + }, + { + "ce_ib": 3.7064201831817627, + "ce_orig": 0.7229231595993042, + "epoch": 0.8759795815658926, + "kl_loss": 0.0685151070356369, + "loss_ib": 0.0010557930218055844, + "step": 3046 + }, + { + "ce_ib": 4.363445281982422, + "ce_orig": 1.2037345170974731, + "epoch": 0.8759795815658926, + "kl_loss": 0.03933858126401901, + "loss_ib": 0.0008297302993014455, + "step": 3046 + }, + { + "ce_ib": 2.0543854236602783, + "ce_orig": 0.5460901260375977, + "epoch": 0.8759795815658926, + "kl_loss": 0.05675424635410309, + "loss_ib": 0.000772981031332165, + "step": 3046 + }, + { + "ce_ib": 4.537381172180176, + "ce_orig": 1.175857663154602, + "epoch": 0.8762671651448702, + "kl_loss": 0.0501803494989872, + "loss_ib": 0.0009555415599606931, + "step": 3047 + }, + { + "ce_ib": 2.2568626403808594, + "ce_orig": 0.6080323457717896, + "epoch": 0.8762671651448702, + "kl_loss": 0.047414667904376984, + "loss_ib": 0.0006998329190537333, + "step": 3047 + }, + { + "ce_ib": 3.7346878051757812, + "ce_orig": 0.9498841166496277, + "epoch": 0.8762671651448702, + "kl_loss": 0.04159918427467346, + "loss_ib": 0.0007894606096670032, + "step": 3047 + }, + { + "ce_ib": 3.392367124557495, + "ce_orig": 0.7885263562202454, + "epoch": 0.8762671651448702, + "kl_loss": 0.05118507146835327, + "loss_ib": 0.0008510873885825276, + "step": 3047 + }, + { + "ce_ib": 2.82456374168396, + "ce_orig": 0.655381977558136, + "epoch": 0.8765547487238479, + "kl_loss": 0.04193998873233795, + "loss_ib": 0.0007018562173470855, + "step": 3048 + }, + { + "ce_ib": 1.7260265350341797, + "ce_orig": 0.3795928657054901, + "epoch": 0.8765547487238479, + "kl_loss": 0.06374390423297882, + "loss_ib": 0.0008100417326204479, + "step": 3048 + }, + { + "ce_ib": 3.6598238945007324, + "ce_orig": 0.8279062509536743, + "epoch": 0.8765547487238479, + "kl_loss": 0.05107719451189041, + "loss_ib": 0.0008767543477006257, + "step": 3048 + }, + { + "ce_ib": 7.593876361846924, + "ce_orig": 1.97532057762146, + "epoch": 0.8765547487238479, + "kl_loss": 0.07618924230337143, + "loss_ib": 0.0015212800353765488, + "step": 3048 + }, + { + "ce_ib": 3.2138211727142334, + "ce_orig": 0.8238239288330078, + "epoch": 0.8768423323028255, + "kl_loss": 0.05567244812846184, + "loss_ib": 0.0008781065698713064, + "step": 3049 + }, + { + "ce_ib": 4.208319187164307, + "ce_orig": 1.1852630376815796, + "epoch": 0.8768423323028255, + "kl_loss": 0.038251396268606186, + "loss_ib": 0.0008033458725549281, + "step": 3049 + }, + { + "ce_ib": 3.6587562561035156, + "ce_orig": 0.8490967154502869, + "epoch": 0.8768423323028255, + "kl_loss": 0.04903629049658775, + "loss_ib": 0.0008562385337427258, + "step": 3049 + }, + { + "ce_ib": 5.998356342315674, + "ce_orig": 1.445742130279541, + "epoch": 0.8768423323028255, + "kl_loss": 0.06361766159534454, + "loss_ib": 0.0012360122054815292, + "step": 3049 + }, + { + "epoch": 0.8771299158818031, + "grad_norm": 0.09761546552181244, + "learning_rate": 4.154676364456544e-05, + "loss": 0.9177, + "step": 3050 + }, + { + "ce_ib": 2.8841614723205566, + "ce_orig": 0.7309339642524719, + "epoch": 0.8771299158818031, + "kl_loss": 0.07358191907405853, + "loss_ib": 0.0010242352727800608, + "step": 3050 + }, + { + "ce_ib": 3.5519721508026123, + "ce_orig": 0.4634748697280884, + "epoch": 0.8771299158818031, + "kl_loss": 0.06773003935813904, + "loss_ib": 0.0010324976174160838, + "step": 3050 + }, + { + "ce_ib": 3.2356150150299072, + "ce_orig": 0.6141772866249084, + "epoch": 0.8771299158818031, + "kl_loss": 0.07871788740158081, + "loss_ib": 0.0011107403552159667, + "step": 3050 + }, + { + "ce_ib": 3.8109548091888428, + "ce_orig": 0.6967995166778564, + "epoch": 0.8771299158818031, + "kl_loss": 0.07355694472789764, + "loss_ib": 0.001116664963774383, + "step": 3050 + }, + { + "ce_ib": 2.6374783515930176, + "ce_orig": 0.8139725923538208, + "epoch": 0.8774174994607807, + "kl_loss": 0.03366963565349579, + "loss_ib": 0.0006004441529512405, + "step": 3051 + }, + { + "ce_ib": 3.3819944858551025, + "ce_orig": 0.6251212358474731, + "epoch": 0.8774174994607807, + "kl_loss": 0.0764649361371994, + "loss_ib": 0.0011028487933799624, + "step": 3051 + }, + { + "ce_ib": 4.243565559387207, + "ce_orig": 0.8859913349151611, + "epoch": 0.8774174994607807, + "kl_loss": 0.06768712401390076, + "loss_ib": 0.0011012277100235224, + "step": 3051 + }, + { + "ce_ib": 5.836120128631592, + "ce_orig": 1.3498414754867554, + "epoch": 0.8774174994607807, + "kl_loss": 0.08269160985946655, + "loss_ib": 0.0014105280861258507, + "step": 3051 + }, + { + "ce_ib": 1.817089557647705, + "ce_orig": 0.37567129731178284, + "epoch": 0.8777050830397585, + "kl_loss": 0.0464714840054512, + "loss_ib": 0.0006464237812906504, + "step": 3052 + }, + { + "ce_ib": 2.8028926849365234, + "ce_orig": 0.5191061496734619, + "epoch": 0.8777050830397585, + "kl_loss": 0.07179248332977295, + "loss_ib": 0.0009982141200453043, + "step": 3052 + }, + { + "ce_ib": 4.315255165100098, + "ce_orig": 1.160775899887085, + "epoch": 0.8777050830397585, + "kl_loss": 0.07664286345243454, + "loss_ib": 0.0011979540577158332, + "step": 3052 + }, + { + "ce_ib": 5.708582878112793, + "ce_orig": 1.4109126329421997, + "epoch": 0.8777050830397585, + "kl_loss": 0.045472800731658936, + "loss_ib": 0.0010255862725898623, + "step": 3052 + }, + { + "ce_ib": 3.616373062133789, + "ce_orig": 0.4883316159248352, + "epoch": 0.8779926666187361, + "kl_loss": 0.07671181857585907, + "loss_ib": 0.001128755509853363, + "step": 3053 + }, + { + "ce_ib": 5.84367561340332, + "ce_orig": 1.5641967058181763, + "epoch": 0.8779926666187361, + "kl_loss": 0.07097028940916061, + "loss_ib": 0.0012940705055370927, + "step": 3053 + }, + { + "ce_ib": 2.670637369155884, + "ce_orig": 0.524451732635498, + "epoch": 0.8779926666187361, + "kl_loss": 0.03846345841884613, + "loss_ib": 0.0006516983266919851, + "step": 3053 + }, + { + "ce_ib": 4.310855865478516, + "ce_orig": 0.6918860077857971, + "epoch": 0.8779926666187361, + "kl_loss": 0.054435133934020996, + "loss_ib": 0.0009754368220455945, + "step": 3053 + }, + { + "ce_ib": 2.5992040634155273, + "ce_orig": 0.4153679609298706, + "epoch": 0.8782802501977137, + "kl_loss": 0.05874170362949371, + "loss_ib": 0.0008473373600281775, + "step": 3054 + }, + { + "ce_ib": 2.6378042697906494, + "ce_orig": 0.6904326677322388, + "epoch": 0.8782802501977137, + "kl_loss": 0.060293134301900864, + "loss_ib": 0.0008667117217555642, + "step": 3054 + }, + { + "ce_ib": 3.8863046169281006, + "ce_orig": 0.8231661915779114, + "epoch": 0.8782802501977137, + "kl_loss": 0.09268218278884888, + "loss_ib": 0.0013154522748664021, + "step": 3054 + }, + { + "ce_ib": 3.290699005126953, + "ce_orig": 0.9287217855453491, + "epoch": 0.8782802501977137, + "kl_loss": 0.053410228341817856, + "loss_ib": 0.0008631721721030772, + "step": 3054 + }, + { + "epoch": 0.8785678337766913, + "grad_norm": 0.08724824339151382, + "learning_rate": 4.151765534808631e-05, + "loss": 0.8542, + "step": 3055 + }, + { + "ce_ib": 3.726032257080078, + "ce_orig": 0.6379972100257874, + "epoch": 0.8785678337766913, + "kl_loss": 0.08251959085464478, + "loss_ib": 0.0011977991089224815, + "step": 3055 + }, + { + "ce_ib": 3.6325926780700684, + "ce_orig": 0.6194993853569031, + "epoch": 0.8785678337766913, + "kl_loss": 0.04931867495179176, + "loss_ib": 0.0008564459858462214, + "step": 3055 + }, + { + "ce_ib": 4.281033992767334, + "ce_orig": 0.5591267943382263, + "epoch": 0.8785678337766913, + "kl_loss": 0.06353098154067993, + "loss_ib": 0.0010634131031110883, + "step": 3055 + }, + { + "ce_ib": 4.254394054412842, + "ce_orig": 1.2768322229385376, + "epoch": 0.8785678337766913, + "kl_loss": 0.04774881899356842, + "loss_ib": 0.000902927597053349, + "step": 3055 + }, + { + "ce_ib": 4.276079177856445, + "ce_orig": 1.1044349670410156, + "epoch": 0.878855417355669, + "kl_loss": 0.07234673947095871, + "loss_ib": 0.0011510752374306321, + "step": 3056 + }, + { + "ce_ib": 2.624912738800049, + "ce_orig": 0.5454309582710266, + "epoch": 0.878855417355669, + "kl_loss": 0.07001595199108124, + "loss_ib": 0.0009626507526263595, + "step": 3056 + }, + { + "ce_ib": 4.306847095489502, + "ce_orig": 1.176635980606079, + "epoch": 0.878855417355669, + "kl_loss": 0.04698040336370468, + "loss_ib": 0.0009004886960610747, + "step": 3056 + }, + { + "ce_ib": 2.955036163330078, + "ce_orig": 0.763623833656311, + "epoch": 0.878855417355669, + "kl_loss": 0.03756558895111084, + "loss_ib": 0.0006711594760417938, + "step": 3056 + }, + { + "ce_ib": 3.6143481731414795, + "ce_orig": 1.017423152923584, + "epoch": 0.8791430009346466, + "kl_loss": 0.04413586109876633, + "loss_ib": 0.0008027934236451983, + "step": 3057 + }, + { + "ce_ib": 2.785132646560669, + "ce_orig": 0.7608103156089783, + "epoch": 0.8791430009346466, + "kl_loss": 0.07180427014827728, + "loss_ib": 0.0009965560166165233, + "step": 3057 + }, + { + "ce_ib": 4.367329120635986, + "ce_orig": 1.074489951133728, + "epoch": 0.8791430009346466, + "kl_loss": 0.05137583613395691, + "loss_ib": 0.000950491230469197, + "step": 3057 + }, + { + "ce_ib": 6.7257890701293945, + "ce_orig": 1.7895921468734741, + "epoch": 0.8791430009346466, + "kl_loss": 0.0667400136590004, + "loss_ib": 0.0013399790041148663, + "step": 3057 + }, + { + "ce_ib": 5.994304180145264, + "ce_orig": 1.7054378986358643, + "epoch": 0.8794305845136243, + "kl_loss": 0.13116714358329773, + "loss_ib": 0.0019111017463728786, + "step": 3058 + }, + { + "ce_ib": 3.0609240531921387, + "ce_orig": 0.7215198278427124, + "epoch": 0.8794305845136243, + "kl_loss": 0.033866092562675476, + "loss_ib": 0.0006447533378377557, + "step": 3058 + }, + { + "ce_ib": 2.987257957458496, + "ce_orig": 0.4753538966178894, + "epoch": 0.8794305845136243, + "kl_loss": 0.06719491630792618, + "loss_ib": 0.0009706749697215855, + "step": 3058 + }, + { + "ce_ib": 3.076185464859009, + "ce_orig": 0.6002035140991211, + "epoch": 0.8794305845136243, + "kl_loss": 0.05204608291387558, + "loss_ib": 0.0008280793554149568, + "step": 3058 + }, + { + "ce_ib": 5.354484558105469, + "ce_orig": 0.8511850833892822, + "epoch": 0.879718168092602, + "kl_loss": 0.07161868363618851, + "loss_ib": 0.0012516352580860257, + "step": 3059 + }, + { + "ce_ib": 3.4945948123931885, + "ce_orig": 0.7552092671394348, + "epoch": 0.879718168092602, + "kl_loss": 0.06746328622102737, + "loss_ib": 0.0010240923147648573, + "step": 3059 + }, + { + "ce_ib": 2.278172016143799, + "ce_orig": 0.46571439504623413, + "epoch": 0.879718168092602, + "kl_loss": 0.13329647481441498, + "loss_ib": 0.0015607818495482206, + "step": 3059 + }, + { + "ce_ib": 4.705671787261963, + "ce_orig": 1.5325226783752441, + "epoch": 0.879718168092602, + "kl_loss": 0.07964209467172623, + "loss_ib": 0.00126698799431324, + "step": 3059 + }, + { + "epoch": 0.8800057516715796, + "grad_norm": 0.09498482942581177, + "learning_rate": 4.1488507256741616e-05, + "loss": 0.8308, + "step": 3060 + }, + { + "ce_ib": 2.3468968868255615, + "ce_orig": 0.6076623201370239, + "epoch": 0.8800057516715796, + "kl_loss": 0.02595784328877926, + "loss_ib": 0.0004942680825479329, + "step": 3060 + }, + { + "ce_ib": 2.419285297393799, + "ce_orig": 0.5661544799804688, + "epoch": 0.8800057516715796, + "kl_loss": 0.033144548535346985, + "loss_ib": 0.0005733740399591625, + "step": 3060 + }, + { + "ce_ib": 4.676966667175293, + "ce_orig": 1.1717473268508911, + "epoch": 0.8800057516715796, + "kl_loss": 0.04088406264781952, + "loss_ib": 0.0008765372331254184, + "step": 3060 + }, + { + "ce_ib": 4.168657302856445, + "ce_orig": 0.8232080936431885, + "epoch": 0.8800057516715796, + "kl_loss": 0.05503355711698532, + "loss_ib": 0.0009672012529335916, + "step": 3060 + }, + { + "ce_ib": 4.3852691650390625, + "ce_orig": 1.155313491821289, + "epoch": 0.8802933352505572, + "kl_loss": 0.05852900445461273, + "loss_ib": 0.0010238168761134148, + "step": 3061 + }, + { + "ce_ib": 6.399803161621094, + "ce_orig": 1.7036772966384888, + "epoch": 0.8802933352505572, + "kl_loss": 0.06566270440816879, + "loss_ib": 0.0012966071953997016, + "step": 3061 + }, + { + "ce_ib": 4.838980674743652, + "ce_orig": 1.291905164718628, + "epoch": 0.8802933352505572, + "kl_loss": 0.06370104104280472, + "loss_ib": 0.0011209084186702967, + "step": 3061 + }, + { + "ce_ib": 4.663456916809082, + "ce_orig": 0.7586646676063538, + "epoch": 0.8802933352505572, + "kl_loss": 0.07372988015413284, + "loss_ib": 0.0012036444386467338, + "step": 3061 + }, + { + "ce_ib": 6.924431324005127, + "ce_orig": 1.6397757530212402, + "epoch": 0.8805809188295348, + "kl_loss": 0.21976317465305328, + "loss_ib": 0.002890074858441949, + "step": 3062 + }, + { + "ce_ib": 4.137187957763672, + "ce_orig": 1.0608471632003784, + "epoch": 0.8805809188295348, + "kl_loss": 0.03794332966208458, + "loss_ib": 0.0007931520813144743, + "step": 3062 + }, + { + "ce_ib": 2.4969046115875244, + "ce_orig": 0.5204461216926575, + "epoch": 0.8805809188295348, + "kl_loss": 0.047493476420640945, + "loss_ib": 0.0007246251916512847, + "step": 3062 + }, + { + "ce_ib": 3.9802961349487305, + "ce_orig": 0.9217489957809448, + "epoch": 0.8805809188295348, + "kl_loss": 0.030410822480916977, + "loss_ib": 0.0007021377678029239, + "step": 3062 + }, + { + "ce_ib": 4.435883045196533, + "ce_orig": 0.9873637557029724, + "epoch": 0.8808685024085124, + "kl_loss": 0.06961430609226227, + "loss_ib": 0.00113973137922585, + "step": 3063 + }, + { + "ce_ib": 3.0502893924713135, + "ce_orig": 0.6008862257003784, + "epoch": 0.8808685024085124, + "kl_loss": 0.05188274383544922, + "loss_ib": 0.0008238563314080238, + "step": 3063 + }, + { + "ce_ib": 4.733911037445068, + "ce_orig": 1.156371831893921, + "epoch": 0.8808685024085124, + "kl_loss": 0.05748114734888077, + "loss_ib": 0.0010482025099918246, + "step": 3063 + }, + { + "ce_ib": 4.374758720397949, + "ce_orig": 1.585612416267395, + "epoch": 0.8808685024085124, + "kl_loss": 0.04572092369198799, + "loss_ib": 0.0008946850430220366, + "step": 3063 + }, + { + "ce_ib": 3.1315276622772217, + "ce_orig": 0.43690547347068787, + "epoch": 0.8811560859874901, + "kl_loss": 0.09355521202087402, + "loss_ib": 0.0012487048516049981, + "step": 3064 + }, + { + "ce_ib": 3.22348690032959, + "ce_orig": 0.6828880906105042, + "epoch": 0.8811560859874901, + "kl_loss": 0.04183443635702133, + "loss_ib": 0.0007406930672004819, + "step": 3064 + }, + { + "ce_ib": 3.852661609649658, + "ce_orig": 1.2687416076660156, + "epoch": 0.8811560859874901, + "kl_loss": 0.09049941599369049, + "loss_ib": 0.0012902602320536971, + "step": 3064 + }, + { + "ce_ib": 2.6945083141326904, + "ce_orig": 0.7320135831832886, + "epoch": 0.8811560859874901, + "kl_loss": 0.047835174947977066, + "loss_ib": 0.0007478025509044528, + "step": 3064 + }, + { + "epoch": 0.8814436695664678, + "grad_norm": 0.09316390007734299, + "learning_rate": 4.1459319440755874e-05, + "loss": 0.9298, + "step": 3065 + }, + { + "ce_ib": 4.918934345245361, + "ce_orig": 1.5688203573226929, + "epoch": 0.8814436695664678, + "kl_loss": 0.0499064102768898, + "loss_ib": 0.0009909574873745441, + "step": 3065 + }, + { + "ce_ib": 2.5989797115325928, + "ce_orig": 0.4370891749858856, + "epoch": 0.8814436695664678, + "kl_loss": 0.04244489595293999, + "loss_ib": 0.0006843468872830272, + "step": 3065 + }, + { + "ce_ib": 2.463904619216919, + "ce_orig": 0.6710439324378967, + "epoch": 0.8814436695664678, + "kl_loss": 0.052073895931243896, + "loss_ib": 0.0007671294151805341, + "step": 3065 + }, + { + "ce_ib": 1.7625372409820557, + "ce_orig": 0.1874852329492569, + "epoch": 0.8814436695664678, + "kl_loss": 0.06300076842308044, + "loss_ib": 0.0008062614360824227, + "step": 3065 + }, + { + "ce_ib": 3.204744577407837, + "ce_orig": 0.6976271271705627, + "epoch": 0.8817312531454454, + "kl_loss": 0.05472471937537193, + "loss_ib": 0.0008677216246724129, + "step": 3066 + }, + { + "ce_ib": 2.3256490230560303, + "ce_orig": 0.6216291189193726, + "epoch": 0.8817312531454454, + "kl_loss": 0.04651197791099548, + "loss_ib": 0.0006976846489124, + "step": 3066 + }, + { + "ce_ib": 3.4793403148651123, + "ce_orig": 0.9265792369842529, + "epoch": 0.8817312531454454, + "kl_loss": 0.05933905020356178, + "loss_ib": 0.0009413245134055614, + "step": 3066 + }, + { + "ce_ib": 5.032183647155762, + "ce_orig": 1.5062997341156006, + "epoch": 0.8817312531454454, + "kl_loss": 0.041171230375766754, + "loss_ib": 0.000914930657017976, + "step": 3066 + }, + { + "ce_ib": 4.938384532928467, + "ce_orig": 1.0640138387680054, + "epoch": 0.882018836724423, + "kl_loss": 0.04182198643684387, + "loss_ib": 0.0009120583417825401, + "step": 3067 + }, + { + "ce_ib": 4.427870273590088, + "ce_orig": 0.6096996665000916, + "epoch": 0.882018836724423, + "kl_loss": 0.07714278995990753, + "loss_ib": 0.0012142148334532976, + "step": 3067 + }, + { + "ce_ib": 5.013279438018799, + "ce_orig": 1.3158180713653564, + "epoch": 0.882018836724423, + "kl_loss": 0.0625113695859909, + "loss_ib": 0.0011264416389167309, + "step": 3067 + }, + { + "ce_ib": 4.047378063201904, + "ce_orig": 0.7863109707832336, + "epoch": 0.882018836724423, + "kl_loss": 0.08017318695783615, + "loss_ib": 0.0012064696056768298, + "step": 3067 + }, + { + "ce_ib": 3.393800735473633, + "ce_orig": 0.9163536429405212, + "epoch": 0.8823064203034007, + "kl_loss": 0.034307993948459625, + "loss_ib": 0.0006824599695391953, + "step": 3068 + }, + { + "ce_ib": 3.7618565559387207, + "ce_orig": 0.3377809524536133, + "epoch": 0.8823064203034007, + "kl_loss": 0.07530075311660767, + "loss_ib": 0.0011291931150481105, + "step": 3068 + }, + { + "ce_ib": 5.553350925445557, + "ce_orig": 1.3855549097061157, + "epoch": 0.8823064203034007, + "kl_loss": 0.0637989193201065, + "loss_ib": 0.0011933243367820978, + "step": 3068 + }, + { + "ce_ib": 3.2134852409362793, + "ce_orig": 0.9682958722114563, + "epoch": 0.8823064203034007, + "kl_loss": 0.021197045221924782, + "loss_ib": 0.0005333189619705081, + "step": 3068 + }, + { + "ce_ib": 4.516556262969971, + "ce_orig": 1.0867679119110107, + "epoch": 0.8825940038823783, + "kl_loss": 0.03737124800682068, + "loss_ib": 0.0008253681007772684, + "step": 3069 + }, + { + "ce_ib": 2.4707112312316895, + "ce_orig": 0.7408323287963867, + "epoch": 0.8825940038823783, + "kl_loss": 0.03841211274266243, + "loss_ib": 0.0006311922334134579, + "step": 3069 + }, + { + "ce_ib": 3.478875160217285, + "ce_orig": 0.613751232624054, + "epoch": 0.8825940038823783, + "kl_loss": 0.09046702831983566, + "loss_ib": 0.0012525577330961823, + "step": 3069 + }, + { + "ce_ib": 2.671408176422119, + "ce_orig": 0.6149039268493652, + "epoch": 0.8825940038823783, + "kl_loss": 0.05801280587911606, + "loss_ib": 0.0008472688496112823, + "step": 3069 + }, + { + "epoch": 0.8828815874613559, + "grad_norm": 0.09493105858564377, + "learning_rate": 4.143009197044932e-05, + "loss": 0.8611, + "step": 3070 + }, + { + "ce_ib": 3.307145357131958, + "ce_orig": 0.6948019862174988, + "epoch": 0.8828815874613559, + "kl_loss": 0.03400977700948715, + "loss_ib": 0.000670812267344445, + "step": 3070 + }, + { + "ce_ib": 5.80410623550415, + "ce_orig": 1.0814772844314575, + "epoch": 0.8828815874613559, + "kl_loss": 0.07552941888570786, + "loss_ib": 0.0013357046991586685, + "step": 3070 + }, + { + "ce_ib": 3.989912986755371, + "ce_orig": 0.8752128481864929, + "epoch": 0.8828815874613559, + "kl_loss": 0.06486837565898895, + "loss_ib": 0.0010476750321686268, + "step": 3070 + }, + { + "ce_ib": 3.2335402965545654, + "ce_orig": 0.5372995138168335, + "epoch": 0.8828815874613559, + "kl_loss": 0.03791454806923866, + "loss_ib": 0.0007024994120001793, + "step": 3070 + }, + { + "ce_ib": 3.3100674152374268, + "ce_orig": 0.8304669260978699, + "epoch": 0.8831691710403335, + "kl_loss": 0.06419000029563904, + "loss_ib": 0.0009729067096486688, + "step": 3071 + }, + { + "ce_ib": 6.259768486022949, + "ce_orig": 1.3207340240478516, + "epoch": 0.8831691710403335, + "kl_loss": 0.057236019521951675, + "loss_ib": 0.0011983370641246438, + "step": 3071 + }, + { + "ce_ib": 6.0996294021606445, + "ce_orig": 1.3708442449569702, + "epoch": 0.8831691710403335, + "kl_loss": 0.10770608484745026, + "loss_ib": 0.001687023788690567, + "step": 3071 + }, + { + "ce_ib": 4.367895603179932, + "ce_orig": 1.1873950958251953, + "epoch": 0.8831691710403335, + "kl_loss": 0.05024849250912666, + "loss_ib": 0.0009392743813805282, + "step": 3071 + }, + { + "ce_ib": 2.3195626735687256, + "ce_orig": 0.5574880242347717, + "epoch": 0.8834567546193113, + "kl_loss": 0.02665264531970024, + "loss_ib": 0.0004984827246516943, + "step": 3072 + }, + { + "ce_ib": 3.6274924278259277, + "ce_orig": 0.8294886946678162, + "epoch": 0.8834567546193113, + "kl_loss": 0.07800397276878357, + "loss_ib": 0.0011427889112383127, + "step": 3072 + }, + { + "ce_ib": 3.443638324737549, + "ce_orig": 1.0734453201293945, + "epoch": 0.8834567546193113, + "kl_loss": 0.08599592745304108, + "loss_ib": 0.001204323023557663, + "step": 3072 + }, + { + "ce_ib": 3.354410409927368, + "ce_orig": 0.884476900100708, + "epoch": 0.8834567546193113, + "kl_loss": 0.04340103268623352, + "loss_ib": 0.0007694513769820333, + "step": 3072 + }, + { + "ce_ib": 2.1817398071289062, + "ce_orig": 0.4625023305416107, + "epoch": 0.8837443381982889, + "kl_loss": 0.05343535542488098, + "loss_ib": 0.0007525275577791035, + "step": 3073 + }, + { + "ce_ib": 3.10603928565979, + "ce_orig": 0.5225088000297546, + "epoch": 0.8837443381982889, + "kl_loss": 0.06900450587272644, + "loss_ib": 0.0010006489465013146, + "step": 3073 + }, + { + "ce_ib": 1.8677008152008057, + "ce_orig": 0.2890453636646271, + "epoch": 0.8837443381982889, + "kl_loss": 0.022056162357330322, + "loss_ib": 0.00040733168134465814, + "step": 3073 + }, + { + "ce_ib": 2.5628650188446045, + "ce_orig": 0.6032301783561707, + "epoch": 0.8837443381982889, + "kl_loss": 0.12500128149986267, + "loss_ib": 0.0015062993625178933, + "step": 3073 + }, + { + "ce_ib": 5.85776948928833, + "ce_orig": 1.6045035123825073, + "epoch": 0.8840319217772665, + "kl_loss": 0.0828375369310379, + "loss_ib": 0.0014141523279249668, + "step": 3074 + }, + { + "ce_ib": 4.529620170593262, + "ce_orig": 0.9397972226142883, + "epoch": 0.8840319217772665, + "kl_loss": 0.0580056831240654, + "loss_ib": 0.001033018808811903, + "step": 3074 + }, + { + "ce_ib": 2.4378445148468018, + "ce_orig": 0.7013124823570251, + "epoch": 0.8840319217772665, + "kl_loss": 0.03034396469593048, + "loss_ib": 0.0005472240736708045, + "step": 3074 + }, + { + "ce_ib": 4.185871124267578, + "ce_orig": 1.1647168397903442, + "epoch": 0.8840319217772665, + "kl_loss": 0.04287175461649895, + "loss_ib": 0.0008473045891150832, + "step": 3074 + }, + { + "epoch": 0.8843195053562442, + "grad_norm": 0.10092701762914658, + "learning_rate": 4.140082491623769e-05, + "loss": 0.8244, + "step": 3075 + }, + { + "ce_ib": 4.112522602081299, + "ce_orig": 0.7560692429542542, + "epoch": 0.8843195053562442, + "kl_loss": 0.10430362820625305, + "loss_ib": 0.0014542884891852736, + "step": 3075 + }, + { + "ce_ib": 2.7202441692352295, + "ce_orig": 0.6688130497932434, + "epoch": 0.8843195053562442, + "kl_loss": 0.061113838106393814, + "loss_ib": 0.0008831627201288939, + "step": 3075 + }, + { + "ce_ib": 2.90793776512146, + "ce_orig": 0.9012512564659119, + "epoch": 0.8843195053562442, + "kl_loss": 0.04719175398349762, + "loss_ib": 0.0007627112790942192, + "step": 3075 + }, + { + "ce_ib": 2.87566876411438, + "ce_orig": 0.605985701084137, + "epoch": 0.8843195053562442, + "kl_loss": 0.04865656793117523, + "loss_ib": 0.0007741324952803552, + "step": 3075 + }, + { + "ce_ib": 3.939563512802124, + "ce_orig": 0.9586266279220581, + "epoch": 0.8846070889352218, + "kl_loss": 0.06390552222728729, + "loss_ib": 0.001033011474646628, + "step": 3076 + }, + { + "ce_ib": 4.0945515632629395, + "ce_orig": 0.9432970881462097, + "epoch": 0.8846070889352218, + "kl_loss": 0.06395664811134338, + "loss_ib": 0.001049021608196199, + "step": 3076 + }, + { + "ce_ib": 4.971469879150391, + "ce_orig": 1.0449191331863403, + "epoch": 0.8846070889352218, + "kl_loss": 0.06115651875734329, + "loss_ib": 0.0011087121674790978, + "step": 3076 + }, + { + "ce_ib": 4.476897716522217, + "ce_orig": 1.06281578540802, + "epoch": 0.8846070889352218, + "kl_loss": 0.060758862644433975, + "loss_ib": 0.0010552783496677876, + "step": 3076 + }, + { + "ce_ib": 4.744324684143066, + "ce_orig": 1.1853448152542114, + "epoch": 0.8848946725141994, + "kl_loss": 0.047390639781951904, + "loss_ib": 0.0009483387693762779, + "step": 3077 + }, + { + "ce_ib": 4.23925256729126, + "ce_orig": 0.8076894283294678, + "epoch": 0.8848946725141994, + "kl_loss": 0.0631374716758728, + "loss_ib": 0.0010552998865023255, + "step": 3077 + }, + { + "ce_ib": 5.363480567932129, + "ce_orig": 1.257943868637085, + "epoch": 0.8848946725141994, + "kl_loss": 0.06163496896624565, + "loss_ib": 0.001152697717770934, + "step": 3077 + }, + { + "ce_ib": 4.21160888671875, + "ce_orig": 1.237022042274475, + "epoch": 0.8848946725141994, + "kl_loss": 0.06602821499109268, + "loss_ib": 0.0010814429260790348, + "step": 3077 + }, + { + "ce_ib": 3.700315475463867, + "ce_orig": 0.5124292373657227, + "epoch": 0.8851822560931771, + "kl_loss": 0.08116751164197922, + "loss_ib": 0.001181706553325057, + "step": 3078 + }, + { + "ce_ib": 4.395101070404053, + "ce_orig": 0.9082584977149963, + "epoch": 0.8851822560931771, + "kl_loss": 0.10797211527824402, + "loss_ib": 0.0015192311257123947, + "step": 3078 + }, + { + "ce_ib": 2.993378162384033, + "ce_orig": 0.49869489669799805, + "epoch": 0.8851822560931771, + "kl_loss": 0.06690329313278198, + "loss_ib": 0.0009683707030490041, + "step": 3078 + }, + { + "ce_ib": 2.5452375411987305, + "ce_orig": 0.6342163681983948, + "epoch": 0.8851822560931771, + "kl_loss": 0.027852654457092285, + "loss_ib": 0.0005330502754077315, + "step": 3078 + }, + { + "ce_ib": 4.051046848297119, + "ce_orig": 0.874344527721405, + "epoch": 0.8854698396721548, + "kl_loss": 0.032871440052986145, + "loss_ib": 0.0007338190916925669, + "step": 3079 + }, + { + "ce_ib": 3.5140974521636963, + "ce_orig": 0.8797248601913452, + "epoch": 0.8854698396721548, + "kl_loss": 0.06447182595729828, + "loss_ib": 0.0009961279574781656, + "step": 3079 + }, + { + "ce_ib": 4.840360164642334, + "ce_orig": 1.351117730140686, + "epoch": 0.8854698396721548, + "kl_loss": 0.05569100379943848, + "loss_ib": 0.0010409461101517081, + "step": 3079 + }, + { + "ce_ib": 4.15227746963501, + "ce_orig": 0.7623774409294128, + "epoch": 0.8854698396721548, + "kl_loss": 0.07273386418819427, + "loss_ib": 0.0011425663251429796, + "step": 3079 + }, + { + "epoch": 0.8857574232511324, + "grad_norm": 0.09644320607185364, + "learning_rate": 4.137151834863213e-05, + "loss": 0.9176, + "step": 3080 + }, + { + "ce_ib": 3.838468074798584, + "ce_orig": 0.6734762191772461, + "epoch": 0.8857574232511324, + "kl_loss": 0.0921390950679779, + "loss_ib": 0.0013052376452833414, + "step": 3080 + }, + { + "ce_ib": 3.155815601348877, + "ce_orig": 0.5929871201515198, + "epoch": 0.8857574232511324, + "kl_loss": 0.05876976251602173, + "loss_ib": 0.0009032791713252664, + "step": 3080 + }, + { + "ce_ib": 5.035396099090576, + "ce_orig": 1.448481559753418, + "epoch": 0.8857574232511324, + "kl_loss": 0.03472777456045151, + "loss_ib": 0.0008508173050358891, + "step": 3080 + }, + { + "ce_ib": 2.296297788619995, + "ce_orig": 0.6808394193649292, + "epoch": 0.8857574232511324, + "kl_loss": 0.05631621927022934, + "loss_ib": 0.0007927919505164027, + "step": 3080 + }, + { + "ce_ib": 3.28546404838562, + "ce_orig": 0.8424068689346313, + "epoch": 0.88604500683011, + "kl_loss": 0.04168383777141571, + "loss_ib": 0.0007453847792930901, + "step": 3081 + }, + { + "ce_ib": 4.4037675857543945, + "ce_orig": 1.4730380773544312, + "epoch": 0.88604500683011, + "kl_loss": 0.06296565383672714, + "loss_ib": 0.0010700332932174206, + "step": 3081 + }, + { + "ce_ib": 3.218494176864624, + "ce_orig": 0.6250332593917847, + "epoch": 0.88604500683011, + "kl_loss": 0.06739696860313416, + "loss_ib": 0.0009958191076293588, + "step": 3081 + }, + { + "ce_ib": 4.823512554168701, + "ce_orig": 1.4663877487182617, + "epoch": 0.88604500683011, + "kl_loss": 0.06394913792610168, + "loss_ib": 0.0011218426516279578, + "step": 3081 + }, + { + "ce_ib": 4.54909086227417, + "ce_orig": 1.0545753240585327, + "epoch": 0.8863325904090876, + "kl_loss": 0.04879367724061012, + "loss_ib": 0.0009428458288311958, + "step": 3082 + }, + { + "ce_ib": 1.9665279388427734, + "ce_orig": 0.2548412084579468, + "epoch": 0.8863325904090876, + "kl_loss": 0.07801040261983871, + "loss_ib": 0.0009767567971721292, + "step": 3082 + }, + { + "ce_ib": 5.186450958251953, + "ce_orig": 0.906235933303833, + "epoch": 0.8863325904090876, + "kl_loss": 0.05571763217449188, + "loss_ib": 0.001075821346603334, + "step": 3082 + }, + { + "ce_ib": 4.499849796295166, + "ce_orig": 0.9672443866729736, + "epoch": 0.8863325904090876, + "kl_loss": 0.04439030587673187, + "loss_ib": 0.0008938880055211484, + "step": 3082 + }, + { + "ce_ib": 3.2151832580566406, + "ce_orig": 0.7592018246650696, + "epoch": 0.8866201739880653, + "kl_loss": 0.037674643099308014, + "loss_ib": 0.0006982647464610636, + "step": 3083 + }, + { + "ce_ib": 3.3683090209960938, + "ce_orig": 0.7551714181900024, + "epoch": 0.8866201739880653, + "kl_loss": 0.04517515003681183, + "loss_ib": 0.0007885823724791408, + "step": 3083 + }, + { + "ce_ib": 3.204347610473633, + "ce_orig": 0.7649196982383728, + "epoch": 0.8866201739880653, + "kl_loss": 0.034711211919784546, + "loss_ib": 0.0006675468175671995, + "step": 3083 + }, + { + "ce_ib": 2.8987834453582764, + "ce_orig": 0.7898060083389282, + "epoch": 0.8866201739880653, + "kl_loss": 0.039442647248506546, + "loss_ib": 0.0006843048031441867, + "step": 3083 + }, + { + "ce_ib": 3.3794026374816895, + "ce_orig": 0.8094468116760254, + "epoch": 0.8869077575670429, + "kl_loss": 0.07786611467599869, + "loss_ib": 0.0011166014010086656, + "step": 3084 + }, + { + "ce_ib": 2.1859962940216064, + "ce_orig": 0.48695603013038635, + "epoch": 0.8869077575670429, + "kl_loss": 0.021699387580156326, + "loss_ib": 0.0004355934797786176, + "step": 3084 + }, + { + "ce_ib": 2.6382596492767334, + "ce_orig": 0.5083096027374268, + "epoch": 0.8869077575670429, + "kl_loss": 0.07398292422294617, + "loss_ib": 0.0010036551393568516, + "step": 3084 + }, + { + "ce_ib": 3.7201955318450928, + "ce_orig": 0.8720616102218628, + "epoch": 0.8869077575670429, + "kl_loss": 0.0740409716963768, + "loss_ib": 0.0011124293087050319, + "step": 3084 + }, + { + "epoch": 0.8871953411460206, + "grad_norm": 0.10996893793344498, + "learning_rate": 4.134217233823896e-05, + "loss": 0.8756, + "step": 3085 + }, + { + "ce_ib": 4.171865463256836, + "ce_orig": 0.9787085652351379, + "epoch": 0.8871953411460206, + "kl_loss": 0.06811176985502243, + "loss_ib": 0.0010983041720464826, + "step": 3085 + }, + { + "ce_ib": 4.511376857757568, + "ce_orig": 1.0024528503417969, + "epoch": 0.8871953411460206, + "kl_loss": 0.05276479944586754, + "loss_ib": 0.000978785683400929, + "step": 3085 + }, + { + "ce_ib": 4.467855930328369, + "ce_orig": 0.8579865097999573, + "epoch": 0.8871953411460206, + "kl_loss": 0.06303410232067108, + "loss_ib": 0.0010771265951916575, + "step": 3085 + }, + { + "ce_ib": 4.1526923179626465, + "ce_orig": 1.0409826040267944, + "epoch": 0.8871953411460206, + "kl_loss": 0.06662946194410324, + "loss_ib": 0.0010815637651830912, + "step": 3085 + }, + { + "ce_ib": 2.738980770111084, + "ce_orig": 0.7064430713653564, + "epoch": 0.8874829247249982, + "kl_loss": 0.04013978689908981, + "loss_ib": 0.0006752959452569485, + "step": 3086 + }, + { + "ce_ib": 3.3785314559936523, + "ce_orig": 0.6960227489471436, + "epoch": 0.8874829247249982, + "kl_loss": 0.05627036467194557, + "loss_ib": 0.0009005567408166826, + "step": 3086 + }, + { + "ce_ib": 3.0852015018463135, + "ce_orig": 0.7371809482574463, + "epoch": 0.8874829247249982, + "kl_loss": 0.03938751295208931, + "loss_ib": 0.0007023952784948051, + "step": 3086 + }, + { + "ce_ib": 4.4303364753723145, + "ce_orig": 1.2295522689819336, + "epoch": 0.8874829247249982, + "kl_loss": 0.033368587493896484, + "loss_ib": 0.0007767195347696543, + "step": 3086 + }, + { + "ce_ib": 4.029292583465576, + "ce_orig": 0.7791849970817566, + "epoch": 0.8877705083039759, + "kl_loss": 0.05303753912448883, + "loss_ib": 0.000933304603677243, + "step": 3087 + }, + { + "ce_ib": 5.332289218902588, + "ce_orig": 0.9346296191215515, + "epoch": 0.8877705083039759, + "kl_loss": 0.056677088141441345, + "loss_ib": 0.0010999998776242137, + "step": 3087 + }, + { + "ce_ib": 3.115664005279541, + "ce_orig": 0.7592244148254395, + "epoch": 0.8877705083039759, + "kl_loss": 0.030415872111916542, + "loss_ib": 0.0006157251191325486, + "step": 3087 + }, + { + "ce_ib": 2.50825572013855, + "ce_orig": 0.5680367350578308, + "epoch": 0.8877705083039759, + "kl_loss": 0.09005758166313171, + "loss_ib": 0.0011514013167470694, + "step": 3087 + }, + { + "ce_ib": 3.6878161430358887, + "ce_orig": 0.8596364855766296, + "epoch": 0.8880580918829535, + "kl_loss": 0.04206755757331848, + "loss_ib": 0.0007894571754150093, + "step": 3088 + }, + { + "ce_ib": 4.021722793579102, + "ce_orig": 1.1123864650726318, + "epoch": 0.8880580918829535, + "kl_loss": 0.050140198320150375, + "loss_ib": 0.0009035742259584367, + "step": 3088 + }, + { + "ce_ib": 3.9961085319519043, + "ce_orig": 0.9554210901260376, + "epoch": 0.8880580918829535, + "kl_loss": 0.041007064282894135, + "loss_ib": 0.0008096814854070544, + "step": 3088 + }, + { + "ce_ib": 4.5218305587768555, + "ce_orig": 1.0859012603759766, + "epoch": 0.8880580918829535, + "kl_loss": 0.05683185160160065, + "loss_ib": 0.0010205014841631055, + "step": 3088 + }, + { + "ce_ib": 3.05063533782959, + "ce_orig": 0.808527410030365, + "epoch": 0.8883456754619311, + "kl_loss": 0.04079092666506767, + "loss_ib": 0.0007129727746360004, + "step": 3089 + }, + { + "ce_ib": 2.870461940765381, + "ce_orig": 0.548692524433136, + "epoch": 0.8883456754619311, + "kl_loss": 0.025281274691224098, + "loss_ib": 0.0005398589419201016, + "step": 3089 + }, + { + "ce_ib": 2.896991729736328, + "ce_orig": 0.6076558828353882, + "epoch": 0.8883456754619311, + "kl_loss": 0.11955590546131134, + "loss_ib": 0.0014852581080049276, + "step": 3089 + }, + { + "ce_ib": 2.4320273399353027, + "ce_orig": 0.5189306139945984, + "epoch": 0.8883456754619311, + "kl_loss": 0.06650368869304657, + "loss_ib": 0.000908239628188312, + "step": 3089 + }, + { + "epoch": 0.8886332590409087, + "grad_norm": 0.09526026993989944, + "learning_rate": 4.1312786955759516e-05, + "loss": 0.8381, + "step": 3090 + }, + { + "ce_ib": 2.9765217304229736, + "ce_orig": 0.7884946465492249, + "epoch": 0.8886332590409087, + "kl_loss": 0.062367431819438934, + "loss_ib": 0.0009213264565914869, + "step": 3090 + }, + { + "ce_ib": 4.615750312805176, + "ce_orig": 1.0289316177368164, + "epoch": 0.8886332590409087, + "kl_loss": 0.058144766837358475, + "loss_ib": 0.001043022726662457, + "step": 3090 + }, + { + "ce_ib": 3.9129788875579834, + "ce_orig": 0.6873042583465576, + "epoch": 0.8886332590409087, + "kl_loss": 0.12833991646766663, + "loss_ib": 0.0016746970359236002, + "step": 3090 + }, + { + "ce_ib": 3.9392507076263428, + "ce_orig": 0.7486117482185364, + "epoch": 0.8886332590409087, + "kl_loss": 0.06933480501174927, + "loss_ib": 0.0010872730053961277, + "step": 3090 + }, + { + "ce_ib": 4.447238445281982, + "ce_orig": 1.2318986654281616, + "epoch": 0.8889208426198864, + "kl_loss": 0.0455869622528553, + "loss_ib": 0.000900593469850719, + "step": 3091 + }, + { + "ce_ib": 2.4980409145355225, + "ce_orig": 0.7695750594139099, + "epoch": 0.8889208426198864, + "kl_loss": 0.02809806540608406, + "loss_ib": 0.0005307847168296576, + "step": 3091 + }, + { + "ce_ib": 3.6761527061462402, + "ce_orig": 0.9181052446365356, + "epoch": 0.8889208426198864, + "kl_loss": 0.038838353008031845, + "loss_ib": 0.0007559987716376781, + "step": 3091 + }, + { + "ce_ib": 3.272711753845215, + "ce_orig": 0.9635962247848511, + "epoch": 0.8889208426198864, + "kl_loss": 0.022865936160087585, + "loss_ib": 0.0005559305427595973, + "step": 3091 + }, + { + "ce_ib": 4.828166961669922, + "ce_orig": 1.1775087118148804, + "epoch": 0.8892084261988641, + "kl_loss": 0.04071018844842911, + "loss_ib": 0.0008899185340851545, + "step": 3092 + }, + { + "ce_ib": 3.026651382446289, + "ce_orig": 0.5279139876365662, + "epoch": 0.8892084261988641, + "kl_loss": 0.0436287522315979, + "loss_ib": 0.0007389526581391692, + "step": 3092 + }, + { + "ce_ib": 3.1366994380950928, + "ce_orig": 0.6259693503379822, + "epoch": 0.8892084261988641, + "kl_loss": 0.04049169272184372, + "loss_ib": 0.0007185869035311043, + "step": 3092 + }, + { + "ce_ib": 4.5818071365356445, + "ce_orig": 1.1118208169937134, + "epoch": 0.8892084261988641, + "kl_loss": 0.043774936348199844, + "loss_ib": 0.0008959300466813147, + "step": 3092 + }, + { + "ce_ib": 4.571009159088135, + "ce_orig": 1.3118534088134766, + "epoch": 0.8894960097778417, + "kl_loss": 0.0557655394077301, + "loss_ib": 0.0010147562716156244, + "step": 3093 + }, + { + "ce_ib": 2.9245412349700928, + "ce_orig": 0.7522282004356384, + "epoch": 0.8894960097778417, + "kl_loss": 0.0396159291267395, + "loss_ib": 0.0006886133924126625, + "step": 3093 + }, + { + "ce_ib": 2.4609169960021973, + "ce_orig": 0.6293176412582397, + "epoch": 0.8894960097778417, + "kl_loss": 0.04170781001448631, + "loss_ib": 0.0006631697760894895, + "step": 3093 + }, + { + "ce_ib": 2.809842109680176, + "ce_orig": 0.5660992860794067, + "epoch": 0.8894960097778417, + "kl_loss": 0.04998143017292023, + "loss_ib": 0.0007807984948158264, + "step": 3093 + }, + { + "ce_ib": 3.2628836631774902, + "ce_orig": 0.6744593977928162, + "epoch": 0.8897835933568193, + "kl_loss": 0.0351422056555748, + "loss_ib": 0.0006777103990316391, + "step": 3094 + }, + { + "ce_ib": 1.9900124073028564, + "ce_orig": 0.45915865898132324, + "epoch": 0.8897835933568193, + "kl_loss": 0.030350305140018463, + "loss_ib": 0.0005025042919442058, + "step": 3094 + }, + { + "ce_ib": 2.944579601287842, + "ce_orig": 0.6549683809280396, + "epoch": 0.8897835933568193, + "kl_loss": 0.05494995415210724, + "loss_ib": 0.0008439574739895761, + "step": 3094 + }, + { + "ce_ib": 3.240743398666382, + "ce_orig": 0.9003892540931702, + "epoch": 0.8897835933568193, + "kl_loss": 0.0375511534512043, + "loss_ib": 0.0006995858275331557, + "step": 3094 + }, + { + "epoch": 0.890071176935797, + "grad_norm": 0.09506064653396606, + "learning_rate": 4.128336227199003e-05, + "loss": 0.8665, + "step": 3095 + }, + { + "ce_ib": 3.2244741916656494, + "ce_orig": 0.7024855017662048, + "epoch": 0.890071176935797, + "kl_loss": 0.0627397894859314, + "loss_ib": 0.0009498453000560403, + "step": 3095 + }, + { + "ce_ib": 4.497989177703857, + "ce_orig": 1.0775635242462158, + "epoch": 0.890071176935797, + "kl_loss": 0.08104885369539261, + "loss_ib": 0.0012602874776348472, + "step": 3095 + }, + { + "ce_ib": 2.699937343597412, + "ce_orig": 0.5694195032119751, + "epoch": 0.890071176935797, + "kl_loss": 0.05660218000411987, + "loss_ib": 0.0008360154461115599, + "step": 3095 + }, + { + "ce_ib": 6.69846248626709, + "ce_orig": 1.411732792854309, + "epoch": 0.890071176935797, + "kl_loss": 0.06800240278244019, + "loss_ib": 0.0013498702319338918, + "step": 3095 + }, + { + "ce_ib": 2.0791003704071045, + "ce_orig": 0.5035520195960999, + "epoch": 0.8903587605147746, + "kl_loss": 0.05521729961037636, + "loss_ib": 0.0007600830285809934, + "step": 3096 + }, + { + "ce_ib": 2.77816104888916, + "ce_orig": 0.6380156874656677, + "epoch": 0.8903587605147746, + "kl_loss": 0.03773000091314316, + "loss_ib": 0.0006551161059178412, + "step": 3096 + }, + { + "ce_ib": 3.2374634742736816, + "ce_orig": 0.6389076709747314, + "epoch": 0.8903587605147746, + "kl_loss": 0.05140858143568039, + "loss_ib": 0.0008378321654163301, + "step": 3096 + }, + { + "ce_ib": 2.4779367446899414, + "ce_orig": 0.3390951156616211, + "epoch": 0.8903587605147746, + "kl_loss": 0.045208096504211426, + "loss_ib": 0.0006998745957389474, + "step": 3096 + }, + { + "ce_ib": 5.040646076202393, + "ce_orig": 1.254789113998413, + "epoch": 0.8906463440937522, + "kl_loss": 0.045143138617277145, + "loss_ib": 0.0009554959833621979, + "step": 3097 + }, + { + "ce_ib": 2.4467737674713135, + "ce_orig": 0.5723627209663391, + "epoch": 0.8906463440937522, + "kl_loss": 0.03506626933813095, + "loss_ib": 0.0005953400395810604, + "step": 3097 + }, + { + "ce_ib": 1.6393611431121826, + "ce_orig": 0.2850027084350586, + "epoch": 0.8906463440937522, + "kl_loss": 0.10799381136894226, + "loss_ib": 0.0012438741978257895, + "step": 3097 + }, + { + "ce_ib": 1.8120309114456177, + "ce_orig": 0.44177335500717163, + "epoch": 0.8906463440937522, + "kl_loss": 0.028406154364347458, + "loss_ib": 0.000465264602098614, + "step": 3097 + }, + { + "ce_ib": 2.85054874420166, + "ce_orig": 0.7220306396484375, + "epoch": 0.8909339276727299, + "kl_loss": 0.1235492080450058, + "loss_ib": 0.0015205469680950046, + "step": 3098 + }, + { + "ce_ib": 2.0356953144073486, + "ce_orig": 0.6115330457687378, + "epoch": 0.8909339276727299, + "kl_loss": 0.03164203092455864, + "loss_ib": 0.0005199898150749505, + "step": 3098 + }, + { + "ce_ib": 2.879354476928711, + "ce_orig": 0.38345494866371155, + "epoch": 0.8909339276727299, + "kl_loss": 0.06359703838825226, + "loss_ib": 0.0009239058126695454, + "step": 3098 + }, + { + "ce_ib": 4.437338352203369, + "ce_orig": 1.0986099243164062, + "epoch": 0.8909339276727299, + "kl_loss": 0.058597784489393234, + "loss_ib": 0.0010297116823494434, + "step": 3098 + }, + { + "ce_ib": 6.036228179931641, + "ce_orig": 1.8771066665649414, + "epoch": 0.8912215112517076, + "kl_loss": 0.07470668852329254, + "loss_ib": 0.0013506896793842316, + "step": 3099 + }, + { + "ce_ib": 5.32127571105957, + "ce_orig": 1.1738642454147339, + "epoch": 0.8912215112517076, + "kl_loss": 0.06903070211410522, + "loss_ib": 0.001222434570081532, + "step": 3099 + }, + { + "ce_ib": 2.6072325706481934, + "ce_orig": 0.31958648562431335, + "epoch": 0.8912215112517076, + "kl_loss": 0.056058213114738464, + "loss_ib": 0.0008213053224608302, + "step": 3099 + }, + { + "ce_ib": 2.0334346294403076, + "ce_orig": 0.482647180557251, + "epoch": 0.8912215112517076, + "kl_loss": 0.05324605107307434, + "loss_ib": 0.0007358039729297161, + "step": 3099 + }, + { + "epoch": 0.8915090948306852, + "grad_norm": 0.0905550941824913, + "learning_rate": 4.125389835782138e-05, + "loss": 0.7808, + "step": 3100 + }, + { + "ce_ib": 2.732715129852295, + "ce_orig": 0.4350951611995697, + "epoch": 0.8915090948306852, + "kl_loss": 0.056056201457977295, + "loss_ib": 0.0008338334737345576, + "step": 3100 + }, + { + "ce_ib": 2.864436626434326, + "ce_orig": 0.8848311305046082, + "epoch": 0.8915090948306852, + "kl_loss": 0.026425577700138092, + "loss_ib": 0.000550699420273304, + "step": 3100 + }, + { + "ce_ib": 3.3592209815979004, + "ce_orig": 0.4953135848045349, + "epoch": 0.8915090948306852, + "kl_loss": 0.031225528568029404, + "loss_ib": 0.0006481774034909904, + "step": 3100 + }, + { + "ce_ib": 2.508592367172241, + "ce_orig": 0.7355779409408569, + "epoch": 0.8915090948306852, + "kl_loss": 0.047460876405239105, + "loss_ib": 0.0007254679221659899, + "step": 3100 + }, + { + "ce_ib": 3.0788402557373047, + "ce_orig": 0.71449875831604, + "epoch": 0.8917966784096628, + "kl_loss": 0.03355327248573303, + "loss_ib": 0.0006434167153201997, + "step": 3101 + }, + { + "ce_ib": 2.728816509246826, + "ce_orig": 0.7010646462440491, + "epoch": 0.8917966784096628, + "kl_loss": 0.02871963009238243, + "loss_ib": 0.0005600779550150037, + "step": 3101 + }, + { + "ce_ib": 3.3895933628082275, + "ce_orig": 0.8214389085769653, + "epoch": 0.8917966784096628, + "kl_loss": 0.05915296822786331, + "loss_ib": 0.0009304889827035367, + "step": 3101 + }, + { + "ce_ib": 3.497623920440674, + "ce_orig": 0.8059384226799011, + "epoch": 0.8917966784096628, + "kl_loss": 0.03538990765810013, + "loss_ib": 0.0007036614115349948, + "step": 3101 + }, + { + "ce_ib": 3.624910593032837, + "ce_orig": 0.7523483037948608, + "epoch": 0.8920842619886404, + "kl_loss": 0.06838734447956085, + "loss_ib": 0.0010463644284754992, + "step": 3102 + }, + { + "ce_ib": 4.8237175941467285, + "ce_orig": 1.2774999141693115, + "epoch": 0.8920842619886404, + "kl_loss": 0.05397239699959755, + "loss_ib": 0.0010220956755802035, + "step": 3102 + }, + { + "ce_ib": 3.6559576988220215, + "ce_orig": 0.7604995965957642, + "epoch": 0.8920842619886404, + "kl_loss": 0.053527235984802246, + "loss_ib": 0.0009008681518025696, + "step": 3102 + }, + { + "ce_ib": 3.8382484912872314, + "ce_orig": 0.7892813682556152, + "epoch": 0.8920842619886404, + "kl_loss": 0.07707233726978302, + "loss_ib": 0.001154548255726695, + "step": 3102 + }, + { + "ce_ib": 2.4096641540527344, + "ce_orig": 0.49795278906822205, + "epoch": 0.8923718455676181, + "kl_loss": 0.08747562766075134, + "loss_ib": 0.001115722581744194, + "step": 3103 + }, + { + "ce_ib": 3.0776000022888184, + "ce_orig": 0.7479544281959534, + "epoch": 0.8923718455676181, + "kl_loss": 0.065291628241539, + "loss_ib": 0.0009606762905605137, + "step": 3103 + }, + { + "ce_ib": 2.8900699615478516, + "ce_orig": 0.41282033920288086, + "epoch": 0.8923718455676181, + "kl_loss": 0.021157141774892807, + "loss_ib": 0.0005005784332752228, + "step": 3103 + }, + { + "ce_ib": 5.326651096343994, + "ce_orig": 1.3049046993255615, + "epoch": 0.8923718455676181, + "kl_loss": 0.06720661371946335, + "loss_ib": 0.0012047311756759882, + "step": 3103 + }, + { + "ce_ib": 2.3099477291107178, + "ce_orig": 0.5285616517066956, + "epoch": 0.8926594291465957, + "kl_loss": 0.038396403193473816, + "loss_ib": 0.0006149587570689619, + "step": 3104 + }, + { + "ce_ib": 3.97275972366333, + "ce_orig": 0.8855296969413757, + "epoch": 0.8926594291465957, + "kl_loss": 0.054695311933755875, + "loss_ib": 0.0009442290174774826, + "step": 3104 + }, + { + "ce_ib": 3.6999173164367676, + "ce_orig": 0.9429256319999695, + "epoch": 0.8926594291465957, + "kl_loss": 0.06798704713582993, + "loss_ib": 0.0010498621268197894, + "step": 3104 + }, + { + "ce_ib": 5.084854602813721, + "ce_orig": 1.232107162475586, + "epoch": 0.8926594291465957, + "kl_loss": 0.06510090827941895, + "loss_ib": 0.0011594945099204779, + "step": 3104 + }, + { + "epoch": 0.8929470127255734, + "grad_norm": 0.10271920263767242, + "learning_rate": 4.122439528423897e-05, + "loss": 0.7812, + "step": 3105 + }, + { + "ce_ib": 2.6838226318359375, + "ce_orig": 0.6486763954162598, + "epoch": 0.8929470127255734, + "kl_loss": 0.040136951953172684, + "loss_ib": 0.0006697517237626016, + "step": 3105 + }, + { + "ce_ib": 3.2154150009155273, + "ce_orig": 0.9201083779335022, + "epoch": 0.8929470127255734, + "kl_loss": 0.03305024653673172, + "loss_ib": 0.0006520439637824893, + "step": 3105 + }, + { + "ce_ib": 2.8613405227661133, + "ce_orig": 0.7317679524421692, + "epoch": 0.8929470127255734, + "kl_loss": 0.04521772637963295, + "loss_ib": 0.0007383113261312246, + "step": 3105 + }, + { + "ce_ib": 3.2416703701019287, + "ce_orig": 0.7347345948219299, + "epoch": 0.8929470127255734, + "kl_loss": 0.1504751741886139, + "loss_ib": 0.0018289186991751194, + "step": 3105 + }, + { + "ce_ib": 1.9130688905715942, + "ce_orig": 0.49264904856681824, + "epoch": 0.893234596304551, + "kl_loss": 0.07353953272104263, + "loss_ib": 0.0009267021669074893, + "step": 3106 + }, + { + "ce_ib": 6.6315016746521, + "ce_orig": 0.948112964630127, + "epoch": 0.893234596304551, + "kl_loss": 0.04942401498556137, + "loss_ib": 0.001157390302978456, + "step": 3106 + }, + { + "ce_ib": 6.400832653045654, + "ce_orig": 1.7948431968688965, + "epoch": 0.893234596304551, + "kl_loss": 0.079766646027565, + "loss_ib": 0.0014377497136592865, + "step": 3106 + }, + { + "ce_ib": 2.52066969871521, + "ce_orig": 0.4376794695854187, + "epoch": 0.893234596304551, + "kl_loss": 0.06478427350521088, + "loss_ib": 0.0008999097044579685, + "step": 3106 + }, + { + "ce_ib": 3.5574519634246826, + "ce_orig": 0.7955411076545715, + "epoch": 0.8935221798835287, + "kl_loss": 0.07685063779354095, + "loss_ib": 0.0011242515174672008, + "step": 3107 + }, + { + "ce_ib": 3.4425625801086426, + "ce_orig": 0.5814232230186462, + "epoch": 0.8935221798835287, + "kl_loss": 0.09540437161922455, + "loss_ib": 0.001298299990594387, + "step": 3107 + }, + { + "ce_ib": 3.0197184085845947, + "ce_orig": 0.72687166929245, + "epoch": 0.8935221798835287, + "kl_loss": 0.07657364010810852, + "loss_ib": 0.0010677081299945712, + "step": 3107 + }, + { + "ce_ib": 4.293099403381348, + "ce_orig": 1.3292359113693237, + "epoch": 0.8935221798835287, + "kl_loss": 0.050555795431137085, + "loss_ib": 0.0009348678286187351, + "step": 3107 + }, + { + "ce_ib": 3.0975265502929688, + "ce_orig": 0.4537966549396515, + "epoch": 0.8938097634625063, + "kl_loss": 0.06627313792705536, + "loss_ib": 0.0009724840056151152, + "step": 3108 + }, + { + "ce_ib": 1.7968330383300781, + "ce_orig": 0.17848511040210724, + "epoch": 0.8938097634625063, + "kl_loss": 0.06122509390115738, + "loss_ib": 0.0007919342606328428, + "step": 3108 + }, + { + "ce_ib": 3.868345260620117, + "ce_orig": 0.7571849822998047, + "epoch": 0.8938097634625063, + "kl_loss": 0.06327809393405914, + "loss_ib": 0.0010196154471486807, + "step": 3108 + }, + { + "ce_ib": 3.940718173980713, + "ce_orig": 0.9369444847106934, + "epoch": 0.8938097634625063, + "kl_loss": 0.06173327937722206, + "loss_ib": 0.0010114045580849051, + "step": 3108 + }, + { + "ce_ib": 3.990269899368286, + "ce_orig": 0.7632893323898315, + "epoch": 0.8940973470414839, + "kl_loss": 0.07573391497135162, + "loss_ib": 0.0011563661973923445, + "step": 3109 + }, + { + "ce_ib": 6.147134304046631, + "ce_orig": 1.4514856338500977, + "epoch": 0.8940973470414839, + "kl_loss": 0.06575213372707367, + "loss_ib": 0.0012722347164526582, + "step": 3109 + }, + { + "ce_ib": 2.229612112045288, + "ce_orig": 0.6870375871658325, + "epoch": 0.8940973470414839, + "kl_loss": 0.028626011684536934, + "loss_ib": 0.0005092213395982981, + "step": 3109 + }, + { + "ce_ib": 6.608057498931885, + "ce_orig": 1.8339816331863403, + "epoch": 0.8940973470414839, + "kl_loss": 0.1215234249830246, + "loss_ib": 0.0018760398961603642, + "step": 3109 + }, + { + "epoch": 0.8943849306204615, + "grad_norm": 0.10160653293132782, + "learning_rate": 4.119485312232256e-05, + "loss": 0.7979, + "step": 3110 + }, + { + "ce_ib": 3.26474666595459, + "ce_orig": 0.7323849201202393, + "epoch": 0.8943849306204615, + "kl_loss": 0.05519198626279831, + "loss_ib": 0.0008783945231698453, + "step": 3110 + }, + { + "ce_ib": 5.162665843963623, + "ce_orig": 1.1869269609451294, + "epoch": 0.8943849306204615, + "kl_loss": 0.0910247340798378, + "loss_ib": 0.0014265138888731599, + "step": 3110 + }, + { + "ce_ib": 2.3041439056396484, + "ce_orig": 0.5638024210929871, + "epoch": 0.8943849306204615, + "kl_loss": 0.10469725728034973, + "loss_ib": 0.0012773869093507528, + "step": 3110 + }, + { + "ce_ib": 5.375331401824951, + "ce_orig": 1.2067235708236694, + "epoch": 0.8943849306204615, + "kl_loss": 0.11750131845474243, + "loss_ib": 0.0017125463346019387, + "step": 3110 + }, + { + "ce_ib": 2.8528006076812744, + "ce_orig": 0.556660532951355, + "epoch": 0.8946725141994392, + "kl_loss": 0.04961015284061432, + "loss_ib": 0.0007813815609551966, + "step": 3111 + }, + { + "ce_ib": 2.7597596645355225, + "ce_orig": 0.6570913195610046, + "epoch": 0.8946725141994392, + "kl_loss": 0.04707592353224754, + "loss_ib": 0.0007467351970262825, + "step": 3111 + }, + { + "ce_ib": 2.8910207748413086, + "ce_orig": 0.686583399772644, + "epoch": 0.8946725141994392, + "kl_loss": 0.04933389276266098, + "loss_ib": 0.0007824409403838217, + "step": 3111 + }, + { + "ce_ib": 4.603890419006348, + "ce_orig": 1.0978405475616455, + "epoch": 0.8946725141994392, + "kl_loss": 0.07494065910577774, + "loss_ib": 0.0012097955914214253, + "step": 3111 + }, + { + "ce_ib": 4.579075813293457, + "ce_orig": 1.1068822145462036, + "epoch": 0.8949600977784169, + "kl_loss": 0.05916838347911835, + "loss_ib": 0.0010495914611965418, + "step": 3112 + }, + { + "ce_ib": 4.73726224899292, + "ce_orig": 1.2823963165283203, + "epoch": 0.8949600977784169, + "kl_loss": 0.06120278686285019, + "loss_ib": 0.0010857540182769299, + "step": 3112 + }, + { + "ce_ib": 3.2481794357299805, + "ce_orig": 1.1849340200424194, + "epoch": 0.8949600977784169, + "kl_loss": 0.03842785581946373, + "loss_ib": 0.000709096493665129, + "step": 3112 + }, + { + "ce_ib": 4.061931133270264, + "ce_orig": 0.9318153262138367, + "epoch": 0.8949600977784169, + "kl_loss": 0.08287139236927032, + "loss_ib": 0.0012349070748314261, + "step": 3112 + }, + { + "ce_ib": 5.574834823608398, + "ce_orig": 1.1008172035217285, + "epoch": 0.8952476813573945, + "kl_loss": 0.07882770895957947, + "loss_ib": 0.0013457605382427573, + "step": 3113 + }, + { + "ce_ib": 3.4786760807037354, + "ce_orig": 0.4427070617675781, + "epoch": 0.8952476813573945, + "kl_loss": 0.055316776037216187, + "loss_ib": 0.0009010353242047131, + "step": 3113 + }, + { + "ce_ib": 4.17708158493042, + "ce_orig": 0.5921626091003418, + "epoch": 0.8952476813573945, + "kl_loss": 0.12168805301189423, + "loss_ib": 0.00163458869792521, + "step": 3113 + }, + { + "ce_ib": 2.963989019393921, + "ce_orig": 0.647000253200531, + "epoch": 0.8952476813573945, + "kl_loss": 0.049437105655670166, + "loss_ib": 0.0007907699327915907, + "step": 3113 + }, + { + "ce_ib": 3.8129727840423584, + "ce_orig": 0.9956676363945007, + "epoch": 0.8955352649363721, + "kl_loss": 0.08296485245227814, + "loss_ib": 0.0012109457748010755, + "step": 3114 + }, + { + "ce_ib": 3.937181234359741, + "ce_orig": 1.0302529335021973, + "epoch": 0.8955352649363721, + "kl_loss": 0.05757645517587662, + "loss_ib": 0.000969482643995434, + "step": 3114 + }, + { + "ce_ib": 4.438992023468018, + "ce_orig": 1.1253352165222168, + "epoch": 0.8955352649363721, + "kl_loss": 0.06515221297740936, + "loss_ib": 0.0010954212630167603, + "step": 3114 + }, + { + "ce_ib": 3.863422393798828, + "ce_orig": 0.6314740180969238, + "epoch": 0.8955352649363721, + "kl_loss": 0.07815004885196686, + "loss_ib": 0.0011678427690640092, + "step": 3114 + }, + { + "epoch": 0.8958228485153498, + "grad_norm": 0.08517540246248245, + "learning_rate": 4.1165271943246076e-05, + "loss": 0.8348, + "step": 3115 + }, + { + "ce_ib": 2.400136709213257, + "ce_orig": 0.6495793461799622, + "epoch": 0.8958228485153498, + "kl_loss": 0.037037987262010574, + "loss_ib": 0.0006103935302235186, + "step": 3115 + }, + { + "ce_ib": 2.797213315963745, + "ce_orig": 0.6761162877082825, + "epoch": 0.8958228485153498, + "kl_loss": 0.036459021270275116, + "loss_ib": 0.0006443115416914225, + "step": 3115 + }, + { + "ce_ib": 4.637478351593018, + "ce_orig": 1.1898638010025024, + "epoch": 0.8958228485153498, + "kl_loss": 0.07660864293575287, + "loss_ib": 0.0012298342771828175, + "step": 3115 + }, + { + "ce_ib": 2.548518657684326, + "ce_orig": 0.6753807067871094, + "epoch": 0.8958228485153498, + "kl_loss": 0.04044211655855179, + "loss_ib": 0.0006592730060219765, + "step": 3115 + }, + { + "ce_ib": 3.1138925552368164, + "ce_orig": 0.7103480100631714, + "epoch": 0.8961104320943274, + "kl_loss": 0.03639856353402138, + "loss_ib": 0.0006753748748451471, + "step": 3116 + }, + { + "ce_ib": 4.091615676879883, + "ce_orig": 0.9810498356819153, + "epoch": 0.8961104320943274, + "kl_loss": 0.07343132048845291, + "loss_ib": 0.0011434747138991952, + "step": 3116 + }, + { + "ce_ib": 4.1762375831604, + "ce_orig": 1.1480547189712524, + "epoch": 0.8961104320943274, + "kl_loss": 0.07327037304639816, + "loss_ib": 0.0011503275018185377, + "step": 3116 + }, + { + "ce_ib": 4.912754058837891, + "ce_orig": 1.2349454164505005, + "epoch": 0.8961104320943274, + "kl_loss": 0.04494549706578255, + "loss_ib": 0.0009407303296029568, + "step": 3116 + }, + { + "ce_ib": 2.5936264991760254, + "ce_orig": 0.6954960227012634, + "epoch": 0.896398015673305, + "kl_loss": 0.048798784613609314, + "loss_ib": 0.0007473504520021379, + "step": 3117 + }, + { + "ce_ib": 3.4145469665527344, + "ce_orig": 0.7071998715400696, + "epoch": 0.896398015673305, + "kl_loss": 0.06427041441202164, + "loss_ib": 0.0009841588325798512, + "step": 3117 + }, + { + "ce_ib": 1.9131349325180054, + "ce_orig": 0.35168835520744324, + "epoch": 0.896398015673305, + "kl_loss": 0.06442157924175262, + "loss_ib": 0.0008355292375199497, + "step": 3117 + }, + { + "ce_ib": 5.314935684204102, + "ce_orig": 1.4675812721252441, + "epoch": 0.896398015673305, + "kl_loss": 0.06203201785683632, + "loss_ib": 0.0011518136598169804, + "step": 3117 + }, + { + "ce_ib": 3.4825401306152344, + "ce_orig": 0.7528243064880371, + "epoch": 0.8966855992522828, + "kl_loss": 0.08154574036598206, + "loss_ib": 0.001163711422123015, + "step": 3118 + }, + { + "ce_ib": 3.023033618927002, + "ce_orig": 0.8490056991577148, + "epoch": 0.8966855992522828, + "kl_loss": 0.03581725060939789, + "loss_ib": 0.0006604758673347533, + "step": 3118 + }, + { + "ce_ib": 3.3113627433776855, + "ce_orig": 0.8321856260299683, + "epoch": 0.8966855992522828, + "kl_loss": 0.06194967031478882, + "loss_ib": 0.0009506329661235213, + "step": 3118 + }, + { + "ce_ib": 4.352823257446289, + "ce_orig": 1.1832727193832397, + "epoch": 0.8966855992522828, + "kl_loss": 0.05196738988161087, + "loss_ib": 0.0009549562237225473, + "step": 3118 + }, + { + "ce_ib": 6.2087860107421875, + "ce_orig": 1.7521345615386963, + "epoch": 0.8969731828312604, + "kl_loss": 0.07364752143621445, + "loss_ib": 0.0013573537580668926, + "step": 3119 + }, + { + "ce_ib": 4.650389671325684, + "ce_orig": 0.8246464133262634, + "epoch": 0.8969731828312604, + "kl_loss": 0.0528959259390831, + "loss_ib": 0.0009939981391653419, + "step": 3119 + }, + { + "ce_ib": 3.0969972610473633, + "ce_orig": 0.40172693133354187, + "epoch": 0.8969731828312604, + "kl_loss": 0.07890401780605316, + "loss_ib": 0.001098739798180759, + "step": 3119 + }, + { + "ce_ib": 3.488189458847046, + "ce_orig": 1.174847960472107, + "epoch": 0.8969731828312604, + "kl_loss": 0.03443470597267151, + "loss_ib": 0.0006931659299880266, + "step": 3119 + }, + { + "epoch": 0.897260766410238, + "grad_norm": 0.09078297764062881, + "learning_rate": 4.1135651818277445e-05, + "loss": 0.9183, + "step": 3120 + }, + { + "ce_ib": 4.196993827819824, + "ce_orig": 1.0099936723709106, + "epoch": 0.897260766410238, + "kl_loss": 0.0337667316198349, + "loss_ib": 0.0007573667098768055, + "step": 3120 + }, + { + "ce_ib": 3.6519699096679688, + "ce_orig": 0.6854029893875122, + "epoch": 0.897260766410238, + "kl_loss": 0.05545440688729286, + "loss_ib": 0.0009197410545311868, + "step": 3120 + }, + { + "ce_ib": 3.878021717071533, + "ce_orig": 1.0426876544952393, + "epoch": 0.897260766410238, + "kl_loss": 0.06536967307329178, + "loss_ib": 0.0010414988500997424, + "step": 3120 + }, + { + "ce_ib": 4.643683910369873, + "ce_orig": 1.3267927169799805, + "epoch": 0.897260766410238, + "kl_loss": 0.057875555008649826, + "loss_ib": 0.0010431238915771246, + "step": 3120 + }, + { + "ce_ib": 3.261653184890747, + "ce_orig": 0.8420042395591736, + "epoch": 0.8975483499892156, + "kl_loss": 0.07539370656013489, + "loss_ib": 0.0010801022872328758, + "step": 3121 + }, + { + "ce_ib": 3.7258620262145996, + "ce_orig": 0.7517514228820801, + "epoch": 0.8975483499892156, + "kl_loss": 0.03757958114147186, + "loss_ib": 0.0007483819499611855, + "step": 3121 + }, + { + "ce_ib": 2.457929849624634, + "ce_orig": 0.5275048017501831, + "epoch": 0.8975483499892156, + "kl_loss": 0.044660329818725586, + "loss_ib": 0.0006923963082954288, + "step": 3121 + }, + { + "ce_ib": 3.127345561981201, + "ce_orig": 0.8162370324134827, + "epoch": 0.8975483499892156, + "kl_loss": 0.04304035007953644, + "loss_ib": 0.0007431380217894912, + "step": 3121 + }, + { + "ce_ib": 3.249201774597168, + "ce_orig": 0.7776914834976196, + "epoch": 0.8978359335681932, + "kl_loss": 0.05762871354818344, + "loss_ib": 0.0009012073278427124, + "step": 3122 + }, + { + "ce_ib": 4.927418231964111, + "ce_orig": 0.9781513810157776, + "epoch": 0.8978359335681932, + "kl_loss": 0.06844587624073029, + "loss_ib": 0.0011772004654631019, + "step": 3122 + }, + { + "ce_ib": 2.756857395172119, + "ce_orig": 0.6173890829086304, + "epoch": 0.8978359335681932, + "kl_loss": 0.0438326857984066, + "loss_ib": 0.0007140125962905586, + "step": 3122 + }, + { + "ce_ib": 2.464866876602173, + "ce_orig": 0.440833181142807, + "epoch": 0.8978359335681932, + "kl_loss": 0.044997282326221466, + "loss_ib": 0.0006964594940654933, + "step": 3122 + }, + { + "ce_ib": 5.7023210525512695, + "ce_orig": 1.6001631021499634, + "epoch": 0.8981235171471709, + "kl_loss": 0.0690690129995346, + "loss_ib": 0.0012609221739694476, + "step": 3123 + }, + { + "ce_ib": 3.509089708328247, + "ce_orig": 0.8328301906585693, + "epoch": 0.8981235171471709, + "kl_loss": 0.050707027316093445, + "loss_ib": 0.0008579791756346822, + "step": 3123 + }, + { + "ce_ib": 4.102006912231445, + "ce_orig": 0.7709041833877563, + "epoch": 0.8981235171471709, + "kl_loss": 0.14866110682487488, + "loss_ib": 0.0018968116492033005, + "step": 3123 + }, + { + "ce_ib": 3.831292152404785, + "ce_orig": 0.5383590459823608, + "epoch": 0.8981235171471709, + "kl_loss": 0.05419307202100754, + "loss_ib": 0.0009250598959624767, + "step": 3123 + }, + { + "ce_ib": 4.917757987976074, + "ce_orig": 1.3293629884719849, + "epoch": 0.8984111007261485, + "kl_loss": 0.06158396974205971, + "loss_ib": 0.0011076155351474881, + "step": 3124 + }, + { + "ce_ib": 6.022705554962158, + "ce_orig": 1.786833643913269, + "epoch": 0.8984111007261485, + "kl_loss": 0.08328546583652496, + "loss_ib": 0.0014351251302286983, + "step": 3124 + }, + { + "ce_ib": 1.0041953325271606, + "ce_orig": 0.07858507335186005, + "epoch": 0.8984111007261485, + "kl_loss": 0.11455818265676498, + "loss_ib": 0.0012460013385862112, + "step": 3124 + }, + { + "ce_ib": 2.516620635986328, + "ce_orig": 0.5171682834625244, + "epoch": 0.8984111007261485, + "kl_loss": 0.04379782825708389, + "loss_ib": 0.0006896403501741588, + "step": 3124 + }, + { + "epoch": 0.8986986843051262, + "grad_norm": 0.09488661587238312, + "learning_rate": 4.110599281877841e-05, + "loss": 0.8258, + "step": 3125 + }, + { + "ce_ib": 5.124475955963135, + "ce_orig": 1.4501162767410278, + "epoch": 0.8986986843051262, + "kl_loss": 0.0446486696600914, + "loss_ib": 0.0009589342516846955, + "step": 3125 + }, + { + "ce_ib": 5.577151298522949, + "ce_orig": 1.4385948181152344, + "epoch": 0.8986986843051262, + "kl_loss": 0.039556726813316345, + "loss_ib": 0.0009532824042253196, + "step": 3125 + }, + { + "ce_ib": 6.172574996948242, + "ce_orig": 1.6980639696121216, + "epoch": 0.8986986843051262, + "kl_loss": 0.06983640044927597, + "loss_ib": 0.0013156214263290167, + "step": 3125 + }, + { + "ce_ib": 2.6638741493225098, + "ce_orig": 0.7529233694076538, + "epoch": 0.8986986843051262, + "kl_loss": 0.023722507059574127, + "loss_ib": 0.0005036124493926764, + "step": 3125 + }, + { + "ce_ib": 5.193201541900635, + "ce_orig": 1.30254328250885, + "epoch": 0.8989862678841039, + "kl_loss": 0.048640310764312744, + "loss_ib": 0.001005723257549107, + "step": 3126 + }, + { + "ce_ib": 3.1162965297698975, + "ce_orig": 0.5249683856964111, + "epoch": 0.8989862678841039, + "kl_loss": 0.037169843912124634, + "loss_ib": 0.0006833280203863978, + "step": 3126 + }, + { + "ce_ib": 4.461698532104492, + "ce_orig": 1.2288213968276978, + "epoch": 0.8989862678841039, + "kl_loss": 0.037516914308071136, + "loss_ib": 0.0008213390246964991, + "step": 3126 + }, + { + "ce_ib": 3.711704969406128, + "ce_orig": 0.8789143562316895, + "epoch": 0.8989862678841039, + "kl_loss": 0.05475790798664093, + "loss_ib": 0.0009187495452351868, + "step": 3126 + }, + { + "ce_ib": 3.5414772033691406, + "ce_orig": 0.6521844267845154, + "epoch": 0.8992738514630815, + "kl_loss": 0.0505790151655674, + "loss_ib": 0.0008599378052167594, + "step": 3127 + }, + { + "ce_ib": 3.5716402530670166, + "ce_orig": 0.5894458293914795, + "epoch": 0.8992738514630815, + "kl_loss": 0.06992414593696594, + "loss_ib": 0.001056405482813716, + "step": 3127 + }, + { + "ce_ib": 2.3814351558685303, + "ce_orig": 0.703652560710907, + "epoch": 0.8992738514630815, + "kl_loss": 0.02469712123274803, + "loss_ib": 0.00048511469503864646, + "step": 3127 + }, + { + "ce_ib": 3.9384326934814453, + "ce_orig": 0.8151541948318481, + "epoch": 0.8992738514630815, + "kl_loss": 0.042316265404224396, + "loss_ib": 0.0008170059300027788, + "step": 3127 + }, + { + "ce_ib": 3.705345630645752, + "ce_orig": 1.1251431703567505, + "epoch": 0.8995614350420591, + "kl_loss": 0.05387631058692932, + "loss_ib": 0.0009092976106330752, + "step": 3128 + }, + { + "ce_ib": 2.3231801986694336, + "ce_orig": 0.6185516119003296, + "epoch": 0.8995614350420591, + "kl_loss": 0.03915878012776375, + "loss_ib": 0.00062390579842031, + "step": 3128 + }, + { + "ce_ib": 3.030238389968872, + "ce_orig": 0.4902428686618805, + "epoch": 0.8995614350420591, + "kl_loss": 0.07716049253940582, + "loss_ib": 0.0010746287880465388, + "step": 3128 + }, + { + "ce_ib": 2.692047595977783, + "ce_orig": 0.6263496279716492, + "epoch": 0.8995614350420591, + "kl_loss": 0.03504202514886856, + "loss_ib": 0.00061962497420609, + "step": 3128 + }, + { + "ce_ib": 2.160520553588867, + "ce_orig": 0.5981430411338806, + "epoch": 0.8998490186210367, + "kl_loss": 0.05574973300099373, + "loss_ib": 0.0007735493709333241, + "step": 3129 + }, + { + "ce_ib": 1.6602137088775635, + "ce_orig": 0.24990743398666382, + "epoch": 0.8998490186210367, + "kl_loss": 0.10603749006986618, + "loss_ib": 0.0012263961834833026, + "step": 3129 + }, + { + "ce_ib": 2.87416672706604, + "ce_orig": 0.7776364088058472, + "epoch": 0.8998490186210367, + "kl_loss": 0.03518648445606232, + "loss_ib": 0.0006392814684659243, + "step": 3129 + }, + { + "ce_ib": 4.218297481536865, + "ce_orig": 0.8735354542732239, + "epoch": 0.8998490186210367, + "kl_loss": 0.04575991630554199, + "loss_ib": 0.0008794288733042777, + "step": 3129 + }, + { + "epoch": 0.9001366022000143, + "grad_norm": 0.10008350759744644, + "learning_rate": 4.1076295016204396e-05, + "loss": 0.8925, + "step": 3130 + }, + { + "ce_ib": 5.691451549530029, + "ce_orig": 1.4438085556030273, + "epoch": 0.9001366022000143, + "kl_loss": 0.05103464424610138, + "loss_ib": 0.0010794915724545717, + "step": 3130 + }, + { + "ce_ib": 3.171886682510376, + "ce_orig": 0.6278020739555359, + "epoch": 0.9001366022000143, + "kl_loss": 0.05124405771493912, + "loss_ib": 0.0008296291925944388, + "step": 3130 + }, + { + "ce_ib": 3.927804470062256, + "ce_orig": 0.9727286696434021, + "epoch": 0.9001366022000143, + "kl_loss": 0.03724087402224541, + "loss_ib": 0.0007651892374269664, + "step": 3130 + }, + { + "ce_ib": 3.629676342010498, + "ce_orig": 0.7205469608306885, + "epoch": 0.9001366022000143, + "kl_loss": 0.07150708138942719, + "loss_ib": 0.001078038359992206, + "step": 3130 + }, + { + "ce_ib": 3.5962471961975098, + "ce_orig": 0.8159353733062744, + "epoch": 0.900424185778992, + "kl_loss": 0.04395217075943947, + "loss_ib": 0.000799146422650665, + "step": 3131 + }, + { + "ce_ib": 2.8570034503936768, + "ce_orig": 0.7404507994651794, + "epoch": 0.900424185778992, + "kl_loss": 0.027756404131650925, + "loss_ib": 0.0005632643587887287, + "step": 3131 + }, + { + "ce_ib": 2.5965657234191895, + "ce_orig": 0.8611348271369934, + "epoch": 0.900424185778992, + "kl_loss": 0.04347732663154602, + "loss_ib": 0.0006944298511371017, + "step": 3131 + }, + { + "ce_ib": 2.9246020317077637, + "ce_orig": 0.50452721118927, + "epoch": 0.900424185778992, + "kl_loss": 0.08632007241249084, + "loss_ib": 0.001155660836957395, + "step": 3131 + }, + { + "ce_ib": 3.030289888381958, + "ce_orig": 0.6493998765945435, + "epoch": 0.9007117693579697, + "kl_loss": 0.04051048308610916, + "loss_ib": 0.0007081337971612811, + "step": 3132 + }, + { + "ce_ib": 5.439365863800049, + "ce_orig": 1.3616421222686768, + "epoch": 0.9007117693579697, + "kl_loss": 0.06297541409730911, + "loss_ib": 0.0011736906599253416, + "step": 3132 + }, + { + "ce_ib": 3.414963483810425, + "ce_orig": 0.3652159571647644, + "epoch": 0.9007117693579697, + "kl_loss": 0.03952603414654732, + "loss_ib": 0.0007367566577158868, + "step": 3132 + }, + { + "ce_ib": 3.086960792541504, + "ce_orig": 0.5596114993095398, + "epoch": 0.9007117693579697, + "kl_loss": 0.04417375475168228, + "loss_ib": 0.0007504336535930634, + "step": 3132 + }, + { + "ce_ib": 3.2730250358581543, + "ce_orig": 0.7851879596710205, + "epoch": 0.9009993529369473, + "kl_loss": 0.055134519934654236, + "loss_ib": 0.000878647668287158, + "step": 3133 + }, + { + "ce_ib": 5.655826568603516, + "ce_orig": 1.581417202949524, + "epoch": 0.9009993529369473, + "kl_loss": 0.042427003383636475, + "loss_ib": 0.0009898527059704065, + "step": 3133 + }, + { + "ce_ib": 4.459659099578857, + "ce_orig": 0.896200954914093, + "epoch": 0.9009993529369473, + "kl_loss": 0.03863952308893204, + "loss_ib": 0.0008323611109517515, + "step": 3133 + }, + { + "ce_ib": 4.496482849121094, + "ce_orig": 1.2784768342971802, + "epoch": 0.9009993529369473, + "kl_loss": 0.04932437092065811, + "loss_ib": 0.0009428919875063002, + "step": 3133 + }, + { + "ce_ib": 6.449125289916992, + "ce_orig": 1.4938771724700928, + "epoch": 0.901286936515925, + "kl_loss": 0.06922325491905212, + "loss_ib": 0.0013371449895203114, + "step": 3134 + }, + { + "ce_ib": 3.9342234134674072, + "ce_orig": 1.1247541904449463, + "epoch": 0.901286936515925, + "kl_loss": 0.04798426479101181, + "loss_ib": 0.000873264973051846, + "step": 3134 + }, + { + "ce_ib": 3.044097423553467, + "ce_orig": 0.5271012783050537, + "epoch": 0.901286936515925, + "kl_loss": 0.034936513751745224, + "loss_ib": 0.0006537748849950731, + "step": 3134 + }, + { + "ce_ib": 3.0270814895629883, + "ce_orig": 0.5004477500915527, + "epoch": 0.901286936515925, + "kl_loss": 0.04199955612421036, + "loss_ib": 0.0007227036985568702, + "step": 3134 + }, + { + "epoch": 0.9015745200949026, + "grad_norm": 0.10734646022319794, + "learning_rate": 4.1046558482104305e-05, + "loss": 0.7937, + "step": 3135 + }, + { + "ce_ib": 3.656642436981201, + "ce_orig": 0.7956322431564331, + "epoch": 0.9015745200949026, + "kl_loss": 0.051557738333940506, + "loss_ib": 0.0008812416344881058, + "step": 3135 + }, + { + "ce_ib": 4.217484474182129, + "ce_orig": 0.6959279179573059, + "epoch": 0.9015745200949026, + "kl_loss": 0.08793353289365768, + "loss_ib": 0.0013010837137699127, + "step": 3135 + }, + { + "ce_ib": 2.6935946941375732, + "ce_orig": 0.7450282573699951, + "epoch": 0.9015745200949026, + "kl_loss": 0.03737877309322357, + "loss_ib": 0.0006431472138501704, + "step": 3135 + }, + { + "ce_ib": 2.8226613998413086, + "ce_orig": 0.7161137461662292, + "epoch": 0.9015745200949026, + "kl_loss": 0.029384106397628784, + "loss_ib": 0.0005761071806773543, + "step": 3135 + }, + { + "ce_ib": 3.557140350341797, + "ce_orig": 0.7173062562942505, + "epoch": 0.9018621036738802, + "kl_loss": 0.043720997869968414, + "loss_ib": 0.0007929240236990154, + "step": 3136 + }, + { + "ce_ib": 2.7684988975524902, + "ce_orig": 0.736605703830719, + "epoch": 0.9018621036738802, + "kl_loss": 0.06432580947875977, + "loss_ib": 0.0009201079374179244, + "step": 3136 + }, + { + "ce_ib": 5.250295162200928, + "ce_orig": 0.8949050903320312, + "epoch": 0.9018621036738802, + "kl_loss": 0.07402694225311279, + "loss_ib": 0.0012652988079935312, + "step": 3136 + }, + { + "ce_ib": 4.676276206970215, + "ce_orig": 1.2058532238006592, + "epoch": 0.9018621036738802, + "kl_loss": 0.060090988874435425, + "loss_ib": 0.0010685374727472663, + "step": 3136 + }, + { + "ce_ib": 3.2671003341674805, + "ce_orig": 0.631249725818634, + "epoch": 0.9021496872528578, + "kl_loss": 0.02656942792236805, + "loss_ib": 0.0005924042779952288, + "step": 3137 + }, + { + "ce_ib": 2.9177937507629395, + "ce_orig": 0.7041955590248108, + "epoch": 0.9021496872528578, + "kl_loss": 0.06784646958112717, + "loss_ib": 0.000970244116615504, + "step": 3137 + }, + { + "ce_ib": 2.628031015396118, + "ce_orig": 0.6345438957214355, + "epoch": 0.9021496872528578, + "kl_loss": 0.04292180761694908, + "loss_ib": 0.0006920211599208415, + "step": 3137 + }, + { + "ce_ib": 3.286144256591797, + "ce_orig": 0.6920792460441589, + "epoch": 0.9021496872528578, + "kl_loss": 0.04643823206424713, + "loss_ib": 0.0007929967250674963, + "step": 3137 + }, + { + "ce_ib": 3.2534854412078857, + "ce_orig": 0.8678990006446838, + "epoch": 0.9024372708318354, + "kl_loss": 0.027239050716161728, + "loss_ib": 0.0005977390101179481, + "step": 3138 + }, + { + "ce_ib": 5.277351379394531, + "ce_orig": 1.721376657485962, + "epoch": 0.9024372708318354, + "kl_loss": 0.049055010080337524, + "loss_ib": 0.0010182851692661643, + "step": 3138 + }, + { + "ce_ib": 2.5208966732025146, + "ce_orig": 0.7662544250488281, + "epoch": 0.9024372708318354, + "kl_loss": 0.040836408734321594, + "loss_ib": 0.0006604537484236062, + "step": 3138 + }, + { + "ce_ib": 4.008233070373535, + "ce_orig": 1.038093090057373, + "epoch": 0.9024372708318354, + "kl_loss": 0.061792705208063126, + "loss_ib": 0.0010187503648921847, + "step": 3138 + }, + { + "ce_ib": 5.542392730712891, + "ce_orig": 1.1451677083969116, + "epoch": 0.9027248544108132, + "kl_loss": 0.06310157477855682, + "loss_ib": 0.0011852550087496638, + "step": 3139 + }, + { + "ce_ib": 4.4598188400268555, + "ce_orig": 0.8230383992195129, + "epoch": 0.9027248544108132, + "kl_loss": 0.06636128574609756, + "loss_ib": 0.0011095947120338678, + "step": 3139 + }, + { + "ce_ib": 2.1911590099334717, + "ce_orig": 0.5390222668647766, + "epoch": 0.9027248544108132, + "kl_loss": 0.0399959497153759, + "loss_ib": 0.000619075377471745, + "step": 3139 + }, + { + "ce_ib": 2.545074701309204, + "ce_orig": 0.7177272439002991, + "epoch": 0.9027248544108132, + "kl_loss": 0.030372919514775276, + "loss_ib": 0.000558236613869667, + "step": 3139 + }, + { + "epoch": 0.9030124379897908, + "grad_norm": 0.09992845356464386, + "learning_rate": 4.101678328812034e-05, + "loss": 0.7919, + "step": 3140 + }, + { + "ce_ib": 2.2484471797943115, + "ce_orig": 0.6150729656219482, + "epoch": 0.9030124379897908, + "kl_loss": 0.027530021965503693, + "loss_ib": 0.0005001449608244002, + "step": 3140 + }, + { + "ce_ib": 3.164381504058838, + "ce_orig": 0.6175785660743713, + "epoch": 0.9030124379897908, + "kl_loss": 0.030350664630532265, + "loss_ib": 0.0006199447670951486, + "step": 3140 + }, + { + "ce_ib": 3.4353654384613037, + "ce_orig": 0.573479175567627, + "epoch": 0.9030124379897908, + "kl_loss": 0.03232278302311897, + "loss_ib": 0.0006667643901892006, + "step": 3140 + }, + { + "ce_ib": 3.8260035514831543, + "ce_orig": 0.8369064331054688, + "epoch": 0.9030124379897908, + "kl_loss": 0.04912973940372467, + "loss_ib": 0.0008738977485336363, + "step": 3140 + }, + { + "ce_ib": 3.58121395111084, + "ce_orig": 0.9888066053390503, + "epoch": 0.9033000215687684, + "kl_loss": 0.08063403517007828, + "loss_ib": 0.0011644617188721895, + "step": 3141 + }, + { + "ce_ib": 4.676492691040039, + "ce_orig": 0.8983652591705322, + "epoch": 0.9033000215687684, + "kl_loss": 0.057104118168354034, + "loss_ib": 0.0010386904468759894, + "step": 3141 + }, + { + "ce_ib": 4.588086128234863, + "ce_orig": 0.9422292113304138, + "epoch": 0.9033000215687684, + "kl_loss": 0.0418565571308136, + "loss_ib": 0.0008773741428740323, + "step": 3141 + }, + { + "ce_ib": 2.7038209438323975, + "ce_orig": 0.6383805871009827, + "epoch": 0.9033000215687684, + "kl_loss": 0.05717053264379501, + "loss_ib": 0.0008420873782597482, + "step": 3141 + }, + { + "ce_ib": 3.3281233310699463, + "ce_orig": 1.0498886108398438, + "epoch": 0.903587605147746, + "kl_loss": 0.04215066134929657, + "loss_ib": 0.0007543189567513764, + "step": 3142 + }, + { + "ce_ib": 4.152535438537598, + "ce_orig": 1.0837552547454834, + "epoch": 0.903587605147746, + "kl_loss": 0.042785175144672394, + "loss_ib": 0.0008431053138338029, + "step": 3142 + }, + { + "ce_ib": 3.7365529537200928, + "ce_orig": 0.7240763306617737, + "epoch": 0.903587605147746, + "kl_loss": 0.10892749577760696, + "loss_ib": 0.0014629302313551307, + "step": 3142 + }, + { + "ce_ib": 3.499246120452881, + "ce_orig": 0.5588169097900391, + "epoch": 0.903587605147746, + "kl_loss": 0.05855812877416611, + "loss_ib": 0.0009355059009976685, + "step": 3142 + }, + { + "ce_ib": 3.274170398712158, + "ce_orig": 0.793782651424408, + "epoch": 0.9038751887267237, + "kl_loss": 0.06249527633190155, + "loss_ib": 0.0009523697663098574, + "step": 3143 + }, + { + "ce_ib": 4.789114952087402, + "ce_orig": 1.228871464729309, + "epoch": 0.9038751887267237, + "kl_loss": 0.06688295304775238, + "loss_ib": 0.0011477409861981869, + "step": 3143 + }, + { + "ce_ib": 4.637375354766846, + "ce_orig": 1.1366498470306396, + "epoch": 0.9038751887267237, + "kl_loss": 0.04283798485994339, + "loss_ib": 0.0008921173284761608, + "step": 3143 + }, + { + "ce_ib": 2.400334358215332, + "ce_orig": 0.6517524123191833, + "epoch": 0.9038751887267237, + "kl_loss": 0.032439686357975006, + "loss_ib": 0.0005644302582368255, + "step": 3143 + }, + { + "ce_ib": 2.5457265377044678, + "ce_orig": 0.5658777356147766, + "epoch": 0.9041627723057013, + "kl_loss": 0.030798282474279404, + "loss_ib": 0.0005625555058941245, + "step": 3144 + }, + { + "ce_ib": 2.830418109893799, + "ce_orig": 0.736941933631897, + "epoch": 0.9041627723057013, + "kl_loss": 0.06579810380935669, + "loss_ib": 0.0009410228230990469, + "step": 3144 + }, + { + "ce_ib": 5.33506441116333, + "ce_orig": 1.4006227254867554, + "epoch": 0.9041627723057013, + "kl_loss": 0.07679986953735352, + "loss_ib": 0.0013015051372349262, + "step": 3144 + }, + { + "ce_ib": 2.579551935195923, + "ce_orig": 0.4189073145389557, + "epoch": 0.9041627723057013, + "kl_loss": 0.05783543735742569, + "loss_ib": 0.0008363095694221556, + "step": 3144 + }, + { + "epoch": 0.904450355884679, + "grad_norm": 0.11603457480669022, + "learning_rate": 4.098696950598786e-05, + "loss": 0.8884, + "step": 3145 + }, + { + "ce_ib": 5.053701877593994, + "ce_orig": 0.8232886791229248, + "epoch": 0.904450355884679, + "kl_loss": 0.058899518102407455, + "loss_ib": 0.00109436537604779, + "step": 3145 + }, + { + "ce_ib": 3.6184818744659424, + "ce_orig": 0.7059771418571472, + "epoch": 0.904450355884679, + "kl_loss": 0.04457046091556549, + "loss_ib": 0.000807552773039788, + "step": 3145 + }, + { + "ce_ib": 2.3971195220947266, + "ce_orig": 0.5604226589202881, + "epoch": 0.904450355884679, + "kl_loss": 0.027318641543388367, + "loss_ib": 0.0005128983175382018, + "step": 3145 + }, + { + "ce_ib": 4.35545015335083, + "ce_orig": 0.9202273488044739, + "epoch": 0.904450355884679, + "kl_loss": 0.08417771011590958, + "loss_ib": 0.001277322182431817, + "step": 3145 + }, + { + "ce_ib": 4.531501293182373, + "ce_orig": 1.1599583625793457, + "epoch": 0.9047379394636567, + "kl_loss": 0.04547704756259918, + "loss_ib": 0.0009079205919988453, + "step": 3146 + }, + { + "ce_ib": 3.53442645072937, + "ce_orig": 0.6688250303268433, + "epoch": 0.9047379394636567, + "kl_loss": 0.040086694061756134, + "loss_ib": 0.0007543095853179693, + "step": 3146 + }, + { + "ce_ib": 2.5549404621124268, + "ce_orig": 0.5614192485809326, + "epoch": 0.9047379394636567, + "kl_loss": 0.05285279080271721, + "loss_ib": 0.0007840219186618924, + "step": 3146 + }, + { + "ce_ib": 3.8104231357574463, + "ce_orig": 0.7322917580604553, + "epoch": 0.9047379394636567, + "kl_loss": 0.045213885605335236, + "loss_ib": 0.0008331811404787004, + "step": 3146 + }, + { + "ce_ib": 3.7219061851501465, + "ce_orig": 0.42451560497283936, + "epoch": 0.9050255230426343, + "kl_loss": 0.06389184296131134, + "loss_ib": 0.0010111089795827866, + "step": 3147 + }, + { + "ce_ib": 4.509589195251465, + "ce_orig": 0.9654191136360168, + "epoch": 0.9050255230426343, + "kl_loss": 0.08876772969961166, + "loss_ib": 0.0013386362697929144, + "step": 3147 + }, + { + "ce_ib": 4.1225504875183105, + "ce_orig": 0.8971890807151794, + "epoch": 0.9050255230426343, + "kl_loss": 0.04802847281098366, + "loss_ib": 0.0008925397414714098, + "step": 3147 + }, + { + "ce_ib": 4.1899614334106445, + "ce_orig": 1.09754478931427, + "epoch": 0.9050255230426343, + "kl_loss": 0.047518450766801834, + "loss_ib": 0.0008941806736402214, + "step": 3147 + }, + { + "ce_ib": 6.377622604370117, + "ce_orig": 1.585392951965332, + "epoch": 0.9053131066216119, + "kl_loss": 0.06624720245599747, + "loss_ib": 0.0013002341147512197, + "step": 3148 + }, + { + "ce_ib": 3.547067642211914, + "ce_orig": 0.799464225769043, + "epoch": 0.9053131066216119, + "kl_loss": 0.05337826907634735, + "loss_ib": 0.000888489477802068, + "step": 3148 + }, + { + "ce_ib": 4.087887287139893, + "ce_orig": 1.112302541732788, + "epoch": 0.9053131066216119, + "kl_loss": 0.05982593446969986, + "loss_ib": 0.0010070480639114976, + "step": 3148 + }, + { + "ce_ib": 5.043735027313232, + "ce_orig": 1.387290596961975, + "epoch": 0.9053131066216119, + "kl_loss": 0.04558909684419632, + "loss_ib": 0.000960264471359551, + "step": 3148 + }, + { + "ce_ib": 3.0967788696289062, + "ce_orig": 0.6422078609466553, + "epoch": 0.9056006902005895, + "kl_loss": 0.05783384293317795, + "loss_ib": 0.0008880162495188415, + "step": 3149 + }, + { + "ce_ib": 4.129266262054443, + "ce_orig": 1.0291314125061035, + "epoch": 0.9056006902005895, + "kl_loss": 0.06444425880908966, + "loss_ib": 0.0010573691688477993, + "step": 3149 + }, + { + "ce_ib": 4.934642314910889, + "ce_orig": 0.8951589465141296, + "epoch": 0.9056006902005895, + "kl_loss": 0.03628742694854736, + "loss_ib": 0.0008563384180888534, + "step": 3149 + }, + { + "ce_ib": 3.810358762741089, + "ce_orig": 0.7624847292900085, + "epoch": 0.9056006902005895, + "kl_loss": 0.06895412504673004, + "loss_ib": 0.0010705770691856742, + "step": 3149 + }, + { + "epoch": 0.9058882737795672, + "grad_norm": 0.1026853621006012, + "learning_rate": 4.095711720753519e-05, + "loss": 0.8769, + "step": 3150 + }, + { + "ce_ib": 2.2921142578125, + "ce_orig": 0.4956589937210083, + "epoch": 0.9058882737795672, + "kl_loss": 0.06659749150276184, + "loss_ib": 0.0008951863273978233, + "step": 3150 + }, + { + "ce_ib": 1.608175277709961, + "ce_orig": 0.2503775358200073, + "epoch": 0.9058882737795672, + "kl_loss": 0.10108375549316406, + "loss_ib": 0.0011716550216078758, + "step": 3150 + }, + { + "ce_ib": 5.30651330947876, + "ce_orig": 1.2953870296478271, + "epoch": 0.9058882737795672, + "kl_loss": 0.07337844371795654, + "loss_ib": 0.0012644358212128282, + "step": 3150 + }, + { + "ce_ib": 3.027233123779297, + "ce_orig": 0.9556626677513123, + "epoch": 0.9058882737795672, + "kl_loss": 0.035618528723716736, + "loss_ib": 0.0006589086260646582, + "step": 3150 + }, + { + "ce_ib": 2.8397579193115234, + "ce_orig": 0.6345589756965637, + "epoch": 0.9061758573585448, + "kl_loss": 0.053592484444379807, + "loss_ib": 0.0008199005969800055, + "step": 3151 + }, + { + "ce_ib": 4.099710464477539, + "ce_orig": 0.9980069994926453, + "epoch": 0.9061758573585448, + "kl_loss": 0.07313114404678345, + "loss_ib": 0.0011412824969738722, + "step": 3151 + }, + { + "ce_ib": 6.4003496170043945, + "ce_orig": 1.7430453300476074, + "epoch": 0.9061758573585448, + "kl_loss": 0.08742370456457138, + "loss_ib": 0.001514272065833211, + "step": 3151 + }, + { + "ce_ib": 2.411074638366699, + "ce_orig": 0.4763501286506653, + "epoch": 0.9061758573585448, + "kl_loss": 0.05348663404583931, + "loss_ib": 0.0007759737782180309, + "step": 3151 + }, + { + "ce_ib": 2.714411973953247, + "ce_orig": 0.5459250807762146, + "epoch": 0.9064634409375225, + "kl_loss": 0.04802354425191879, + "loss_ib": 0.0007516766199842095, + "step": 3152 + }, + { + "ce_ib": 2.5454649925231934, + "ce_orig": 0.5876545906066895, + "epoch": 0.9064634409375225, + "kl_loss": 0.06125754863023758, + "loss_ib": 0.0008671219693496823, + "step": 3152 + }, + { + "ce_ib": 2.7404773235321045, + "ce_orig": 0.6382632851600647, + "epoch": 0.9064634409375225, + "kl_loss": 0.048600099980831146, + "loss_ib": 0.0007600486860610545, + "step": 3152 + }, + { + "ce_ib": 2.9879860877990723, + "ce_orig": 0.7227291464805603, + "epoch": 0.9064634409375225, + "kl_loss": 0.03543735295534134, + "loss_ib": 0.0006531721446663141, + "step": 3152 + }, + { + "ce_ib": 4.180792331695557, + "ce_orig": 0.5921109318733215, + "epoch": 0.9067510245165001, + "kl_loss": 0.03895791992545128, + "loss_ib": 0.000807658419944346, + "step": 3153 + }, + { + "ce_ib": 2.4708826541900635, + "ce_orig": 0.6333239674568176, + "epoch": 0.9067510245165001, + "kl_loss": 0.03753764182329178, + "loss_ib": 0.0006224646931514144, + "step": 3153 + }, + { + "ce_ib": 3.5460598468780518, + "ce_orig": 1.0335943698883057, + "epoch": 0.9067510245165001, + "kl_loss": 0.11577080190181732, + "loss_ib": 0.0015123140765354037, + "step": 3153 + }, + { + "ce_ib": 3.4595508575439453, + "ce_orig": 0.8157153725624084, + "epoch": 0.9067510245165001, + "kl_loss": 0.062388114631175995, + "loss_ib": 0.0009698362555354834, + "step": 3153 + }, + { + "ce_ib": 4.444334983825684, + "ce_orig": 1.1453486680984497, + "epoch": 0.9070386080954778, + "kl_loss": 0.062231071293354034, + "loss_ib": 0.0010667442111298442, + "step": 3154 + }, + { + "ce_ib": 2.040283203125, + "ce_orig": 0.537696123123169, + "epoch": 0.9070386080954778, + "kl_loss": 0.037951596081256866, + "loss_ib": 0.0005835442570969462, + "step": 3154 + }, + { + "ce_ib": 2.9713969230651855, + "ce_orig": 0.5176243185997009, + "epoch": 0.9070386080954778, + "kl_loss": 0.061154626309871674, + "loss_ib": 0.0009086859063245356, + "step": 3154 + }, + { + "ce_ib": 1.9186675548553467, + "ce_orig": 0.5038925409317017, + "epoch": 0.9070386080954778, + "kl_loss": 0.04333876073360443, + "loss_ib": 0.0006252543535083532, + "step": 3154 + }, + { + "epoch": 0.9073261916744554, + "grad_norm": 0.08800416439771652, + "learning_rate": 4.0927226464683435e-05, + "loss": 0.8032, + "step": 3155 + }, + { + "ce_ib": 4.8963422775268555, + "ce_orig": 1.1116701364517212, + "epoch": 0.9073261916744554, + "kl_loss": 0.04667635262012482, + "loss_ib": 0.0009563976782374084, + "step": 3155 + }, + { + "ce_ib": 5.641246795654297, + "ce_orig": 1.4788697957992554, + "epoch": 0.9073261916744554, + "kl_loss": 0.0802331268787384, + "loss_ib": 0.0013664558064192533, + "step": 3155 + }, + { + "ce_ib": 3.8911144733428955, + "ce_orig": 0.5718050599098206, + "epoch": 0.9073261916744554, + "kl_loss": 0.1023905873298645, + "loss_ib": 0.0014130172785371542, + "step": 3155 + }, + { + "ce_ib": 3.1391818523406982, + "ce_orig": 0.9533452391624451, + "epoch": 0.9073261916744554, + "kl_loss": 0.028824463486671448, + "loss_ib": 0.0006021627923473716, + "step": 3155 + }, + { + "ce_ib": 2.0950379371643066, + "ce_orig": 0.4570387899875641, + "epoch": 0.907613775253433, + "kl_loss": 0.01930708810687065, + "loss_ib": 0.00040257468936033547, + "step": 3156 + }, + { + "ce_ib": 3.8657944202423096, + "ce_orig": 0.7045357823371887, + "epoch": 0.907613775253433, + "kl_loss": 0.06694671511650085, + "loss_ib": 0.0010560465743765235, + "step": 3156 + }, + { + "ce_ib": 3.253561019897461, + "ce_orig": 0.7712666392326355, + "epoch": 0.907613775253433, + "kl_loss": 0.06475502252578735, + "loss_ib": 0.0009729063021950424, + "step": 3156 + }, + { + "ce_ib": 1.8848185539245605, + "ce_orig": 0.5103334188461304, + "epoch": 0.907613775253433, + "kl_loss": 0.06333491951227188, + "loss_ib": 0.0008218310540542006, + "step": 3156 + }, + { + "ce_ib": 4.501340389251709, + "ce_orig": 0.5238619446754456, + "epoch": 0.9079013588324106, + "kl_loss": 0.0613267719745636, + "loss_ib": 0.0010634016944095492, + "step": 3157 + }, + { + "ce_ib": 3.4964559078216553, + "ce_orig": 0.5789336562156677, + "epoch": 0.9079013588324106, + "kl_loss": 0.0738741084933281, + "loss_ib": 0.0010883866343647242, + "step": 3157 + }, + { + "ce_ib": 3.0201361179351807, + "ce_orig": 0.7131902575492859, + "epoch": 0.9079013588324106, + "kl_loss": 0.07226026058197021, + "loss_ib": 0.0010246161837130785, + "step": 3157 + }, + { + "ce_ib": 2.7828500270843506, + "ce_orig": 0.8242505192756653, + "epoch": 0.9079013588324106, + "kl_loss": 0.027684489265084267, + "loss_ib": 0.0005551298381760716, + "step": 3157 + }, + { + "ce_ib": 2.599431037902832, + "ce_orig": 0.6275473237037659, + "epoch": 0.9081889424113883, + "kl_loss": 0.03952493518590927, + "loss_ib": 0.0006551924161612988, + "step": 3158 + }, + { + "ce_ib": 4.455791473388672, + "ce_orig": 0.9370334148406982, + "epoch": 0.9081889424113883, + "kl_loss": 0.053418196737766266, + "loss_ib": 0.0009797611273825169, + "step": 3158 + }, + { + "ce_ib": 4.0729546546936035, + "ce_orig": 0.8900496959686279, + "epoch": 0.9081889424113883, + "kl_loss": 0.05221444368362427, + "loss_ib": 0.0009294398478232324, + "step": 3158 + }, + { + "ce_ib": 2.6649739742279053, + "ce_orig": 0.7785559296607971, + "epoch": 0.9081889424113883, + "kl_loss": 0.040097810328006744, + "loss_ib": 0.0006674754549749196, + "step": 3158 + }, + { + "ce_ib": 2.9906888008117676, + "ce_orig": 0.8574509620666504, + "epoch": 0.908476525990366, + "kl_loss": 0.037598807364702225, + "loss_ib": 0.0006750569446012378, + "step": 3159 + }, + { + "ce_ib": 4.080773830413818, + "ce_orig": 0.9945180416107178, + "epoch": 0.908476525990366, + "kl_loss": 0.06580433994531631, + "loss_ib": 0.001066120807081461, + "step": 3159 + }, + { + "ce_ib": 4.112853527069092, + "ce_orig": 0.9542604684829712, + "epoch": 0.908476525990366, + "kl_loss": 0.0683295950293541, + "loss_ib": 0.001094581326469779, + "step": 3159 + }, + { + "ce_ib": 2.4025213718414307, + "ce_orig": 0.6176670789718628, + "epoch": 0.908476525990366, + "kl_loss": 0.03496548533439636, + "loss_ib": 0.0005899069947190583, + "step": 3159 + }, + { + "epoch": 0.9087641095693436, + "grad_norm": 0.08723282068967819, + "learning_rate": 4.089729734944634e-05, + "loss": 0.8786, + "step": 3160 + }, + { + "ce_ib": 5.415412902832031, + "ce_orig": 1.3668545484542847, + "epoch": 0.9087641095693436, + "kl_loss": 0.045561522245407104, + "loss_ib": 0.0009971564868465066, + "step": 3160 + }, + { + "ce_ib": 4.749611854553223, + "ce_orig": 1.1728780269622803, + "epoch": 0.9087641095693436, + "kl_loss": 0.04969606548547745, + "loss_ib": 0.0009719218360260129, + "step": 3160 + }, + { + "ce_ib": 4.570993423461914, + "ce_orig": 1.2396568059921265, + "epoch": 0.9087641095693436, + "kl_loss": 0.04069716855883598, + "loss_ib": 0.0008640710148029029, + "step": 3160 + }, + { + "ce_ib": 2.9093387126922607, + "ce_orig": 0.7770566940307617, + "epoch": 0.9087641095693436, + "kl_loss": 0.05811512842774391, + "loss_ib": 0.000872085161972791, + "step": 3160 + }, + { + "ce_ib": 2.1648383140563965, + "ce_orig": 0.47273069620132446, + "epoch": 0.9090516931483212, + "kl_loss": 0.04455002769827843, + "loss_ib": 0.0006619840860366821, + "step": 3161 + }, + { + "ce_ib": 4.106698989868164, + "ce_orig": 1.0626882314682007, + "epoch": 0.9090516931483212, + "kl_loss": 0.0570068284869194, + "loss_ib": 0.0009807382011786103, + "step": 3161 + }, + { + "ce_ib": 3.476902484893799, + "ce_orig": 0.582852303981781, + "epoch": 0.9090516931483212, + "kl_loss": 0.04991478472948074, + "loss_ib": 0.0008468380547128618, + "step": 3161 + }, + { + "ce_ib": 2.324017286300659, + "ce_orig": 0.6442859768867493, + "epoch": 0.9090516931483212, + "kl_loss": 0.04940430447459221, + "loss_ib": 0.0007264447631314397, + "step": 3161 + }, + { + "ce_ib": 5.534293174743652, + "ce_orig": 1.5612061023712158, + "epoch": 0.9093392767272989, + "kl_loss": 0.03989531099796295, + "loss_ib": 0.0009523823391646147, + "step": 3162 + }, + { + "ce_ib": 5.0921630859375, + "ce_orig": 0.8155058026313782, + "epoch": 0.9093392767272989, + "kl_loss": 0.13897936046123505, + "loss_ib": 0.0018990099197253585, + "step": 3162 + }, + { + "ce_ib": 2.761345148086548, + "ce_orig": 0.8818910717964172, + "epoch": 0.9093392767272989, + "kl_loss": 0.022269662469625473, + "loss_ib": 0.0004988310975022614, + "step": 3162 + }, + { + "ce_ib": 4.035433292388916, + "ce_orig": 0.9083020091056824, + "epoch": 0.9093392767272989, + "kl_loss": 0.06589611619710922, + "loss_ib": 0.001062504481524229, + "step": 3162 + }, + { + "ce_ib": 4.267354965209961, + "ce_orig": 1.083043098449707, + "epoch": 0.9096268603062765, + "kl_loss": 0.04926174879074097, + "loss_ib": 0.0009193529840558767, + "step": 3163 + }, + { + "ce_ib": 3.179414987564087, + "ce_orig": 0.6998196244239807, + "epoch": 0.9096268603062765, + "kl_loss": 0.04881719499826431, + "loss_ib": 0.0008061134722083807, + "step": 3163 + }, + { + "ce_ib": 3.333092212677002, + "ce_orig": 0.8073245286941528, + "epoch": 0.9096268603062765, + "kl_loss": 0.11262553930282593, + "loss_ib": 0.0014595645479857922, + "step": 3163 + }, + { + "ce_ib": 4.782686233520508, + "ce_orig": 1.3961894512176514, + "epoch": 0.9096268603062765, + "kl_loss": 0.05796296149492264, + "loss_ib": 0.0010578982764855027, + "step": 3163 + }, + { + "ce_ib": 3.8897080421447754, + "ce_orig": 0.9594340920448303, + "epoch": 0.9099144438852541, + "kl_loss": 0.054433900862932205, + "loss_ib": 0.0009333097841590643, + "step": 3164 + }, + { + "ce_ib": 2.416351795196533, + "ce_orig": 0.5093982219696045, + "epoch": 0.9099144438852541, + "kl_loss": 0.05345557630062103, + "loss_ib": 0.0007761908927932382, + "step": 3164 + }, + { + "ce_ib": 4.459129810333252, + "ce_orig": 1.0348386764526367, + "epoch": 0.9099144438852541, + "kl_loss": 0.06499442458152771, + "loss_ib": 0.0010958571219816804, + "step": 3164 + }, + { + "ce_ib": 2.4464800357818604, + "ce_orig": 0.6161850094795227, + "epoch": 0.9099144438852541, + "kl_loss": 0.04545998573303223, + "loss_ib": 0.0006992478738538921, + "step": 3164 + }, + { + "epoch": 0.9102020274642318, + "grad_norm": 0.0939023345708847, + "learning_rate": 4.0867329933930085e-05, + "loss": 0.8416, + "step": 3165 + }, + { + "ce_ib": 3.9203765392303467, + "ce_orig": 0.9958056211471558, + "epoch": 0.9102020274642318, + "kl_loss": 0.08521296083927155, + "loss_ib": 0.0012441672151908278, + "step": 3165 + }, + { + "ce_ib": 2.741227626800537, + "ce_orig": 0.44543880224227905, + "epoch": 0.9102020274642318, + "kl_loss": 0.054834455251693726, + "loss_ib": 0.0008224672637879848, + "step": 3165 + }, + { + "ce_ib": 5.284068584442139, + "ce_orig": 1.1296659708023071, + "epoch": 0.9102020274642318, + "kl_loss": 0.04957398399710655, + "loss_ib": 0.0010241466807201505, + "step": 3165 + }, + { + "ce_ib": 3.009582757949829, + "ce_orig": 0.4799595773220062, + "epoch": 0.9102020274642318, + "kl_loss": 0.07159125804901123, + "loss_ib": 0.0010168708395212889, + "step": 3165 + }, + { + "ce_ib": 3.8365890979766846, + "ce_orig": 0.7389499545097351, + "epoch": 0.9104896110432095, + "kl_loss": 0.06372620165348053, + "loss_ib": 0.001020920812152326, + "step": 3166 + }, + { + "ce_ib": 4.527741432189941, + "ce_orig": 1.0655750036239624, + "epoch": 0.9104896110432095, + "kl_loss": 0.06316692382097244, + "loss_ib": 0.0010844432981684804, + "step": 3166 + }, + { + "ce_ib": 2.7912206649780273, + "ce_orig": 0.6853869557380676, + "epoch": 0.9104896110432095, + "kl_loss": 0.032502710819244385, + "loss_ib": 0.0006041491869837046, + "step": 3166 + }, + { + "ce_ib": 2.6967291831970215, + "ce_orig": 0.5976574420928955, + "epoch": 0.9104896110432095, + "kl_loss": 0.0651044175028801, + "loss_ib": 0.0009207170805893838, + "step": 3166 + }, + { + "ce_ib": 5.364028453826904, + "ce_orig": 1.2627798318862915, + "epoch": 0.9107771946221871, + "kl_loss": 0.08646850287914276, + "loss_ib": 0.0014010878512635827, + "step": 3167 + }, + { + "ce_ib": 2.318260908126831, + "ce_orig": 0.32400253415107727, + "epoch": 0.9107771946221871, + "kl_loss": 0.02478390745818615, + "loss_ib": 0.0004796651774086058, + "step": 3167 + }, + { + "ce_ib": 1.59919011592865, + "ce_orig": 0.23859328031539917, + "epoch": 0.9107771946221871, + "kl_loss": 0.12918564677238464, + "loss_ib": 0.0014517754316329956, + "step": 3167 + }, + { + "ce_ib": 3.6274046897888184, + "ce_orig": 0.3181067109107971, + "epoch": 0.9107771946221871, + "kl_loss": 0.08040410280227661, + "loss_ib": 0.001166781410574913, + "step": 3167 + }, + { + "ce_ib": 2.323798894882202, + "ce_orig": 0.4198931157588959, + "epoch": 0.9110647782011647, + "kl_loss": 0.03054458647966385, + "loss_ib": 0.0005378257483243942, + "step": 3168 + }, + { + "ce_ib": 2.6449389457702637, + "ce_orig": 0.7084780335426331, + "epoch": 0.9110647782011647, + "kl_loss": 0.07263755798339844, + "loss_ib": 0.000990869477391243, + "step": 3168 + }, + { + "ce_ib": 3.8316755294799805, + "ce_orig": 0.7159465551376343, + "epoch": 0.9110647782011647, + "kl_loss": 0.07422642409801483, + "loss_ib": 0.0011254317359998822, + "step": 3168 + }, + { + "ce_ib": 3.9646575450897217, + "ce_orig": 0.9314635396003723, + "epoch": 0.9110647782011647, + "kl_loss": 0.06810861825942993, + "loss_ib": 0.0010775518603622913, + "step": 3168 + }, + { + "ce_ib": 3.182936191558838, + "ce_orig": 0.8021252751350403, + "epoch": 0.9113523617801423, + "kl_loss": 0.04320649802684784, + "loss_ib": 0.0007503585075028241, + "step": 3169 + }, + { + "ce_ib": 5.848911762237549, + "ce_orig": 1.6468174457550049, + "epoch": 0.9113523617801423, + "kl_loss": 0.05887865647673607, + "loss_ib": 0.0011736777378246188, + "step": 3169 + }, + { + "ce_ib": 2.9514994621276855, + "ce_orig": 0.3406210243701935, + "epoch": 0.9113523617801423, + "kl_loss": 0.06704488396644592, + "loss_ib": 0.0009655987960286438, + "step": 3169 + }, + { + "ce_ib": 3.719589948654175, + "ce_orig": 0.8549460768699646, + "epoch": 0.9113523617801423, + "kl_loss": 0.05161901190876961, + "loss_ib": 0.000888149137608707, + "step": 3169 + }, + { + "epoch": 0.91163994535912, + "grad_norm": 0.0977342426776886, + "learning_rate": 4.083732429033315e-05, + "loss": 0.8312, + "step": 3170 + }, + { + "ce_ib": 5.3511199951171875, + "ce_orig": 0.6467573642730713, + "epoch": 0.91163994535912, + "kl_loss": 0.10304812341928482, + "loss_ib": 0.001565593178384006, + "step": 3170 + }, + { + "ce_ib": 4.459630489349365, + "ce_orig": 1.3701367378234863, + "epoch": 0.91163994535912, + "kl_loss": 0.055446550250053406, + "loss_ib": 0.0010004284558817744, + "step": 3170 + }, + { + "ce_ib": 4.722958087921143, + "ce_orig": 0.6562477350234985, + "epoch": 0.91163994535912, + "kl_loss": 0.09976979345083237, + "loss_ib": 0.00146999373100698, + "step": 3170 + }, + { + "ce_ib": 1.8688380718231201, + "ce_orig": 0.5650045275688171, + "epoch": 0.91163994535912, + "kl_loss": 0.0327344685792923, + "loss_ib": 0.0005142284790053964, + "step": 3170 + }, + { + "ce_ib": 1.2230650186538696, + "ce_orig": 0.26929789781570435, + "epoch": 0.9119275289380976, + "kl_loss": 0.08776982873678207, + "loss_ib": 0.0010000047041103244, + "step": 3171 + }, + { + "ce_ib": 4.76809549331665, + "ce_orig": 1.442736268043518, + "epoch": 0.9119275289380976, + "kl_loss": 0.04948870837688446, + "loss_ib": 0.0009716966305859387, + "step": 3171 + }, + { + "ce_ib": 5.377802848815918, + "ce_orig": 1.1876178979873657, + "epoch": 0.9119275289380976, + "kl_loss": 0.1023683249950409, + "loss_ib": 0.0015614634612575173, + "step": 3171 + }, + { + "ce_ib": 3.9083352088928223, + "ce_orig": 0.9968768954277039, + "epoch": 0.9119275289380976, + "kl_loss": 0.04958637058734894, + "loss_ib": 0.0008866971475072205, + "step": 3171 + }, + { + "ce_ib": 5.264407634735107, + "ce_orig": 1.291345477104187, + "epoch": 0.9122151125170753, + "kl_loss": 0.04102856665849686, + "loss_ib": 0.0009367263992317021, + "step": 3172 + }, + { + "ce_ib": 3.1326162815093994, + "ce_orig": 0.7288129329681396, + "epoch": 0.9122151125170753, + "kl_loss": 0.07025466859340668, + "loss_ib": 0.0010158083168789744, + "step": 3172 + }, + { + "ce_ib": 3.359748601913452, + "ce_orig": 0.7309743165969849, + "epoch": 0.9122151125170753, + "kl_loss": 0.046865809708833694, + "loss_ib": 0.0008046329021453857, + "step": 3172 + }, + { + "ce_ib": 2.450080633163452, + "ce_orig": 0.6548645496368408, + "epoch": 0.9122151125170753, + "kl_loss": 0.15250636637210846, + "loss_ib": 0.0017700716853141785, + "step": 3172 + }, + { + "ce_ib": 3.0459132194519043, + "ce_orig": 0.4079059660434723, + "epoch": 0.912502696096053, + "kl_loss": 0.10099020600318909, + "loss_ib": 0.0013144933618605137, + "step": 3173 + }, + { + "ce_ib": 3.506834030151367, + "ce_orig": 1.0061074495315552, + "epoch": 0.912502696096053, + "kl_loss": 0.05272670090198517, + "loss_ib": 0.0008779503987170756, + "step": 3173 + }, + { + "ce_ib": 2.977964401245117, + "ce_orig": 0.8127270936965942, + "epoch": 0.912502696096053, + "kl_loss": 0.044994644820690155, + "loss_ib": 0.0007477428880520165, + "step": 3173 + }, + { + "ce_ib": 3.2782328128814697, + "ce_orig": 0.6604313850402832, + "epoch": 0.912502696096053, + "kl_loss": 0.051744185388088226, + "loss_ib": 0.0008452650508843362, + "step": 3173 + }, + { + "ce_ib": 4.6020331382751465, + "ce_orig": 0.7165954113006592, + "epoch": 0.9127902796750306, + "kl_loss": 0.10648408532142639, + "loss_ib": 0.001525044091977179, + "step": 3174 + }, + { + "ce_ib": 2.5944085121154785, + "ce_orig": 0.5319154262542725, + "epoch": 0.9127902796750306, + "kl_loss": 0.05503962188959122, + "loss_ib": 0.0008098370162770152, + "step": 3174 + }, + { + "ce_ib": 2.2545547485351562, + "ce_orig": 0.20395638048648834, + "epoch": 0.9127902796750306, + "kl_loss": 0.05077337846159935, + "loss_ib": 0.0007331892848014832, + "step": 3174 + }, + { + "ce_ib": 2.5884907245635986, + "ce_orig": 0.6704795360565186, + "epoch": 0.9127902796750306, + "kl_loss": 0.04743240028619766, + "loss_ib": 0.0007331730448640883, + "step": 3174 + }, + { + "epoch": 0.9130778632540082, + "grad_norm": 0.09019036591053009, + "learning_rate": 4.080728049094606e-05, + "loss": 0.8583, + "step": 3175 + }, + { + "ce_ib": 1.9601051807403564, + "ce_orig": 0.47929808497428894, + "epoch": 0.9130778632540082, + "kl_loss": 0.029512176290154457, + "loss_ib": 0.0004911322612315416, + "step": 3175 + }, + { + "ce_ib": 2.6888723373413086, + "ce_orig": 0.46886900067329407, + "epoch": 0.9130778632540082, + "kl_loss": 0.04713757708668709, + "loss_ib": 0.0007402629707939923, + "step": 3175 + }, + { + "ce_ib": 3.338979959487915, + "ce_orig": 0.5278971195220947, + "epoch": 0.9130778632540082, + "kl_loss": 0.10123096406459808, + "loss_ib": 0.0013462075730785728, + "step": 3175 + }, + { + "ce_ib": 3.2871997356414795, + "ce_orig": 0.5088003873825073, + "epoch": 0.9130778632540082, + "kl_loss": 0.07355530560016632, + "loss_ib": 0.001064272946678102, + "step": 3175 + }, + { + "ce_ib": 3.700340986251831, + "ce_orig": 0.9800871014595032, + "epoch": 0.9133654468329858, + "kl_loss": 0.068606898188591, + "loss_ib": 0.0010561030358076096, + "step": 3176 + }, + { + "ce_ib": 3.6877622604370117, + "ce_orig": 0.8150835037231445, + "epoch": 0.9133654468329858, + "kl_loss": 0.07103079557418823, + "loss_ib": 0.0010790841188281775, + "step": 3176 + }, + { + "ce_ib": 4.9309186935424805, + "ce_orig": 1.0601760149002075, + "epoch": 0.9133654468329858, + "kl_loss": 0.06910328567028046, + "loss_ib": 0.0011841247323900461, + "step": 3176 + }, + { + "ce_ib": 4.146428108215332, + "ce_orig": 0.9670461416244507, + "epoch": 0.9133654468329858, + "kl_loss": 0.05971643328666687, + "loss_ib": 0.0010118071222677827, + "step": 3176 + }, + { + "ce_ib": 4.071465492248535, + "ce_orig": 0.8679485321044922, + "epoch": 0.9136530304119634, + "kl_loss": 0.043402791023254395, + "loss_ib": 0.0008411743910983205, + "step": 3177 + }, + { + "ce_ib": 2.759841203689575, + "ce_orig": 0.681425154209137, + "epoch": 0.9136530304119634, + "kl_loss": 0.0380876325070858, + "loss_ib": 0.000656860473100096, + "step": 3177 + }, + { + "ce_ib": 2.4663920402526855, + "ce_orig": 0.6051286458969116, + "epoch": 0.9136530304119634, + "kl_loss": 0.04413055628538132, + "loss_ib": 0.0006879447028040886, + "step": 3177 + }, + { + "ce_ib": 2.956993341445923, + "ce_orig": 0.5728127360343933, + "epoch": 0.9136530304119634, + "kl_loss": 0.03637642040848732, + "loss_ib": 0.0006594635196961462, + "step": 3177 + }, + { + "ce_ib": 4.672955513000488, + "ce_orig": 1.1483360528945923, + "epoch": 0.9139406139909411, + "kl_loss": 0.04516831040382385, + "loss_ib": 0.0009189785923808813, + "step": 3178 + }, + { + "ce_ib": 1.6486446857452393, + "ce_orig": 0.36352741718292236, + "epoch": 0.9139406139909411, + "kl_loss": 0.06760765612125397, + "loss_ib": 0.0008409410365857184, + "step": 3178 + }, + { + "ce_ib": 3.1348612308502197, + "ce_orig": 0.41073453426361084, + "epoch": 0.9139406139909411, + "kl_loss": 0.054151348769664764, + "loss_ib": 0.0008549995254725218, + "step": 3178 + }, + { + "ce_ib": 2.712325096130371, + "ce_orig": 0.5329107046127319, + "epoch": 0.9139406139909411, + "kl_loss": 0.02798927202820778, + "loss_ib": 0.000551125209312886, + "step": 3178 + }, + { + "ce_ib": 4.262974262237549, + "ce_orig": 0.9197275042533875, + "epoch": 0.9142281975699188, + "kl_loss": 0.06888129562139511, + "loss_ib": 0.0011151103535667062, + "step": 3179 + }, + { + "ce_ib": 4.161855697631836, + "ce_orig": 0.7765277624130249, + "epoch": 0.9142281975699188, + "kl_loss": 0.06673496961593628, + "loss_ib": 0.0010835352586582303, + "step": 3179 + }, + { + "ce_ib": 3.3494105339050293, + "ce_orig": 0.477772057056427, + "epoch": 0.9142281975699188, + "kl_loss": 0.07534363865852356, + "loss_ib": 0.001088377321138978, + "step": 3179 + }, + { + "ce_ib": 4.012151718139648, + "ce_orig": 0.7827338576316833, + "epoch": 0.9142281975699188, + "kl_loss": 0.04297991096973419, + "loss_ib": 0.0008310142438858747, + "step": 3179 + }, + { + "epoch": 0.9145157811488964, + "grad_norm": 0.09450624138116837, + "learning_rate": 4.077719860815132e-05, + "loss": 0.7873, + "step": 3180 + }, + { + "ce_ib": 2.744706630706787, + "ce_orig": 0.6751590967178345, + "epoch": 0.9145157811488964, + "kl_loss": 0.03711235150694847, + "loss_ib": 0.0006455941474996507, + "step": 3180 + }, + { + "ce_ib": 3.4469122886657715, + "ce_orig": 0.8222633600234985, + "epoch": 0.9145157811488964, + "kl_loss": 0.054762449115514755, + "loss_ib": 0.0008923157001845539, + "step": 3180 + }, + { + "ce_ib": 3.325777292251587, + "ce_orig": 0.8911304473876953, + "epoch": 0.9145157811488964, + "kl_loss": 0.04325383901596069, + "loss_ib": 0.0007651160703971982, + "step": 3180 + }, + { + "ce_ib": 4.53206729888916, + "ce_orig": 1.26225745677948, + "epoch": 0.9145157811488964, + "kl_loss": 0.04498463124036789, + "loss_ib": 0.0009030529763549566, + "step": 3180 + }, + { + "ce_ib": 4.41810941696167, + "ce_orig": 1.1914161443710327, + "epoch": 0.914803364727874, + "kl_loss": 0.05854479968547821, + "loss_ib": 0.0010272589279338717, + "step": 3181 + }, + { + "ce_ib": 2.8844523429870605, + "ce_orig": 0.7358414530754089, + "epoch": 0.914803364727874, + "kl_loss": 0.032018207013607025, + "loss_ib": 0.0006086272769607604, + "step": 3181 + }, + { + "ce_ib": 4.159276008605957, + "ce_orig": 1.0298231840133667, + "epoch": 0.914803364727874, + "kl_loss": 0.055959559977054596, + "loss_ib": 0.0009755231440067291, + "step": 3181 + }, + { + "ce_ib": 5.414156913757324, + "ce_orig": 1.4007587432861328, + "epoch": 0.914803364727874, + "kl_loss": 0.04218119755387306, + "loss_ib": 0.0009632275905460119, + "step": 3181 + }, + { + "ce_ib": 3.688832998275757, + "ce_orig": 0.4152159094810486, + "epoch": 0.9150909483068517, + "kl_loss": 0.055178649723529816, + "loss_ib": 0.0009206696995534003, + "step": 3182 + }, + { + "ce_ib": 2.0761871337890625, + "ce_orig": 0.4196523427963257, + "epoch": 0.9150909483068517, + "kl_loss": 0.19646984338760376, + "loss_ib": 0.002172317123040557, + "step": 3182 + }, + { + "ce_ib": 3.8203697204589844, + "ce_orig": 0.7145872712135315, + "epoch": 0.9150909483068517, + "kl_loss": 0.0639747753739357, + "loss_ib": 0.001021784613840282, + "step": 3182 + }, + { + "ce_ib": 3.809478998184204, + "ce_orig": 0.986812174320221, + "epoch": 0.9150909483068517, + "kl_loss": 0.06346821784973145, + "loss_ib": 0.0010156300850212574, + "step": 3182 + }, + { + "ce_ib": 2.9625067710876465, + "ce_orig": 0.49960142374038696, + "epoch": 0.9153785318858293, + "kl_loss": 0.038908183574676514, + "loss_ib": 0.000685332459397614, + "step": 3183 + }, + { + "ce_ib": 2.6079494953155518, + "ce_orig": 0.5612336993217468, + "epoch": 0.9153785318858293, + "kl_loss": 0.060059089213609695, + "loss_ib": 0.0008613858371973038, + "step": 3183 + }, + { + "ce_ib": 3.2692666053771973, + "ce_orig": 0.7059376835823059, + "epoch": 0.9153785318858293, + "kl_loss": 0.0556926503777504, + "loss_ib": 0.0008838531211949885, + "step": 3183 + }, + { + "ce_ib": 4.060166835784912, + "ce_orig": 0.9575976133346558, + "epoch": 0.9153785318858293, + "kl_loss": 0.05849279463291168, + "loss_ib": 0.0009909446816891432, + "step": 3183 + }, + { + "ce_ib": 3.4545934200286865, + "ce_orig": 0.5478398203849792, + "epoch": 0.9156661154648069, + "kl_loss": 0.07179083675146103, + "loss_ib": 0.0010633677011355758, + "step": 3184 + }, + { + "ce_ib": 4.1562700271606445, + "ce_orig": 1.017277717590332, + "epoch": 0.9156661154648069, + "kl_loss": 0.028699781745672226, + "loss_ib": 0.000702624733094126, + "step": 3184 + }, + { + "ce_ib": 3.1984450817108154, + "ce_orig": 0.8842546939849854, + "epoch": 0.9156661154648069, + "kl_loss": 0.04963940009474754, + "loss_ib": 0.0008162385202012956, + "step": 3184 + }, + { + "ce_ib": 4.074039459228516, + "ce_orig": 0.8329095840454102, + "epoch": 0.9156661154648069, + "kl_loss": 0.044150494039058685, + "loss_ib": 0.0008489088504575193, + "step": 3184 + }, + { + "epoch": 0.9159536990437847, + "grad_norm": 0.111606664955616, + "learning_rate": 4.074707871442318e-05, + "loss": 0.8346, + "step": 3185 + }, + { + "ce_ib": 3.066641330718994, + "ce_orig": 0.5846387147903442, + "epoch": 0.9159536990437847, + "kl_loss": 0.09630551189184189, + "loss_ib": 0.0012697193305939436, + "step": 3185 + }, + { + "ce_ib": 2.565711736679077, + "ce_orig": 0.6466444134712219, + "epoch": 0.9159536990437847, + "kl_loss": 0.04540964588522911, + "loss_ib": 0.0007106676348485053, + "step": 3185 + }, + { + "ce_ib": 4.229547500610352, + "ce_orig": 0.9230093359947205, + "epoch": 0.9159536990437847, + "kl_loss": 0.06771610677242279, + "loss_ib": 0.0011001157108694315, + "step": 3185 + }, + { + "ce_ib": 3.2083494663238525, + "ce_orig": 0.80756014585495, + "epoch": 0.9159536990437847, + "kl_loss": 0.03788698464632034, + "loss_ib": 0.0006997047457844019, + "step": 3185 + }, + { + "ce_ib": 3.652029514312744, + "ce_orig": 0.8912012577056885, + "epoch": 0.9162412826227623, + "kl_loss": 0.041818298399448395, + "loss_ib": 0.0007833859417587519, + "step": 3186 + }, + { + "ce_ib": 4.405072212219238, + "ce_orig": 1.2981663942337036, + "epoch": 0.9162412826227623, + "kl_loss": 0.050795961171388626, + "loss_ib": 0.0009484667680226266, + "step": 3186 + }, + { + "ce_ib": 3.567411184310913, + "ce_orig": 0.8439738154411316, + "epoch": 0.9162412826227623, + "kl_loss": 0.04497046023607254, + "loss_ib": 0.0008064456633292139, + "step": 3186 + }, + { + "ce_ib": 3.7090370655059814, + "ce_orig": 0.8979739546775818, + "epoch": 0.9162412826227623, + "kl_loss": 0.05001690983772278, + "loss_ib": 0.000871072756126523, + "step": 3186 + }, + { + "ce_ib": 4.554932594299316, + "ce_orig": 1.093866229057312, + "epoch": 0.9165288662017399, + "kl_loss": 0.07862812280654907, + "loss_ib": 0.001241774414665997, + "step": 3187 + }, + { + "ce_ib": 4.013314723968506, + "ce_orig": 0.9294552206993103, + "epoch": 0.9165288662017399, + "kl_loss": 0.06314477324485779, + "loss_ib": 0.0010327792260795832, + "step": 3187 + }, + { + "ce_ib": 3.3071587085723877, + "ce_orig": 0.8728212118148804, + "epoch": 0.9165288662017399, + "kl_loss": 0.04995517432689667, + "loss_ib": 0.0008302675560116768, + "step": 3187 + }, + { + "ce_ib": 5.732297420501709, + "ce_orig": 1.4476723670959473, + "epoch": 0.9165288662017399, + "kl_loss": 0.08298581838607788, + "loss_ib": 0.0014030879829078913, + "step": 3187 + }, + { + "ce_ib": 5.0192718505859375, + "ce_orig": 1.3445837497711182, + "epoch": 0.9168164497807175, + "kl_loss": 0.06920824944972992, + "loss_ib": 0.0011940095573663712, + "step": 3188 + }, + { + "ce_ib": 4.004128932952881, + "ce_orig": 0.9683447480201721, + "epoch": 0.9168164497807175, + "kl_loss": 0.07069846987724304, + "loss_ib": 0.001107397605665028, + "step": 3188 + }, + { + "ce_ib": 3.325744152069092, + "ce_orig": 0.6996028423309326, + "epoch": 0.9168164497807175, + "kl_loss": 0.05229607969522476, + "loss_ib": 0.0008555351523682475, + "step": 3188 + }, + { + "ce_ib": 2.231485366821289, + "ce_orig": 0.5520500540733337, + "epoch": 0.9168164497807175, + "kl_loss": 0.03566392511129379, + "loss_ib": 0.0005797877674922347, + "step": 3188 + }, + { + "ce_ib": 2.888406991958618, + "ce_orig": 0.506007730960846, + "epoch": 0.9171040333596951, + "kl_loss": 0.03929300978779793, + "loss_ib": 0.0006817707908339798, + "step": 3189 + }, + { + "ce_ib": 6.988772392272949, + "ce_orig": 2.102501392364502, + "epoch": 0.9171040333596951, + "kl_loss": 0.06999631226062775, + "loss_ib": 0.00139884022064507, + "step": 3189 + }, + { + "ce_ib": 5.397253513336182, + "ce_orig": 1.550826072692871, + "epoch": 0.9171040333596951, + "kl_loss": 0.08576331287622452, + "loss_ib": 0.0013973584864288568, + "step": 3189 + }, + { + "ce_ib": 0.9908317923545837, + "ce_orig": 0.17227017879486084, + "epoch": 0.9171040333596951, + "kl_loss": 0.07521001994609833, + "loss_ib": 0.0008511833730153739, + "step": 3189 + }, + { + "epoch": 0.9173916169386728, + "grad_norm": 0.0999460220336914, + "learning_rate": 4.071692088232743e-05, + "loss": 0.8451, + "step": 3190 + }, + { + "ce_ib": 3.551182508468628, + "ce_orig": 0.5878246426582336, + "epoch": 0.9173916169386728, + "kl_loss": 0.06394501775503159, + "loss_ib": 0.000994568457826972, + "step": 3190 + }, + { + "ce_ib": 1.803792953491211, + "ce_orig": 0.5341501235961914, + "epoch": 0.9173916169386728, + "kl_loss": 0.04270561784505844, + "loss_ib": 0.0006074354751035571, + "step": 3190 + }, + { + "ce_ib": 2.511011838912964, + "ce_orig": 0.4997142255306244, + "epoch": 0.9173916169386728, + "kl_loss": 0.05078794062137604, + "loss_ib": 0.0007589805754832923, + "step": 3190 + }, + { + "ce_ib": 4.315921783447266, + "ce_orig": 0.8346672654151917, + "epoch": 0.9173916169386728, + "kl_loss": 0.07177073508501053, + "loss_ib": 0.001149299438111484, + "step": 3190 + }, + { + "ce_ib": 4.655359745025635, + "ce_orig": 1.1562644243240356, + "epoch": 0.9176792005176504, + "kl_loss": 0.08200542628765106, + "loss_ib": 0.0012855902314186096, + "step": 3191 + }, + { + "ce_ib": 3.265634775161743, + "ce_orig": 0.7918500900268555, + "epoch": 0.9176792005176504, + "kl_loss": 0.04104746878147125, + "loss_ib": 0.0007370380917564034, + "step": 3191 + }, + { + "ce_ib": 4.331277370452881, + "ce_orig": 1.0445988178253174, + "epoch": 0.9176792005176504, + "kl_loss": 0.04599080979824066, + "loss_ib": 0.0008930357871577144, + "step": 3191 + }, + { + "ce_ib": 5.118367671966553, + "ce_orig": 1.1276978254318237, + "epoch": 0.9176792005176504, + "kl_loss": 0.04730473458766937, + "loss_ib": 0.0009848839836195111, + "step": 3191 + }, + { + "ce_ib": 3.4301340579986572, + "ce_orig": 0.6683462262153625, + "epoch": 0.9179667840966281, + "kl_loss": 0.058359138667583466, + "loss_ib": 0.0009266047854907811, + "step": 3192 + }, + { + "ce_ib": 3.181018829345703, + "ce_orig": 0.7371907234191895, + "epoch": 0.9179667840966281, + "kl_loss": 0.07229121774435043, + "loss_ib": 0.0010410139802843332, + "step": 3192 + }, + { + "ce_ib": 4.543924808502197, + "ce_orig": 1.4522510766983032, + "epoch": 0.9179667840966281, + "kl_loss": 0.03998488560318947, + "loss_ib": 0.000854241312481463, + "step": 3192 + }, + { + "ce_ib": 3.2249135971069336, + "ce_orig": 0.6695526838302612, + "epoch": 0.9179667840966281, + "kl_loss": 0.08390084654092789, + "loss_ib": 0.0011614997638389468, + "step": 3192 + }, + { + "ce_ib": 4.292159557342529, + "ce_orig": 0.9767239689826965, + "epoch": 0.9182543676756058, + "kl_loss": 0.05491176247596741, + "loss_ib": 0.0009783335262909532, + "step": 3193 + }, + { + "ce_ib": 2.7639811038970947, + "ce_orig": 0.707823634147644, + "epoch": 0.9182543676756058, + "kl_loss": 0.029215268790721893, + "loss_ib": 0.0005685507785528898, + "step": 3193 + }, + { + "ce_ib": 1.91177499294281, + "ce_orig": 0.43107467889785767, + "epoch": 0.9182543676756058, + "kl_loss": 0.047254808247089386, + "loss_ib": 0.0006637255428358912, + "step": 3193 + }, + { + "ce_ib": 3.860837697982788, + "ce_orig": 0.8027139902114868, + "epoch": 0.9182543676756058, + "kl_loss": 0.06129908189177513, + "loss_ib": 0.0009990745456889272, + "step": 3193 + }, + { + "ce_ib": 2.9692749977111816, + "ce_orig": 0.690369725227356, + "epoch": 0.9185419512545834, + "kl_loss": 0.03899727016687393, + "loss_ib": 0.0006869001663289964, + "step": 3194 + }, + { + "ce_ib": 4.56494140625, + "ce_orig": 0.9030995965003967, + "epoch": 0.9185419512545834, + "kl_loss": 0.05553675442934036, + "loss_ib": 0.0010118617210537195, + "step": 3194 + }, + { + "ce_ib": 6.042218208312988, + "ce_orig": 0.625754714012146, + "epoch": 0.9185419512545834, + "kl_loss": 0.047599319368600845, + "loss_ib": 0.0010802149772644043, + "step": 3194 + }, + { + "ce_ib": 4.809246063232422, + "ce_orig": 0.574388861656189, + "epoch": 0.9185419512545834, + "kl_loss": 0.1192028746008873, + "loss_ib": 0.0016729533672332764, + "step": 3194 + }, + { + "epoch": 0.918829534833561, + "grad_norm": 0.08261846750974655, + "learning_rate": 4.06867251845213e-05, + "loss": 0.7998, + "step": 3195 + }, + { + "ce_ib": 2.6409659385681152, + "ce_orig": 0.6071385741233826, + "epoch": 0.918829534833561, + "kl_loss": 0.07333294302225113, + "loss_ib": 0.000997425988316536, + "step": 3195 + }, + { + "ce_ib": 3.8314425945281982, + "ce_orig": 0.748336672782898, + "epoch": 0.918829534833561, + "kl_loss": 0.07514072954654694, + "loss_ib": 0.0011345514794811606, + "step": 3195 + }, + { + "ce_ib": 3.3991379737854004, + "ce_orig": 0.8238034248352051, + "epoch": 0.918829534833561, + "kl_loss": 0.03926848620176315, + "loss_ib": 0.0007325985934585333, + "step": 3195 + }, + { + "ce_ib": 4.501326560974121, + "ce_orig": 1.1586707830429077, + "epoch": 0.918829534833561, + "kl_loss": 0.045221347361803055, + "loss_ib": 0.0009023460443131626, + "step": 3195 + }, + { + "ce_ib": 3.8081247806549072, + "ce_orig": 0.8294766545295715, + "epoch": 0.9191171184125386, + "kl_loss": 0.07239902019500732, + "loss_ib": 0.0011048025917261839, + "step": 3196 + }, + { + "ce_ib": 3.298801898956299, + "ce_orig": 0.5892886519432068, + "epoch": 0.9191171184125386, + "kl_loss": 0.061170730739831924, + "loss_ib": 0.0009415874956175685, + "step": 3196 + }, + { + "ce_ib": 3.8359615802764893, + "ce_orig": 0.7160845994949341, + "epoch": 0.9191171184125386, + "kl_loss": 0.07150237262248993, + "loss_ib": 0.0010986197739839554, + "step": 3196 + }, + { + "ce_ib": 1.9970406293869019, + "ce_orig": 0.3997369408607483, + "epoch": 0.9191171184125386, + "kl_loss": 0.05791294202208519, + "loss_ib": 0.0007788334623910487, + "step": 3196 + }, + { + "ce_ib": 4.294368743896484, + "ce_orig": 1.0961525440216064, + "epoch": 0.9194047019915162, + "kl_loss": 0.04618740826845169, + "loss_ib": 0.0008913109195418656, + "step": 3197 + }, + { + "ce_ib": 0.709657609462738, + "ce_orig": 0.12724660336971283, + "epoch": 0.9194047019915162, + "kl_loss": 0.08097407221794128, + "loss_ib": 0.0008807064150460064, + "step": 3197 + }, + { + "ce_ib": 2.3381171226501465, + "ce_orig": 0.7475312948226929, + "epoch": 0.9194047019915162, + "kl_loss": 0.03507109731435776, + "loss_ib": 0.0005845226696692407, + "step": 3197 + }, + { + "ce_ib": 7.39910888671875, + "ce_orig": 1.8042207956314087, + "epoch": 0.9194047019915162, + "kl_loss": 0.0890331044793129, + "loss_ib": 0.001630241866223514, + "step": 3197 + }, + { + "ce_ib": 4.145359039306641, + "ce_orig": 0.8944915533065796, + "epoch": 0.9196922855704939, + "kl_loss": 0.07143262028694153, + "loss_ib": 0.0011288620298728347, + "step": 3198 + }, + { + "ce_ib": 3.2337803840637207, + "ce_orig": 0.696902871131897, + "epoch": 0.9196922855704939, + "kl_loss": 0.03886301815509796, + "loss_ib": 0.0007120081572793424, + "step": 3198 + }, + { + "ce_ib": 2.5978376865386963, + "ce_orig": 0.8300560116767883, + "epoch": 0.9196922855704939, + "kl_loss": 0.017183903604745865, + "loss_ib": 0.00043162278598174453, + "step": 3198 + }, + { + "ce_ib": 2.866645336151123, + "ce_orig": 0.8763684630393982, + "epoch": 0.9196922855704939, + "kl_loss": 0.04191884398460388, + "loss_ib": 0.0007058529299683869, + "step": 3198 + }, + { + "ce_ib": 2.2338366508483887, + "ce_orig": 0.5669873952865601, + "epoch": 0.9199798691494716, + "kl_loss": 0.021897176280617714, + "loss_ib": 0.0004423554055392742, + "step": 3199 + }, + { + "ce_ib": 3.059532880783081, + "ce_orig": 0.727215588092804, + "epoch": 0.9199798691494716, + "kl_loss": 0.056413546204566956, + "loss_ib": 0.0008700887556187809, + "step": 3199 + }, + { + "ce_ib": 3.512538433074951, + "ce_orig": 0.8917385339736938, + "epoch": 0.9199798691494716, + "kl_loss": 0.05458346754312515, + "loss_ib": 0.0008970884955488145, + "step": 3199 + }, + { + "ce_ib": 3.9868593215942383, + "ce_orig": 0.9126573801040649, + "epoch": 0.9199798691494716, + "kl_loss": 0.05063524842262268, + "loss_ib": 0.0009050383814610541, + "step": 3199 + }, + { + "epoch": 0.9202674527284492, + "grad_norm": 0.09403792768716812, + "learning_rate": 4.065649169375324e-05, + "loss": 0.7788, + "step": 3200 + }, + { + "ce_ib": 4.86122989654541, + "ce_orig": 1.003016471862793, + "epoch": 0.9202674527284492, + "kl_loss": 0.07420980930328369, + "loss_ib": 0.0012282209936529398, + "step": 3200 + }, + { + "ce_ib": 4.812366008758545, + "ce_orig": 1.1118460893630981, + "epoch": 0.9202674527284492, + "kl_loss": 0.05838356912136078, + "loss_ib": 0.0010650722542777658, + "step": 3200 + }, + { + "ce_ib": 6.865611553192139, + "ce_orig": 1.816336989402771, + "epoch": 0.9202674527284492, + "kl_loss": 0.07359464466571808, + "loss_ib": 0.0014225075719878078, + "step": 3200 + }, + { + "ce_ib": 4.289966583251953, + "ce_orig": 0.944628119468689, + "epoch": 0.9202674527284492, + "kl_loss": 0.06688971072435379, + "loss_ib": 0.0010978936916217208, + "step": 3200 + }, + { + "ce_ib": 2.390066146850586, + "ce_orig": 0.5038105845451355, + "epoch": 0.9205550363074269, + "kl_loss": 0.07549577206373215, + "loss_ib": 0.000993964378722012, + "step": 3201 + }, + { + "ce_ib": 5.074769020080566, + "ce_orig": 1.1328943967819214, + "epoch": 0.9205550363074269, + "kl_loss": 0.04498150944709778, + "loss_ib": 0.0009572919807396829, + "step": 3201 + }, + { + "ce_ib": 2.6037545204162598, + "ce_orig": 0.6269896030426025, + "epoch": 0.9205550363074269, + "kl_loss": 0.05642209202051163, + "loss_ib": 0.0008245963254012167, + "step": 3201 + }, + { + "ce_ib": 5.624873161315918, + "ce_orig": 1.4130483865737915, + "epoch": 0.9205550363074269, + "kl_loss": 0.06065916642546654, + "loss_ib": 0.0011690788669511676, + "step": 3201 + }, + { + "ce_ib": 4.8625030517578125, + "ce_orig": 1.0066754817962646, + "epoch": 0.9208426198864045, + "kl_loss": 0.0717778429389, + "loss_ib": 0.0012040287256240845, + "step": 3202 + }, + { + "ce_ib": 2.834974765777588, + "ce_orig": 0.5869733691215515, + "epoch": 0.9208426198864045, + "kl_loss": 0.04081311076879501, + "loss_ib": 0.000691628607455641, + "step": 3202 + }, + { + "ce_ib": 4.312038421630859, + "ce_orig": 0.9448176026344299, + "epoch": 0.9208426198864045, + "kl_loss": 0.06042904406785965, + "loss_ib": 0.001035494264215231, + "step": 3202 + }, + { + "ce_ib": 2.829367160797119, + "ce_orig": 0.5569058656692505, + "epoch": 0.9208426198864045, + "kl_loss": 0.06430092453956604, + "loss_ib": 0.0009259459329769015, + "step": 3202 + }, + { + "ce_ib": 4.647596836090088, + "ce_orig": 0.8815832734107971, + "epoch": 0.9211302034653821, + "kl_loss": 0.05483182147145271, + "loss_ib": 0.0010130777955055237, + "step": 3203 + }, + { + "ce_ib": 2.3806395530700684, + "ce_orig": 0.4503689706325531, + "epoch": 0.9211302034653821, + "kl_loss": 0.04614130035042763, + "loss_ib": 0.0006994769792072475, + "step": 3203 + }, + { + "ce_ib": 6.1203742027282715, + "ce_orig": 1.6066436767578125, + "epoch": 0.9211302034653821, + "kl_loss": 0.043240275233983994, + "loss_ib": 0.0010444400832057, + "step": 3203 + }, + { + "ce_ib": 3.066277503967285, + "ce_orig": 0.8724227547645569, + "epoch": 0.9211302034653821, + "kl_loss": 0.04433097690343857, + "loss_ib": 0.000749937491491437, + "step": 3203 + }, + { + "ce_ib": 3.303079128265381, + "ce_orig": 0.9027718901634216, + "epoch": 0.9214177870443597, + "kl_loss": 0.08832599222660065, + "loss_ib": 0.0012135677970945835, + "step": 3204 + }, + { + "ce_ib": 3.6673951148986816, + "ce_orig": 0.8844602704048157, + "epoch": 0.9214177870443597, + "kl_loss": 0.20112088322639465, + "loss_ib": 0.002377948258072138, + "step": 3204 + }, + { + "ce_ib": 3.4316177368164062, + "ce_orig": 0.5671533942222595, + "epoch": 0.9214177870443597, + "kl_loss": 0.04684218391776085, + "loss_ib": 0.0008115835953503847, + "step": 3204 + }, + { + "ce_ib": 2.9339089393615723, + "ce_orig": 0.7059691548347473, + "epoch": 0.9214177870443597, + "kl_loss": 0.039525024592876434, + "loss_ib": 0.0006886411574669182, + "step": 3204 + }, + { + "epoch": 0.9217053706233375, + "grad_norm": 0.09923651069402695, + "learning_rate": 4.0626220482862735e-05, + "loss": 0.844, + "step": 3205 + }, + { + "ce_ib": 4.476441860198975, + "ce_orig": 1.1261178255081177, + "epoch": 0.9217053706233375, + "kl_loss": 0.12271246314048767, + "loss_ib": 0.0016747687477618456, + "step": 3205 + }, + { + "ce_ib": 2.9585678577423096, + "ce_orig": 0.692084789276123, + "epoch": 0.9217053706233375, + "kl_loss": 0.056936684995889664, + "loss_ib": 0.0008652235846966505, + "step": 3205 + }, + { + "ce_ib": 6.06861686706543, + "ce_orig": 1.7206658124923706, + "epoch": 0.9217053706233375, + "kl_loss": 0.053771186619997025, + "loss_ib": 0.0011445735581219196, + "step": 3205 + }, + { + "ce_ib": 5.20057487487793, + "ce_orig": 1.2731976509094238, + "epoch": 0.9217053706233375, + "kl_loss": 0.044571634382009506, + "loss_ib": 0.0009657738264650106, + "step": 3205 + }, + { + "ce_ib": 5.692834377288818, + "ce_orig": 1.6756495237350464, + "epoch": 0.9219929542023151, + "kl_loss": 0.1051848828792572, + "loss_ib": 0.0016211321344599128, + "step": 3206 + }, + { + "ce_ib": 3.712090015411377, + "ce_orig": 0.6713534593582153, + "epoch": 0.9219929542023151, + "kl_loss": 0.09290719032287598, + "loss_ib": 0.0013002809137105942, + "step": 3206 + }, + { + "ce_ib": 4.360491752624512, + "ce_orig": 0.9382306933403015, + "epoch": 0.9219929542023151, + "kl_loss": 0.05315448343753815, + "loss_ib": 0.0009675939800217748, + "step": 3206 + }, + { + "ce_ib": 2.586575984954834, + "ce_orig": 0.769508957862854, + "epoch": 0.9219929542023151, + "kl_loss": 0.02789117768406868, + "loss_ib": 0.0005375693435780704, + "step": 3206 + }, + { + "ce_ib": 4.5779829025268555, + "ce_orig": 0.9769300222396851, + "epoch": 0.9222805377812927, + "kl_loss": 0.07158896327018738, + "loss_ib": 0.0011736878659576178, + "step": 3207 + }, + { + "ce_ib": 2.475050926208496, + "ce_orig": 0.07812873274087906, + "epoch": 0.9222805377812927, + "kl_loss": 0.15334948897361755, + "loss_ib": 0.0017809998244047165, + "step": 3207 + }, + { + "ce_ib": 4.085664749145508, + "ce_orig": 1.1482868194580078, + "epoch": 0.9222805377812927, + "kl_loss": 0.06854888796806335, + "loss_ib": 0.001094055245630443, + "step": 3207 + }, + { + "ce_ib": 7.176668167114258, + "ce_orig": 1.8883119821548462, + "epoch": 0.9222805377812927, + "kl_loss": 0.04894078150391579, + "loss_ib": 0.0012070746161043644, + "step": 3207 + }, + { + "ce_ib": 4.535983085632324, + "ce_orig": 0.7226999998092651, + "epoch": 0.9225681213602703, + "kl_loss": 0.06856577098369598, + "loss_ib": 0.0011392560554668307, + "step": 3208 + }, + { + "ce_ib": 3.6759965419769287, + "ce_orig": 0.9526473879814148, + "epoch": 0.9225681213602703, + "kl_loss": 0.032770320773124695, + "loss_ib": 0.0006953028496354818, + "step": 3208 + }, + { + "ce_ib": 3.448948383331299, + "ce_orig": 1.1707570552825928, + "epoch": 0.9225681213602703, + "kl_loss": 0.05210162699222565, + "loss_ib": 0.0008659110753796995, + "step": 3208 + }, + { + "ce_ib": 3.994434118270874, + "ce_orig": 0.8035833835601807, + "epoch": 0.9225681213602703, + "kl_loss": 0.0750410258769989, + "loss_ib": 0.0011498535750433803, + "step": 3208 + }, + { + "ce_ib": 2.4354453086853027, + "ce_orig": 0.46285325288772583, + "epoch": 0.922855704939248, + "kl_loss": 0.0532878153026104, + "loss_ib": 0.0007764226756989956, + "step": 3209 + }, + { + "ce_ib": 4.67061710357666, + "ce_orig": 0.7540927529335022, + "epoch": 0.922855704939248, + "kl_loss": 0.16403847932815552, + "loss_ib": 0.0021074465475976467, + "step": 3209 + }, + { + "ce_ib": 3.3047239780426025, + "ce_orig": 0.7836443781852722, + "epoch": 0.922855704939248, + "kl_loss": 0.05098933354020119, + "loss_ib": 0.0008403657120652497, + "step": 3209 + }, + { + "ce_ib": 3.6410443782806396, + "ce_orig": 0.8377199172973633, + "epoch": 0.922855704939248, + "kl_loss": 0.038137637078762054, + "loss_ib": 0.0007454807637259364, + "step": 3209 + }, + { + "epoch": 0.9231432885182256, + "grad_norm": 0.09334629029035568, + "learning_rate": 4.059591162478017e-05, + "loss": 0.9067, + "step": 3210 + }, + { + "ce_ib": 6.363516807556152, + "ce_orig": 1.3339869976043701, + "epoch": 0.9231432885182256, + "kl_loss": 0.07593558728694916, + "loss_ib": 0.0013957073679193854, + "step": 3210 + }, + { + "ce_ib": 1.2215497493743896, + "ce_orig": 0.2588921785354614, + "epoch": 0.9231432885182256, + "kl_loss": 0.12800714373588562, + "loss_ib": 0.0014022263931110501, + "step": 3210 + }, + { + "ce_ib": 2.6003122329711914, + "ce_orig": 0.47283709049224854, + "epoch": 0.9231432885182256, + "kl_loss": 0.054622355848550797, + "loss_ib": 0.0008062547422014177, + "step": 3210 + }, + { + "ce_ib": 2.8416149616241455, + "ce_orig": 0.5747243165969849, + "epoch": 0.9231432885182256, + "kl_loss": 0.06251467764377594, + "loss_ib": 0.0009093082626350224, + "step": 3210 + }, + { + "ce_ib": 3.9595704078674316, + "ce_orig": 0.8895650506019592, + "epoch": 0.9234308720972032, + "kl_loss": 0.033054500818252563, + "loss_ib": 0.0007265020976774395, + "step": 3211 + }, + { + "ce_ib": 4.630435943603516, + "ce_orig": 0.9047726392745972, + "epoch": 0.9234308720972032, + "kl_loss": 0.06067252159118652, + "loss_ib": 0.0010697687976062298, + "step": 3211 + }, + { + "ce_ib": 3.2354471683502197, + "ce_orig": 0.6424351334571838, + "epoch": 0.9234308720972032, + "kl_loss": 0.10116438567638397, + "loss_ib": 0.0013351885136216879, + "step": 3211 + }, + { + "ce_ib": 3.454850673675537, + "ce_orig": 0.40440112352371216, + "epoch": 0.9234308720972032, + "kl_loss": 0.0668070837855339, + "loss_ib": 0.001013555913232267, + "step": 3211 + }, + { + "ce_ib": 4.327646255493164, + "ce_orig": 0.7481862306594849, + "epoch": 0.9237184556761809, + "kl_loss": 0.06409113109111786, + "loss_ib": 0.0010736759286373854, + "step": 3212 + }, + { + "ce_ib": 6.028416633605957, + "ce_orig": 1.777456283569336, + "epoch": 0.9237184556761809, + "kl_loss": 0.028706399723887444, + "loss_ib": 0.0008899056119844317, + "step": 3212 + }, + { + "ce_ib": 2.6157302856445312, + "ce_orig": 0.7007063627243042, + "epoch": 0.9237184556761809, + "kl_loss": 0.03223448991775513, + "loss_ib": 0.0005839179502800107, + "step": 3212 + }, + { + "ce_ib": 3.3051724433898926, + "ce_orig": 0.7307367324829102, + "epoch": 0.9237184556761809, + "kl_loss": 0.05645521730184555, + "loss_ib": 0.0008950693882070482, + "step": 3212 + }, + { + "ce_ib": 3.638867139816284, + "ce_orig": 0.5102182626724243, + "epoch": 0.9240060392551586, + "kl_loss": 0.06313341856002808, + "loss_ib": 0.0009952208492904902, + "step": 3213 + }, + { + "ce_ib": 3.0202677249908447, + "ce_orig": 0.6413112878799438, + "epoch": 0.9240060392551586, + "kl_loss": 0.04676371067762375, + "loss_ib": 0.0007696638931520283, + "step": 3213 + }, + { + "ce_ib": 4.125507831573486, + "ce_orig": 0.9508820176124573, + "epoch": 0.9240060392551586, + "kl_loss": 0.042675964534282684, + "loss_ib": 0.0008393104071728885, + "step": 3213 + }, + { + "ce_ib": 4.040095329284668, + "ce_orig": 0.9514000415802002, + "epoch": 0.9240060392551586, + "kl_loss": 0.0848371610045433, + "loss_ib": 0.0012523811310529709, + "step": 3213 + }, + { + "ce_ib": 3.888169527053833, + "ce_orig": 1.0332157611846924, + "epoch": 0.9242936228341362, + "kl_loss": 0.14173156023025513, + "loss_ib": 0.0018061324954032898, + "step": 3214 + }, + { + "ce_ib": 4.493694305419922, + "ce_orig": 0.8820397853851318, + "epoch": 0.9242936228341362, + "kl_loss": 0.049749698489904404, + "loss_ib": 0.0009468664065934718, + "step": 3214 + }, + { + "ce_ib": 4.661299228668213, + "ce_orig": 1.0662554502487183, + "epoch": 0.9242936228341362, + "kl_loss": 0.06703582406044006, + "loss_ib": 0.0011364881647750735, + "step": 3214 + }, + { + "ce_ib": 2.1584527492523193, + "ce_orig": 0.46736693382263184, + "epoch": 0.9242936228341362, + "kl_loss": 0.08227990567684174, + "loss_ib": 0.001038644346408546, + "step": 3214 + }, + { + "epoch": 0.9245812064131138, + "grad_norm": 0.09115787595510483, + "learning_rate": 4.0565565192526605e-05, + "loss": 0.8353, + "step": 3215 + }, + { + "ce_ib": 4.475177764892578, + "ce_orig": 1.2920632362365723, + "epoch": 0.9245812064131138, + "kl_loss": 0.0391685925424099, + "loss_ib": 0.0008392037125304341, + "step": 3215 + }, + { + "ce_ib": 2.579705238342285, + "ce_orig": 0.6707682609558105, + "epoch": 0.9245812064131138, + "kl_loss": 0.048480693250894547, + "loss_ib": 0.0007427774253301322, + "step": 3215 + }, + { + "ce_ib": 4.089361190795898, + "ce_orig": 0.8546391725540161, + "epoch": 0.9245812064131138, + "kl_loss": 0.03169870376586914, + "loss_ib": 0.0007259231642819941, + "step": 3215 + }, + { + "ce_ib": 4.792830944061279, + "ce_orig": 1.174859881401062, + "epoch": 0.9245812064131138, + "kl_loss": 0.08515684306621552, + "loss_ib": 0.0013308514608070254, + "step": 3215 + }, + { + "ce_ib": 4.753259181976318, + "ce_orig": 1.1046979427337646, + "epoch": 0.9248687899920914, + "kl_loss": 0.0735260471701622, + "loss_ib": 0.0012105864007025957, + "step": 3216 + }, + { + "ce_ib": 4.208837032318115, + "ce_orig": 0.894100546836853, + "epoch": 0.9248687899920914, + "kl_loss": 0.11378564685583115, + "loss_ib": 0.0015587401576340199, + "step": 3216 + }, + { + "ce_ib": 2.5356314182281494, + "ce_orig": 0.7129440307617188, + "epoch": 0.9248687899920914, + "kl_loss": 0.046991460025310516, + "loss_ib": 0.0007234777440316975, + "step": 3216 + }, + { + "ce_ib": 3.3925089836120605, + "ce_orig": 0.5885260105133057, + "epoch": 0.9248687899920914, + "kl_loss": 0.03457505255937576, + "loss_ib": 0.0006850014324299991, + "step": 3216 + }, + { + "ce_ib": 3.3447790145874023, + "ce_orig": 0.7477579712867737, + "epoch": 0.9251563735710691, + "kl_loss": 0.04752611368894577, + "loss_ib": 0.0008097390527836978, + "step": 3217 + }, + { + "ce_ib": 4.525654315948486, + "ce_orig": 1.2029683589935303, + "epoch": 0.9251563735710691, + "kl_loss": 0.05567536875605583, + "loss_ib": 0.0010093190940096974, + "step": 3217 + }, + { + "ce_ib": 3.3912973403930664, + "ce_orig": 0.8838808536529541, + "epoch": 0.9251563735710691, + "kl_loss": 0.05044254660606384, + "loss_ib": 0.0008435551426373422, + "step": 3217 + }, + { + "ce_ib": 4.427152156829834, + "ce_orig": 0.7208644151687622, + "epoch": 0.9251563735710691, + "kl_loss": 0.06333591043949127, + "loss_ib": 0.0010760743170976639, + "step": 3217 + }, + { + "ce_ib": 2.694065809249878, + "ce_orig": 0.718438446521759, + "epoch": 0.9254439571500467, + "kl_loss": 0.05223255231976509, + "loss_ib": 0.0007917320472188294, + "step": 3218 + }, + { + "ce_ib": 2.9602108001708984, + "ce_orig": 0.7896710634231567, + "epoch": 0.9254439571500467, + "kl_loss": 0.03617352247238159, + "loss_ib": 0.0006577562890015543, + "step": 3218 + }, + { + "ce_ib": 3.84252667427063, + "ce_orig": 0.8409265279769897, + "epoch": 0.9254439571500467, + "kl_loss": 0.05280134826898575, + "loss_ib": 0.0009122661431320012, + "step": 3218 + }, + { + "ce_ib": 3.409011125564575, + "ce_orig": 0.7351001501083374, + "epoch": 0.9254439571500467, + "kl_loss": 0.04310869425535202, + "loss_ib": 0.0007719880086369812, + "step": 3218 + }, + { + "ce_ib": 2.4305436611175537, + "ce_orig": 0.46342235803604126, + "epoch": 0.9257315407290244, + "kl_loss": 0.055696889758110046, + "loss_ib": 0.0008000232046470046, + "step": 3219 + }, + { + "ce_ib": 2.9163408279418945, + "ce_orig": 0.5470888614654541, + "epoch": 0.9257315407290244, + "kl_loss": 0.05336577817797661, + "loss_ib": 0.0008252918487414718, + "step": 3219 + }, + { + "ce_ib": 3.0183217525482178, + "ce_orig": 0.8398440480232239, + "epoch": 0.9257315407290244, + "kl_loss": 0.04797499626874924, + "loss_ib": 0.0007815821445547044, + "step": 3219 + }, + { + "ce_ib": 2.653970956802368, + "ce_orig": 0.7340614795684814, + "epoch": 0.9257315407290244, + "kl_loss": 0.02838675118982792, + "loss_ib": 0.000549264601431787, + "step": 3219 + }, + { + "epoch": 0.926019124308002, + "grad_norm": 0.10995731502771378, + "learning_rate": 4.053518125921365e-05, + "loss": 0.8681, + "step": 3220 + }, + { + "ce_ib": 3.5552496910095215, + "ce_orig": 0.7322179079055786, + "epoch": 0.926019124308002, + "kl_loss": 0.04620983451604843, + "loss_ib": 0.000817623280454427, + "step": 3220 + }, + { + "ce_ib": 2.087230920791626, + "ce_orig": 0.44652965664863586, + "epoch": 0.926019124308002, + "kl_loss": 0.022987481206655502, + "loss_ib": 0.00043859786819666624, + "step": 3220 + }, + { + "ce_ib": 3.8266453742980957, + "ce_orig": 0.9872012138366699, + "epoch": 0.926019124308002, + "kl_loss": 0.04546389728784561, + "loss_ib": 0.000837303523439914, + "step": 3220 + }, + { + "ce_ib": 2.2637953758239746, + "ce_orig": 0.6788160800933838, + "epoch": 0.926019124308002, + "kl_loss": 0.039346855133771896, + "loss_ib": 0.0006198480841703713, + "step": 3220 + }, + { + "ce_ib": 4.137264251708984, + "ce_orig": 1.099465012550354, + "epoch": 0.9263067078869797, + "kl_loss": 0.033056698739528656, + "loss_ib": 0.0007442933856509626, + "step": 3221 + }, + { + "ce_ib": 5.359578609466553, + "ce_orig": 1.184550404548645, + "epoch": 0.9263067078869797, + "kl_loss": 0.054851118475198746, + "loss_ib": 0.0010844690259546041, + "step": 3221 + }, + { + "ce_ib": 2.0666134357452393, + "ce_orig": 0.38470882177352905, + "epoch": 0.9263067078869797, + "kl_loss": 0.04622240737080574, + "loss_ib": 0.0006688854191452265, + "step": 3221 + }, + { + "ce_ib": 2.705644130706787, + "ce_orig": 0.8040753602981567, + "epoch": 0.9263067078869797, + "kl_loss": 0.05467110872268677, + "loss_ib": 0.000817275489680469, + "step": 3221 + }, + { + "ce_ib": 2.3943443298339844, + "ce_orig": 0.6045452356338501, + "epoch": 0.9265942914659573, + "kl_loss": 0.030139463022351265, + "loss_ib": 0.0005408290890045464, + "step": 3222 + }, + { + "ce_ib": 2.25039005279541, + "ce_orig": 0.588294506072998, + "epoch": 0.9265942914659573, + "kl_loss": 0.07637421041727066, + "loss_ib": 0.0009887811029329896, + "step": 3222 + }, + { + "ce_ib": 2.781264066696167, + "ce_orig": 0.8902842998504639, + "epoch": 0.9265942914659573, + "kl_loss": 0.033727094531059265, + "loss_ib": 0.0006153972935862839, + "step": 3222 + }, + { + "ce_ib": 3.7364885807037354, + "ce_orig": 0.9251511096954346, + "epoch": 0.9265942914659573, + "kl_loss": 0.051229327917099, + "loss_ib": 0.0008859421359375119, + "step": 3222 + }, + { + "ce_ib": 5.045575141906738, + "ce_orig": 0.9248849749565125, + "epoch": 0.9268818750449349, + "kl_loss": 0.10205075889825821, + "loss_ib": 0.001525064930319786, + "step": 3223 + }, + { + "ce_ib": 6.749456882476807, + "ce_orig": 1.659141182899475, + "epoch": 0.9268818750449349, + "kl_loss": 0.06276987493038177, + "loss_ib": 0.0013026442611590028, + "step": 3223 + }, + { + "ce_ib": 3.156900644302368, + "ce_orig": 1.0215963125228882, + "epoch": 0.9268818750449349, + "kl_loss": 0.03157768398523331, + "loss_ib": 0.0006314668571576476, + "step": 3223 + }, + { + "ce_ib": 3.9701812267303467, + "ce_orig": 0.8939017057418823, + "epoch": 0.9268818750449349, + "kl_loss": 0.05970665067434311, + "loss_ib": 0.0009940846357494593, + "step": 3223 + }, + { + "ce_ib": 3.3656532764434814, + "ce_orig": 0.8406651020050049, + "epoch": 0.9271694586239125, + "kl_loss": 0.07255869358778, + "loss_ib": 0.0010621522087603807, + "step": 3224 + }, + { + "ce_ib": 4.1461381912231445, + "ce_orig": 1.2900965213775635, + "epoch": 0.9271694586239125, + "kl_loss": 0.037554893642663956, + "loss_ib": 0.0007901627104729414, + "step": 3224 + }, + { + "ce_ib": 2.4146933555603027, + "ce_orig": 0.541534960269928, + "epoch": 0.9271694586239125, + "kl_loss": 0.04234131798148155, + "loss_ib": 0.0006648824783042073, + "step": 3224 + }, + { + "ce_ib": 3.462245464324951, + "ce_orig": 0.8030938506126404, + "epoch": 0.9271694586239125, + "kl_loss": 0.049987100064754486, + "loss_ib": 0.0008460954995825887, + "step": 3224 + }, + { + "epoch": 0.9274570422028903, + "grad_norm": 0.09512978047132492, + "learning_rate": 4.050475989804326e-05, + "loss": 0.8399, + "step": 3225 + }, + { + "ce_ib": 2.2694456577301025, + "ce_orig": 0.524649977684021, + "epoch": 0.9274570422028903, + "kl_loss": 0.02758478745818138, + "loss_ib": 0.0005027924198657274, + "step": 3225 + }, + { + "ce_ib": 2.4845540523529053, + "ce_orig": 0.4762313663959503, + "epoch": 0.9274570422028903, + "kl_loss": 0.04276333004236221, + "loss_ib": 0.000676088675390929, + "step": 3225 + }, + { + "ce_ib": 3.561732053756714, + "ce_orig": 1.0613120794296265, + "epoch": 0.9274570422028903, + "kl_loss": 0.04536009579896927, + "loss_ib": 0.0008097740937955678, + "step": 3225 + }, + { + "ce_ib": 2.4694700241088867, + "ce_orig": 0.5674365162849426, + "epoch": 0.9274570422028903, + "kl_loss": 0.048048246651887894, + "loss_ib": 0.0007274294621311128, + "step": 3225 + }, + { + "ce_ib": 3.2043373584747314, + "ce_orig": 0.6400473713874817, + "epoch": 0.9277446257818679, + "kl_loss": 0.06594367325305939, + "loss_ib": 0.0009798704413697124, + "step": 3226 + }, + { + "ce_ib": 3.901717185974121, + "ce_orig": 1.1094458103179932, + "epoch": 0.9277446257818679, + "kl_loss": 0.04496193677186966, + "loss_ib": 0.000839791027829051, + "step": 3226 + }, + { + "ce_ib": 5.601212978363037, + "ce_orig": 1.5129035711288452, + "epoch": 0.9277446257818679, + "kl_loss": 0.06589809060096741, + "loss_ib": 0.0012191020650789142, + "step": 3226 + }, + { + "ce_ib": 3.9336979389190674, + "ce_orig": 0.6749326586723328, + "epoch": 0.9277446257818679, + "kl_loss": 0.04369225725531578, + "loss_ib": 0.0008302924106828868, + "step": 3226 + }, + { + "ce_ib": 3.471466541290283, + "ce_orig": 0.8958057165145874, + "epoch": 0.9280322093608455, + "kl_loss": 0.06481394171714783, + "loss_ib": 0.0009952860418707132, + "step": 3227 + }, + { + "ce_ib": 3.2155487537384033, + "ce_orig": 0.4691512882709503, + "epoch": 0.9280322093608455, + "kl_loss": 0.11047933995723724, + "loss_ib": 0.0014263482298702002, + "step": 3227 + }, + { + "ce_ib": 5.475457668304443, + "ce_orig": 1.3795198202133179, + "epoch": 0.9280322093608455, + "kl_loss": 0.07338424026966095, + "loss_ib": 0.0012813881039619446, + "step": 3227 + }, + { + "ce_ib": 3.9982311725616455, + "ce_orig": 0.7133368849754333, + "epoch": 0.9280322093608455, + "kl_loss": 0.04868307709693909, + "loss_ib": 0.0008866538410075009, + "step": 3227 + }, + { + "ce_ib": 3.97926664352417, + "ce_orig": 1.213282823562622, + "epoch": 0.9283197929398231, + "kl_loss": 0.04336971044540405, + "loss_ib": 0.0008316237363032997, + "step": 3228 + }, + { + "ce_ib": 4.748617649078369, + "ce_orig": 1.3855459690093994, + "epoch": 0.9283197929398231, + "kl_loss": 0.05017848685383797, + "loss_ib": 0.000976646551862359, + "step": 3228 + }, + { + "ce_ib": 2.3713841438293457, + "ce_orig": 0.6072503328323364, + "epoch": 0.9283197929398231, + "kl_loss": 0.03135954961180687, + "loss_ib": 0.0005507338792085648, + "step": 3228 + }, + { + "ce_ib": 3.459848165512085, + "ce_orig": 0.7929607629776001, + "epoch": 0.9283197929398231, + "kl_loss": 0.0381624773144722, + "loss_ib": 0.0007276095566339791, + "step": 3228 + }, + { + "ce_ib": 4.483698844909668, + "ce_orig": 1.0117554664611816, + "epoch": 0.9286073765188008, + "kl_loss": 0.053129732608795166, + "loss_ib": 0.0009796671802178025, + "step": 3229 + }, + { + "ce_ib": 2.2883822917938232, + "ce_orig": 0.42074042558670044, + "epoch": 0.9286073765188008, + "kl_loss": 0.06638844311237335, + "loss_ib": 0.0008927226881496608, + "step": 3229 + }, + { + "ce_ib": 2.9463324546813965, + "ce_orig": 0.6557804942131042, + "epoch": 0.9286073765188008, + "kl_loss": 0.047216057777404785, + "loss_ib": 0.0007667937898077071, + "step": 3229 + }, + { + "ce_ib": 2.9608266353607178, + "ce_orig": 0.7813248038291931, + "epoch": 0.9286073765188008, + "kl_loss": 0.04123727232217789, + "loss_ib": 0.0007084553362801671, + "step": 3229 + }, + { + "epoch": 0.9288949600977784, + "grad_norm": 0.08753489702939987, + "learning_rate": 4.047430118230753e-05, + "loss": 0.9042, + "step": 3230 + }, + { + "ce_ib": 4.087707996368408, + "ce_orig": 0.6940314769744873, + "epoch": 0.9288949600977784, + "kl_loss": 0.03388824313879013, + "loss_ib": 0.0007476532482542098, + "step": 3230 + }, + { + "ce_ib": 1.5332108736038208, + "ce_orig": 0.2298368513584137, + "epoch": 0.9288949600977784, + "kl_loss": 0.13257810473442078, + "loss_ib": 0.0014791020657867193, + "step": 3230 + }, + { + "ce_ib": 0.6686152815818787, + "ce_orig": 0.050003793090581894, + "epoch": 0.9288949600977784, + "kl_loss": 0.11053842306137085, + "loss_ib": 0.0011722457129508257, + "step": 3230 + }, + { + "ce_ib": 2.86799955368042, + "ce_orig": 0.5686987638473511, + "epoch": 0.9288949600977784, + "kl_loss": 0.0539284348487854, + "loss_ib": 0.0008260842878371477, + "step": 3230 + }, + { + "ce_ib": 3.420051336288452, + "ce_orig": 0.8600316047668457, + "epoch": 0.929182543676756, + "kl_loss": 0.06116145849227905, + "loss_ib": 0.0009536196594126523, + "step": 3231 + }, + { + "ce_ib": 4.931880950927734, + "ce_orig": 1.196074366569519, + "epoch": 0.929182543676756, + "kl_loss": 0.051044873893260956, + "loss_ib": 0.0010036368621513247, + "step": 3231 + }, + { + "ce_ib": 3.0049993991851807, + "ce_orig": 0.6496173143386841, + "epoch": 0.929182543676756, + "kl_loss": 0.04673505946993828, + "loss_ib": 0.0007678504916839302, + "step": 3231 + }, + { + "ce_ib": 2.623401165008545, + "ce_orig": 0.8676499128341675, + "epoch": 0.929182543676756, + "kl_loss": 0.03240646794438362, + "loss_ib": 0.0005864048143848777, + "step": 3231 + }, + { + "ce_ib": 2.3586788177490234, + "ce_orig": 0.47213348746299744, + "epoch": 0.9294701272557337, + "kl_loss": 0.032453641295433044, + "loss_ib": 0.0005604042671620846, + "step": 3232 + }, + { + "ce_ib": 2.7672739028930664, + "ce_orig": 0.6446057558059692, + "epoch": 0.9294701272557337, + "kl_loss": 0.05416427180171013, + "loss_ib": 0.0008183701429516077, + "step": 3232 + }, + { + "ce_ib": 4.5081963539123535, + "ce_orig": 1.2143161296844482, + "epoch": 0.9294701272557337, + "kl_loss": 0.03503815457224846, + "loss_ib": 0.0008012011530809104, + "step": 3232 + }, + { + "ce_ib": 3.8661892414093018, + "ce_orig": 1.1210122108459473, + "epoch": 0.9294701272557337, + "kl_loss": 0.07318615913391113, + "loss_ib": 0.0011184804607182741, + "step": 3232 + }, + { + "ce_ib": 3.564380168914795, + "ce_orig": 0.5940566062927246, + "epoch": 0.9297577108347114, + "kl_loss": 0.04892442375421524, + "loss_ib": 0.000845682225190103, + "step": 3233 + }, + { + "ce_ib": 4.453423500061035, + "ce_orig": 1.2023130655288696, + "epoch": 0.9297577108347114, + "kl_loss": 0.03883010149002075, + "loss_ib": 0.0008336433675140142, + "step": 3233 + }, + { + "ce_ib": 2.0629260540008545, + "ce_orig": 0.6118950843811035, + "epoch": 0.9297577108347114, + "kl_loss": 0.04289861023426056, + "loss_ib": 0.0006352786440402269, + "step": 3233 + }, + { + "ce_ib": 3.143083095550537, + "ce_orig": 0.869044840335846, + "epoch": 0.9297577108347114, + "kl_loss": 0.041874222457408905, + "loss_ib": 0.0007330505177378654, + "step": 3233 + }, + { + "ce_ib": 2.1706831455230713, + "ce_orig": 0.6259910464286804, + "epoch": 0.930045294413689, + "kl_loss": 0.02655400149524212, + "loss_ib": 0.000482608302263543, + "step": 3234 + }, + { + "ce_ib": 3.7270991802215576, + "ce_orig": 0.8714501261711121, + "epoch": 0.930045294413689, + "kl_loss": 0.07683658599853516, + "loss_ib": 0.0011410757433623075, + "step": 3234 + }, + { + "ce_ib": 1.6648752689361572, + "ce_orig": 0.33603379130363464, + "epoch": 0.930045294413689, + "kl_loss": 0.07759728282690048, + "loss_ib": 0.0009424603777006269, + "step": 3234 + }, + { + "ce_ib": 2.589088201522827, + "ce_orig": 0.55193030834198, + "epoch": 0.930045294413689, + "kl_loss": 0.030965227633714676, + "loss_ib": 0.0005685610813088715, + "step": 3234 + }, + { + "epoch": 0.9303328779926666, + "grad_norm": 0.10029535740613937, + "learning_rate": 4.044380518538859e-05, + "loss": 0.8639, + "step": 3235 + }, + { + "ce_ib": 3.9607338905334473, + "ce_orig": 0.9423674941062927, + "epoch": 0.9303328779926666, + "kl_loss": 0.07429380714893341, + "loss_ib": 0.0011390114668756723, + "step": 3235 + }, + { + "ce_ib": 4.768397808074951, + "ce_orig": 1.0701603889465332, + "epoch": 0.9303328779926666, + "kl_loss": 0.04637531936168671, + "loss_ib": 0.000940592959523201, + "step": 3235 + }, + { + "ce_ib": 2.9296467304229736, + "ce_orig": 0.7952653169631958, + "epoch": 0.9303328779926666, + "kl_loss": 0.04060693830251694, + "loss_ib": 0.000699034018907696, + "step": 3235 + }, + { + "ce_ib": 4.7338433265686035, + "ce_orig": 0.9422643184661865, + "epoch": 0.9303328779926666, + "kl_loss": 0.03842431306838989, + "loss_ib": 0.0008576274267397821, + "step": 3235 + }, + { + "ce_ib": 3.524066209793091, + "ce_orig": 0.8587357997894287, + "epoch": 0.9306204615716442, + "kl_loss": 0.1098671555519104, + "loss_ib": 0.001451078220270574, + "step": 3236 + }, + { + "ce_ib": 3.5567362308502197, + "ce_orig": 0.7802704572677612, + "epoch": 0.9306204615716442, + "kl_loss": 0.06271684169769287, + "loss_ib": 0.0009828419424593449, + "step": 3236 + }, + { + "ce_ib": 3.362628698348999, + "ce_orig": 0.7254250049591064, + "epoch": 0.9306204615716442, + "kl_loss": 0.06188143417239189, + "loss_ib": 0.0009550771792419255, + "step": 3236 + }, + { + "ce_ib": 4.8726806640625, + "ce_orig": 1.4002543687820435, + "epoch": 0.9306204615716442, + "kl_loss": 0.034088484942913055, + "loss_ib": 0.0008281528716906905, + "step": 3236 + }, + { + "ce_ib": 2.802391529083252, + "ce_orig": 0.6201428174972534, + "epoch": 0.9309080451506219, + "kl_loss": 0.05495309457182884, + "loss_ib": 0.0008297701133415103, + "step": 3237 + }, + { + "ce_ib": 2.6268386840820312, + "ce_orig": 0.8270498514175415, + "epoch": 0.9309080451506219, + "kl_loss": 0.04986217990517616, + "loss_ib": 0.0007613056804984808, + "step": 3237 + }, + { + "ce_ib": 2.8021655082702637, + "ce_orig": 0.8092654943466187, + "epoch": 0.9309080451506219, + "kl_loss": 0.03407464176416397, + "loss_ib": 0.0006209629354998469, + "step": 3237 + }, + { + "ce_ib": 2.4964165687561035, + "ce_orig": 0.6533411145210266, + "epoch": 0.9309080451506219, + "kl_loss": 0.04676209017634392, + "loss_ib": 0.0007172625628300011, + "step": 3237 + }, + { + "ce_ib": 4.015262126922607, + "ce_orig": 1.2030055522918701, + "epoch": 0.9311956287295995, + "kl_loss": 0.03855182230472565, + "loss_ib": 0.000787044467870146, + "step": 3238 + }, + { + "ce_ib": 3.2557249069213867, + "ce_orig": 0.49613484740257263, + "epoch": 0.9311956287295995, + "kl_loss": 0.07908976078033447, + "loss_ib": 0.0011164700845256448, + "step": 3238 + }, + { + "ce_ib": 1.4681648015975952, + "ce_orig": 0.11420060694217682, + "epoch": 0.9311956287295995, + "kl_loss": 0.13136376440525055, + "loss_ib": 0.0014604540774598718, + "step": 3238 + }, + { + "ce_ib": 4.074934482574463, + "ce_orig": 0.7752663493156433, + "epoch": 0.9311956287295995, + "kl_loss": 0.03924975171685219, + "loss_ib": 0.0007999909576028585, + "step": 3238 + }, + { + "ce_ib": 3.0116655826568604, + "ce_orig": 0.5370876789093018, + "epoch": 0.9314832123085772, + "kl_loss": 0.041208527982234955, + "loss_ib": 0.0007132518221624196, + "step": 3239 + }, + { + "ce_ib": 3.96083402633667, + "ce_orig": 0.7530006170272827, + "epoch": 0.9314832123085772, + "kl_loss": 0.06927822530269623, + "loss_ib": 0.001088865683414042, + "step": 3239 + }, + { + "ce_ib": 3.099508762359619, + "ce_orig": 0.7702621221542358, + "epoch": 0.9314832123085772, + "kl_loss": 0.034944213926792145, + "loss_ib": 0.0006593929720111191, + "step": 3239 + }, + { + "ce_ib": 3.0499134063720703, + "ce_orig": 0.7247288823127747, + "epoch": 0.9314832123085772, + "kl_loss": 0.05009569972753525, + "loss_ib": 0.0008059483370743692, + "step": 3239 + }, + { + "epoch": 0.9317707958875548, + "grad_norm": 0.1005113422870636, + "learning_rate": 4.041327198075838e-05, + "loss": 0.8521, + "step": 3240 + }, + { + "ce_ib": 5.738753318786621, + "ce_orig": 1.817479133605957, + "epoch": 0.9317707958875548, + "kl_loss": 0.044933415949344635, + "loss_ib": 0.0010232094209641218, + "step": 3240 + }, + { + "ce_ib": 3.4428553581237793, + "ce_orig": 0.777876079082489, + "epoch": 0.9317707958875548, + "kl_loss": 0.051428548991680145, + "loss_ib": 0.0008585709729231894, + "step": 3240 + }, + { + "ce_ib": 4.363922119140625, + "ce_orig": 0.44698846340179443, + "epoch": 0.9317707958875548, + "kl_loss": 0.10155618190765381, + "loss_ib": 0.0014519538963213563, + "step": 3240 + }, + { + "ce_ib": 2.3516926765441895, + "ce_orig": 0.3442194163799286, + "epoch": 0.9317707958875548, + "kl_loss": 0.04490397125482559, + "loss_ib": 0.0006842089933343232, + "step": 3240 + }, + { + "ce_ib": 2.2444262504577637, + "ce_orig": 0.6283619403839111, + "epoch": 0.9320583794665325, + "kl_loss": 0.0628073588013649, + "loss_ib": 0.0008525162120349705, + "step": 3241 + }, + { + "ce_ib": 2.7037205696105957, + "ce_orig": 0.6561682820320129, + "epoch": 0.9320583794665325, + "kl_loss": 0.05329214036464691, + "loss_ib": 0.000803293427452445, + "step": 3241 + }, + { + "ce_ib": 4.949276447296143, + "ce_orig": 0.8255207538604736, + "epoch": 0.9320583794665325, + "kl_loss": 0.09010284394025803, + "loss_ib": 0.0013959561474621296, + "step": 3241 + }, + { + "ce_ib": 2.5918145179748535, + "ce_orig": 0.6372799277305603, + "epoch": 0.9320583794665325, + "kl_loss": 0.03509345278143883, + "loss_ib": 0.0006101159378886223, + "step": 3241 + }, + { + "ce_ib": 2.670269727706909, + "ce_orig": 0.5995690226554871, + "epoch": 0.9323459630455101, + "kl_loss": 0.0367879644036293, + "loss_ib": 0.000634906580671668, + "step": 3242 + }, + { + "ce_ib": 5.396104335784912, + "ce_orig": 1.3948787450790405, + "epoch": 0.9323459630455101, + "kl_loss": 0.060665324330329895, + "loss_ib": 0.0011462635593488812, + "step": 3242 + }, + { + "ce_ib": 3.684687376022339, + "ce_orig": 0.8870030641555786, + "epoch": 0.9323459630455101, + "kl_loss": 0.04940113425254822, + "loss_ib": 0.000862480083014816, + "step": 3242 + }, + { + "ce_ib": 2.930612564086914, + "ce_orig": 0.924166738986969, + "epoch": 0.9323459630455101, + "kl_loss": 0.03325837850570679, + "loss_ib": 0.0006256450433284044, + "step": 3242 + }, + { + "ce_ib": 5.932633399963379, + "ce_orig": 1.0794789791107178, + "epoch": 0.9326335466244877, + "kl_loss": 0.08274778723716736, + "loss_ib": 0.0014207412023097277, + "step": 3243 + }, + { + "ce_ib": 2.091975450515747, + "ce_orig": 0.6908469796180725, + "epoch": 0.9326335466244877, + "kl_loss": 0.03068387880921364, + "loss_ib": 0.0005160362925380468, + "step": 3243 + }, + { + "ce_ib": 2.574167013168335, + "ce_orig": 0.569635808467865, + "epoch": 0.9326335466244877, + "kl_loss": 0.051698241382837296, + "loss_ib": 0.0007743990863673389, + "step": 3243 + }, + { + "ce_ib": 3.462162733078003, + "ce_orig": 0.8295125365257263, + "epoch": 0.9326335466244877, + "kl_loss": 0.04302860423922539, + "loss_ib": 0.0007765023037791252, + "step": 3243 + }, + { + "ce_ib": 4.381130218505859, + "ce_orig": 1.091596007347107, + "epoch": 0.9329211302034653, + "kl_loss": 0.07047083973884583, + "loss_ib": 0.0011428213911131024, + "step": 3244 + }, + { + "ce_ib": 4.227360725402832, + "ce_orig": 1.1035395860671997, + "epoch": 0.9329211302034653, + "kl_loss": 0.04796527698636055, + "loss_ib": 0.000902388826943934, + "step": 3244 + }, + { + "ce_ib": 3.5261194705963135, + "ce_orig": 0.7544249892234802, + "epoch": 0.9329211302034653, + "kl_loss": 0.04382603242993355, + "loss_ib": 0.0007908722618594766, + "step": 3244 + }, + { + "ce_ib": 4.056575775146484, + "ce_orig": 0.6124538779258728, + "epoch": 0.9329211302034653, + "kl_loss": 0.06571513414382935, + "loss_ib": 0.0010628089075908065, + "step": 3244 + }, + { + "epoch": 0.9332087137824431, + "grad_norm": 0.08860474824905396, + "learning_rate": 4.038270164197847e-05, + "loss": 0.8181, + "step": 3245 + }, + { + "ce_ib": 3.7809832096099854, + "ce_orig": 0.7342736124992371, + "epoch": 0.9332087137824431, + "kl_loss": 0.06722591817378998, + "loss_ib": 0.001050357474014163, + "step": 3245 + }, + { + "ce_ib": 3.633622169494629, + "ce_orig": 0.6540010571479797, + "epoch": 0.9332087137824431, + "kl_loss": 0.039173468947410583, + "loss_ib": 0.0007550969021394849, + "step": 3245 + }, + { + "ce_ib": 3.4225475788116455, + "ce_orig": 0.45546528697013855, + "epoch": 0.9332087137824431, + "kl_loss": 0.05942396819591522, + "loss_ib": 0.0009364944417029619, + "step": 3245 + }, + { + "ce_ib": 4.257932662963867, + "ce_orig": 0.9519487619400024, + "epoch": 0.9332087137824431, + "kl_loss": 0.0636335015296936, + "loss_ib": 0.0010621282272040844, + "step": 3245 + }, + { + "ce_ib": 3.6302974224090576, + "ce_orig": 0.9387862682342529, + "epoch": 0.9334962973614207, + "kl_loss": 0.06199483573436737, + "loss_ib": 0.0009829780319705606, + "step": 3246 + }, + { + "ce_ib": 2.860386371612549, + "ce_orig": 0.539267361164093, + "epoch": 0.9334962973614207, + "kl_loss": 0.03404983878135681, + "loss_ib": 0.0006265370175242424, + "step": 3246 + }, + { + "ce_ib": 2.0345866680145264, + "ce_orig": 0.5178064703941345, + "epoch": 0.9334962973614207, + "kl_loss": 0.022885873913764954, + "loss_ib": 0.0004323174071032554, + "step": 3246 + }, + { + "ce_ib": 3.920335292816162, + "ce_orig": 0.9600411057472229, + "epoch": 0.9334962973614207, + "kl_loss": 0.05239801108837128, + "loss_ib": 0.0009160136105492711, + "step": 3246 + }, + { + "ce_ib": 4.891231536865234, + "ce_orig": 1.3451751470565796, + "epoch": 0.9337838809403983, + "kl_loss": 0.061683736741542816, + "loss_ib": 0.0011059604585170746, + "step": 3247 + }, + { + "ce_ib": 4.943645477294922, + "ce_orig": 1.1942909955978394, + "epoch": 0.9337838809403983, + "kl_loss": 0.0639951303601265, + "loss_ib": 0.001134315854869783, + "step": 3247 + }, + { + "ce_ib": 3.121521472930908, + "ce_orig": 0.6871943473815918, + "epoch": 0.9337838809403983, + "kl_loss": 0.04394255951046944, + "loss_ib": 0.0007515777251683176, + "step": 3247 + }, + { + "ce_ib": 2.497335195541382, + "ce_orig": 0.4274482727050781, + "epoch": 0.9337838809403983, + "kl_loss": 0.1175985187292099, + "loss_ib": 0.00142571865580976, + "step": 3247 + }, + { + "ce_ib": 6.635779857635498, + "ce_orig": 1.5496293306350708, + "epoch": 0.934071464519376, + "kl_loss": 0.0765993520617485, + "loss_ib": 0.0014295714208856225, + "step": 3248 + }, + { + "ce_ib": 3.1536669731140137, + "ce_orig": 0.5705724954605103, + "epoch": 0.934071464519376, + "kl_loss": 0.07461028546094894, + "loss_ib": 0.0010614695493131876, + "step": 3248 + }, + { + "ce_ib": 2.9785454273223877, + "ce_orig": 0.6393260955810547, + "epoch": 0.934071464519376, + "kl_loss": 0.08904185891151428, + "loss_ib": 0.0011882730759680271, + "step": 3248 + }, + { + "ce_ib": 6.610926151275635, + "ce_orig": 1.6991676092147827, + "epoch": 0.934071464519376, + "kl_loss": 0.06077202409505844, + "loss_ib": 0.0012688128044828773, + "step": 3248 + }, + { + "ce_ib": 3.622892141342163, + "ce_orig": 0.7513480186462402, + "epoch": 0.9343590480983536, + "kl_loss": 0.06594771146774292, + "loss_ib": 0.0010217663366347551, + "step": 3249 + }, + { + "ce_ib": 2.459463119506836, + "ce_orig": 0.48100462555885315, + "epoch": 0.9343590480983536, + "kl_loss": 0.03520045801997185, + "loss_ib": 0.0005979508860036731, + "step": 3249 + }, + { + "ce_ib": 5.2692036628723145, + "ce_orig": 0.8027289509773254, + "epoch": 0.9343590480983536, + "kl_loss": 0.08972782641649246, + "loss_ib": 0.0014241986209526658, + "step": 3249 + }, + { + "ce_ib": 2.4116857051849365, + "ce_orig": 0.4736078381538391, + "epoch": 0.9343590480983536, + "kl_loss": 0.05833032354712486, + "loss_ib": 0.0008244717610068619, + "step": 3249 + }, + { + "epoch": 0.9346466316773312, + "grad_norm": 0.09929206967353821, + "learning_rate": 4.03520942426999e-05, + "loss": 0.7864, + "step": 3250 + }, + { + "ce_ib": 4.388247489929199, + "ce_orig": 0.9640275835990906, + "epoch": 0.9346466316773312, + "kl_loss": 0.07194880396127701, + "loss_ib": 0.0011583127779886127, + "step": 3250 + }, + { + "ce_ib": 3.2167999744415283, + "ce_orig": 0.8224186301231384, + "epoch": 0.9346466316773312, + "kl_loss": 0.03330450505018234, + "loss_ib": 0.0006547249504365027, + "step": 3250 + }, + { + "ce_ib": 4.351608753204346, + "ce_orig": 1.2045364379882812, + "epoch": 0.9346466316773312, + "kl_loss": 0.044452182948589325, + "loss_ib": 0.0008796827169135213, + "step": 3250 + }, + { + "ce_ib": 3.0693705081939697, + "ce_orig": 0.23837508261203766, + "epoch": 0.9346466316773312, + "kl_loss": 0.11029164493083954, + "loss_ib": 0.0014098534593358636, + "step": 3250 + }, + { + "ce_ib": 4.597654819488525, + "ce_orig": 1.147627830505371, + "epoch": 0.9349342152563088, + "kl_loss": 0.055713921785354614, + "loss_ib": 0.0010169047163799405, + "step": 3251 + }, + { + "ce_ib": 3.126189947128296, + "ce_orig": 0.4266659617424011, + "epoch": 0.9349342152563088, + "kl_loss": 0.088340625166893, + "loss_ib": 0.0011960251722484827, + "step": 3251 + }, + { + "ce_ib": 2.5773377418518066, + "ce_orig": 0.6522459387779236, + "epoch": 0.9349342152563088, + "kl_loss": 0.0914529412984848, + "loss_ib": 0.0011722631752490997, + "step": 3251 + }, + { + "ce_ib": 3.258207082748413, + "ce_orig": 0.7760253548622131, + "epoch": 0.9349342152563088, + "kl_loss": 0.059476759284734726, + "loss_ib": 0.0009205882670357823, + "step": 3251 + }, + { + "ce_ib": 4.902854919433594, + "ce_orig": 1.2413547039031982, + "epoch": 0.9352217988352866, + "kl_loss": 0.06167033314704895, + "loss_ib": 0.001106988755054772, + "step": 3252 + }, + { + "ce_ib": 5.713639736175537, + "ce_orig": 0.7094220519065857, + "epoch": 0.9352217988352866, + "kl_loss": 0.20896190404891968, + "loss_ib": 0.0026609827764332294, + "step": 3252 + }, + { + "ce_ib": 2.4782555103302, + "ce_orig": 0.5297417640686035, + "epoch": 0.9352217988352866, + "kl_loss": 0.045263756066560745, + "loss_ib": 0.0007004631333984435, + "step": 3252 + }, + { + "ce_ib": 2.985384464263916, + "ce_orig": 0.7977446913719177, + "epoch": 0.9352217988352866, + "kl_loss": 0.06038983538746834, + "loss_ib": 0.0009024367900565267, + "step": 3252 + }, + { + "ce_ib": 2.7483973503112793, + "ce_orig": 0.7360705733299255, + "epoch": 0.9355093824142642, + "kl_loss": 0.056165315210819244, + "loss_ib": 0.000836492923554033, + "step": 3253 + }, + { + "ce_ib": 3.8470511436462402, + "ce_orig": 0.7542652487754822, + "epoch": 0.9355093824142642, + "kl_loss": 0.07235206663608551, + "loss_ib": 0.001108225667849183, + "step": 3253 + }, + { + "ce_ib": 3.7831430435180664, + "ce_orig": 0.9217150807380676, + "epoch": 0.9355093824142642, + "kl_loss": 0.06691661477088928, + "loss_ib": 0.0010474803857505322, + "step": 3253 + }, + { + "ce_ib": 3.2199559211730957, + "ce_orig": 0.915223240852356, + "epoch": 0.9355093824142642, + "kl_loss": 0.041290294378995895, + "loss_ib": 0.0007348984945565462, + "step": 3253 + }, + { + "ce_ib": 3.180293321609497, + "ce_orig": 0.4475858509540558, + "epoch": 0.9357969659932418, + "kl_loss": 0.06368955969810486, + "loss_ib": 0.0009549249662086368, + "step": 3254 + }, + { + "ce_ib": 3.773439645767212, + "ce_orig": 1.0185534954071045, + "epoch": 0.9357969659932418, + "kl_loss": 0.03532939776778221, + "loss_ib": 0.0007306378684006631, + "step": 3254 + }, + { + "ce_ib": 4.962161064147949, + "ce_orig": 0.6544047594070435, + "epoch": 0.9357969659932418, + "kl_loss": 0.062538281083107, + "loss_ib": 0.0011215988779440522, + "step": 3254 + }, + { + "ce_ib": 2.8568711280822754, + "ce_orig": 0.5057787299156189, + "epoch": 0.9357969659932418, + "kl_loss": 0.09722277522087097, + "loss_ib": 0.0012579148169606924, + "step": 3254 + }, + { + "epoch": 0.9360845495722194, + "grad_norm": 0.09952723234891891, + "learning_rate": 4.0321449856663004e-05, + "loss": 0.8039, + "step": 3255 + }, + { + "ce_ib": 4.032966136932373, + "ce_orig": 0.9997227787971497, + "epoch": 0.9360845495722194, + "kl_loss": 0.05295970290899277, + "loss_ib": 0.000932893599383533, + "step": 3255 + }, + { + "ce_ib": 5.823774337768555, + "ce_orig": 1.4013786315917969, + "epoch": 0.9360845495722194, + "kl_loss": 0.06264498829841614, + "loss_ib": 0.0012088273651897907, + "step": 3255 + }, + { + "ce_ib": 3.0922555923461914, + "ce_orig": 0.4924181401729584, + "epoch": 0.9360845495722194, + "kl_loss": 0.05715460330247879, + "loss_ib": 0.0008807715494185686, + "step": 3255 + }, + { + "ce_ib": 3.693826913833618, + "ce_orig": 0.9991262555122375, + "epoch": 0.9360845495722194, + "kl_loss": 0.05206336826086044, + "loss_ib": 0.0008900163811631501, + "step": 3255 + }, + { + "ce_ib": 3.8040175437927246, + "ce_orig": 0.8190496563911438, + "epoch": 0.936372133151197, + "kl_loss": 0.05435200035572052, + "loss_ib": 0.0009239217615686357, + "step": 3256 + }, + { + "ce_ib": 2.5517890453338623, + "ce_orig": 0.6043893098831177, + "epoch": 0.936372133151197, + "kl_loss": 0.027183786034584045, + "loss_ib": 0.0005270167603157461, + "step": 3256 + }, + { + "ce_ib": 2.953892707824707, + "ce_orig": 0.9828460812568665, + "epoch": 0.936372133151197, + "kl_loss": 0.034768976271152496, + "loss_ib": 0.0006430789944715798, + "step": 3256 + }, + { + "ce_ib": 4.206114292144775, + "ce_orig": 1.3629920482635498, + "epoch": 0.936372133151197, + "kl_loss": 0.04810355231165886, + "loss_ib": 0.0009016469120979309, + "step": 3256 + }, + { + "ce_ib": 2.4784271717071533, + "ce_orig": 0.4597042500972748, + "epoch": 0.9366597167301747, + "kl_loss": 0.03695060312747955, + "loss_ib": 0.0006173487054184079, + "step": 3257 + }, + { + "ce_ib": 5.158473968505859, + "ce_orig": 1.3763599395751953, + "epoch": 0.9366597167301747, + "kl_loss": 0.05518551170825958, + "loss_ib": 0.0010677024256438017, + "step": 3257 + }, + { + "ce_ib": 3.6272008419036865, + "ce_orig": 0.850065290927887, + "epoch": 0.9366597167301747, + "kl_loss": 0.06550037115812302, + "loss_ib": 0.0010177238145843148, + "step": 3257 + }, + { + "ce_ib": 6.125127792358398, + "ce_orig": 1.36961829662323, + "epoch": 0.9366597167301747, + "kl_loss": 0.07957610487937927, + "loss_ib": 0.001408273819833994, + "step": 3257 + }, + { + "ce_ib": 2.8931238651275635, + "ce_orig": 0.4628714323043823, + "epoch": 0.9369473003091523, + "kl_loss": 0.08703479915857315, + "loss_ib": 0.0011596602853387594, + "step": 3258 + }, + { + "ce_ib": 4.136826515197754, + "ce_orig": 1.1023378372192383, + "epoch": 0.9369473003091523, + "kl_loss": 0.06576324254274368, + "loss_ib": 0.001071315142326057, + "step": 3258 + }, + { + "ce_ib": 2.9564156532287598, + "ce_orig": 0.6920216679573059, + "epoch": 0.9369473003091523, + "kl_loss": 0.0706925094127655, + "loss_ib": 0.0010025666560977697, + "step": 3258 + }, + { + "ce_ib": 1.8523015975952148, + "ce_orig": 0.5179917812347412, + "epoch": 0.9369473003091523, + "kl_loss": 0.03907093405723572, + "loss_ib": 0.0005759394844062626, + "step": 3258 + }, + { + "ce_ib": 4.332699775695801, + "ce_orig": 0.7597174048423767, + "epoch": 0.93723488388813, + "kl_loss": 0.12678270041942596, + "loss_ib": 0.0017010968877002597, + "step": 3259 + }, + { + "ce_ib": 3.1173019409179688, + "ce_orig": 0.8939347267150879, + "epoch": 0.93723488388813, + "kl_loss": 0.04562094807624817, + "loss_ib": 0.0007679396658204496, + "step": 3259 + }, + { + "ce_ib": 2.358206272125244, + "ce_orig": 0.53092360496521, + "epoch": 0.93723488388813, + "kl_loss": 0.043030135333538055, + "loss_ib": 0.0006661219522356987, + "step": 3259 + }, + { + "ce_ib": 2.648858070373535, + "ce_orig": 0.6455047726631165, + "epoch": 0.93723488388813, + "kl_loss": 0.06114106625318527, + "loss_ib": 0.0008762964280322194, + "step": 3259 + }, + { + "epoch": 0.9375224674671077, + "grad_norm": 0.08763594925403595, + "learning_rate": 4.029076855769722e-05, + "loss": 0.8723, + "step": 3260 + }, + { + "ce_ib": 4.062323570251465, + "ce_orig": 1.0116933584213257, + "epoch": 0.9375224674671077, + "kl_loss": 0.0592157319188118, + "loss_ib": 0.0009983896743506193, + "step": 3260 + }, + { + "ce_ib": 4.005756378173828, + "ce_orig": 0.5376563668251038, + "epoch": 0.9375224674671077, + "kl_loss": 0.038566023111343384, + "loss_ib": 0.0007862358470447361, + "step": 3260 + }, + { + "ce_ib": 2.2874372005462646, + "ce_orig": 0.6352933645248413, + "epoch": 0.9375224674671077, + "kl_loss": 0.05938434600830078, + "loss_ib": 0.0008225871715694666, + "step": 3260 + }, + { + "ce_ib": 6.762615203857422, + "ce_orig": 1.2247246503829956, + "epoch": 0.9375224674671077, + "kl_loss": 0.09106147289276123, + "loss_ib": 0.0015868762275204062, + "step": 3260 + }, + { + "ce_ib": 4.901185512542725, + "ce_orig": 1.21113920211792, + "epoch": 0.9378100510460853, + "kl_loss": 0.04395221173763275, + "loss_ib": 0.0009296406642533839, + "step": 3261 + }, + { + "ce_ib": 2.481415033340454, + "ce_orig": 0.6176896691322327, + "epoch": 0.9378100510460853, + "kl_loss": 0.03603556379675865, + "loss_ib": 0.0006084971246309578, + "step": 3261 + }, + { + "ce_ib": 3.2344181537628174, + "ce_orig": 0.6254041194915771, + "epoch": 0.9378100510460853, + "kl_loss": 0.05740651488304138, + "loss_ib": 0.0008975068922154605, + "step": 3261 + }, + { + "ce_ib": 4.823166847229004, + "ce_orig": 1.0183454751968384, + "epoch": 0.9378100510460853, + "kl_loss": 0.0821479856967926, + "loss_ib": 0.001303796423599124, + "step": 3261 + }, + { + "ce_ib": 2.7587549686431885, + "ce_orig": 0.5027909874916077, + "epoch": 0.9380976346250629, + "kl_loss": 0.062475286424160004, + "loss_ib": 0.0009006283362396061, + "step": 3262 + }, + { + "ce_ib": 2.718191385269165, + "ce_orig": 0.5681697130203247, + "epoch": 0.9380976346250629, + "kl_loss": 0.06409807503223419, + "loss_ib": 0.0009127999073825777, + "step": 3262 + }, + { + "ce_ib": 2.5623319149017334, + "ce_orig": 0.6072449684143066, + "epoch": 0.9380976346250629, + "kl_loss": 0.05240299180150032, + "loss_ib": 0.0007802631007507443, + "step": 3262 + }, + { + "ce_ib": 2.6171376705169678, + "ce_orig": 0.6020799875259399, + "epoch": 0.9380976346250629, + "kl_loss": 0.0475970134139061, + "loss_ib": 0.0007376839057542384, + "step": 3262 + }, + { + "ce_ib": 6.006959438323975, + "ce_orig": 1.4623099565505981, + "epoch": 0.9383852182040405, + "kl_loss": 0.038336534053087234, + "loss_ib": 0.0009840612765401602, + "step": 3263 + }, + { + "ce_ib": 4.2758588790893555, + "ce_orig": 1.1469563245773315, + "epoch": 0.9383852182040405, + "kl_loss": 0.14640650153160095, + "loss_ib": 0.0018916508415713906, + "step": 3263 + }, + { + "ce_ib": 4.826613426208496, + "ce_orig": 0.9996064901351929, + "epoch": 0.9383852182040405, + "kl_loss": 0.054045744240283966, + "loss_ib": 0.0010231187334284186, + "step": 3263 + }, + { + "ce_ib": 1.0297999382019043, + "ce_orig": 0.1367565393447876, + "epoch": 0.9383852182040405, + "kl_loss": 0.12489070743322372, + "loss_ib": 0.0013518870109692216, + "step": 3263 + }, + { + "ce_ib": 2.8797497749328613, + "ce_orig": 0.7432836294174194, + "epoch": 0.9386728017830182, + "kl_loss": 0.048318732529878616, + "loss_ib": 0.0007711622747592628, + "step": 3264 + }, + { + "ce_ib": 3.507577419281006, + "ce_orig": 0.38912901282310486, + "epoch": 0.9386728017830182, + "kl_loss": 0.04426657408475876, + "loss_ib": 0.0007934235036373138, + "step": 3264 + }, + { + "ce_ib": 3.1882920265197754, + "ce_orig": 0.7648387551307678, + "epoch": 0.9386728017830182, + "kl_loss": 0.048240624368190765, + "loss_ib": 0.0008012354373931885, + "step": 3264 + }, + { + "ce_ib": 5.560983657836914, + "ce_orig": 1.4579476118087769, + "epoch": 0.9386728017830182, + "kl_loss": 0.06868137419223785, + "loss_ib": 0.0012429121416062117, + "step": 3264 + }, + { + "epoch": 0.9389603853619958, + "grad_norm": 0.08773885667324066, + "learning_rate": 4.026005041972092e-05, + "loss": 0.7977, + "step": 3265 + }, + { + "ce_ib": 3.635033130645752, + "ce_orig": 0.5259560346603394, + "epoch": 0.9389603853619958, + "kl_loss": 0.08063646405935287, + "loss_ib": 0.0011698679300025105, + "step": 3265 + }, + { + "ce_ib": 2.6595263481140137, + "ce_orig": 0.5746830701828003, + "epoch": 0.9389603853619958, + "kl_loss": 0.04737119376659393, + "loss_ib": 0.0007396645960398018, + "step": 3265 + }, + { + "ce_ib": 4.818488597869873, + "ce_orig": 1.2942172288894653, + "epoch": 0.9389603853619958, + "kl_loss": 0.055556777864694595, + "loss_ib": 0.0010374166304245591, + "step": 3265 + }, + { + "ce_ib": 2.9504611492156982, + "ce_orig": 0.5903036594390869, + "epoch": 0.9389603853619958, + "kl_loss": 0.061309389770030975, + "loss_ib": 0.0009081399766728282, + "step": 3265 + }, + { + "ce_ib": 3.5243844985961914, + "ce_orig": 0.8116549849510193, + "epoch": 0.9392479689409735, + "kl_loss": 0.04412811994552612, + "loss_ib": 0.0007937195478007197, + "step": 3266 + }, + { + "ce_ib": 3.790083646774292, + "ce_orig": 0.8370009660720825, + "epoch": 0.9392479689409735, + "kl_loss": 0.05706774815917015, + "loss_ib": 0.0009496858692727983, + "step": 3266 + }, + { + "ce_ib": 3.1404552459716797, + "ce_orig": 0.7820417284965515, + "epoch": 0.9392479689409735, + "kl_loss": 0.02391391061246395, + "loss_ib": 0.0005531845963560045, + "step": 3266 + }, + { + "ce_ib": 3.541536331176758, + "ce_orig": 0.8849738836288452, + "epoch": 0.9392479689409735, + "kl_loss": 0.07965423911809921, + "loss_ib": 0.00115069595631212, + "step": 3266 + }, + { + "ce_ib": 3.553405523300171, + "ce_orig": 0.9573114514350891, + "epoch": 0.9395355525199511, + "kl_loss": 0.06790369749069214, + "loss_ib": 0.0010343774920329452, + "step": 3267 + }, + { + "ce_ib": 2.6877026557922363, + "ce_orig": 0.6562243103981018, + "epoch": 0.9395355525199511, + "kl_loss": 0.0497559979557991, + "loss_ib": 0.0007663302239961922, + "step": 3267 + }, + { + "ce_ib": 2.993170738220215, + "ce_orig": 0.28969767689704895, + "epoch": 0.9395355525199511, + "kl_loss": 0.07268428802490234, + "loss_ib": 0.0010261599672958255, + "step": 3267 + }, + { + "ce_ib": 1.8621671199798584, + "ce_orig": 0.6166879534721375, + "epoch": 0.9395355525199511, + "kl_loss": 0.03071870654821396, + "loss_ib": 0.0004934037569910288, + "step": 3267 + }, + { + "ce_ib": 3.718642473220825, + "ce_orig": 0.6675646901130676, + "epoch": 0.9398231360989288, + "kl_loss": 0.08782445639371872, + "loss_ib": 0.0012501087039709091, + "step": 3268 + }, + { + "ce_ib": 3.275197744369507, + "ce_orig": 0.751190185546875, + "epoch": 0.9398231360989288, + "kl_loss": 0.025811471045017242, + "loss_ib": 0.0005856344942003489, + "step": 3268 + }, + { + "ce_ib": 5.628581523895264, + "ce_orig": 1.3010485172271729, + "epoch": 0.9398231360989288, + "kl_loss": 0.06280066072940826, + "loss_ib": 0.0011908647138625383, + "step": 3268 + }, + { + "ce_ib": 2.987203359603882, + "ce_orig": 0.6916788816452026, + "epoch": 0.9398231360989288, + "kl_loss": 0.044010989367961884, + "loss_ib": 0.0007388302474282682, + "step": 3268 + }, + { + "ce_ib": 2.9125301837921143, + "ce_orig": 0.8206856846809387, + "epoch": 0.9401107196779064, + "kl_loss": 0.041680775582790375, + "loss_ib": 0.0007080607465468347, + "step": 3269 + }, + { + "ce_ib": 3.71517014503479, + "ce_orig": 0.7414816617965698, + "epoch": 0.9401107196779064, + "kl_loss": 0.0692770928144455, + "loss_ib": 0.0010642879642546177, + "step": 3269 + }, + { + "ce_ib": 3.4450385570526123, + "ce_orig": 0.4895511865615845, + "epoch": 0.9401107196779064, + "kl_loss": 0.05064413323998451, + "loss_ib": 0.000850945187266916, + "step": 3269 + }, + { + "ce_ib": 3.4095778465270996, + "ce_orig": 0.498809814453125, + "epoch": 0.9401107196779064, + "kl_loss": 0.02982407435774803, + "loss_ib": 0.0006391985807567835, + "step": 3269 + }, + { + "epoch": 0.940398303256884, + "grad_norm": 0.09832839667797089, + "learning_rate": 4.022929551674122e-05, + "loss": 0.8068, + "step": 3270 + }, + { + "ce_ib": 3.4243719577789307, + "ce_orig": 0.6171739101409912, + "epoch": 0.940398303256884, + "kl_loss": 0.05062856152653694, + "loss_ib": 0.0008487227605655789, + "step": 3270 + }, + { + "ce_ib": 3.0421693325042725, + "ce_orig": 0.8669711947441101, + "epoch": 0.940398303256884, + "kl_loss": 0.025400152429938316, + "loss_ib": 0.0005582183948718011, + "step": 3270 + }, + { + "ce_ib": 4.280025959014893, + "ce_orig": 0.7242692708969116, + "epoch": 0.940398303256884, + "kl_loss": 0.06754213571548462, + "loss_ib": 0.0011034240014851093, + "step": 3270 + }, + { + "ce_ib": 1.9352058172225952, + "ce_orig": 0.4083940386772156, + "epoch": 0.940398303256884, + "kl_loss": 0.04688388481736183, + "loss_ib": 0.0006623594090342522, + "step": 3270 + }, + { + "ce_ib": 2.9155850410461426, + "ce_orig": 0.754269540309906, + "epoch": 0.9406858868358616, + "kl_loss": 0.06070413067936897, + "loss_ib": 0.0008985997410491109, + "step": 3271 + }, + { + "ce_ib": 2.6949846744537354, + "ce_orig": 0.5974329113960266, + "epoch": 0.9406858868358616, + "kl_loss": 0.07547459751367569, + "loss_ib": 0.0010242443531751633, + "step": 3271 + }, + { + "ce_ib": 4.344683647155762, + "ce_orig": 1.0943818092346191, + "epoch": 0.9406858868358616, + "kl_loss": 0.03444705158472061, + "loss_ib": 0.000778938818257302, + "step": 3271 + }, + { + "ce_ib": 3.5386812686920166, + "ce_orig": 0.7948712110519409, + "epoch": 0.9406858868358616, + "kl_loss": 0.05280698463320732, + "loss_ib": 0.0008819379727356136, + "step": 3271 + }, + { + "ce_ib": 4.5594801902771, + "ce_orig": 1.3952149152755737, + "epoch": 0.9409734704148394, + "kl_loss": 0.0743255466222763, + "loss_ib": 0.0011992034269496799, + "step": 3272 + }, + { + "ce_ib": 5.667575359344482, + "ce_orig": 1.393882155418396, + "epoch": 0.9409734704148394, + "kl_loss": 0.06027811020612717, + "loss_ib": 0.0011695385910570621, + "step": 3272 + }, + { + "ce_ib": 3.100421190261841, + "ce_orig": 0.9028326272964478, + "epoch": 0.9409734704148394, + "kl_loss": 0.05954264849424362, + "loss_ib": 0.0009054685942828655, + "step": 3272 + }, + { + "ce_ib": 4.434813022613525, + "ce_orig": 1.0326228141784668, + "epoch": 0.9409734704148394, + "kl_loss": 0.07949307560920715, + "loss_ib": 0.0012384119909256697, + "step": 3272 + }, + { + "ce_ib": 1.9392893314361572, + "ce_orig": 0.35471850633621216, + "epoch": 0.941261053993817, + "kl_loss": 0.053970806300640106, + "loss_ib": 0.0007336369599215686, + "step": 3273 + }, + { + "ce_ib": 4.119274139404297, + "ce_orig": 0.9748149514198303, + "epoch": 0.941261053993817, + "kl_loss": 0.08464686572551727, + "loss_ib": 0.001258396077901125, + "step": 3273 + }, + { + "ce_ib": 6.927652835845947, + "ce_orig": 1.5531851053237915, + "epoch": 0.941261053993817, + "kl_loss": 0.10389900207519531, + "loss_ib": 0.0017317552119493484, + "step": 3273 + }, + { + "ce_ib": 4.966390132904053, + "ce_orig": 1.2150561809539795, + "epoch": 0.941261053993817, + "kl_loss": 0.09248516708612442, + "loss_ib": 0.0014214905677363276, + "step": 3273 + }, + { + "ce_ib": 4.406932830810547, + "ce_orig": 0.864728569984436, + "epoch": 0.9415486375727946, + "kl_loss": 0.04978588595986366, + "loss_ib": 0.0009385521407239139, + "step": 3274 + }, + { + "ce_ib": 2.6330859661102295, + "ce_orig": 0.5154660940170288, + "epoch": 0.9415486375727946, + "kl_loss": 0.04499553143978119, + "loss_ib": 0.0007132638711482286, + "step": 3274 + }, + { + "ce_ib": 3.1190171241760254, + "ce_orig": 0.7988911271095276, + "epoch": 0.9415486375727946, + "kl_loss": 0.027819525450468063, + "loss_ib": 0.0005900969263166189, + "step": 3274 + }, + { + "ce_ib": 2.970581531524658, + "ce_orig": 0.6614795923233032, + "epoch": 0.9415486375727946, + "kl_loss": 0.028504766523838043, + "loss_ib": 0.0005821058293804526, + "step": 3274 + }, + { + "epoch": 0.9418362211517722, + "grad_norm": 0.09742450714111328, + "learning_rate": 4.0198503922853834e-05, + "loss": 0.8644, + "step": 3275 + }, + { + "ce_ib": 2.7119863033294678, + "ce_orig": 0.6062424182891846, + "epoch": 0.9418362211517722, + "kl_loss": 0.037062425166368484, + "loss_ib": 0.0006418228731490672, + "step": 3275 + }, + { + "ce_ib": 3.4598286151885986, + "ce_orig": 0.6057239770889282, + "epoch": 0.9418362211517722, + "kl_loss": 0.05692702904343605, + "loss_ib": 0.0009152531274594367, + "step": 3275 + }, + { + "ce_ib": 2.782773733139038, + "ce_orig": 0.6992740631103516, + "epoch": 0.9418362211517722, + "kl_loss": 0.02567465603351593, + "loss_ib": 0.0005350239225663245, + "step": 3275 + }, + { + "ce_ib": 3.085437059402466, + "ce_orig": 0.7386686205863953, + "epoch": 0.9418362211517722, + "kl_loss": 0.04181627184152603, + "loss_ib": 0.0007267063483595848, + "step": 3275 + }, + { + "ce_ib": 2.590521812438965, + "ce_orig": 0.6188504099845886, + "epoch": 0.9421238047307499, + "kl_loss": 0.0570480152964592, + "loss_ib": 0.0008295322768390179, + "step": 3276 + }, + { + "ce_ib": 2.6630992889404297, + "ce_orig": 0.4015752077102661, + "epoch": 0.9421238047307499, + "kl_loss": 0.05695410817861557, + "loss_ib": 0.0008358509512618184, + "step": 3276 + }, + { + "ce_ib": 4.727084636688232, + "ce_orig": 1.2302255630493164, + "epoch": 0.9421238047307499, + "kl_loss": 0.059814125299453735, + "loss_ib": 0.0010708497138693929, + "step": 3276 + }, + { + "ce_ib": 3.8293073177337646, + "ce_orig": 1.0864516496658325, + "epoch": 0.9421238047307499, + "kl_loss": 0.0528562031686306, + "loss_ib": 0.000911492679733783, + "step": 3276 + }, + { + "ce_ib": 2.6953721046447754, + "ce_orig": 0.6398144960403442, + "epoch": 0.9424113883097275, + "kl_loss": 0.06071915477514267, + "loss_ib": 0.0008767287363298237, + "step": 3277 + }, + { + "ce_ib": 4.795211315155029, + "ce_orig": 1.4989253282546997, + "epoch": 0.9424113883097275, + "kl_loss": 0.048788491636514664, + "loss_ib": 0.0009674060274846852, + "step": 3277 + }, + { + "ce_ib": 3.0874106884002686, + "ce_orig": 0.6418893337249756, + "epoch": 0.9424113883097275, + "kl_loss": 0.04572652652859688, + "loss_ib": 0.0007660062983632088, + "step": 3277 + }, + { + "ce_ib": 4.456676006317139, + "ce_orig": 0.9195656180381775, + "epoch": 0.9424113883097275, + "kl_loss": 0.043963439762592316, + "loss_ib": 0.0008853019680827856, + "step": 3277 + }, + { + "ce_ib": 2.5465097427368164, + "ce_orig": 0.3998510241508484, + "epoch": 0.9426989718887051, + "kl_loss": 0.030031787231564522, + "loss_ib": 0.000554968835785985, + "step": 3278 + }, + { + "ce_ib": 3.6275885105133057, + "ce_orig": 0.7704962491989136, + "epoch": 0.9426989718887051, + "kl_loss": 0.07474157214164734, + "loss_ib": 0.0011101745767518878, + "step": 3278 + }, + { + "ce_ib": 4.836483955383301, + "ce_orig": 1.5028083324432373, + "epoch": 0.9426989718887051, + "kl_loss": 0.03596413508057594, + "loss_ib": 0.0008432897157035768, + "step": 3278 + }, + { + "ce_ib": 7.909686088562012, + "ce_orig": 2.1076273918151855, + "epoch": 0.9426989718887051, + "kl_loss": 0.055470969527959824, + "loss_ib": 0.0013456782326102257, + "step": 3278 + }, + { + "ce_ib": 6.237913608551025, + "ce_orig": 1.8094598054885864, + "epoch": 0.9429865554676828, + "kl_loss": 0.055560920387506485, + "loss_ib": 0.0011794004822149873, + "step": 3279 + }, + { + "ce_ib": 5.546938896179199, + "ce_orig": 1.4272829294204712, + "epoch": 0.9429865554676828, + "kl_loss": 0.07414606213569641, + "loss_ib": 0.0012961545726284385, + "step": 3279 + }, + { + "ce_ib": 2.67374587059021, + "ce_orig": 0.5213585495948792, + "epoch": 0.9429865554676828, + "kl_loss": 0.05068342760205269, + "loss_ib": 0.0007742088637314737, + "step": 3279 + }, + { + "ce_ib": 3.3650062084198, + "ce_orig": 1.0867550373077393, + "epoch": 0.9429865554676828, + "kl_loss": 0.02576937898993492, + "loss_ib": 0.0005941943963989615, + "step": 3279 + }, + { + "epoch": 0.9432741390466605, + "grad_norm": 0.1059132069349289, + "learning_rate": 4.016767571224284e-05, + "loss": 0.8306, + "step": 3280 + }, + { + "ce_ib": 4.963431358337402, + "ce_orig": 1.0367685556411743, + "epoch": 0.9432741390466605, + "kl_loss": 0.052446819841861725, + "loss_ib": 0.0010208113817498088, + "step": 3280 + }, + { + "ce_ib": 3.9417903423309326, + "ce_orig": 1.1129251718521118, + "epoch": 0.9432741390466605, + "kl_loss": 0.03894646838307381, + "loss_ib": 0.0007836436852812767, + "step": 3280 + }, + { + "ce_ib": 2.4448885917663574, + "ce_orig": 0.3366677165031433, + "epoch": 0.9432741390466605, + "kl_loss": 0.06919825822114944, + "loss_ib": 0.0009364714496769011, + "step": 3280 + }, + { + "ce_ib": 7.5409064292907715, + "ce_orig": 1.104883074760437, + "epoch": 0.9432741390466605, + "kl_loss": 0.0685572475194931, + "loss_ib": 0.001439663115888834, + "step": 3280 + }, + { + "ce_ib": 4.3655524253845215, + "ce_orig": 0.6506245732307434, + "epoch": 0.9435617226256381, + "kl_loss": 0.056259818375110626, + "loss_ib": 0.000999153358861804, + "step": 3281 + }, + { + "ce_ib": 3.430297613143921, + "ce_orig": 0.6863912343978882, + "epoch": 0.9435617226256381, + "kl_loss": 0.05825527757406235, + "loss_ib": 0.0009255824843421578, + "step": 3281 + }, + { + "ce_ib": 4.273821830749512, + "ce_orig": 0.8653891682624817, + "epoch": 0.9435617226256381, + "kl_loss": 0.06018172949552536, + "loss_ib": 0.001029199454933405, + "step": 3281 + }, + { + "ce_ib": 3.2461485862731934, + "ce_orig": 0.5939798355102539, + "epoch": 0.9435617226256381, + "kl_loss": 0.04707356542348862, + "loss_ib": 0.0007953505264595151, + "step": 3281 + }, + { + "ce_ib": 3.852475643157959, + "ce_orig": 1.1470423936843872, + "epoch": 0.9438493062046157, + "kl_loss": 0.04727986454963684, + "loss_ib": 0.0008580461726523936, + "step": 3282 + }, + { + "ce_ib": 3.9140634536743164, + "ce_orig": 0.9188427329063416, + "epoch": 0.9438493062046157, + "kl_loss": 0.0605822429060936, + "loss_ib": 0.0009972287807613611, + "step": 3282 + }, + { + "ce_ib": 2.64831805229187, + "ce_orig": 0.7527708411216736, + "epoch": 0.9438493062046157, + "kl_loss": 0.03658728301525116, + "loss_ib": 0.0006307045696303248, + "step": 3282 + }, + { + "ce_ib": 5.81369161605835, + "ce_orig": 1.5208728313446045, + "epoch": 0.9438493062046157, + "kl_loss": 0.06719336658716202, + "loss_ib": 0.0012533027911558747, + "step": 3282 + }, + { + "ce_ib": 2.818305253982544, + "ce_orig": 0.6036056280136108, + "epoch": 0.9441368897835933, + "kl_loss": 0.05383435636758804, + "loss_ib": 0.000820174056570977, + "step": 3283 + }, + { + "ce_ib": 4.013901710510254, + "ce_orig": 0.7196105122566223, + "epoch": 0.9441368897835933, + "kl_loss": 0.07200074195861816, + "loss_ib": 0.0011213975958526134, + "step": 3283 + }, + { + "ce_ib": 3.3925983905792236, + "ce_orig": 0.6529005765914917, + "epoch": 0.9441368897835933, + "kl_loss": 0.08520869165658951, + "loss_ib": 0.0011913467897102237, + "step": 3283 + }, + { + "ce_ib": 3.47836971282959, + "ce_orig": 0.6212224364280701, + "epoch": 0.9441368897835933, + "kl_loss": 0.062376417219638824, + "loss_ib": 0.0009716011118143797, + "step": 3283 + }, + { + "ce_ib": 3.086313247680664, + "ce_orig": 0.45551884174346924, + "epoch": 0.944424473362571, + "kl_loss": 0.07807032763957977, + "loss_ib": 0.0010893346043303609, + "step": 3284 + }, + { + "ce_ib": 5.496944904327393, + "ce_orig": 1.6053745746612549, + "epoch": 0.944424473362571, + "kl_loss": 0.04798440262675285, + "loss_ib": 0.001029538456350565, + "step": 3284 + }, + { + "ce_ib": 3.901054859161377, + "ce_orig": 0.9791481494903564, + "epoch": 0.944424473362571, + "kl_loss": 0.05496632307767868, + "loss_ib": 0.0009397686808370054, + "step": 3284 + }, + { + "ce_ib": 4.648987293243408, + "ce_orig": 1.0164192914962769, + "epoch": 0.944424473362571, + "kl_loss": 0.033896442502737045, + "loss_ib": 0.0008038631058298051, + "step": 3284 + }, + { + "epoch": 0.9447120569415486, + "grad_norm": 0.10808689147233963, + "learning_rate": 4.013681095918057e-05, + "loss": 0.9069, + "step": 3285 + }, + { + "ce_ib": 4.11482048034668, + "ce_orig": 0.8914403319358826, + "epoch": 0.9447120569415486, + "kl_loss": 0.06166047602891922, + "loss_ib": 0.001028086873702705, + "step": 3285 + }, + { + "ce_ib": 3.0508224964141846, + "ce_orig": 0.7386616468429565, + "epoch": 0.9447120569415486, + "kl_loss": 0.06350376456975937, + "loss_ib": 0.0009401199058629572, + "step": 3285 + }, + { + "ce_ib": 2.0627572536468506, + "ce_orig": 0.5354618430137634, + "epoch": 0.9447120569415486, + "kl_loss": 0.035598963499069214, + "loss_ib": 0.0005622653407044709, + "step": 3285 + }, + { + "ce_ib": 3.658311367034912, + "ce_orig": 0.8815774917602539, + "epoch": 0.9447120569415486, + "kl_loss": 0.06119367480278015, + "loss_ib": 0.000977767864242196, + "step": 3285 + }, + { + "ce_ib": 3.9213857650756836, + "ce_orig": 0.7997374534606934, + "epoch": 0.9449996405205263, + "kl_loss": 0.07208641618490219, + "loss_ib": 0.0011130027705803514, + "step": 3286 + }, + { + "ce_ib": 4.217085361480713, + "ce_orig": 0.7809854745864868, + "epoch": 0.9449996405205263, + "kl_loss": 0.03671705350279808, + "loss_ib": 0.0007888790569268167, + "step": 3286 + }, + { + "ce_ib": 2.9208738803863525, + "ce_orig": 0.5876925587654114, + "epoch": 0.9449996405205263, + "kl_loss": 0.03902792930603027, + "loss_ib": 0.0006823666044510901, + "step": 3286 + }, + { + "ce_ib": 4.807227611541748, + "ce_orig": 1.4614475965499878, + "epoch": 0.9449996405205263, + "kl_loss": 0.04894338920712471, + "loss_ib": 0.0009701566305011511, + "step": 3286 + }, + { + "ce_ib": 3.661614418029785, + "ce_orig": 0.8591966032981873, + "epoch": 0.9452872240995039, + "kl_loss": 0.08183199912309647, + "loss_ib": 0.001184481312520802, + "step": 3287 + }, + { + "ce_ib": 1.328540325164795, + "ce_orig": 0.1682852804660797, + "epoch": 0.9452872240995039, + "kl_loss": 0.07902348041534424, + "loss_ib": 0.0009230888681486249, + "step": 3287 + }, + { + "ce_ib": 5.056491374969482, + "ce_orig": 1.3449504375457764, + "epoch": 0.9452872240995039, + "kl_loss": 0.05251432955265045, + "loss_ib": 0.0010307923657819629, + "step": 3287 + }, + { + "ce_ib": 3.034475803375244, + "ce_orig": 0.3816814422607422, + "epoch": 0.9452872240995039, + "kl_loss": 0.0776752158999443, + "loss_ib": 0.001080199726857245, + "step": 3287 + }, + { + "ce_ib": 2.2858030796051025, + "ce_orig": 0.6284030675888062, + "epoch": 0.9455748076784816, + "kl_loss": 0.022854149341583252, + "loss_ib": 0.00045712178689427674, + "step": 3288 + }, + { + "ce_ib": 2.1414477825164795, + "ce_orig": 0.5412955284118652, + "epoch": 0.9455748076784816, + "kl_loss": 0.032614000141620636, + "loss_ib": 0.0005402847891673446, + "step": 3288 + }, + { + "ce_ib": 4.246842861175537, + "ce_orig": 1.2586779594421387, + "epoch": 0.9455748076784816, + "kl_loss": 0.05077993869781494, + "loss_ib": 0.0009324836428277194, + "step": 3288 + }, + { + "ce_ib": 2.9112253189086914, + "ce_orig": 0.8126984238624573, + "epoch": 0.9455748076784816, + "kl_loss": 0.031020190566778183, + "loss_ib": 0.0006013244274072349, + "step": 3288 + }, + { + "ce_ib": 2.851895809173584, + "ce_orig": 0.5116419792175293, + "epoch": 0.9458623912574592, + "kl_loss": 0.044992055743932724, + "loss_ib": 0.0007351100794039667, + "step": 3289 + }, + { + "ce_ib": 5.0446319580078125, + "ce_orig": 1.6068451404571533, + "epoch": 0.9458623912574592, + "kl_loss": 0.03135033696889877, + "loss_ib": 0.0008179665310308337, + "step": 3289 + }, + { + "ce_ib": 2.5561108589172363, + "ce_orig": 0.4137911796569824, + "epoch": 0.9458623912574592, + "kl_loss": 0.03311869502067566, + "loss_ib": 0.0005867980071343482, + "step": 3289 + }, + { + "ce_ib": 4.91077184677124, + "ce_orig": 0.9101418852806091, + "epoch": 0.9458623912574592, + "kl_loss": 0.05761140212416649, + "loss_ib": 0.0010671912459656596, + "step": 3289 + }, + { + "epoch": 0.9461499748364368, + "grad_norm": 0.10230079293251038, + "learning_rate": 4.0105909738027365e-05, + "loss": 0.8957, + "step": 3290 + }, + { + "ce_ib": 3.265183687210083, + "ce_orig": 0.6869937181472778, + "epoch": 0.9461499748364368, + "kl_loss": 0.05828280746936798, + "loss_ib": 0.0009093464468605816, + "step": 3290 + }, + { + "ce_ib": 3.5313916206359863, + "ce_orig": 0.4313232898712158, + "epoch": 0.9461499748364368, + "kl_loss": 0.08415015041828156, + "loss_ib": 0.001194640644825995, + "step": 3290 + }, + { + "ce_ib": 5.279635906219482, + "ce_orig": 1.7049856185913086, + "epoch": 0.9461499748364368, + "kl_loss": 0.04410446807742119, + "loss_ib": 0.0009690081933513284, + "step": 3290 + }, + { + "ce_ib": 3.6000618934631348, + "ce_orig": 0.7894752621650696, + "epoch": 0.9461499748364368, + "kl_loss": 0.04084160178899765, + "loss_ib": 0.0007684221491217613, + "step": 3290 + }, + { + "ce_ib": 2.3205809593200684, + "ce_orig": 0.5372558832168579, + "epoch": 0.9464375584154144, + "kl_loss": 0.05397646874189377, + "loss_ib": 0.0007718227570876479, + "step": 3291 + }, + { + "ce_ib": 3.1585991382598877, + "ce_orig": 0.8557496666908264, + "epoch": 0.9464375584154144, + "kl_loss": 0.039819031953811646, + "loss_ib": 0.0007140501984395087, + "step": 3291 + }, + { + "ce_ib": 4.807711124420166, + "ce_orig": 0.7084754109382629, + "epoch": 0.9464375584154144, + "kl_loss": 0.056851230561733246, + "loss_ib": 0.0010492834262549877, + "step": 3291 + }, + { + "ce_ib": 3.6959214210510254, + "ce_orig": 0.8489532470703125, + "epoch": 0.9464375584154144, + "kl_loss": 0.11345624923706055, + "loss_ib": 0.0015041546430438757, + "step": 3291 + }, + { + "ce_ib": 4.306189060211182, + "ce_orig": 0.6962549090385437, + "epoch": 0.9467251419943922, + "kl_loss": 0.054715558886528015, + "loss_ib": 0.0009777744999155402, + "step": 3292 + }, + { + "ce_ib": 3.1382501125335693, + "ce_orig": 0.7339943051338196, + "epoch": 0.9467251419943922, + "kl_loss": 0.04771332070231438, + "loss_ib": 0.0007909582345746458, + "step": 3292 + }, + { + "ce_ib": 2.54524827003479, + "ce_orig": 0.5527498722076416, + "epoch": 0.9467251419943922, + "kl_loss": 0.056243255734443665, + "loss_ib": 0.000816957326605916, + "step": 3292 + }, + { + "ce_ib": 3.073535680770874, + "ce_orig": 0.8677868843078613, + "epoch": 0.9467251419943922, + "kl_loss": 0.03850387781858444, + "loss_ib": 0.0006923923501744866, + "step": 3292 + }, + { + "ce_ib": 2.394094705581665, + "ce_orig": 0.5575835108757019, + "epoch": 0.9470127255733698, + "kl_loss": 0.05685748904943466, + "loss_ib": 0.0008079843246378005, + "step": 3293 + }, + { + "ce_ib": 3.0511200428009033, + "ce_orig": 0.8538570404052734, + "epoch": 0.9470127255733698, + "kl_loss": 0.055004484951496124, + "loss_ib": 0.0008551568607799709, + "step": 3293 + }, + { + "ce_ib": 3.3267529010772705, + "ce_orig": 0.7014561295509338, + "epoch": 0.9470127255733698, + "kl_loss": 0.051591869443655014, + "loss_ib": 0.0008485940052196383, + "step": 3293 + }, + { + "ce_ib": 3.506239891052246, + "ce_orig": 0.49467790126800537, + "epoch": 0.9470127255733698, + "kl_loss": 0.08592334389686584, + "loss_ib": 0.0012098574079573154, + "step": 3293 + }, + { + "ce_ib": 1.1700360774993896, + "ce_orig": 0.22962629795074463, + "epoch": 0.9473003091523474, + "kl_loss": 0.12142083793878555, + "loss_ib": 0.0013312118826434016, + "step": 3294 + }, + { + "ce_ib": 4.708444118499756, + "ce_orig": 1.1794096231460571, + "epoch": 0.9473003091523474, + "kl_loss": 0.05958818644285202, + "loss_ib": 0.0010667262831702828, + "step": 3294 + }, + { + "ce_ib": 3.35907244682312, + "ce_orig": 0.33785155415534973, + "epoch": 0.9473003091523474, + "kl_loss": 0.09511297196149826, + "loss_ib": 0.001287036924622953, + "step": 3294 + }, + { + "ce_ib": 1.6268678903579712, + "ce_orig": 0.272732675075531, + "epoch": 0.9473003091523474, + "kl_loss": 0.05450838804244995, + "loss_ib": 0.000707770639564842, + "step": 3294 + }, + { + "epoch": 0.947587892731325, + "grad_norm": 0.09916996210813522, + "learning_rate": 4.0074972123231444e-05, + "loss": 0.7974, + "step": 3295 + }, + { + "ce_ib": 2.8014888763427734, + "ce_orig": 0.45114487409591675, + "epoch": 0.947587892731325, + "kl_loss": 0.05759420990943909, + "loss_ib": 0.0008560909773223102, + "step": 3295 + }, + { + "ce_ib": 2.391979694366455, + "ce_orig": 0.5708649158477783, + "epoch": 0.947587892731325, + "kl_loss": 0.0353318527340889, + "loss_ib": 0.0005925165023654699, + "step": 3295 + }, + { + "ce_ib": 2.789954662322998, + "ce_orig": 0.5837699770927429, + "epoch": 0.947587892731325, + "kl_loss": 0.0578540563583374, + "loss_ib": 0.000857536040712148, + "step": 3295 + }, + { + "ce_ib": 2.6346611976623535, + "ce_orig": 0.535047709941864, + "epoch": 0.947587892731325, + "kl_loss": 0.033943962305784225, + "loss_ib": 0.0006029057549312711, + "step": 3295 + }, + { + "ce_ib": 4.542697429656982, + "ce_orig": 0.8958317637443542, + "epoch": 0.9478754763103027, + "kl_loss": 0.07013791054487228, + "loss_ib": 0.001155648846179247, + "step": 3296 + }, + { + "ce_ib": 3.810680627822876, + "ce_orig": 0.978108823299408, + "epoch": 0.9478754763103027, + "kl_loss": 0.04993259534239769, + "loss_ib": 0.000880393956322223, + "step": 3296 + }, + { + "ce_ib": 2.272770881652832, + "ce_orig": 0.4798756539821625, + "epoch": 0.9478754763103027, + "kl_loss": 0.033107731491327286, + "loss_ib": 0.000558354367967695, + "step": 3296 + }, + { + "ce_ib": 2.6873667240142822, + "ce_orig": 0.8391938805580139, + "epoch": 0.9478754763103027, + "kl_loss": 0.04378986358642578, + "loss_ib": 0.0007066352409310639, + "step": 3296 + }, + { + "ce_ib": 2.380596160888672, + "ce_orig": 0.41917684674263, + "epoch": 0.9481630598892803, + "kl_loss": 0.03351770341396332, + "loss_ib": 0.0005732366116717458, + "step": 3297 + }, + { + "ce_ib": 5.507664203643799, + "ce_orig": 0.8728311657905579, + "epoch": 0.9481630598892803, + "kl_loss": 0.057845428586006165, + "loss_ib": 0.0011292207054793835, + "step": 3297 + }, + { + "ce_ib": 4.2253594398498535, + "ce_orig": 0.9812507033348083, + "epoch": 0.9481630598892803, + "kl_loss": 0.053717199712991714, + "loss_ib": 0.0009597079479135573, + "step": 3297 + }, + { + "ce_ib": 4.103152275085449, + "ce_orig": 0.7810499668121338, + "epoch": 0.9481630598892803, + "kl_loss": 0.060096852481365204, + "loss_ib": 0.0010112837189808488, + "step": 3297 + }, + { + "ce_ib": 3.8092172145843506, + "ce_orig": 0.9201093316078186, + "epoch": 0.9484506434682579, + "kl_loss": 0.08860339224338531, + "loss_ib": 0.0012669555144384503, + "step": 3298 + }, + { + "ce_ib": 2.6889936923980713, + "ce_orig": 0.6567607522010803, + "epoch": 0.9484506434682579, + "kl_loss": 0.026473306119441986, + "loss_ib": 0.0005336324102245271, + "step": 3298 + }, + { + "ce_ib": 3.8773696422576904, + "ce_orig": 0.8521678447723389, + "epoch": 0.9484506434682579, + "kl_loss": 0.06427246332168579, + "loss_ib": 0.0010304616298526525, + "step": 3298 + }, + { + "ce_ib": 5.202352523803711, + "ce_orig": 1.4557439088821411, + "epoch": 0.9484506434682579, + "kl_loss": 0.06263437122106552, + "loss_ib": 0.0011465789284557104, + "step": 3298 + }, + { + "ce_ib": 3.1693274974823, + "ce_orig": 0.6818715929985046, + "epoch": 0.9487382270472356, + "kl_loss": 0.10785868763923645, + "loss_ib": 0.0013955195900052786, + "step": 3299 + }, + { + "ce_ib": 3.531665802001953, + "ce_orig": 0.9299750328063965, + "epoch": 0.9487382270472356, + "kl_loss": 0.04641104117035866, + "loss_ib": 0.0008172769448719919, + "step": 3299 + }, + { + "ce_ib": 4.626594066619873, + "ce_orig": 0.824010968208313, + "epoch": 0.9487382270472356, + "kl_loss": 0.04838769882917404, + "loss_ib": 0.0009465363691560924, + "step": 3299 + }, + { + "ce_ib": 4.574785232543945, + "ce_orig": 1.3611829280853271, + "epoch": 0.9487382270472356, + "kl_loss": 0.045428283512592316, + "loss_ib": 0.0009117613662965596, + "step": 3299 + }, + { + "epoch": 0.9490258106262133, + "grad_norm": 0.09769188612699509, + "learning_rate": 4.004399818932871e-05, + "loss": 0.8781, + "step": 3300 + }, + { + "ce_ib": 3.54878830909729, + "ce_orig": 0.8025414347648621, + "epoch": 0.9490258106262133, + "kl_loss": 0.0462782122194767, + "loss_ib": 0.000817660940811038, + "step": 3300 + }, + { + "ce_ib": 4.008702278137207, + "ce_orig": 0.8101702332496643, + "epoch": 0.9490258106262133, + "kl_loss": 0.06594837456941605, + "loss_ib": 0.00106035394128412, + "step": 3300 + }, + { + "ce_ib": 2.4106993675231934, + "ce_orig": 0.49510183930397034, + "epoch": 0.9490258106262133, + "kl_loss": 0.03897685930132866, + "loss_ib": 0.0006308385054580867, + "step": 3300 + }, + { + "ce_ib": 2.5253922939300537, + "ce_orig": 0.567979633808136, + "epoch": 0.9490258106262133, + "kl_loss": 0.02974310703575611, + "loss_ib": 0.000549970252905041, + "step": 3300 + }, + { + "ce_ib": 2.909071683883667, + "ce_orig": 0.710472583770752, + "epoch": 0.9493133942051909, + "kl_loss": 0.022930527105927467, + "loss_ib": 0.0005202124011702836, + "step": 3301 + }, + { + "ce_ib": 3.6505203247070312, + "ce_orig": 0.4833335280418396, + "epoch": 0.9493133942051909, + "kl_loss": 0.04591567441821098, + "loss_ib": 0.0008242087787948549, + "step": 3301 + }, + { + "ce_ib": 5.42113733291626, + "ce_orig": 0.6891701817512512, + "epoch": 0.9493133942051909, + "kl_loss": 0.06730972230434418, + "loss_ib": 0.0012152108829468489, + "step": 3301 + }, + { + "ce_ib": 2.963268756866455, + "ce_orig": 0.8665474057197571, + "epoch": 0.9493133942051909, + "kl_loss": 0.037072502076625824, + "loss_ib": 0.0006670518778264523, + "step": 3301 + }, + { + "ce_ib": 4.508488655090332, + "ce_orig": 1.2703129053115845, + "epoch": 0.9496009777841685, + "kl_loss": 0.05268280953168869, + "loss_ib": 0.0009776769438758492, + "step": 3302 + }, + { + "ce_ib": 1.8635623455047607, + "ce_orig": 0.3678600490093231, + "epoch": 0.9496009777841685, + "kl_loss": 0.04336881637573242, + "loss_ib": 0.0006200443604029715, + "step": 3302 + }, + { + "ce_ib": 4.2653279304504395, + "ce_orig": 0.978579580783844, + "epoch": 0.9496009777841685, + "kl_loss": 0.0525507926940918, + "loss_ib": 0.0009520406601950526, + "step": 3302 + }, + { + "ce_ib": 3.730602741241455, + "ce_orig": 1.0672338008880615, + "epoch": 0.9496009777841685, + "kl_loss": 0.031448520720005035, + "loss_ib": 0.0006875454564578831, + "step": 3302 + }, + { + "ce_ib": 5.148703098297119, + "ce_orig": 1.1076338291168213, + "epoch": 0.9498885613631461, + "kl_loss": 0.059449367225170135, + "loss_ib": 0.0011093639768660069, + "step": 3303 + }, + { + "ce_ib": 3.765798807144165, + "ce_orig": 1.0432746410369873, + "epoch": 0.9498885613631461, + "kl_loss": 0.04122539609670639, + "loss_ib": 0.000788833771366626, + "step": 3303 + }, + { + "ce_ib": 3.133355140686035, + "ce_orig": 0.7185429930686951, + "epoch": 0.9498885613631461, + "kl_loss": 0.044256359338760376, + "loss_ib": 0.0007558990619145334, + "step": 3303 + }, + { + "ce_ib": 5.634961128234863, + "ce_orig": 1.6078860759735107, + "epoch": 0.9498885613631461, + "kl_loss": 0.05524230748414993, + "loss_ib": 0.0011159192072227597, + "step": 3303 + }, + { + "ce_ib": 2.7411389350891113, + "ce_orig": 0.4367649257183075, + "epoch": 0.9501761449421238, + "kl_loss": 0.05487235635519028, + "loss_ib": 0.0008228374063037336, + "step": 3304 + }, + { + "ce_ib": 4.53033447265625, + "ce_orig": 1.3492614030838013, + "epoch": 0.9501761449421238, + "kl_loss": 0.04831996187567711, + "loss_ib": 0.0009362330893054605, + "step": 3304 + }, + { + "ce_ib": 4.517089366912842, + "ce_orig": 1.0524044036865234, + "epoch": 0.9501761449421238, + "kl_loss": 0.04575635492801666, + "loss_ib": 0.0009092724067158997, + "step": 3304 + }, + { + "ce_ib": 3.9811737537384033, + "ce_orig": 0.7069242596626282, + "epoch": 0.9501761449421238, + "kl_loss": 0.042103830724954605, + "loss_ib": 0.0008191557135432959, + "step": 3304 + }, + { + "epoch": 0.9504637285211014, + "grad_norm": 0.09712526947259903, + "learning_rate": 4.001298801094254e-05, + "loss": 0.8088, + "step": 3305 + }, + { + "ce_ib": 2.476193904876709, + "ce_orig": 0.733921468257904, + "epoch": 0.9504637285211014, + "kl_loss": 0.054108455777168274, + "loss_ib": 0.0007887039100751281, + "step": 3305 + }, + { + "ce_ib": 1.9120566844940186, + "ce_orig": 0.5141152143478394, + "epoch": 0.9504637285211014, + "kl_loss": 0.2110135853290558, + "loss_ib": 0.0023013416212052107, + "step": 3305 + }, + { + "ce_ib": 5.655270576477051, + "ce_orig": 1.2823801040649414, + "epoch": 0.9504637285211014, + "kl_loss": 0.06729704141616821, + "loss_ib": 0.0012384974397718906, + "step": 3305 + }, + { + "ce_ib": 6.001824378967285, + "ce_orig": 1.9320709705352783, + "epoch": 0.9504637285211014, + "kl_loss": 0.04201715439558029, + "loss_ib": 0.0010203539859503508, + "step": 3305 + }, + { + "ce_ib": 2.16082763671875, + "ce_orig": 0.5519272685050964, + "epoch": 0.9507513121000791, + "kl_loss": 0.03434494137763977, + "loss_ib": 0.000559532199986279, + "step": 3306 + }, + { + "ce_ib": 1.578354835510254, + "ce_orig": 0.38614076375961304, + "epoch": 0.9507513121000791, + "kl_loss": 0.02829236537218094, + "loss_ib": 0.0004407591186463833, + "step": 3306 + }, + { + "ce_ib": 4.007180690765381, + "ce_orig": 1.0192790031433105, + "epoch": 0.9507513121000791, + "kl_loss": 0.07709333300590515, + "loss_ib": 0.0011716514127328992, + "step": 3306 + }, + { + "ce_ib": 5.014314651489258, + "ce_orig": 1.4488232135772705, + "epoch": 0.9507513121000791, + "kl_loss": 0.07264548540115356, + "loss_ib": 0.0012278862996026874, + "step": 3306 + }, + { + "ce_ib": 2.1881558895111084, + "ce_orig": 0.5628747344017029, + "epoch": 0.9510388956790568, + "kl_loss": 0.041781194508075714, + "loss_ib": 0.0006366274901665747, + "step": 3307 + }, + { + "ce_ib": 3.5604522228240967, + "ce_orig": 0.8984759449958801, + "epoch": 0.9510388956790568, + "kl_loss": 0.026902128010988235, + "loss_ib": 0.0006250664591789246, + "step": 3307 + }, + { + "ce_ib": 2.771876335144043, + "ce_orig": 0.685585618019104, + "epoch": 0.9510388956790568, + "kl_loss": 0.10162356495857239, + "loss_ib": 0.0012934233527630568, + "step": 3307 + }, + { + "ce_ib": 2.2143242359161377, + "ce_orig": 0.36391469836235046, + "epoch": 0.9510388956790568, + "kl_loss": 0.026044290512800217, + "loss_ib": 0.0004818752931896597, + "step": 3307 + }, + { + "ce_ib": 3.292417526245117, + "ce_orig": 0.6281090378761292, + "epoch": 0.9513264792580344, + "kl_loss": 0.06025203317403793, + "loss_ib": 0.0009317620424553752, + "step": 3308 + }, + { + "ce_ib": 4.653890609741211, + "ce_orig": 0.9383089542388916, + "epoch": 0.9513264792580344, + "kl_loss": 0.05439828708767891, + "loss_ib": 0.001009371830150485, + "step": 3308 + }, + { + "ce_ib": 2.4556615352630615, + "ce_orig": 0.5208185911178589, + "epoch": 0.9513264792580344, + "kl_loss": 0.03260594606399536, + "loss_ib": 0.0005716255982406437, + "step": 3308 + }, + { + "ce_ib": 2.8439786434173584, + "ce_orig": 0.8544073104858398, + "epoch": 0.9513264792580344, + "kl_loss": 0.052760664373636246, + "loss_ib": 0.00081200449494645, + "step": 3308 + }, + { + "ce_ib": 3.6638870239257812, + "ce_orig": 0.6267813444137573, + "epoch": 0.951614062837012, + "kl_loss": 0.07843577861785889, + "loss_ib": 0.0011507464805617929, + "step": 3309 + }, + { + "ce_ib": 3.8434250354766846, + "ce_orig": 1.0797085762023926, + "epoch": 0.951614062837012, + "kl_loss": 0.04456816986203194, + "loss_ib": 0.0008300241897813976, + "step": 3309 + }, + { + "ce_ib": 4.628203868865967, + "ce_orig": 1.0802152156829834, + "epoch": 0.951614062837012, + "kl_loss": 0.04243587329983711, + "loss_ib": 0.0008871790487319231, + "step": 3309 + }, + { + "ce_ib": 4.152618885040283, + "ce_orig": 0.999318540096283, + "epoch": 0.951614062837012, + "kl_loss": 0.038692906498909, + "loss_ib": 0.0008021908579394221, + "step": 3309 + }, + { + "epoch": 0.9519016464159896, + "grad_norm": 0.09750837832689285, + "learning_rate": 3.9981941662783674e-05, + "loss": 0.8422, + "step": 3310 + }, + { + "ce_ib": 4.082005977630615, + "ce_orig": 1.1080774068832397, + "epoch": 0.9519016464159896, + "kl_loss": 0.044805530458688736, + "loss_ib": 0.0008562558796256781, + "step": 3310 + }, + { + "ce_ib": 2.400665044784546, + "ce_orig": 0.06553597003221512, + "epoch": 0.9519016464159896, + "kl_loss": 0.15606895089149475, + "loss_ib": 0.0018007559701800346, + "step": 3310 + }, + { + "ce_ib": 2.839587450027466, + "ce_orig": 0.7130232453346252, + "epoch": 0.9519016464159896, + "kl_loss": 0.044987551867961884, + "loss_ib": 0.0007338342838920653, + "step": 3310 + }, + { + "ce_ib": 2.428694248199463, + "ce_orig": 0.6310423016548157, + "epoch": 0.9519016464159896, + "kl_loss": 0.037184737622737885, + "loss_ib": 0.0006147167878225446, + "step": 3310 + }, + { + "ce_ib": 3.0369863510131836, + "ce_orig": 0.6304922699928284, + "epoch": 0.9521892299949672, + "kl_loss": 0.037453219294548035, + "loss_ib": 0.0006782308337278664, + "step": 3311 + }, + { + "ce_ib": 5.273410320281982, + "ce_orig": 1.0739023685455322, + "epoch": 0.9521892299949672, + "kl_loss": 0.048696160316467285, + "loss_ib": 0.0010143026011064649, + "step": 3311 + }, + { + "ce_ib": 4.207910537719727, + "ce_orig": 1.0850634574890137, + "epoch": 0.9521892299949672, + "kl_loss": 0.03672610595822334, + "loss_ib": 0.0007880520424805582, + "step": 3311 + }, + { + "ce_ib": 2.525604009628296, + "ce_orig": 0.5841779112815857, + "epoch": 0.9521892299949672, + "kl_loss": 0.04431122541427612, + "loss_ib": 0.0006956726429052651, + "step": 3311 + }, + { + "ce_ib": 3.133317470550537, + "ce_orig": 0.5294228792190552, + "epoch": 0.952476813573945, + "kl_loss": 0.024207277223467827, + "loss_ib": 0.0005554044619202614, + "step": 3312 + }, + { + "ce_ib": 3.7946438789367676, + "ce_orig": 1.018708348274231, + "epoch": 0.952476813573945, + "kl_loss": 0.0480000376701355, + "loss_ib": 0.0008594646933488548, + "step": 3312 + }, + { + "ce_ib": 2.9256629943847656, + "ce_orig": 0.481580913066864, + "epoch": 0.952476813573945, + "kl_loss": 0.04695947468280792, + "loss_ib": 0.0007621609838679433, + "step": 3312 + }, + { + "ce_ib": 3.940520763397217, + "ce_orig": 0.7947948575019836, + "epoch": 0.952476813573945, + "kl_loss": 0.0690661072731018, + "loss_ib": 0.0010847131488844752, + "step": 3312 + }, + { + "ce_ib": 2.4490578174591064, + "ce_orig": 0.7714560627937317, + "epoch": 0.9527643971529226, + "kl_loss": 0.03453740477561951, + "loss_ib": 0.000590279814787209, + "step": 3313 + }, + { + "ce_ib": 4.263663291931152, + "ce_orig": 1.1804726123809814, + "epoch": 0.9527643971529226, + "kl_loss": 0.039490707218647, + "loss_ib": 0.0008212733664549887, + "step": 3313 + }, + { + "ce_ib": 4.0822224617004395, + "ce_orig": 1.0177274942398071, + "epoch": 0.9527643971529226, + "kl_loss": 0.03333413600921631, + "loss_ib": 0.0007415635627694428, + "step": 3313 + }, + { + "ce_ib": 4.801853656768799, + "ce_orig": 0.688370406627655, + "epoch": 0.9527643971529226, + "kl_loss": 0.039018820971250534, + "loss_ib": 0.0008703735074959695, + "step": 3313 + }, + { + "ce_ib": 3.643350124359131, + "ce_orig": 0.9955623745918274, + "epoch": 0.9530519807319002, + "kl_loss": 0.04966704919934273, + "loss_ib": 0.0008610054501332343, + "step": 3314 + }, + { + "ce_ib": 4.51703405380249, + "ce_orig": 1.0993504524230957, + "epoch": 0.9530519807319002, + "kl_loss": 0.06643283367156982, + "loss_ib": 0.0011160316644236445, + "step": 3314 + }, + { + "ce_ib": 2.551372528076172, + "ce_orig": 0.46226391196250916, + "epoch": 0.9530519807319002, + "kl_loss": 0.03572455048561096, + "loss_ib": 0.0006123827188275754, + "step": 3314 + }, + { + "ce_ib": 4.109044551849365, + "ce_orig": 0.9915465712547302, + "epoch": 0.9530519807319002, + "kl_loss": 0.05518002808094025, + "loss_ib": 0.0009627047111280262, + "step": 3314 + }, + { + "epoch": 0.9533395643108779, + "grad_norm": 0.11257080733776093, + "learning_rate": 3.995085921964996e-05, + "loss": 0.8259, + "step": 3315 + }, + { + "ce_ib": 2.3552000522613525, + "ce_orig": 0.5548798441886902, + "epoch": 0.9533395643108779, + "kl_loss": 0.038683995604515076, + "loss_ib": 0.0006223599193617702, + "step": 3315 + }, + { + "ce_ib": 3.5698506832122803, + "ce_orig": 0.9729869365692139, + "epoch": 0.9533395643108779, + "kl_loss": 0.042190831154584885, + "loss_ib": 0.0007788933580741286, + "step": 3315 + }, + { + "ce_ib": 3.868140697479248, + "ce_orig": 0.8765325546264648, + "epoch": 0.9533395643108779, + "kl_loss": 0.0563768669962883, + "loss_ib": 0.000950582732912153, + "step": 3315 + }, + { + "ce_ib": 2.426764726638794, + "ce_orig": 0.5583599209785461, + "epoch": 0.9533395643108779, + "kl_loss": 0.055946916341781616, + "loss_ib": 0.0008021455723792315, + "step": 3315 + }, + { + "ce_ib": 4.986082077026367, + "ce_orig": 1.2603331804275513, + "epoch": 0.9536271478898555, + "kl_loss": 0.06214907765388489, + "loss_ib": 0.001120098982937634, + "step": 3316 + }, + { + "ce_ib": 4.413356781005859, + "ce_orig": 0.7372375130653381, + "epoch": 0.9536271478898555, + "kl_loss": 0.05779525637626648, + "loss_ib": 0.0010192882036790252, + "step": 3316 + }, + { + "ce_ib": 3.378216505050659, + "ce_orig": 1.0423929691314697, + "epoch": 0.9536271478898555, + "kl_loss": 0.04202656447887421, + "loss_ib": 0.0007580873207189143, + "step": 3316 + }, + { + "ce_ib": 4.0613884925842285, + "ce_orig": 0.9954362511634827, + "epoch": 0.9536271478898555, + "kl_loss": 0.05593804270029068, + "loss_ib": 0.0009655192261561751, + "step": 3316 + }, + { + "ce_ib": 4.268248081207275, + "ce_orig": 0.9730849266052246, + "epoch": 0.9539147314688331, + "kl_loss": 0.048742957413196564, + "loss_ib": 0.0009142543422058225, + "step": 3317 + }, + { + "ce_ib": 4.943183422088623, + "ce_orig": 1.150309681892395, + "epoch": 0.9539147314688331, + "kl_loss": 0.08406402170658112, + "loss_ib": 0.0013349584769457579, + "step": 3317 + }, + { + "ce_ib": 3.910717248916626, + "ce_orig": 0.9329489469528198, + "epoch": 0.9539147314688331, + "kl_loss": 0.05913088843226433, + "loss_ib": 0.0009823805885389447, + "step": 3317 + }, + { + "ce_ib": 3.420924186706543, + "ce_orig": 0.9745855927467346, + "epoch": 0.9539147314688331, + "kl_loss": 0.06179948151111603, + "loss_ib": 0.0009600871708244085, + "step": 3317 + }, + { + "ce_ib": 2.2824413776397705, + "ce_orig": 0.5687135457992554, + "epoch": 0.9542023150478107, + "kl_loss": 0.04373429715633392, + "loss_ib": 0.0006655870820395648, + "step": 3318 + }, + { + "ce_ib": 3.1705446243286133, + "ce_orig": 0.5858557224273682, + "epoch": 0.9542023150478107, + "kl_loss": 0.07257789373397827, + "loss_ib": 0.0010428334353491664, + "step": 3318 + }, + { + "ce_ib": 7.135806083679199, + "ce_orig": 1.6343177556991577, + "epoch": 0.9542023150478107, + "kl_loss": 0.040018126368522644, + "loss_ib": 0.0011137619148939848, + "step": 3318 + }, + { + "ce_ib": 4.6126179695129395, + "ce_orig": 0.9051647782325745, + "epoch": 0.9542023150478107, + "kl_loss": 0.0463215708732605, + "loss_ib": 0.0009244774701073766, + "step": 3318 + }, + { + "ce_ib": 3.4054434299468994, + "ce_orig": 0.8011272549629211, + "epoch": 0.9544898986267885, + "kl_loss": 0.047224149107933044, + "loss_ib": 0.0008127857581712306, + "step": 3319 + }, + { + "ce_ib": 4.043957710266113, + "ce_orig": 1.1306073665618896, + "epoch": 0.9544898986267885, + "kl_loss": 0.05992526561021805, + "loss_ib": 0.0010036483872681856, + "step": 3319 + }, + { + "ce_ib": 3.8109560012817383, + "ce_orig": 0.9334621429443359, + "epoch": 0.9544898986267885, + "kl_loss": 0.0353146456182003, + "loss_ib": 0.0007342420285567641, + "step": 3319 + }, + { + "ce_ib": 4.583680629730225, + "ce_orig": 1.048335075378418, + "epoch": 0.9544898986267885, + "kl_loss": 0.051085665822029114, + "loss_ib": 0.0009692247258499265, + "step": 3319 + }, + { + "epoch": 0.9547774822057661, + "grad_norm": 0.09274253249168396, + "learning_rate": 3.991974075642621e-05, + "loss": 0.9065, + "step": 3320 + }, + { + "ce_ib": 3.0579445362091064, + "ce_orig": 0.6927348375320435, + "epoch": 0.9547774822057661, + "kl_loss": 0.041871294379234314, + "loss_ib": 0.0007245073793455958, + "step": 3320 + }, + { + "ce_ib": 3.790905475616455, + "ce_orig": 0.3857501447200775, + "epoch": 0.9547774822057661, + "kl_loss": 0.04291866347193718, + "loss_ib": 0.0008082771091721952, + "step": 3320 + }, + { + "ce_ib": 4.10317325592041, + "ce_orig": 0.9034391045570374, + "epoch": 0.9547774822057661, + "kl_loss": 0.08684119582176208, + "loss_ib": 0.0012787292944267392, + "step": 3320 + }, + { + "ce_ib": 4.353842735290527, + "ce_orig": 0.9078669548034668, + "epoch": 0.9547774822057661, + "kl_loss": 0.04098047688603401, + "loss_ib": 0.0008451889734715223, + "step": 3320 + }, + { + "ce_ib": 3.7112479209899902, + "ce_orig": 0.7183680534362793, + "epoch": 0.9550650657847437, + "kl_loss": 0.069898821413517, + "loss_ib": 0.001070113037712872, + "step": 3321 + }, + { + "ce_ib": 5.073823928833008, + "ce_orig": 1.4471426010131836, + "epoch": 0.9550650657847437, + "kl_loss": 0.0471402108669281, + "loss_ib": 0.0009787845192477107, + "step": 3321 + }, + { + "ce_ib": 2.801347255706787, + "ce_orig": 0.49451500177383423, + "epoch": 0.9550650657847437, + "kl_loss": 0.04747430235147476, + "loss_ib": 0.0007548777502961457, + "step": 3321 + }, + { + "ce_ib": 3.6581597328186035, + "ce_orig": 0.7139821648597717, + "epoch": 0.9550650657847437, + "kl_loss": 0.06738510727882385, + "loss_ib": 0.0010396670550107956, + "step": 3321 + }, + { + "ce_ib": 2.8410069942474365, + "ce_orig": 0.5353663563728333, + "epoch": 0.9553526493637213, + "kl_loss": 0.05403266102075577, + "loss_ib": 0.000824427290353924, + "step": 3322 + }, + { + "ce_ib": 3.3238749504089355, + "ce_orig": 1.027436375617981, + "epoch": 0.9553526493637213, + "kl_loss": 0.04390087351202965, + "loss_ib": 0.000771396211348474, + "step": 3322 + }, + { + "ce_ib": 3.425027370452881, + "ce_orig": 0.7209321856498718, + "epoch": 0.9553526493637213, + "kl_loss": 0.039335377514362335, + "loss_ib": 0.0007358564762398601, + "step": 3322 + }, + { + "ce_ib": 5.376399040222168, + "ce_orig": 1.3109036684036255, + "epoch": 0.9553526493637213, + "kl_loss": 0.06592723727226257, + "loss_ib": 0.001196912257000804, + "step": 3322 + }, + { + "ce_ib": 4.134073734283447, + "ce_orig": 1.0804953575134277, + "epoch": 0.955640232942699, + "kl_loss": 0.12415983527898788, + "loss_ib": 0.0016550056170672178, + "step": 3323 + }, + { + "ce_ib": 5.448294162750244, + "ce_orig": 1.3745142221450806, + "epoch": 0.955640232942699, + "kl_loss": 0.05508885532617569, + "loss_ib": 0.0010957180056720972, + "step": 3323 + }, + { + "ce_ib": 3.5783255100250244, + "ce_orig": 0.7792356014251709, + "epoch": 0.955640232942699, + "kl_loss": 0.0573904886841774, + "loss_ib": 0.0009317374206148088, + "step": 3323 + }, + { + "ce_ib": 1.6077038049697876, + "ce_orig": 0.2617127597332001, + "epoch": 0.955640232942699, + "kl_loss": 0.101617231965065, + "loss_ib": 0.001176942721940577, + "step": 3323 + }, + { + "ce_ib": 2.7071220874786377, + "ce_orig": 0.5779938101768494, + "epoch": 0.9559278165216766, + "kl_loss": 0.04898412525653839, + "loss_ib": 0.0007605534628964961, + "step": 3324 + }, + { + "ce_ib": 2.2469284534454346, + "ce_orig": 0.6813217997550964, + "epoch": 0.9559278165216766, + "kl_loss": 0.0363704115152359, + "loss_ib": 0.0005883969133719802, + "step": 3324 + }, + { + "ce_ib": 3.3160507678985596, + "ce_orig": 0.6790832877159119, + "epoch": 0.9559278165216766, + "kl_loss": 0.0579163134098053, + "loss_ib": 0.0009107681689783931, + "step": 3324 + }, + { + "ce_ib": 3.6831657886505127, + "ce_orig": 0.8561438918113708, + "epoch": 0.9559278165216766, + "kl_loss": 0.05446704477071762, + "loss_ib": 0.0009129869868047535, + "step": 3324 + }, + { + "epoch": 0.9562154001006542, + "grad_norm": 0.10077639669179916, + "learning_rate": 3.9888586348084034e-05, + "loss": 0.8726, + "step": 3325 + }, + { + "ce_ib": 4.091396808624268, + "ce_orig": 0.854426920413971, + "epoch": 0.9562154001006542, + "kl_loss": 0.04942527413368225, + "loss_ib": 0.0009033923852257431, + "step": 3325 + }, + { + "ce_ib": 3.1576430797576904, + "ce_orig": 0.7895709872245789, + "epoch": 0.9562154001006542, + "kl_loss": 0.04490992799401283, + "loss_ib": 0.0007648635655641556, + "step": 3325 + }, + { + "ce_ib": 3.5209295749664307, + "ce_orig": 0.664503276348114, + "epoch": 0.9562154001006542, + "kl_loss": 0.07233166694641113, + "loss_ib": 0.001075409585610032, + "step": 3325 + }, + { + "ce_ib": 1.647375464439392, + "ce_orig": 0.25897958874702454, + "epoch": 0.9562154001006542, + "kl_loss": 0.124590203166008, + "loss_ib": 0.001410639495588839, + "step": 3325 + }, + { + "ce_ib": 3.9388020038604736, + "ce_orig": 0.49382826685905457, + "epoch": 0.9565029836796319, + "kl_loss": 0.06026426702737808, + "loss_ib": 0.0009965228382498026, + "step": 3326 + }, + { + "ce_ib": 2.996541738510132, + "ce_orig": 0.841587245464325, + "epoch": 0.9565029836796319, + "kl_loss": 0.11470133066177368, + "loss_ib": 0.0014466674765571952, + "step": 3326 + }, + { + "ce_ib": 3.3112118244171143, + "ce_orig": 0.9793955087661743, + "epoch": 0.9565029836796319, + "kl_loss": 0.042807336896657944, + "loss_ib": 0.0007591944886371493, + "step": 3326 + }, + { + "ce_ib": 3.4789364337921143, + "ce_orig": 1.0738499164581299, + "epoch": 0.9565029836796319, + "kl_loss": 0.04971793293952942, + "loss_ib": 0.0008450730238109827, + "step": 3326 + }, + { + "ce_ib": 2.8763680458068848, + "ce_orig": 0.7529290914535522, + "epoch": 0.9567905672586096, + "kl_loss": 0.03855911269783974, + "loss_ib": 0.0006732278852723539, + "step": 3327 + }, + { + "ce_ib": 5.180582523345947, + "ce_orig": 1.174481987953186, + "epoch": 0.9567905672586096, + "kl_loss": 0.07014869153499603, + "loss_ib": 0.0012195451417937875, + "step": 3327 + }, + { + "ce_ib": 3.249824047088623, + "ce_orig": 0.6468319892883301, + "epoch": 0.9567905672586096, + "kl_loss": 0.05747479572892189, + "loss_ib": 0.0008997303084470332, + "step": 3327 + }, + { + "ce_ib": 2.7419235706329346, + "ce_orig": 0.8708519339561462, + "epoch": 0.9567905672586096, + "kl_loss": 0.05307390168309212, + "loss_ib": 0.0008049313328228891, + "step": 3327 + }, + { + "ce_ib": 5.055845260620117, + "ce_orig": 0.8993709683418274, + "epoch": 0.9570781508375872, + "kl_loss": 0.07151477038860321, + "loss_ib": 0.001220732112415135, + "step": 3328 + }, + { + "ce_ib": 3.354062795639038, + "ce_orig": 0.808264434337616, + "epoch": 0.9570781508375872, + "kl_loss": 0.07154849171638489, + "loss_ib": 0.0010508912382647395, + "step": 3328 + }, + { + "ce_ib": 2.1190896034240723, + "ce_orig": 0.504993736743927, + "epoch": 0.9570781508375872, + "kl_loss": 0.06880832463502884, + "loss_ib": 0.0008999921847134829, + "step": 3328 + }, + { + "ce_ib": 3.119743824005127, + "ce_orig": 0.5982366800308228, + "epoch": 0.9570781508375872, + "kl_loss": 0.04411579668521881, + "loss_ib": 0.0007531323353759944, + "step": 3328 + }, + { + "ce_ib": 3.388723611831665, + "ce_orig": 0.7691148519515991, + "epoch": 0.9573657344165648, + "kl_loss": 0.06492412090301514, + "loss_ib": 0.000988113577477634, + "step": 3329 + }, + { + "ce_ib": 2.756649971008301, + "ce_orig": 0.4655248820781708, + "epoch": 0.9573657344165648, + "kl_loss": 0.03863568603992462, + "loss_ib": 0.000662021862808615, + "step": 3329 + }, + { + "ce_ib": 4.649877548217773, + "ce_orig": 1.3403980731964111, + "epoch": 0.9573657344165648, + "kl_loss": 0.046752020716667175, + "loss_ib": 0.0009325079154223204, + "step": 3329 + }, + { + "ce_ib": 5.6523823738098145, + "ce_orig": 1.5786595344543457, + "epoch": 0.9573657344165648, + "kl_loss": 0.0321899950504303, + "loss_ib": 0.0008871381287463009, + "step": 3329 + }, + { + "epoch": 0.9576533179955424, + "grad_norm": 0.10159970074892044, + "learning_rate": 3.985739606968163e-05, + "loss": 0.8311, + "step": 3330 + }, + { + "ce_ib": 5.156787872314453, + "ce_orig": 1.1215494871139526, + "epoch": 0.9576533179955424, + "kl_loss": 0.047608524560928345, + "loss_ib": 0.0009917640127241611, + "step": 3330 + }, + { + "ce_ib": 3.5255985260009766, + "ce_orig": 0.998247504234314, + "epoch": 0.9576533179955424, + "kl_loss": 0.04764954000711441, + "loss_ib": 0.0008290552068501711, + "step": 3330 + }, + { + "ce_ib": 3.941854476928711, + "ce_orig": 1.043226718902588, + "epoch": 0.9576533179955424, + "kl_loss": 0.05710917338728905, + "loss_ib": 0.000965277140494436, + "step": 3330 + }, + { + "ce_ib": 3.6915926933288574, + "ce_orig": 0.8356548547744751, + "epoch": 0.9576533179955424, + "kl_loss": 0.050601616501808167, + "loss_ib": 0.0008751754066906869, + "step": 3330 + }, + { + "ce_ib": 3.700395345687866, + "ce_orig": 0.8902326822280884, + "epoch": 0.95794090157452, + "kl_loss": 0.058099761605262756, + "loss_ib": 0.0009510371019132435, + "step": 3331 + }, + { + "ce_ib": 2.5921545028686523, + "ce_orig": 0.369320809841156, + "epoch": 0.95794090157452, + "kl_loss": 0.04174943268299103, + "loss_ib": 0.0006767097511328757, + "step": 3331 + }, + { + "ce_ib": 2.2487707138061523, + "ce_orig": 0.49278783798217773, + "epoch": 0.95794090157452, + "kl_loss": 0.044621601700782776, + "loss_ib": 0.0006710930611006916, + "step": 3331 + }, + { + "ce_ib": 3.253249406814575, + "ce_orig": 0.8103417158126831, + "epoch": 0.95794090157452, + "kl_loss": 0.058646537363529205, + "loss_ib": 0.0009117902372963727, + "step": 3331 + }, + { + "ce_ib": 2.683657646179199, + "ce_orig": 0.6383242607116699, + "epoch": 0.9582284851534978, + "kl_loss": 0.036067377775907516, + "loss_ib": 0.0006290395394898951, + "step": 3332 + }, + { + "ce_ib": 2.5859100818634033, + "ce_orig": 0.5156968832015991, + "epoch": 0.9582284851534978, + "kl_loss": 0.04130181297659874, + "loss_ib": 0.0006716091302223504, + "step": 3332 + }, + { + "ce_ib": 2.206023931503296, + "ce_orig": 0.6124736070632935, + "epoch": 0.9582284851534978, + "kl_loss": 0.04382312297821045, + "loss_ib": 0.0006588335963897407, + "step": 3332 + }, + { + "ce_ib": 2.8198978900909424, + "ce_orig": 0.7826844453811646, + "epoch": 0.9582284851534978, + "kl_loss": 0.058420680463314056, + "loss_ib": 0.00086619658395648, + "step": 3332 + }, + { + "ce_ib": 2.7328617572784424, + "ce_orig": 0.8686415553092957, + "epoch": 0.9585160687324754, + "kl_loss": 0.04470367729663849, + "loss_ib": 0.0007203229470178485, + "step": 3333 + }, + { + "ce_ib": 2.499614953994751, + "ce_orig": 0.3908523917198181, + "epoch": 0.9585160687324754, + "kl_loss": 0.06546856462955475, + "loss_ib": 0.0009046471095643938, + "step": 3333 + }, + { + "ce_ib": 4.297549247741699, + "ce_orig": 1.0018614530563354, + "epoch": 0.9585160687324754, + "kl_loss": 0.05088311433792114, + "loss_ib": 0.0009385860757902265, + "step": 3333 + }, + { + "ce_ib": 3.402106285095215, + "ce_orig": 1.023643970489502, + "epoch": 0.9585160687324754, + "kl_loss": 0.03950139507651329, + "loss_ib": 0.0007352245156653225, + "step": 3333 + }, + { + "ce_ib": 2.5492565631866455, + "ce_orig": 0.5887293815612793, + "epoch": 0.958803652311453, + "kl_loss": 0.03511230647563934, + "loss_ib": 0.0006060486775822937, + "step": 3334 + }, + { + "ce_ib": 3.9251368045806885, + "ce_orig": 0.964262843132019, + "epoch": 0.958803652311453, + "kl_loss": 0.05698467046022415, + "loss_ib": 0.0009623603546060622, + "step": 3334 + }, + { + "ce_ib": 3.1321535110473633, + "ce_orig": 0.9762880206108093, + "epoch": 0.958803652311453, + "kl_loss": 0.026099463924765587, + "loss_ib": 0.0005742099601775408, + "step": 3334 + }, + { + "ce_ib": 4.240932464599609, + "ce_orig": 1.405016303062439, + "epoch": 0.958803652311453, + "kl_loss": 0.068762868642807, + "loss_ib": 0.0011117218527942896, + "step": 3334 + }, + { + "epoch": 0.9590912358904307, + "grad_norm": 0.10540676862001419, + "learning_rate": 3.982616999636362e-05, + "loss": 0.8311, + "step": 3335 + }, + { + "ce_ib": 7.3248372077941895, + "ce_orig": 1.072213888168335, + "epoch": 0.9590912358904307, + "kl_loss": 0.058634333312511444, + "loss_ib": 0.0013188269222155213, + "step": 3335 + }, + { + "ce_ib": 6.613680362701416, + "ce_orig": 1.8567731380462646, + "epoch": 0.9590912358904307, + "kl_loss": 0.07156942784786224, + "loss_ib": 0.0013770621735602617, + "step": 3335 + }, + { + "ce_ib": 2.3205060958862305, + "ce_orig": 0.6802839636802673, + "epoch": 0.9590912358904307, + "kl_loss": 0.05234700068831444, + "loss_ib": 0.000755520595703274, + "step": 3335 + }, + { + "ce_ib": 4.143457889556885, + "ce_orig": 1.0678775310516357, + "epoch": 0.9590912358904307, + "kl_loss": 0.04179341346025467, + "loss_ib": 0.0008322799112647772, + "step": 3335 + }, + { + "ce_ib": 3.1172640323638916, + "ce_orig": 0.5829916000366211, + "epoch": 0.9593788194694083, + "kl_loss": 0.05598611384630203, + "loss_ib": 0.0008715875446796417, + "step": 3336 + }, + { + "ce_ib": 3.1662604808807373, + "ce_orig": 0.845909833908081, + "epoch": 0.9593788194694083, + "kl_loss": 0.08611778914928436, + "loss_ib": 0.0011778039624914527, + "step": 3336 + }, + { + "ce_ib": 6.501156806945801, + "ce_orig": 0.9109855890274048, + "epoch": 0.9593788194694083, + "kl_loss": 0.05555303394794464, + "loss_ib": 0.0012056459672749043, + "step": 3336 + }, + { + "ce_ib": 3.238650321960449, + "ce_orig": 0.7006947994232178, + "epoch": 0.9593788194694083, + "kl_loss": 0.04661693051457405, + "loss_ib": 0.0007900343625806272, + "step": 3336 + }, + { + "ce_ib": 4.6133317947387695, + "ce_orig": 1.4189668893814087, + "epoch": 0.9596664030483859, + "kl_loss": 0.06520473957061768, + "loss_ib": 0.0011133805382996798, + "step": 3337 + }, + { + "ce_ib": 5.174102783203125, + "ce_orig": 1.4966472387313843, + "epoch": 0.9596664030483859, + "kl_loss": 0.06517688930034637, + "loss_ib": 0.0011691791005432606, + "step": 3337 + }, + { + "ce_ib": 5.029361724853516, + "ce_orig": 1.0786433219909668, + "epoch": 0.9596664030483859, + "kl_loss": 0.061418723315000534, + "loss_ib": 0.0011171232908964157, + "step": 3337 + }, + { + "ce_ib": 3.3337676525115967, + "ce_orig": 0.7570855021476746, + "epoch": 0.9596664030483859, + "kl_loss": 0.06008101999759674, + "loss_ib": 0.0009341869154013693, + "step": 3337 + }, + { + "ce_ib": 5.406108379364014, + "ce_orig": 1.1876932382583618, + "epoch": 0.9599539866273635, + "kl_loss": 0.05668395012617111, + "loss_ib": 0.0011074502253904939, + "step": 3338 + }, + { + "ce_ib": 3.9392709732055664, + "ce_orig": 1.065526008605957, + "epoch": 0.9599539866273635, + "kl_loss": 0.04638552665710449, + "loss_ib": 0.0008577823173254728, + "step": 3338 + }, + { + "ce_ib": 3.9322025775909424, + "ce_orig": 0.8414658904075623, + "epoch": 0.9599539866273635, + "kl_loss": 0.08149340748786926, + "loss_ib": 0.0012081542517989874, + "step": 3338 + }, + { + "ce_ib": 3.918971538543701, + "ce_orig": 0.899490237236023, + "epoch": 0.9599539866273635, + "kl_loss": 0.04330256208777428, + "loss_ib": 0.0008249227539636195, + "step": 3338 + }, + { + "ce_ib": 2.2589356899261475, + "ce_orig": 0.5814642906188965, + "epoch": 0.9602415702063413, + "kl_loss": 0.1286752074956894, + "loss_ib": 0.0015126456273719668, + "step": 3339 + }, + { + "ce_ib": 2.155461072921753, + "ce_orig": 0.6305063962936401, + "epoch": 0.9602415702063413, + "kl_loss": 0.04834376275539398, + "loss_ib": 0.0006989837274886668, + "step": 3339 + }, + { + "ce_ib": 5.204251289367676, + "ce_orig": 0.9730063080787659, + "epoch": 0.9602415702063413, + "kl_loss": 0.06975814700126648, + "loss_ib": 0.0012180065969005227, + "step": 3339 + }, + { + "ce_ib": 4.3086018562316895, + "ce_orig": 1.195293664932251, + "epoch": 0.9602415702063413, + "kl_loss": 0.053185708820819855, + "loss_ib": 0.0009627172257751226, + "step": 3339 + }, + { + "epoch": 0.9605291537853189, + "grad_norm": 0.0926448330283165, + "learning_rate": 3.979490820336086e-05, + "loss": 0.8774, + "step": 3340 + }, + { + "ce_ib": 3.4135327339172363, + "ce_orig": 0.7904138565063477, + "epoch": 0.9605291537853189, + "kl_loss": 0.027383502572774887, + "loss_ib": 0.0006151882698759437, + "step": 3340 + }, + { + "ce_ib": 2.5959103107452393, + "ce_orig": 0.48054423928260803, + "epoch": 0.9605291537853189, + "kl_loss": 0.06197637692093849, + "loss_ib": 0.0008793547749519348, + "step": 3340 + }, + { + "ce_ib": 4.194213390350342, + "ce_orig": 1.0387638807296753, + "epoch": 0.9605291537853189, + "kl_loss": 0.0657658576965332, + "loss_ib": 0.001077079912647605, + "step": 3340 + }, + { + "ce_ib": 3.3722870349884033, + "ce_orig": 0.7806622982025146, + "epoch": 0.9605291537853189, + "kl_loss": 0.05386227369308472, + "loss_ib": 0.000875851430464536, + "step": 3340 + }, + { + "ce_ib": 2.5884876251220703, + "ce_orig": 0.6688674688339233, + "epoch": 0.9608167373642965, + "kl_loss": 0.03787892311811447, + "loss_ib": 0.0006376379751600325, + "step": 3341 + }, + { + "ce_ib": 2.88690185546875, + "ce_orig": 0.42770901322364807, + "epoch": 0.9608167373642965, + "kl_loss": 0.06752348691225052, + "loss_ib": 0.0009639250347390771, + "step": 3341 + }, + { + "ce_ib": 4.1650309562683105, + "ce_orig": 1.0898003578186035, + "epoch": 0.9608167373642965, + "kl_loss": 0.041075557470321655, + "loss_ib": 0.0008272586856037378, + "step": 3341 + }, + { + "ce_ib": 3.475825548171997, + "ce_orig": 0.7106524705886841, + "epoch": 0.9608167373642965, + "kl_loss": 0.07777051627635956, + "loss_ib": 0.0011252877302467823, + "step": 3341 + }, + { + "ce_ib": 2.9376754760742188, + "ce_orig": 0.7748451828956604, + "epoch": 0.9611043209432741, + "kl_loss": 0.033583179116249084, + "loss_ib": 0.0006295993225648999, + "step": 3342 + }, + { + "ce_ib": 1.9801921844482422, + "ce_orig": 0.3960556089878082, + "epoch": 0.9611043209432741, + "kl_loss": 0.08568327128887177, + "loss_ib": 0.0010548519203439355, + "step": 3342 + }, + { + "ce_ib": 2.8099565505981445, + "ce_orig": 0.8014640808105469, + "epoch": 0.9611043209432741, + "kl_loss": 0.027320977300405502, + "loss_ib": 0.000554205384105444, + "step": 3342 + }, + { + "ce_ib": 1.9894520044326782, + "ce_orig": 0.5663857460021973, + "epoch": 0.9611043209432741, + "kl_loss": 0.04090287908911705, + "loss_ib": 0.0006079740123823285, + "step": 3342 + }, + { + "ce_ib": 2.4003565311431885, + "ce_orig": 0.4763808846473694, + "epoch": 0.9613919045222518, + "kl_loss": 0.053983211517333984, + "loss_ib": 0.0007798677543178201, + "step": 3343 + }, + { + "ce_ib": 5.4083356857299805, + "ce_orig": 1.5040303468704224, + "epoch": 0.9613919045222518, + "kl_loss": 0.08639071136713028, + "loss_ib": 0.0014047406148165464, + "step": 3343 + }, + { + "ce_ib": 2.911696434020996, + "ce_orig": 0.6955206990242004, + "epoch": 0.9613919045222518, + "kl_loss": 0.033352240920066833, + "loss_ib": 0.0006246920675039291, + "step": 3343 + }, + { + "ce_ib": 3.2360827922821045, + "ce_orig": 0.8621702790260315, + "epoch": 0.9613919045222518, + "kl_loss": 0.05269714444875717, + "loss_ib": 0.0008505797013640404, + "step": 3343 + }, + { + "ce_ib": 2.2117185592651367, + "ce_orig": 0.33964866399765015, + "epoch": 0.9616794881012294, + "kl_loss": 0.040155887603759766, + "loss_ib": 0.0006227307021617889, + "step": 3344 + }, + { + "ce_ib": 3.220088005065918, + "ce_orig": 0.6639916300773621, + "epoch": 0.9616794881012294, + "kl_loss": 0.05606253445148468, + "loss_ib": 0.0008826341363601387, + "step": 3344 + }, + { + "ce_ib": 4.038930892944336, + "ce_orig": 1.195138692855835, + "epoch": 0.9616794881012294, + "kl_loss": 0.04244042932987213, + "loss_ib": 0.0008282973431050777, + "step": 3344 + }, + { + "ce_ib": 1.5997883081436157, + "ce_orig": 0.2635568380355835, + "epoch": 0.9616794881012294, + "kl_loss": 0.11964649707078934, + "loss_ib": 0.0013564437394961715, + "step": 3344 + }, + { + "epoch": 0.961967071680207, + "grad_norm": 0.10729533433914185, + "learning_rate": 3.976361076599027e-05, + "loss": 0.7348, + "step": 3345 + }, + { + "ce_ib": 3.780261278152466, + "ce_orig": 1.0877994298934937, + "epoch": 0.961967071680207, + "kl_loss": 0.05290921404957771, + "loss_ib": 0.0009071181993931532, + "step": 3345 + }, + { + "ce_ib": 4.33595609664917, + "ce_orig": 0.9496873021125793, + "epoch": 0.961967071680207, + "kl_loss": 0.07676616311073303, + "loss_ib": 0.0012012572260573506, + "step": 3345 + }, + { + "ce_ib": 3.680032253265381, + "ce_orig": 0.8047893047332764, + "epoch": 0.961967071680207, + "kl_loss": 0.0551472008228302, + "loss_ib": 0.000919475220143795, + "step": 3345 + }, + { + "ce_ib": 1.5459262132644653, + "ce_orig": 0.2555871605873108, + "epoch": 0.961967071680207, + "kl_loss": 0.12944656610488892, + "loss_ib": 0.001449058298021555, + "step": 3345 + }, + { + "ce_ib": 2.8079259395599365, + "ce_orig": 0.5921444296836853, + "epoch": 0.9622546552591847, + "kl_loss": 0.029385477304458618, + "loss_ib": 0.0005746473325416446, + "step": 3346 + }, + { + "ce_ib": 5.49817419052124, + "ce_orig": 1.5889277458190918, + "epoch": 0.9622546552591847, + "kl_loss": 0.04106473922729492, + "loss_ib": 0.0009604647639207542, + "step": 3346 + }, + { + "ce_ib": 2.834416151046753, + "ce_orig": 0.7552313804626465, + "epoch": 0.9622546552591847, + "kl_loss": 0.03181444853544235, + "loss_ib": 0.0006015860708430409, + "step": 3346 + }, + { + "ce_ib": 4.6220598220825195, + "ce_orig": 0.7681835293769836, + "epoch": 0.9622546552591847, + "kl_loss": 0.07607610523700714, + "loss_ib": 0.0012229670537635684, + "step": 3346 + }, + { + "ce_ib": 2.6737377643585205, + "ce_orig": 0.47497236728668213, + "epoch": 0.9625422388381624, + "kl_loss": 0.018736571073532104, + "loss_ib": 0.00045473946374841034, + "step": 3347 + }, + { + "ce_ib": 3.85270619392395, + "ce_orig": 1.0948556661605835, + "epoch": 0.9625422388381624, + "kl_loss": 0.06850361824035645, + "loss_ib": 0.001070306752808392, + "step": 3347 + }, + { + "ce_ib": 3.34810471534729, + "ce_orig": 0.9159621000289917, + "epoch": 0.9625422388381624, + "kl_loss": 0.040910504758358, + "loss_ib": 0.0007439155015163124, + "step": 3347 + }, + { + "ce_ib": 2.048241138458252, + "ce_orig": 0.7210181951522827, + "epoch": 0.9625422388381624, + "kl_loss": 0.04600673168897629, + "loss_ib": 0.000664891442283988, + "step": 3347 + }, + { + "ce_ib": 4.084762096405029, + "ce_orig": 1.1376248598098755, + "epoch": 0.96282982241714, + "kl_loss": 0.05280522629618645, + "loss_ib": 0.0009365284349769354, + "step": 3348 + }, + { + "ce_ib": 4.75372314453125, + "ce_orig": 1.2947173118591309, + "epoch": 0.96282982241714, + "kl_loss": 0.0739760622382164, + "loss_ib": 0.0012151328846812248, + "step": 3348 + }, + { + "ce_ib": 4.99955940246582, + "ce_orig": 1.4171351194381714, + "epoch": 0.96282982241714, + "kl_loss": 0.06584668159484863, + "loss_ib": 0.0011584226740524173, + "step": 3348 + }, + { + "ce_ib": 2.260032892227173, + "ce_orig": 0.6960614919662476, + "epoch": 0.96282982241714, + "kl_loss": 0.025567088276147842, + "loss_ib": 0.00048167412751354277, + "step": 3348 + }, + { + "ce_ib": 4.31295108795166, + "ce_orig": 0.9280511736869812, + "epoch": 0.9631174059961176, + "kl_loss": 0.06880709528923035, + "loss_ib": 0.0011193660320714116, + "step": 3349 + }, + { + "ce_ib": 3.5666568279266357, + "ce_orig": 0.8861562609672546, + "epoch": 0.9631174059961176, + "kl_loss": 0.057769279927015305, + "loss_ib": 0.0009343584533780813, + "step": 3349 + }, + { + "ce_ib": 2.3724160194396973, + "ce_orig": 0.6814833283424377, + "epoch": 0.9631174059961176, + "kl_loss": 0.037839896976947784, + "loss_ib": 0.0006156405434012413, + "step": 3349 + }, + { + "ce_ib": 3.018800973892212, + "ce_orig": 0.8840894103050232, + "epoch": 0.9631174059961176, + "kl_loss": 0.03519047796726227, + "loss_ib": 0.0006537848385050893, + "step": 3349 + }, + { + "epoch": 0.9634049895750952, + "grad_norm": 0.08868249505758286, + "learning_rate": 3.973227775965464e-05, + "loss": 0.8154, + "step": 3350 + }, + { + "ce_ib": 2.883789300918579, + "ce_orig": 0.767935574054718, + "epoch": 0.9634049895750952, + "kl_loss": 0.04296034574508667, + "loss_ib": 0.000717982358764857, + "step": 3350 + }, + { + "ce_ib": 3.5050995349884033, + "ce_orig": 0.6611614227294922, + "epoch": 0.9634049895750952, + "kl_loss": 0.06343105435371399, + "loss_ib": 0.0009848204208537936, + "step": 3350 + }, + { + "ce_ib": 4.619152545928955, + "ce_orig": 1.207182765007019, + "epoch": 0.9634049895750952, + "kl_loss": 0.05661490932106972, + "loss_ib": 0.0010280642891302705, + "step": 3350 + }, + { + "ce_ib": 3.3195383548736572, + "ce_orig": 0.8764815926551819, + "epoch": 0.9634049895750952, + "kl_loss": 0.03692171350121498, + "loss_ib": 0.0007011709967628121, + "step": 3350 + }, + { + "ce_ib": 5.452233791351318, + "ce_orig": 1.2644139528274536, + "epoch": 0.9636925731540729, + "kl_loss": 0.04642695188522339, + "loss_ib": 0.001009492902085185, + "step": 3351 + }, + { + "ce_ib": 3.836470365524292, + "ce_orig": 0.6802058815956116, + "epoch": 0.9636925731540729, + "kl_loss": 0.0774318128824234, + "loss_ib": 0.0011579651618376374, + "step": 3351 + }, + { + "ce_ib": 4.7805657386779785, + "ce_orig": 1.2558231353759766, + "epoch": 0.9636925731540729, + "kl_loss": 0.03516290336847305, + "loss_ib": 0.0008296855958178639, + "step": 3351 + }, + { + "ce_ib": 3.0156381130218506, + "ce_orig": 0.6509935259819031, + "epoch": 0.9636925731540729, + "kl_loss": 0.06700599193572998, + "loss_ib": 0.0009716236963868141, + "step": 3351 + }, + { + "ce_ib": 3.9334118366241455, + "ce_orig": 0.6742110252380371, + "epoch": 0.9639801567330506, + "kl_loss": 0.07898014783859253, + "loss_ib": 0.001183142652735114, + "step": 3352 + }, + { + "ce_ib": 3.0437123775482178, + "ce_orig": 0.5326513051986694, + "epoch": 0.9639801567330506, + "kl_loss": 0.08668769896030426, + "loss_ib": 0.0011712482664734125, + "step": 3352 + }, + { + "ce_ib": 4.731832981109619, + "ce_orig": 1.0474146604537964, + "epoch": 0.9639801567330506, + "kl_loss": 0.05051310360431671, + "loss_ib": 0.0009783142013475299, + "step": 3352 + }, + { + "ce_ib": 5.551700592041016, + "ce_orig": 1.5688461065292358, + "epoch": 0.9639801567330506, + "kl_loss": 0.05408947914838791, + "loss_ib": 0.0010960648069158196, + "step": 3352 + }, + { + "ce_ib": 3.163296699523926, + "ce_orig": 0.9335260987281799, + "epoch": 0.9642677403120282, + "kl_loss": 0.056603800505399704, + "loss_ib": 0.0008823676034808159, + "step": 3353 + }, + { + "ce_ib": 3.8752682209014893, + "ce_orig": 1.1972798109054565, + "epoch": 0.9642677403120282, + "kl_loss": 0.04552801698446274, + "loss_ib": 0.0008428069995716214, + "step": 3353 + }, + { + "ce_ib": 3.2930490970611572, + "ce_orig": 0.5720186233520508, + "epoch": 0.9642677403120282, + "kl_loss": 0.05484800040721893, + "loss_ib": 0.0008777849143370986, + "step": 3353 + }, + { + "ce_ib": 5.264178276062012, + "ce_orig": 1.551714301109314, + "epoch": 0.9642677403120282, + "kl_loss": 0.05401364713907242, + "loss_ib": 0.0010665543377399445, + "step": 3353 + }, + { + "ce_ib": 4.134627342224121, + "ce_orig": 1.3236181735992432, + "epoch": 0.9645553238910058, + "kl_loss": 0.0404668003320694, + "loss_ib": 0.0008181307348422706, + "step": 3354 + }, + { + "ce_ib": 3.8535053730010986, + "ce_orig": 1.0325125455856323, + "epoch": 0.9645553238910058, + "kl_loss": 0.05717087537050247, + "loss_ib": 0.0009570592665113509, + "step": 3354 + }, + { + "ce_ib": 2.9462685585021973, + "ce_orig": 0.6164489984512329, + "epoch": 0.9645553238910058, + "kl_loss": 0.058323901146650314, + "loss_ib": 0.0008778658229857683, + "step": 3354 + }, + { + "ce_ib": 3.0439765453338623, + "ce_orig": 0.7960587739944458, + "epoch": 0.9645553238910058, + "kl_loss": 0.05859425663948059, + "loss_ib": 0.0008903401903808117, + "step": 3354 + }, + { + "epoch": 0.9648429074699835, + "grad_norm": 0.10244198143482208, + "learning_rate": 3.970090925984244e-05, + "loss": 0.8571, + "step": 3355 + }, + { + "ce_ib": 4.336809158325195, + "ce_orig": 0.5738304853439331, + "epoch": 0.9648429074699835, + "kl_loss": 0.19906944036483765, + "loss_ib": 0.0024243751540780067, + "step": 3355 + }, + { + "ce_ib": 2.538280487060547, + "ce_orig": 0.661300003528595, + "epoch": 0.9648429074699835, + "kl_loss": 0.021813757717609406, + "loss_ib": 0.00047196558443829417, + "step": 3355 + }, + { + "ce_ib": 2.114743947982788, + "ce_orig": 0.428050696849823, + "epoch": 0.9648429074699835, + "kl_loss": 0.05265691876411438, + "loss_ib": 0.0007380435708910227, + "step": 3355 + }, + { + "ce_ib": 2.4689409732818604, + "ce_orig": 0.4102499783039093, + "epoch": 0.9648429074699835, + "kl_loss": 0.034295983612537384, + "loss_ib": 0.0005898539093323052, + "step": 3355 + }, + { + "ce_ib": 3.7140073776245117, + "ce_orig": 0.9849492311477661, + "epoch": 0.9651304910489611, + "kl_loss": 0.052072733640670776, + "loss_ib": 0.0008921280386857688, + "step": 3356 + }, + { + "ce_ib": 4.014986515045166, + "ce_orig": 1.064766764640808, + "epoch": 0.9651304910489611, + "kl_loss": 0.09494619071483612, + "loss_ib": 0.0013509605778381228, + "step": 3356 + }, + { + "ce_ib": 5.070929050445557, + "ce_orig": 1.3919395208358765, + "epoch": 0.9651304910489611, + "kl_loss": 0.10326479375362396, + "loss_ib": 0.0015397408278658986, + "step": 3356 + }, + { + "ce_ib": 5.039867401123047, + "ce_orig": 0.8158395290374756, + "epoch": 0.9651304910489611, + "kl_loss": 0.07736895978450775, + "loss_ib": 0.0012776763178408146, + "step": 3356 + }, + { + "ce_ib": 4.627206325531006, + "ce_orig": 0.8550837635993958, + "epoch": 0.9654180746279387, + "kl_loss": 0.06824640929698944, + "loss_ib": 0.001145184738561511, + "step": 3357 + }, + { + "ce_ib": 3.121575117111206, + "ce_orig": 0.5235661864280701, + "epoch": 0.9654180746279387, + "kl_loss": 0.08226372301578522, + "loss_ib": 0.001134794671088457, + "step": 3357 + }, + { + "ce_ib": 4.61481237411499, + "ce_orig": 1.272113561630249, + "epoch": 0.9654180746279387, + "kl_loss": 0.04803900420665741, + "loss_ib": 0.0009418712579645216, + "step": 3357 + }, + { + "ce_ib": 4.91301965713501, + "ce_orig": 0.9060989022254944, + "epoch": 0.9654180746279387, + "kl_loss": 0.035317204892635345, + "loss_ib": 0.0008444739505648613, + "step": 3357 + }, + { + "ce_ib": 3.4786646366119385, + "ce_orig": 0.8254556655883789, + "epoch": 0.9657056582069163, + "kl_loss": 0.04652174562215805, + "loss_ib": 0.0008130839560180902, + "step": 3358 + }, + { + "ce_ib": 3.021854877471924, + "ce_orig": 0.5959189534187317, + "epoch": 0.9657056582069163, + "kl_loss": 0.044642120599746704, + "loss_ib": 0.0007486066897399724, + "step": 3358 + }, + { + "ce_ib": 3.172344207763672, + "ce_orig": 0.8166502714157104, + "epoch": 0.9657056582069163, + "kl_loss": 0.0498468279838562, + "loss_ib": 0.0008157026604749262, + "step": 3358 + }, + { + "ce_ib": 3.2488515377044678, + "ce_orig": 0.9457082748413086, + "epoch": 0.9657056582069163, + "kl_loss": 0.03869285807013512, + "loss_ib": 0.0007118136854842305, + "step": 3358 + }, + { + "ce_ib": 5.843242168426514, + "ce_orig": 1.226638674736023, + "epoch": 0.9659932417858941, + "kl_loss": 0.07126612961292267, + "loss_ib": 0.0012969854287803173, + "step": 3359 + }, + { + "ce_ib": 4.095675468444824, + "ce_orig": 1.02019202709198, + "epoch": 0.9659932417858941, + "kl_loss": 0.06362830847501755, + "loss_ib": 0.0010458506876602769, + "step": 3359 + }, + { + "ce_ib": 4.333505630493164, + "ce_orig": 1.029388666152954, + "epoch": 0.9659932417858941, + "kl_loss": 0.047386534512043, + "loss_ib": 0.0009072159300558269, + "step": 3359 + }, + { + "ce_ib": 2.5133466720581055, + "ce_orig": 0.657985270023346, + "epoch": 0.9659932417858941, + "kl_loss": 0.027748355641961098, + "loss_ib": 0.000528818229213357, + "step": 3359 + }, + { + "epoch": 0.9662808253648717, + "grad_norm": 0.08720304816961288, + "learning_rate": 3.96695053421277e-05, + "loss": 0.7861, + "step": 3360 + }, + { + "ce_ib": 3.356675386428833, + "ce_orig": 0.6929931044578552, + "epoch": 0.9662808253648717, + "kl_loss": 0.10418205708265305, + "loss_ib": 0.0013774880208075047, + "step": 3360 + }, + { + "ce_ib": 1.887808918952942, + "ce_orig": 0.33290207386016846, + "epoch": 0.9662808253648717, + "kl_loss": 0.04626142978668213, + "loss_ib": 0.0006513951811939478, + "step": 3360 + }, + { + "ce_ib": 4.880937099456787, + "ce_orig": 1.0303484201431274, + "epoch": 0.9662808253648717, + "kl_loss": 0.06860557943582535, + "loss_ib": 0.0011741494527086616, + "step": 3360 + }, + { + "ce_ib": 3.2255148887634277, + "ce_orig": 0.6196760535240173, + "epoch": 0.9662808253648717, + "kl_loss": 0.05414970964193344, + "loss_ib": 0.0008640486048534513, + "step": 3360 + }, + { + "ce_ib": 2.304076671600342, + "ce_orig": 0.745291531085968, + "epoch": 0.9665684089438493, + "kl_loss": 0.03307715058326721, + "loss_ib": 0.0005611791275441647, + "step": 3361 + }, + { + "ce_ib": 3.4262847900390625, + "ce_orig": 0.791449248790741, + "epoch": 0.9665684089438493, + "kl_loss": 0.0671778991818428, + "loss_ib": 0.001014407374896109, + "step": 3361 + }, + { + "ce_ib": 2.88778018951416, + "ce_orig": 0.7963044047355652, + "epoch": 0.9665684089438493, + "kl_loss": 0.06068507209420204, + "loss_ib": 0.0008956287056207657, + "step": 3361 + }, + { + "ce_ib": 3.3939061164855957, + "ce_orig": 0.8229256868362427, + "epoch": 0.9665684089438493, + "kl_loss": 0.04666994512081146, + "loss_ib": 0.0008060900727286935, + "step": 3361 + }, + { + "ce_ib": 4.620193958282471, + "ce_orig": 1.3735333681106567, + "epoch": 0.966855992522827, + "kl_loss": 0.06901101022958755, + "loss_ib": 0.0011521294945850968, + "step": 3362 + }, + { + "ce_ib": 3.181936502456665, + "ce_orig": 0.8887683749198914, + "epoch": 0.966855992522827, + "kl_loss": 0.053670674562454224, + "loss_ib": 0.0008549003978259861, + "step": 3362 + }, + { + "ce_ib": 3.1666641235351562, + "ce_orig": 0.702185869216919, + "epoch": 0.966855992522827, + "kl_loss": 0.07639321684837341, + "loss_ib": 0.001080598565749824, + "step": 3362 + }, + { + "ce_ib": 4.05635404586792, + "ce_orig": 1.147905945777893, + "epoch": 0.966855992522827, + "kl_loss": 0.04421716555953026, + "loss_ib": 0.0008478070376440883, + "step": 3362 + }, + { + "ce_ib": 5.538776397705078, + "ce_orig": 1.534177303314209, + "epoch": 0.9671435761018046, + "kl_loss": 0.043559640645980835, + "loss_ib": 0.0009894740069285035, + "step": 3363 + }, + { + "ce_ib": 4.246890068054199, + "ce_orig": 0.6940446496009827, + "epoch": 0.9671435761018046, + "kl_loss": 0.05846807360649109, + "loss_ib": 0.0010093697346746922, + "step": 3363 + }, + { + "ce_ib": 3.4719276428222656, + "ce_orig": 1.2756035327911377, + "epoch": 0.9671435761018046, + "kl_loss": 0.05451950430870056, + "loss_ib": 0.0008923877612687647, + "step": 3363 + }, + { + "ce_ib": 3.3598310947418213, + "ce_orig": 0.5790072083473206, + "epoch": 0.9671435761018046, + "kl_loss": 0.08288251608610153, + "loss_ib": 0.0011648082872852683, + "step": 3363 + }, + { + "ce_ib": 4.624600410461426, + "ce_orig": 1.3707916736602783, + "epoch": 0.9674311596807822, + "kl_loss": 0.04321231320500374, + "loss_ib": 0.0008945831214077771, + "step": 3364 + }, + { + "ce_ib": 4.06861686706543, + "ce_orig": 1.1260889768600464, + "epoch": 0.9674311596807822, + "kl_loss": 0.06933875381946564, + "loss_ib": 0.001100249239243567, + "step": 3364 + }, + { + "ce_ib": 5.093267917633057, + "ce_orig": 1.3767344951629639, + "epoch": 0.9674311596807822, + "kl_loss": 0.0714779868721962, + "loss_ib": 0.0012241066433489323, + "step": 3364 + }, + { + "ce_ib": 3.8294947147369385, + "ce_orig": 0.5699147582054138, + "epoch": 0.9674311596807822, + "kl_loss": 0.05788327753543854, + "loss_ib": 0.0009617821779102087, + "step": 3364 + }, + { + "epoch": 0.9677187432597598, + "grad_norm": 0.0905143991112709, + "learning_rate": 3.9638066082169714e-05, + "loss": 0.8659, + "step": 3365 + }, + { + "ce_ib": 4.183934688568115, + "ce_orig": 1.1519842147827148, + "epoch": 0.9677187432597598, + "kl_loss": 0.06414973735809326, + "loss_ib": 0.0010598908411338925, + "step": 3365 + }, + { + "ce_ib": 5.24158239364624, + "ce_orig": 0.735165536403656, + "epoch": 0.9677187432597598, + "kl_loss": 0.0648014172911644, + "loss_ib": 0.0011721723712980747, + "step": 3365 + }, + { + "ce_ib": 2.660879611968994, + "ce_orig": 0.6677335500717163, + "epoch": 0.9677187432597598, + "kl_loss": 0.03655746206641197, + "loss_ib": 0.0006316626095212996, + "step": 3365 + }, + { + "ce_ib": 3.862239122390747, + "ce_orig": 1.19265615940094, + "epoch": 0.9677187432597598, + "kl_loss": 0.04409963637590408, + "loss_ib": 0.000827220210339874, + "step": 3365 + }, + { + "ce_ib": 1.9621267318725586, + "ce_orig": 0.3970629870891571, + "epoch": 0.9680063268387376, + "kl_loss": 0.034881435334682465, + "loss_ib": 0.0005450270255096257, + "step": 3366 + }, + { + "ce_ib": 2.8291494846343994, + "ce_orig": 0.7547534108161926, + "epoch": 0.9680063268387376, + "kl_loss": 0.059298451989889145, + "loss_ib": 0.0008758994517847896, + "step": 3366 + }, + { + "ce_ib": 4.875083923339844, + "ce_orig": 0.8186026215553284, + "epoch": 0.9680063268387376, + "kl_loss": 0.06181326508522034, + "loss_ib": 0.0011056408984586596, + "step": 3366 + }, + { + "ce_ib": 2.546844720840454, + "ce_orig": 0.5316460132598877, + "epoch": 0.9680063268387376, + "kl_loss": 0.05481356382369995, + "loss_ib": 0.0008028200827538967, + "step": 3366 + }, + { + "ce_ib": 3.3093385696411133, + "ce_orig": 0.6802007555961609, + "epoch": 0.9682939104177152, + "kl_loss": 0.05539091303944588, + "loss_ib": 0.0008848430006764829, + "step": 3367 + }, + { + "ce_ib": 2.059659719467163, + "ce_orig": 0.42995232343673706, + "epoch": 0.9682939104177152, + "kl_loss": 0.04071637988090515, + "loss_ib": 0.0006131297559477389, + "step": 3367 + }, + { + "ce_ib": 3.360013961791992, + "ce_orig": 0.6571654677391052, + "epoch": 0.9682939104177152, + "kl_loss": 0.08995699882507324, + "loss_ib": 0.0012355713406577706, + "step": 3367 + }, + { + "ce_ib": 2.225694417953491, + "ce_orig": 0.26404765248298645, + "epoch": 0.9682939104177152, + "kl_loss": 0.03467220813035965, + "loss_ib": 0.0005692915292456746, + "step": 3367 + }, + { + "ce_ib": 2.267570972442627, + "ce_orig": 0.6617668867111206, + "epoch": 0.9685814939966928, + "kl_loss": 0.054607562720775604, + "loss_ib": 0.0007728327182121575, + "step": 3368 + }, + { + "ce_ib": 2.9766204357147217, + "ce_orig": 0.7438018321990967, + "epoch": 0.9685814939966928, + "kl_loss": 0.050778307020664215, + "loss_ib": 0.0008054450736381114, + "step": 3368 + }, + { + "ce_ib": 5.197218418121338, + "ce_orig": 1.232897162437439, + "epoch": 0.9685814939966928, + "kl_loss": 0.04297186806797981, + "loss_ib": 0.000949440523982048, + "step": 3368 + }, + { + "ce_ib": 4.855482578277588, + "ce_orig": 1.1969223022460938, + "epoch": 0.9685814939966928, + "kl_loss": 0.04397144168615341, + "loss_ib": 0.0009252626914530993, + "step": 3368 + }, + { + "ce_ib": 4.271852493286133, + "ce_orig": 1.0062099695205688, + "epoch": 0.9688690775756704, + "kl_loss": 0.04764661192893982, + "loss_ib": 0.0009036512929014862, + "step": 3369 + }, + { + "ce_ib": 3.798287868499756, + "ce_orig": 0.8255025148391724, + "epoch": 0.9688690775756704, + "kl_loss": 0.020740263164043427, + "loss_ib": 0.0005872313631698489, + "step": 3369 + }, + { + "ce_ib": 3.057612895965576, + "ce_orig": 0.8486059904098511, + "epoch": 0.9688690775756704, + "kl_loss": 0.04820967838168144, + "loss_ib": 0.0007878580363467336, + "step": 3369 + }, + { + "ce_ib": 3.165860652923584, + "ce_orig": 0.8535991907119751, + "epoch": 0.9688690775756704, + "kl_loss": 0.04077392816543579, + "loss_ib": 0.0007243253057822585, + "step": 3369 + }, + { + "epoch": 0.969156661154648, + "grad_norm": 0.11928434669971466, + "learning_rate": 3.960659155571296e-05, + "loss": 0.8499, + "step": 3370 + }, + { + "ce_ib": 5.549121856689453, + "ce_orig": 1.4033098220825195, + "epoch": 0.969156661154648, + "kl_loss": 0.044381268322467804, + "loss_ib": 0.000998724834062159, + "step": 3370 + }, + { + "ce_ib": 4.654242038726807, + "ce_orig": 0.8281747102737427, + "epoch": 0.969156661154648, + "kl_loss": 0.0841580405831337, + "loss_ib": 0.0013070047134533525, + "step": 3370 + }, + { + "ce_ib": 4.556210517883301, + "ce_orig": 0.9151393175125122, + "epoch": 0.969156661154648, + "kl_loss": 0.11826199293136597, + "loss_ib": 0.0016382408794015646, + "step": 3370 + }, + { + "ce_ib": 3.472912311553955, + "ce_orig": 0.46384677290916443, + "epoch": 0.969156661154648, + "kl_loss": 0.06671790778636932, + "loss_ib": 0.0010144702391698956, + "step": 3370 + }, + { + "ce_ib": 2.5201849937438965, + "ce_orig": 0.7376601099967957, + "epoch": 0.9694442447336257, + "kl_loss": 0.04521144554018974, + "loss_ib": 0.0007041328935883939, + "step": 3371 + }, + { + "ce_ib": 2.155625581741333, + "ce_orig": 0.71832674741745, + "epoch": 0.9694442447336257, + "kl_loss": 0.029247112572193146, + "loss_ib": 0.0005080336704850197, + "step": 3371 + }, + { + "ce_ib": 4.439027786254883, + "ce_orig": 1.0642496347427368, + "epoch": 0.9694442447336257, + "kl_loss": 0.055872172117233276, + "loss_ib": 0.0010026245145127177, + "step": 3371 + }, + { + "ce_ib": 1.8665393590927124, + "ce_orig": 0.4782293140888214, + "epoch": 0.9694442447336257, + "kl_loss": 0.03760138154029846, + "loss_ib": 0.0005626677884720266, + "step": 3371 + }, + { + "ce_ib": 1.8575193881988525, + "ce_orig": 0.41999948024749756, + "epoch": 0.9697318283126034, + "kl_loss": 0.044626425951719284, + "loss_ib": 0.0006320162210613489, + "step": 3372 + }, + { + "ce_ib": 4.586336612701416, + "ce_orig": 1.1336785554885864, + "epoch": 0.9697318283126034, + "kl_loss": 0.04441531002521515, + "loss_ib": 0.0009027866763062775, + "step": 3372 + }, + { + "ce_ib": 4.716530799865723, + "ce_orig": 0.9703569412231445, + "epoch": 0.9697318283126034, + "kl_loss": 0.05281326547265053, + "loss_ib": 0.000999785726889968, + "step": 3372 + }, + { + "ce_ib": 3.309823989868164, + "ce_orig": 0.5298031568527222, + "epoch": 0.9697318283126034, + "kl_loss": 0.04603356868028641, + "loss_ib": 0.0007913180743344128, + "step": 3372 + }, + { + "ce_ib": 2.4833123683929443, + "ce_orig": 0.5250624418258667, + "epoch": 0.970019411891581, + "kl_loss": 0.08196993917226791, + "loss_ib": 0.0010680306004360318, + "step": 3373 + }, + { + "ce_ib": 2.7807772159576416, + "ce_orig": 0.5330173969268799, + "epoch": 0.970019411891581, + "kl_loss": 0.0508434996008873, + "loss_ib": 0.0007865126826800406, + "step": 3373 + }, + { + "ce_ib": 3.707029342651367, + "ce_orig": 0.7523142695426941, + "epoch": 0.970019411891581, + "kl_loss": 0.04005305469036102, + "loss_ib": 0.0007712334045208991, + "step": 3373 + }, + { + "ce_ib": 2.163090944290161, + "ce_orig": 0.5214024782180786, + "epoch": 0.970019411891581, + "kl_loss": 0.04227645695209503, + "loss_ib": 0.0006390736671164632, + "step": 3373 + }, + { + "ce_ib": 3.473311185836792, + "ce_orig": 0.4851108193397522, + "epoch": 0.9703069954705587, + "kl_loss": 0.06966482102870941, + "loss_ib": 0.001043979311361909, + "step": 3374 + }, + { + "ce_ib": 3.3559556007385254, + "ce_orig": 0.7620459198951721, + "epoch": 0.9703069954705587, + "kl_loss": 0.07831843197345734, + "loss_ib": 0.001118779880926013, + "step": 3374 + }, + { + "ce_ib": 2.7457938194274902, + "ce_orig": 0.3768070638179779, + "epoch": 0.9703069954705587, + "kl_loss": 0.061746686697006226, + "loss_ib": 0.0008920461987145245, + "step": 3374 + }, + { + "ce_ib": 4.647453784942627, + "ce_orig": 1.1427996158599854, + "epoch": 0.9703069954705587, + "kl_loss": 0.0634344294667244, + "loss_ib": 0.001099089626222849, + "step": 3374 + }, + { + "epoch": 0.9705945790495363, + "grad_norm": 0.08858075737953186, + "learning_rate": 3.95750818385869e-05, + "loss": 0.8281, + "step": 3375 + }, + { + "ce_ib": 3.9454386234283447, + "ce_orig": 1.261613130569458, + "epoch": 0.9705945790495363, + "kl_loss": 0.04398772865533829, + "loss_ib": 0.00083442113827914, + "step": 3375 + }, + { + "ce_ib": 2.8799824714660645, + "ce_orig": 0.770415723323822, + "epoch": 0.9705945790495363, + "kl_loss": 0.0518430694937706, + "loss_ib": 0.0008064288995228708, + "step": 3375 + }, + { + "ce_ib": 5.475050449371338, + "ce_orig": 1.6461435556411743, + "epoch": 0.9705945790495363, + "kl_loss": 0.07450875639915466, + "loss_ib": 0.0012925926130264997, + "step": 3375 + }, + { + "ce_ib": 4.572258472442627, + "ce_orig": 0.8091452121734619, + "epoch": 0.9705945790495363, + "kl_loss": 0.06803585588932037, + "loss_ib": 0.0011375844478607178, + "step": 3375 + }, + { + "ce_ib": 5.470289707183838, + "ce_orig": 0.710455596446991, + "epoch": 0.9708821626285139, + "kl_loss": 0.07021665573120117, + "loss_ib": 0.0012491954257711768, + "step": 3376 + }, + { + "ce_ib": 2.7532570362091064, + "ce_orig": 0.7078081965446472, + "epoch": 0.9708821626285139, + "kl_loss": 0.0353759229183197, + "loss_ib": 0.0006290849414654076, + "step": 3376 + }, + { + "ce_ib": 3.312706232070923, + "ce_orig": 0.7317742705345154, + "epoch": 0.9708821626285139, + "kl_loss": 0.040551699697971344, + "loss_ib": 0.0007367876241914928, + "step": 3376 + }, + { + "ce_ib": 2.8109917640686035, + "ce_orig": 0.3781137466430664, + "epoch": 0.9708821626285139, + "kl_loss": 0.0401696041226387, + "loss_ib": 0.000682795187458396, + "step": 3376 + }, + { + "ce_ib": 3.306851625442505, + "ce_orig": 0.6813018918037415, + "epoch": 0.9711697462074915, + "kl_loss": 0.07717133313417435, + "loss_ib": 0.0011023984989151359, + "step": 3377 + }, + { + "ce_ib": 3.256883144378662, + "ce_orig": 0.6706445813179016, + "epoch": 0.9711697462074915, + "kl_loss": 0.057154327630996704, + "loss_ib": 0.0008972315117716789, + "step": 3377 + }, + { + "ce_ib": 3.6167614459991455, + "ce_orig": 0.9355749487876892, + "epoch": 0.9711697462074915, + "kl_loss": 0.03484819084405899, + "loss_ib": 0.0007101580267772079, + "step": 3377 + }, + { + "ce_ib": 4.504331588745117, + "ce_orig": 1.287433385848999, + "epoch": 0.9711697462074915, + "kl_loss": 0.03468716889619827, + "loss_ib": 0.0007973047904670238, + "step": 3377 + }, + { + "ce_ib": 4.766756057739258, + "ce_orig": 0.8159705400466919, + "epoch": 0.9714573297864691, + "kl_loss": 0.05872156471014023, + "loss_ib": 0.0010638912208378315, + "step": 3378 + }, + { + "ce_ib": 2.3252670764923096, + "ce_orig": 0.7131567001342773, + "epoch": 0.9714573297864691, + "kl_loss": 0.04298853129148483, + "loss_ib": 0.0006624120287597179, + "step": 3378 + }, + { + "ce_ib": 3.4335408210754395, + "ce_orig": 1.0226856470108032, + "epoch": 0.9714573297864691, + "kl_loss": 0.046836286783218384, + "loss_ib": 0.0008117168908938766, + "step": 3378 + }, + { + "ce_ib": 5.727025985717773, + "ce_orig": 0.9097737669944763, + "epoch": 0.9714573297864691, + "kl_loss": 0.079706110060215, + "loss_ib": 0.0013697636313736439, + "step": 3378 + }, + { + "ce_ib": 3.3879945278167725, + "ce_orig": 0.7782193422317505, + "epoch": 0.9717449133654469, + "kl_loss": 0.045109037309885025, + "loss_ib": 0.0007898898329585791, + "step": 3379 + }, + { + "ce_ib": 2.185497522354126, + "ce_orig": 0.46620097756385803, + "epoch": 0.9717449133654469, + "kl_loss": 0.03712323307991028, + "loss_ib": 0.000589782081078738, + "step": 3379 + }, + { + "ce_ib": 4.949954509735107, + "ce_orig": 1.000083327293396, + "epoch": 0.9717449133654469, + "kl_loss": 0.05341742932796478, + "loss_ib": 0.0010291696526110172, + "step": 3379 + }, + { + "ce_ib": 3.5659053325653076, + "ce_orig": 0.7260215282440186, + "epoch": 0.9717449133654469, + "kl_loss": 0.06616528332233429, + "loss_ib": 0.0010182433761656284, + "step": 3379 + }, + { + "epoch": 0.9720324969444245, + "grad_norm": 0.09920408576726913, + "learning_rate": 3.9543537006705736e-05, + "loss": 0.8283, + "step": 3380 + }, + { + "ce_ib": 3.591614007949829, + "ce_orig": 0.929044246673584, + "epoch": 0.9720324969444245, + "kl_loss": 0.03367080166935921, + "loss_ib": 0.0006958693847991526, + "step": 3380 + }, + { + "ce_ib": 2.2868082523345947, + "ce_orig": 0.6594815254211426, + "epoch": 0.9720324969444245, + "kl_loss": 0.03374612331390381, + "loss_ib": 0.0005661420291289687, + "step": 3380 + }, + { + "ce_ib": 3.341381549835205, + "ce_orig": 0.630258321762085, + "epoch": 0.9720324969444245, + "kl_loss": 0.04352226108312607, + "loss_ib": 0.0007693608058616519, + "step": 3380 + }, + { + "ce_ib": 3.4534220695495605, + "ce_orig": 0.4366798996925354, + "epoch": 0.9720324969444245, + "kl_loss": 0.0652756541967392, + "loss_ib": 0.000998098636046052, + "step": 3380 + }, + { + "ce_ib": 4.314418792724609, + "ce_orig": 0.800651490688324, + "epoch": 0.9723200805234021, + "kl_loss": 0.10016003251075745, + "loss_ib": 0.0014330422272905707, + "step": 3381 + }, + { + "ce_ib": 3.63533091545105, + "ce_orig": 1.0679596662521362, + "epoch": 0.9723200805234021, + "kl_loss": 0.05142171308398247, + "loss_ib": 0.0008777502225711942, + "step": 3381 + }, + { + "ce_ib": 1.9154787063598633, + "ce_orig": 0.4521268606185913, + "epoch": 0.9723200805234021, + "kl_loss": 0.038041844964027405, + "loss_ib": 0.0005719662876799703, + "step": 3381 + }, + { + "ce_ib": 3.274620771408081, + "ce_orig": 0.6064826250076294, + "epoch": 0.9723200805234021, + "kl_loss": 0.04096993803977966, + "loss_ib": 0.00073716149199754, + "step": 3381 + }, + { + "ce_ib": 2.2083253860473633, + "ce_orig": 0.46807512640953064, + "epoch": 0.9726076641023798, + "kl_loss": 0.06450428068637848, + "loss_ib": 0.0008658753358758986, + "step": 3382 + }, + { + "ce_ib": 2.0176658630371094, + "ce_orig": 0.48706743121147156, + "epoch": 0.9726076641023798, + "kl_loss": 0.0402679480612278, + "loss_ib": 0.0006044460460543633, + "step": 3382 + }, + { + "ce_ib": 4.507049083709717, + "ce_orig": 0.5687429308891296, + "epoch": 0.9726076641023798, + "kl_loss": 0.05529942363500595, + "loss_ib": 0.0010036991443485022, + "step": 3382 + }, + { + "ce_ib": 3.262524127960205, + "ce_orig": 0.7375885248184204, + "epoch": 0.9726076641023798, + "kl_loss": 0.05859196186065674, + "loss_ib": 0.0009121720213443041, + "step": 3382 + }, + { + "ce_ib": 2.7250161170959473, + "ce_orig": 0.6582308411598206, + "epoch": 0.9728952476813574, + "kl_loss": 0.05252271890640259, + "loss_ib": 0.0007977287750691175, + "step": 3383 + }, + { + "ce_ib": 4.385551929473877, + "ce_orig": 0.8902696967124939, + "epoch": 0.9728952476813574, + "kl_loss": 0.058292075991630554, + "loss_ib": 0.001021475880406797, + "step": 3383 + }, + { + "ce_ib": 2.9774234294891357, + "ce_orig": 0.7149662375450134, + "epoch": 0.9728952476813574, + "kl_loss": 0.037182144820690155, + "loss_ib": 0.000669563771225512, + "step": 3383 + }, + { + "ce_ib": 3.3775970935821533, + "ce_orig": 0.5289864540100098, + "epoch": 0.9728952476813574, + "kl_loss": 0.03479178249835968, + "loss_ib": 0.000685677514411509, + "step": 3383 + }, + { + "ce_ib": 2.874274253845215, + "ce_orig": 0.5062094926834106, + "epoch": 0.973182831260335, + "kl_loss": 0.028743689879775047, + "loss_ib": 0.0005748642724938691, + "step": 3384 + }, + { + "ce_ib": 5.489131927490234, + "ce_orig": 1.4226300716400146, + "epoch": 0.973182831260335, + "kl_loss": 0.04301943629980087, + "loss_ib": 0.0009791075717657804, + "step": 3384 + }, + { + "ce_ib": 3.7587778568267822, + "ce_orig": 1.0795695781707764, + "epoch": 0.973182831260335, + "kl_loss": 0.05067563056945801, + "loss_ib": 0.0008826340781524777, + "step": 3384 + }, + { + "ce_ib": 3.009881019592285, + "ce_orig": 0.5565769076347351, + "epoch": 0.973182831260335, + "kl_loss": 0.04467542842030525, + "loss_ib": 0.0007477423641830683, + "step": 3384 + }, + { + "epoch": 0.9734704148393126, + "grad_norm": 0.1014367863535881, + "learning_rate": 3.9511957136068294e-05, + "loss": 0.7612, + "step": 3385 + }, + { + "ce_ib": 4.01570987701416, + "ce_orig": 1.054984450340271, + "epoch": 0.9734704148393126, + "kl_loss": 0.02965484745800495, + "loss_ib": 0.0006981194601394236, + "step": 3385 + }, + { + "ce_ib": 3.085203170776367, + "ce_orig": 0.7177781462669373, + "epoch": 0.9734704148393126, + "kl_loss": 0.037259723991155624, + "loss_ib": 0.000681117526255548, + "step": 3385 + }, + { + "ce_ib": 5.091061592102051, + "ce_orig": 1.4056142568588257, + "epoch": 0.9734704148393126, + "kl_loss": 0.04537427797913551, + "loss_ib": 0.0009628489497117698, + "step": 3385 + }, + { + "ce_ib": 3.0631344318389893, + "ce_orig": 0.7659196257591248, + "epoch": 0.9734704148393126, + "kl_loss": 0.04602132365107536, + "loss_ib": 0.0007665266748517752, + "step": 3385 + }, + { + "ce_ib": 2.0161337852478027, + "ce_orig": 0.49425482749938965, + "epoch": 0.9737579984182904, + "kl_loss": 0.04137665033340454, + "loss_ib": 0.0006153798894956708, + "step": 3386 + }, + { + "ce_ib": 2.8662846088409424, + "ce_orig": 0.497906357049942, + "epoch": 0.9737579984182904, + "kl_loss": 0.06164783239364624, + "loss_ib": 0.0009031067602336407, + "step": 3386 + }, + { + "ce_ib": 4.743332862854004, + "ce_orig": 1.268319845199585, + "epoch": 0.9737579984182904, + "kl_loss": 0.04099158197641373, + "loss_ib": 0.0008842490497045219, + "step": 3386 + }, + { + "ce_ib": 4.052933692932129, + "ce_orig": 0.9525508284568787, + "epoch": 0.9737579984182904, + "kl_loss": 0.03325255960226059, + "loss_ib": 0.0007378188893198967, + "step": 3386 + }, + { + "ce_ib": 2.6232597827911377, + "ce_orig": 0.704663872718811, + "epoch": 0.974045581997268, + "kl_loss": 0.04101421684026718, + "loss_ib": 0.0006724681006744504, + "step": 3387 + }, + { + "ce_ib": 3.770958185195923, + "ce_orig": 0.94870924949646, + "epoch": 0.974045581997268, + "kl_loss": 0.09048379957675934, + "loss_ib": 0.0012819337425753474, + "step": 3387 + }, + { + "ce_ib": 6.341409206390381, + "ce_orig": 1.4821487665176392, + "epoch": 0.974045581997268, + "kl_loss": 0.05449676886200905, + "loss_ib": 0.0011791086290031672, + "step": 3387 + }, + { + "ce_ib": 4.0210371017456055, + "ce_orig": 0.6943937540054321, + "epoch": 0.974045581997268, + "kl_loss": 0.1008497029542923, + "loss_ib": 0.0014106006128713489, + "step": 3387 + }, + { + "ce_ib": 3.383434534072876, + "ce_orig": 0.76865154504776, + "epoch": 0.9743331655762456, + "kl_loss": 0.056762196123600006, + "loss_ib": 0.000905965396668762, + "step": 3388 + }, + { + "ce_ib": 4.282547950744629, + "ce_orig": 1.0022825002670288, + "epoch": 0.9743331655762456, + "kl_loss": 0.029506124556064606, + "loss_ib": 0.0007233160431496799, + "step": 3388 + }, + { + "ce_ib": 3.189754009246826, + "ce_orig": 0.6301044821739197, + "epoch": 0.9743331655762456, + "kl_loss": 0.058918777853250504, + "loss_ib": 0.0009081631433218718, + "step": 3388 + }, + { + "ce_ib": 3.1425912380218506, + "ce_orig": 0.6244577765464783, + "epoch": 0.9743331655762456, + "kl_loss": 0.050169773399829865, + "loss_ib": 0.0008159568533301353, + "step": 3388 + }, + { + "ce_ib": 3.6036899089813232, + "ce_orig": 1.0157524347305298, + "epoch": 0.9746207491552232, + "kl_loss": 0.04388787969946861, + "loss_ib": 0.0007992477621883154, + "step": 3389 + }, + { + "ce_ib": 2.711430788040161, + "ce_orig": 0.8219795823097229, + "epoch": 0.9746207491552232, + "kl_loss": 0.03123094141483307, + "loss_ib": 0.0005834525218233466, + "step": 3389 + }, + { + "ce_ib": 3.8745806217193604, + "ce_orig": 0.7343238592147827, + "epoch": 0.9746207491552232, + "kl_loss": 0.07326146215200424, + "loss_ib": 0.001120072673074901, + "step": 3389 + }, + { + "ce_ib": 6.593411922454834, + "ce_orig": 1.8092128038406372, + "epoch": 0.9746207491552232, + "kl_loss": 0.058812640607357025, + "loss_ib": 0.0012474674731492996, + "step": 3389 + }, + { + "epoch": 0.9749083327342009, + "grad_norm": 0.09428082406520844, + "learning_rate": 3.948034230275781e-05, + "loss": 0.8401, + "step": 3390 + }, + { + "ce_ib": 4.755552291870117, + "ce_orig": 1.1338934898376465, + "epoch": 0.9749083327342009, + "kl_loss": 0.0519314706325531, + "loss_ib": 0.000994869857095182, + "step": 3390 + }, + { + "ce_ib": 5.265138626098633, + "ce_orig": 1.3423395156860352, + "epoch": 0.9749083327342009, + "kl_loss": 0.0627739429473877, + "loss_ib": 0.0011542532593011856, + "step": 3390 + }, + { + "ce_ib": 6.114412307739258, + "ce_orig": 1.8133049011230469, + "epoch": 0.9749083327342009, + "kl_loss": 0.04707774519920349, + "loss_ib": 0.0010822186013683677, + "step": 3390 + }, + { + "ce_ib": 4.268710136413574, + "ce_orig": 1.0042667388916016, + "epoch": 0.9749083327342009, + "kl_loss": 0.04544149711728096, + "loss_ib": 0.0008812859305180609, + "step": 3390 + }, + { + "ce_ib": 2.875953197479248, + "ce_orig": 0.6280257701873779, + "epoch": 0.9751959163131785, + "kl_loss": 0.06487835943698883, + "loss_ib": 0.0009363788994960487, + "step": 3391 + }, + { + "ce_ib": 3.662858247756958, + "ce_orig": 0.7146333456039429, + "epoch": 0.9751959163131785, + "kl_loss": 0.07842400670051575, + "loss_ib": 0.0011505258735269308, + "step": 3391 + }, + { + "ce_ib": 2.7801873683929443, + "ce_orig": 0.605349063873291, + "epoch": 0.9751959163131785, + "kl_loss": 0.04001060873270035, + "loss_ib": 0.0006781247793696821, + "step": 3391 + }, + { + "ce_ib": 4.17471170425415, + "ce_orig": 0.7705010175704956, + "epoch": 0.9751959163131785, + "kl_loss": 0.07598719000816345, + "loss_ib": 0.0011773430742323399, + "step": 3391 + }, + { + "ce_ib": 4.8470587730407715, + "ce_orig": 1.4331470727920532, + "epoch": 0.9754834998921561, + "kl_loss": 0.026217173784971237, + "loss_ib": 0.0007468776311725378, + "step": 3392 + }, + { + "ce_ib": 1.7044763565063477, + "ce_orig": 0.3341567814350128, + "epoch": 0.9754834998921561, + "kl_loss": 0.11656466871500015, + "loss_ib": 0.0013360942248255014, + "step": 3392 + }, + { + "ce_ib": 2.6666388511657715, + "ce_orig": 0.4729032516479492, + "epoch": 0.9754834998921561, + "kl_loss": 0.07231856137514114, + "loss_ib": 0.0009898494463413954, + "step": 3392 + }, + { + "ce_ib": 2.301236629486084, + "ce_orig": 0.6356844305992126, + "epoch": 0.9754834998921561, + "kl_loss": 0.03169391676783562, + "loss_ib": 0.0005470628384500742, + "step": 3392 + }, + { + "ce_ib": 3.8742101192474365, + "ce_orig": 1.0545234680175781, + "epoch": 0.9757710834711338, + "kl_loss": 0.052199237048625946, + "loss_ib": 0.0009094133856706321, + "step": 3393 + }, + { + "ce_ib": 1.9848967790603638, + "ce_orig": 0.6846683621406555, + "epoch": 0.9757710834711338, + "kl_loss": 0.052478719502687454, + "loss_ib": 0.0007232768693938851, + "step": 3393 + }, + { + "ce_ib": 4.041388511657715, + "ce_orig": 0.7862762808799744, + "epoch": 0.9757710834711338, + "kl_loss": 0.06300417333841324, + "loss_ib": 0.001034180517308414, + "step": 3393 + }, + { + "ce_ib": 0.953490674495697, + "ce_orig": 0.17216064035892487, + "epoch": 0.9757710834711338, + "kl_loss": 0.071382537484169, + "loss_ib": 0.0008091744384728372, + "step": 3393 + }, + { + "ce_ib": 3.714715003967285, + "ce_orig": 0.6284633874893188, + "epoch": 0.9760586670501115, + "kl_loss": 0.05702289938926697, + "loss_ib": 0.0009417004766874015, + "step": 3394 + }, + { + "ce_ib": 2.1801559925079346, + "ce_orig": 0.47296616435050964, + "epoch": 0.9760586670501115, + "kl_loss": 0.03793312981724739, + "loss_ib": 0.000597346865106374, + "step": 3394 + }, + { + "ce_ib": 2.9090540409088135, + "ce_orig": 0.7436097264289856, + "epoch": 0.9760586670501115, + "kl_loss": 0.040312424302101135, + "loss_ib": 0.0006940296152606606, + "step": 3394 + }, + { + "ce_ib": 2.944882392883301, + "ce_orig": 0.7850603461265564, + "epoch": 0.9760586670501115, + "kl_loss": 0.04071471095085144, + "loss_ib": 0.0007016353192739189, + "step": 3394 + }, + { + "epoch": 0.9763462506290891, + "grad_norm": 0.09468042850494385, + "learning_rate": 3.944869258294177e-05, + "loss": 0.8388, + "step": 3395 + }, + { + "ce_ib": 3.911958694458008, + "ce_orig": 0.6422404646873474, + "epoch": 0.9763462506290891, + "kl_loss": 0.06114010512828827, + "loss_ib": 0.0010025969240814447, + "step": 3395 + }, + { + "ce_ib": 3.84257435798645, + "ce_orig": 0.983860969543457, + "epoch": 0.9763462506290891, + "kl_loss": 0.04758327454328537, + "loss_ib": 0.00086009013466537, + "step": 3395 + }, + { + "ce_ib": 2.9802863597869873, + "ce_orig": 0.6126695275306702, + "epoch": 0.9763462506290891, + "kl_loss": 0.053004179149866104, + "loss_ib": 0.0008280703914351761, + "step": 3395 + }, + { + "ce_ib": 2.636300802230835, + "ce_orig": 0.8622342944145203, + "epoch": 0.9763462506290891, + "kl_loss": 0.04540564492344856, + "loss_ib": 0.0007176865474320948, + "step": 3395 + }, + { + "ce_ib": 4.240979194641113, + "ce_orig": 0.9760212898254395, + "epoch": 0.9766338342080667, + "kl_loss": 0.07260515540838242, + "loss_ib": 0.0011501495027914643, + "step": 3396 + }, + { + "ce_ib": 4.079285144805908, + "ce_orig": 0.8656591176986694, + "epoch": 0.9766338342080667, + "kl_loss": 0.06458354741334915, + "loss_ib": 0.001053763902746141, + "step": 3396 + }, + { + "ce_ib": 3.496347188949585, + "ce_orig": 0.8289207816123962, + "epoch": 0.9766338342080667, + "kl_loss": 0.05426404997706413, + "loss_ib": 0.0008922751876525581, + "step": 3396 + }, + { + "ce_ib": 3.956249475479126, + "ce_orig": 1.233169674873352, + "epoch": 0.9766338342080667, + "kl_loss": 0.052083589136600494, + "loss_ib": 0.0009164608200080693, + "step": 3396 + }, + { + "ce_ib": 1.6313753128051758, + "ce_orig": 0.44085487723350525, + "epoch": 0.9769214177870443, + "kl_loss": 0.03197624906897545, + "loss_ib": 0.0004828999808523804, + "step": 3397 + }, + { + "ce_ib": 1.400808334350586, + "ce_orig": 0.27738049626350403, + "epoch": 0.9769214177870443, + "kl_loss": 0.10487571358680725, + "loss_ib": 0.0011888379231095314, + "step": 3397 + }, + { + "ce_ib": 4.163665771484375, + "ce_orig": 0.9515032768249512, + "epoch": 0.9769214177870443, + "kl_loss": 0.04650726169347763, + "loss_ib": 0.0008814391330815852, + "step": 3397 + }, + { + "ce_ib": 6.090035915374756, + "ce_orig": 1.4091548919677734, + "epoch": 0.9769214177870443, + "kl_loss": 0.045905701816082, + "loss_ib": 0.0010680605191737413, + "step": 3397 + }, + { + "ce_ib": 2.73146915435791, + "ce_orig": 0.8002219200134277, + "epoch": 0.977209001366022, + "kl_loss": 0.02151009812951088, + "loss_ib": 0.0004882478679064661, + "step": 3398 + }, + { + "ce_ib": 2.369126796722412, + "ce_orig": 0.5310594439506531, + "epoch": 0.977209001366022, + "kl_loss": 0.044837385416030884, + "loss_ib": 0.0006852865335531533, + "step": 3398 + }, + { + "ce_ib": 2.9237051010131836, + "ce_orig": 0.552635669708252, + "epoch": 0.977209001366022, + "kl_loss": 0.03639254719018936, + "loss_ib": 0.0006562959752045572, + "step": 3398 + }, + { + "ce_ib": 3.9580461978912354, + "ce_orig": 0.8662056922912598, + "epoch": 0.977209001366022, + "kl_loss": 0.05084938555955887, + "loss_ib": 0.0009042983874678612, + "step": 3398 + }, + { + "ce_ib": 3.8470852375030518, + "ce_orig": 0.9433568120002747, + "epoch": 0.9774965849449997, + "kl_loss": 0.09434588253498077, + "loss_ib": 0.0013281672727316618, + "step": 3399 + }, + { + "ce_ib": 4.601534366607666, + "ce_orig": 1.174981713294983, + "epoch": 0.9774965849449997, + "kl_loss": 0.09997980296611786, + "loss_ib": 0.001459951396100223, + "step": 3399 + }, + { + "ce_ib": 4.426039695739746, + "ce_orig": 0.6772770285606384, + "epoch": 0.9774965849449997, + "kl_loss": 0.08328292518854141, + "loss_ib": 0.0012754332274198532, + "step": 3399 + }, + { + "ce_ib": 4.252315044403076, + "ce_orig": 1.2236363887786865, + "epoch": 0.9774965849449997, + "kl_loss": 0.061110563576221466, + "loss_ib": 0.001036337111145258, + "step": 3399 + }, + { + "epoch": 0.9777841685239773, + "grad_norm": 0.09008466452360153, + "learning_rate": 3.941700805287168e-05, + "loss": 0.8209, + "step": 3400 + }, + { + "ce_ib": 4.481536865234375, + "ce_orig": 1.0568243265151978, + "epoch": 0.9777841685239773, + "kl_loss": 0.06087416782975197, + "loss_ib": 0.0010568953584879637, + "step": 3400 + }, + { + "ce_ib": 4.6185688972473145, + "ce_orig": 1.255655288696289, + "epoch": 0.9777841685239773, + "kl_loss": 0.0476287305355072, + "loss_ib": 0.0009381441632285714, + "step": 3400 + }, + { + "ce_ib": 7.018340110778809, + "ce_orig": 0.6878707408905029, + "epoch": 0.9777841685239773, + "kl_loss": 0.053394950926303864, + "loss_ib": 0.0012357834493741393, + "step": 3400 + }, + { + "ce_ib": 2.9197745323181152, + "ce_orig": 0.49462270736694336, + "epoch": 0.9777841685239773, + "kl_loss": 0.060124777257442474, + "loss_ib": 0.0008932251948863268, + "step": 3400 + }, + { + "ce_ib": 4.9746599197387695, + "ce_orig": 1.1269397735595703, + "epoch": 0.9780717521029549, + "kl_loss": 0.0547599270939827, + "loss_ib": 0.0010450652334839106, + "step": 3401 + }, + { + "ce_ib": 2.6885178089141846, + "ce_orig": 0.6414985060691833, + "epoch": 0.9780717521029549, + "kl_loss": 0.04327268898487091, + "loss_ib": 0.0007015786832198501, + "step": 3401 + }, + { + "ce_ib": 5.920187473297119, + "ce_orig": 1.63614022731781, + "epoch": 0.9780717521029549, + "kl_loss": 0.049609582871198654, + "loss_ib": 0.0010881144553422928, + "step": 3401 + }, + { + "ce_ib": 3.5471584796905518, + "ce_orig": 0.7214102745056152, + "epoch": 0.9780717521029549, + "kl_loss": 0.09060128778219223, + "loss_ib": 0.0012607286917045712, + "step": 3401 + }, + { + "ce_ib": 4.789700031280518, + "ce_orig": 1.3651149272918701, + "epoch": 0.9783593356819326, + "kl_loss": 0.05958143249154091, + "loss_ib": 0.0010747843189164996, + "step": 3402 + }, + { + "ce_ib": 2.661999225616455, + "ce_orig": 0.4930724799633026, + "epoch": 0.9783593356819326, + "kl_loss": 0.0558992438018322, + "loss_ib": 0.0008251923136413097, + "step": 3402 + }, + { + "ce_ib": 6.017880439758301, + "ce_orig": 1.5432002544403076, + "epoch": 0.9783593356819326, + "kl_loss": 0.05903343856334686, + "loss_ib": 0.0011921223485842347, + "step": 3402 + }, + { + "ce_ib": 3.370652675628662, + "ce_orig": 1.0866745710372925, + "epoch": 0.9783593356819326, + "kl_loss": 0.038518913090229034, + "loss_ib": 0.0007222543354146183, + "step": 3402 + }, + { + "ce_ib": 3.3767664432525635, + "ce_orig": 0.89601069688797, + "epoch": 0.9786469192609102, + "kl_loss": 0.06166306883096695, + "loss_ib": 0.000954307324718684, + "step": 3403 + }, + { + "ce_ib": 3.015432834625244, + "ce_orig": 0.7082258462905884, + "epoch": 0.9786469192609102, + "kl_loss": 0.05137673020362854, + "loss_ib": 0.0008153105154633522, + "step": 3403 + }, + { + "ce_ib": 4.21713399887085, + "ce_orig": 1.160141944885254, + "epoch": 0.9786469192609102, + "kl_loss": 0.05765974149107933, + "loss_ib": 0.0009983107447624207, + "step": 3403 + }, + { + "ce_ib": 2.9023633003234863, + "ce_orig": 0.35061073303222656, + "epoch": 0.9786469192609102, + "kl_loss": 0.07838621735572815, + "loss_ib": 0.0010740985162556171, + "step": 3403 + }, + { + "ce_ib": 3.678698778152466, + "ce_orig": 0.9121930003166199, + "epoch": 0.9789345028398878, + "kl_loss": 0.059946458786726, + "loss_ib": 0.0009673344902694225, + "step": 3404 + }, + { + "ce_ib": 2.9247424602508545, + "ce_orig": 0.6886409521102905, + "epoch": 0.9789345028398878, + "kl_loss": 0.03884799778461456, + "loss_ib": 0.0006809541955590248, + "step": 3404 + }, + { + "ce_ib": 3.3685085773468018, + "ce_orig": 0.6699627041816711, + "epoch": 0.9789345028398878, + "kl_loss": 0.051445066928863525, + "loss_ib": 0.0008513015345670283, + "step": 3404 + }, + { + "ce_ib": 4.289220809936523, + "ce_orig": 0.8374332189559937, + "epoch": 0.9789345028398878, + "kl_loss": 0.057595394551754, + "loss_ib": 0.0010048759868368506, + "step": 3404 + }, + { + "epoch": 0.9792220864188654, + "grad_norm": 0.08760187029838562, + "learning_rate": 3.9385288788882934e-05, + "loss": 0.8355, + "step": 3405 + }, + { + "ce_ib": 2.932321071624756, + "ce_orig": 0.8101611137390137, + "epoch": 0.9792220864188654, + "kl_loss": 0.05835634469985962, + "loss_ib": 0.0008767955587245524, + "step": 3405 + }, + { + "ce_ib": 2.95937180519104, + "ce_orig": 0.6598889231681824, + "epoch": 0.9792220864188654, + "kl_loss": 0.05448542535305023, + "loss_ib": 0.0008407914428971708, + "step": 3405 + }, + { + "ce_ib": 5.156567573547363, + "ce_orig": 1.1813021898269653, + "epoch": 0.9792220864188654, + "kl_loss": 0.046954069286584854, + "loss_ib": 0.0009851973736658692, + "step": 3405 + }, + { + "ce_ib": 3.366668462753296, + "ce_orig": 0.6808117032051086, + "epoch": 0.9792220864188654, + "kl_loss": 0.038930539041757584, + "loss_ib": 0.0007259721751324832, + "step": 3405 + }, + { + "ce_ib": 3.7419755458831787, + "ce_orig": 0.885158121585846, + "epoch": 0.9795096699978432, + "kl_loss": 0.028596173971891403, + "loss_ib": 0.0006601592758670449, + "step": 3406 + }, + { + "ce_ib": 3.752126932144165, + "ce_orig": 0.532223105430603, + "epoch": 0.9795096699978432, + "kl_loss": 0.044371869415044785, + "loss_ib": 0.0008189313812181354, + "step": 3406 + }, + { + "ce_ib": 2.200015068054199, + "ce_orig": 0.5620361566543579, + "epoch": 0.9795096699978432, + "kl_loss": 0.03614785522222519, + "loss_ib": 0.0005814800388179719, + "step": 3406 + }, + { + "ce_ib": 4.990869045257568, + "ce_orig": 1.0989603996276855, + "epoch": 0.9795096699978432, + "kl_loss": 0.07399295270442963, + "loss_ib": 0.0012390164192765951, + "step": 3406 + }, + { + "ce_ib": 5.599062442779541, + "ce_orig": 1.2580463886260986, + "epoch": 0.9797972535768208, + "kl_loss": 0.06516215205192566, + "loss_ib": 0.0012115277349948883, + "step": 3407 + }, + { + "ce_ib": 2.755896806716919, + "ce_orig": 0.707417905330658, + "epoch": 0.9797972535768208, + "kl_loss": 0.06591945141553879, + "loss_ib": 0.0009347841842100024, + "step": 3407 + }, + { + "ce_ib": 1.311057686805725, + "ce_orig": 0.24325121939182281, + "epoch": 0.9797972535768208, + "kl_loss": 0.07417318969964981, + "loss_ib": 0.0008728376706130803, + "step": 3407 + }, + { + "ce_ib": 2.5397486686706543, + "ce_orig": 0.6433539986610413, + "epoch": 0.9797972535768208, + "kl_loss": 0.02904733084142208, + "loss_ib": 0.0005444482085295022, + "step": 3407 + }, + { + "ce_ib": 2.499586820602417, + "ce_orig": 0.6240347027778625, + "epoch": 0.9800848371557984, + "kl_loss": 0.05145413428544998, + "loss_ib": 0.000764500058721751, + "step": 3408 + }, + { + "ce_ib": 5.142812728881836, + "ce_orig": 1.234322190284729, + "epoch": 0.9800848371557984, + "kl_loss": 0.04036469757556915, + "loss_ib": 0.0009179281769320369, + "step": 3408 + }, + { + "ce_ib": 2.5257251262664795, + "ce_orig": 0.5402239561080933, + "epoch": 0.9800848371557984, + "kl_loss": 0.03709716349840164, + "loss_ib": 0.0006235441542230546, + "step": 3408 + }, + { + "ce_ib": 3.609818458557129, + "ce_orig": 0.9034485220909119, + "epoch": 0.9800848371557984, + "kl_loss": 0.04773581773042679, + "loss_ib": 0.0008383399690501392, + "step": 3408 + }, + { + "ce_ib": 5.104525089263916, + "ce_orig": 1.538753628730774, + "epoch": 0.980372420734776, + "kl_loss": 0.04519902914762497, + "loss_ib": 0.0009624427184462547, + "step": 3409 + }, + { + "ce_ib": 3.7493033409118652, + "ce_orig": 1.1129980087280273, + "epoch": 0.980372420734776, + "kl_loss": 0.04620293155312538, + "loss_ib": 0.0008369596325792372, + "step": 3409 + }, + { + "ce_ib": 5.739827632904053, + "ce_orig": 1.5960121154785156, + "epoch": 0.980372420734776, + "kl_loss": 0.051887620240449905, + "loss_ib": 0.0010928588453680277, + "step": 3409 + }, + { + "ce_ib": 1.3854008913040161, + "ce_orig": 0.2695290446281433, + "epoch": 0.980372420734776, + "kl_loss": 0.1215955913066864, + "loss_ib": 0.001354495994746685, + "step": 3409 + }, + { + "epoch": 0.9806600043137537, + "grad_norm": 0.11622066050767899, + "learning_rate": 3.935353486739459e-05, + "loss": 0.811, + "step": 3410 + }, + { + "ce_ib": 2.893108606338501, + "ce_orig": 0.58641517162323, + "epoch": 0.9806600043137537, + "kl_loss": 0.06327028572559357, + "loss_ib": 0.0009220136562362313, + "step": 3410 + }, + { + "ce_ib": 5.124874114990234, + "ce_orig": 0.8214781284332275, + "epoch": 0.9806600043137537, + "kl_loss": 0.0790015459060669, + "loss_ib": 0.001302502816542983, + "step": 3410 + }, + { + "ce_ib": 4.367970943450928, + "ce_orig": 0.9674836993217468, + "epoch": 0.9806600043137537, + "kl_loss": 0.08242417871952057, + "loss_ib": 0.0012610387057065964, + "step": 3410 + }, + { + "ce_ib": 3.71256947517395, + "ce_orig": 0.8088080883026123, + "epoch": 0.9806600043137537, + "kl_loss": 0.056849658489227295, + "loss_ib": 0.000939753488637507, + "step": 3410 + }, + { + "ce_ib": 3.83404541015625, + "ce_orig": 0.9434438943862915, + "epoch": 0.9809475878927313, + "kl_loss": 0.046707406640052795, + "loss_ib": 0.0008504785364493728, + "step": 3411 + }, + { + "ce_ib": 2.75834584236145, + "ce_orig": 0.3686833679676056, + "epoch": 0.9809475878927313, + "kl_loss": 0.07439719140529633, + "loss_ib": 0.0010198064846917987, + "step": 3411 + }, + { + "ce_ib": 2.5420117378234863, + "ce_orig": 0.7895163893699646, + "epoch": 0.9809475878927313, + "kl_loss": 0.05984421446919441, + "loss_ib": 0.0008526432793587446, + "step": 3411 + }, + { + "ce_ib": 2.866010904312134, + "ce_orig": 0.7906258702278137, + "epoch": 0.9809475878927313, + "kl_loss": 0.03968210518360138, + "loss_ib": 0.0006834221421740949, + "step": 3411 + }, + { + "ce_ib": 3.662569522857666, + "ce_orig": 0.5504709482192993, + "epoch": 0.9812351714717089, + "kl_loss": 0.07833759486675262, + "loss_ib": 0.0011496328515931964, + "step": 3412 + }, + { + "ce_ib": 3.596046209335327, + "ce_orig": 0.7804595828056335, + "epoch": 0.9812351714717089, + "kl_loss": 0.043503716588020325, + "loss_ib": 0.0007946417899802327, + "step": 3412 + }, + { + "ce_ib": 3.0477256774902344, + "ce_orig": 0.6912084221839905, + "epoch": 0.9812351714717089, + "kl_loss": 0.05307678133249283, + "loss_ib": 0.0008355403551831841, + "step": 3412 + }, + { + "ce_ib": 1.6923394203186035, + "ce_orig": 0.36845818161964417, + "epoch": 0.9812351714717089, + "kl_loss": 0.10795637965202332, + "loss_ib": 0.001248797751031816, + "step": 3412 + }, + { + "ce_ib": 3.6533775329589844, + "ce_orig": 0.9199095368385315, + "epoch": 0.9815227550506866, + "kl_loss": 0.03801426291465759, + "loss_ib": 0.00074548035627231, + "step": 3413 + }, + { + "ce_ib": 4.6588311195373535, + "ce_orig": 0.7217375040054321, + "epoch": 0.9815227550506866, + "kl_loss": 0.07210160791873932, + "loss_ib": 0.0011868991423398256, + "step": 3413 + }, + { + "ce_ib": 2.1884853839874268, + "ce_orig": 0.4975893795490265, + "epoch": 0.9815227550506866, + "kl_loss": 0.03459617868065834, + "loss_ib": 0.0005648102960549295, + "step": 3413 + }, + { + "ce_ib": 3.248147964477539, + "ce_orig": 0.5835769772529602, + "epoch": 0.9815227550506866, + "kl_loss": 0.05765879154205322, + "loss_ib": 0.000901402672752738, + "step": 3413 + }, + { + "ce_ib": 2.7867836952209473, + "ce_orig": 0.707822322845459, + "epoch": 0.9818103386296643, + "kl_loss": 0.029751006513834, + "loss_ib": 0.0005761883803643286, + "step": 3414 + }, + { + "ce_ib": 3.1237475872039795, + "ce_orig": 0.8491817712783813, + "epoch": 0.9818103386296643, + "kl_loss": 0.027817467227578163, + "loss_ib": 0.0005905493744648993, + "step": 3414 + }, + { + "ce_ib": 4.662054061889648, + "ce_orig": 1.127539873123169, + "epoch": 0.9818103386296643, + "kl_loss": 0.057929255068302155, + "loss_ib": 0.0010454979492351413, + "step": 3414 + }, + { + "ce_ib": 3.374859094619751, + "ce_orig": 1.0061651468276978, + "epoch": 0.9818103386296643, + "kl_loss": 0.04872145131230354, + "loss_ib": 0.00082470040069893, + "step": 3414 + }, + { + "epoch": 0.9820979222086419, + "grad_norm": 0.09429129958152771, + "learning_rate": 3.932174636490924e-05, + "loss": 0.8078, + "step": 3415 + }, + { + "ce_ib": 2.7404356002807617, + "ce_orig": 0.5420524477958679, + "epoch": 0.9820979222086419, + "kl_loss": 0.032064616680145264, + "loss_ib": 0.0005946896853856742, + "step": 3415 + }, + { + "ce_ib": 2.2213377952575684, + "ce_orig": 0.4873940348625183, + "epoch": 0.9820979222086419, + "kl_loss": 0.03619909659028053, + "loss_ib": 0.0005841247038915753, + "step": 3415 + }, + { + "ce_ib": 3.9498045444488525, + "ce_orig": 1.126607060432434, + "epoch": 0.9820979222086419, + "kl_loss": 0.047605715692043304, + "loss_ib": 0.0008710375986993313, + "step": 3415 + }, + { + "ce_ib": 2.5502398014068604, + "ce_orig": 0.6420960426330566, + "epoch": 0.9820979222086419, + "kl_loss": 0.04537252336740494, + "loss_ib": 0.0007087492267601192, + "step": 3415 + }, + { + "ce_ib": 5.744671821594238, + "ce_orig": 1.4978188276290894, + "epoch": 0.9823855057876195, + "kl_loss": 0.10068126022815704, + "loss_ib": 0.00158127979375422, + "step": 3416 + }, + { + "ce_ib": 4.035065174102783, + "ce_orig": 0.8775240182876587, + "epoch": 0.9823855057876195, + "kl_loss": 0.03704428672790527, + "loss_ib": 0.0007739493157714605, + "step": 3416 + }, + { + "ce_ib": 3.9445180892944336, + "ce_orig": 1.1564276218414307, + "epoch": 0.9823855057876195, + "kl_loss": 0.07207819819450378, + "loss_ib": 0.0011152337538078427, + "step": 3416 + }, + { + "ce_ib": 3.5835795402526855, + "ce_orig": 0.80580073595047, + "epoch": 0.9823855057876195, + "kl_loss": 0.034236256033182144, + "loss_ib": 0.0007007205276750028, + "step": 3416 + }, + { + "ce_ib": 4.270029544830322, + "ce_orig": 1.087890386581421, + "epoch": 0.9826730893665971, + "kl_loss": 0.043809160590171814, + "loss_ib": 0.0008650944801047444, + "step": 3417 + }, + { + "ce_ib": 2.1943299770355225, + "ce_orig": 0.600295901298523, + "epoch": 0.9826730893665971, + "kl_loss": 0.04569563269615173, + "loss_ib": 0.000676389317959547, + "step": 3417 + }, + { + "ce_ib": 2.770430326461792, + "ce_orig": 0.8739529848098755, + "epoch": 0.9826730893665971, + "kl_loss": 0.04645788297057152, + "loss_ib": 0.0007416218286380172, + "step": 3417 + }, + { + "ce_ib": 3.265794515609741, + "ce_orig": 0.6529867053031921, + "epoch": 0.9826730893665971, + "kl_loss": 0.030858900398015976, + "loss_ib": 0.0006351683987304568, + "step": 3417 + }, + { + "ce_ib": 6.087667465209961, + "ce_orig": 1.6721917390823364, + "epoch": 0.9829606729455748, + "kl_loss": 0.19553926587104797, + "loss_ib": 0.002564159221947193, + "step": 3418 + }, + { + "ce_ib": 3.787785291671753, + "ce_orig": 0.5474222302436829, + "epoch": 0.9829606729455748, + "kl_loss": 0.07519771158695221, + "loss_ib": 0.0011307556414976716, + "step": 3418 + }, + { + "ce_ib": 2.722478151321411, + "ce_orig": 0.7371563911437988, + "epoch": 0.9829606729455748, + "kl_loss": 0.03365577012300491, + "loss_ib": 0.0006088054506108165, + "step": 3418 + }, + { + "ce_ib": 4.584744930267334, + "ce_orig": 1.164749026298523, + "epoch": 0.9829606729455748, + "kl_loss": 0.0393453985452652, + "loss_ib": 0.0008519284310750663, + "step": 3418 + }, + { + "ce_ib": 3.253147840499878, + "ce_orig": 0.825004518032074, + "epoch": 0.9832482565245525, + "kl_loss": 0.04100780934095383, + "loss_ib": 0.0007353928522206843, + "step": 3419 + }, + { + "ce_ib": 5.444496154785156, + "ce_orig": 1.375298261642456, + "epoch": 0.9832482565245525, + "kl_loss": 0.058414526283741, + "loss_ib": 0.0011285948567092419, + "step": 3419 + }, + { + "ce_ib": 4.236307144165039, + "ce_orig": 1.1394888162612915, + "epoch": 0.9832482565245525, + "kl_loss": 0.11670450121164322, + "loss_ib": 0.001590675674378872, + "step": 3419 + }, + { + "ce_ib": 2.9098737239837646, + "ce_orig": 0.6856614947319031, + "epoch": 0.9832482565245525, + "kl_loss": 0.05850968509912491, + "loss_ib": 0.0008760842029005289, + "step": 3419 + }, + { + "epoch": 0.9835358401035301, + "grad_norm": 0.09456254541873932, + "learning_rate": 3.928992335801274e-05, + "loss": 0.924, + "step": 3420 + }, + { + "ce_ib": 4.988696575164795, + "ce_orig": 1.0984859466552734, + "epoch": 0.9835358401035301, + "kl_loss": 0.05324678495526314, + "loss_ib": 0.0010313375387340784, + "step": 3420 + }, + { + "ce_ib": 3.5616891384124756, + "ce_orig": 0.9819644689559937, + "epoch": 0.9835358401035301, + "kl_loss": 0.03205649554729462, + "loss_ib": 0.0006767338491044939, + "step": 3420 + }, + { + "ce_ib": 4.546084880828857, + "ce_orig": 1.113185167312622, + "epoch": 0.9835358401035301, + "kl_loss": 0.05867822468280792, + "loss_ib": 0.0010413907002657652, + "step": 3420 + }, + { + "ce_ib": 3.4951608180999756, + "ce_orig": 0.8271716833114624, + "epoch": 0.9835358401035301, + "kl_loss": 0.04364980384707451, + "loss_ib": 0.0007860141340643167, + "step": 3420 + }, + { + "ce_ib": 3.5183680057525635, + "ce_orig": 0.7413480281829834, + "epoch": 0.9838234236825077, + "kl_loss": 0.05160044878721237, + "loss_ib": 0.00086784124141559, + "step": 3421 + }, + { + "ce_ib": 4.245126247406006, + "ce_orig": 1.2950626611709595, + "epoch": 0.9838234236825077, + "kl_loss": 0.052705101668834686, + "loss_ib": 0.0009515636484138668, + "step": 3421 + }, + { + "ce_ib": 3.4105184078216553, + "ce_orig": 0.9416351318359375, + "epoch": 0.9838234236825077, + "kl_loss": 0.0363130047917366, + "loss_ib": 0.0007041818462312222, + "step": 3421 + }, + { + "ce_ib": 6.282189846038818, + "ce_orig": 1.3055906295776367, + "epoch": 0.9838234236825077, + "kl_loss": 0.06515402346849442, + "loss_ib": 0.0012797591043636203, + "step": 3421 + }, + { + "ce_ib": 2.091475486755371, + "ce_orig": 0.3109883666038513, + "epoch": 0.9841110072614854, + "kl_loss": 0.045421116054058075, + "loss_ib": 0.0006633587181568146, + "step": 3422 + }, + { + "ce_ib": 4.895898818969727, + "ce_orig": 1.2436254024505615, + "epoch": 0.9841110072614854, + "kl_loss": 0.06631450355052948, + "loss_ib": 0.001152734854258597, + "step": 3422 + }, + { + "ce_ib": 3.7550928592681885, + "ce_orig": 0.8317878842353821, + "epoch": 0.9841110072614854, + "kl_loss": 0.04114842042326927, + "loss_ib": 0.0007869934779591858, + "step": 3422 + }, + { + "ce_ib": 4.216629981994629, + "ce_orig": 0.8660778999328613, + "epoch": 0.9841110072614854, + "kl_loss": 0.053204234689474106, + "loss_ib": 0.000953705282881856, + "step": 3422 + }, + { + "ce_ib": 2.356192111968994, + "ce_orig": 0.2930636405944824, + "epoch": 0.984398590840463, + "kl_loss": 0.05603047087788582, + "loss_ib": 0.0007959238719195127, + "step": 3423 + }, + { + "ce_ib": 3.392427444458008, + "ce_orig": 0.8753440380096436, + "epoch": 0.984398590840463, + "kl_loss": 0.062163691967725754, + "loss_ib": 0.0009608796099200845, + "step": 3423 + }, + { + "ce_ib": 3.7692055702209473, + "ce_orig": 0.44614988565444946, + "epoch": 0.984398590840463, + "kl_loss": 0.044463180005550385, + "loss_ib": 0.000821552355773747, + "step": 3423 + }, + { + "ce_ib": 3.3417680263519287, + "ce_orig": 0.4606160819530487, + "epoch": 0.984398590840463, + "kl_loss": 0.09305062890052795, + "loss_ib": 0.0012646829709410667, + "step": 3423 + }, + { + "ce_ib": 4.351094722747803, + "ce_orig": 1.3032828569412231, + "epoch": 0.9846861744194406, + "kl_loss": 0.0584663487970829, + "loss_ib": 0.0010197729570791125, + "step": 3424 + }, + { + "ce_ib": 3.492950916290283, + "ce_orig": 0.5468161702156067, + "epoch": 0.9846861744194406, + "kl_loss": 0.04881613701581955, + "loss_ib": 0.0008374564349651337, + "step": 3424 + }, + { + "ce_ib": 3.811612606048584, + "ce_orig": 1.0475687980651855, + "epoch": 0.9846861744194406, + "kl_loss": 0.0779574066400528, + "loss_ib": 0.0011607352644205093, + "step": 3424 + }, + { + "ce_ib": 6.180762767791748, + "ce_orig": 0.8750653862953186, + "epoch": 0.9846861744194406, + "kl_loss": 0.07549341022968292, + "loss_ib": 0.0013730102218687534, + "step": 3424 + }, + { + "epoch": 0.9849737579984182, + "grad_norm": 0.09162870049476624, + "learning_rate": 3.92580659233741e-05, + "loss": 0.8453, + "step": 3425 + }, + { + "ce_ib": 3.7294366359710693, + "ce_orig": 0.9720034003257751, + "epoch": 0.9849737579984182, + "kl_loss": 0.07249654829502106, + "loss_ib": 0.001097909058444202, + "step": 3425 + }, + { + "ce_ib": 4.903485298156738, + "ce_orig": 1.266768455505371, + "epoch": 0.9849737579984182, + "kl_loss": 0.053871989250183105, + "loss_ib": 0.0010290683712810278, + "step": 3425 + }, + { + "ce_ib": 5.0146331787109375, + "ce_orig": 1.3153990507125854, + "epoch": 0.9849737579984182, + "kl_loss": 0.0704491138458252, + "loss_ib": 0.0012059543514624238, + "step": 3425 + }, + { + "ce_ib": 2.3359742164611816, + "ce_orig": 0.5247592329978943, + "epoch": 0.9849737579984182, + "kl_loss": 0.07304543256759644, + "loss_ib": 0.0009640517528168857, + "step": 3425 + }, + { + "ce_ib": 6.408204078674316, + "ce_orig": 1.928930640220642, + "epoch": 0.985261341577396, + "kl_loss": 0.0691898912191391, + "loss_ib": 0.0013327193446457386, + "step": 3426 + }, + { + "ce_ib": 3.803339719772339, + "ce_orig": 0.861953616142273, + "epoch": 0.985261341577396, + "kl_loss": 0.04884524643421173, + "loss_ib": 0.0008687864174135029, + "step": 3426 + }, + { + "ce_ib": 2.5035550594329834, + "ce_orig": 0.6376609802246094, + "epoch": 0.985261341577396, + "kl_loss": 0.044983502477407455, + "loss_ib": 0.0007001904887147248, + "step": 3426 + }, + { + "ce_ib": 3.6437177658081055, + "ce_orig": 0.7032880187034607, + "epoch": 0.985261341577396, + "kl_loss": 0.1112765520811081, + "loss_ib": 0.0014771372079849243, + "step": 3426 + }, + { + "ce_ib": 3.323763370513916, + "ce_orig": 0.9334019422531128, + "epoch": 0.9855489251563736, + "kl_loss": 0.07020312547683716, + "loss_ib": 0.0010344076436012983, + "step": 3427 + }, + { + "ce_ib": 3.832538604736328, + "ce_orig": 1.0161114931106567, + "epoch": 0.9855489251563736, + "kl_loss": 0.044170111417770386, + "loss_ib": 0.0008249549428001046, + "step": 3427 + }, + { + "ce_ib": 2.1767475605010986, + "ce_orig": 0.4689085781574249, + "epoch": 0.9855489251563736, + "kl_loss": 0.04826788604259491, + "loss_ib": 0.0007003535865806043, + "step": 3427 + }, + { + "ce_ib": 2.9601845741271973, + "ce_orig": 0.6213151216506958, + "epoch": 0.9855489251563736, + "kl_loss": 0.09554338455200195, + "loss_ib": 0.0012514522532001138, + "step": 3427 + }, + { + "ce_ib": 4.131033897399902, + "ce_orig": 0.7956253290176392, + "epoch": 0.9858365087353512, + "kl_loss": 0.05199294164776802, + "loss_ib": 0.0009330327739007771, + "step": 3428 + }, + { + "ce_ib": 2.5064117908477783, + "ce_orig": 0.4851904511451721, + "epoch": 0.9858365087353512, + "kl_loss": 0.05646359920501709, + "loss_ib": 0.0008152771624736488, + "step": 3428 + }, + { + "ce_ib": 4.742499351501465, + "ce_orig": 1.2302857637405396, + "epoch": 0.9858365087353512, + "kl_loss": 0.044394563883543015, + "loss_ib": 0.0009181955247186124, + "step": 3428 + }, + { + "ce_ib": 8.047045707702637, + "ce_orig": 1.8854610919952393, + "epoch": 0.9858365087353512, + "kl_loss": 0.07056616246700287, + "loss_ib": 0.0015103662153705955, + "step": 3428 + }, + { + "ce_ib": 4.795273780822754, + "ce_orig": 0.7366772890090942, + "epoch": 0.9861240923143288, + "kl_loss": 0.07816097140312195, + "loss_ib": 0.0012611370766535401, + "step": 3429 + }, + { + "ce_ib": 2.7732372283935547, + "ce_orig": 0.7814673185348511, + "epoch": 0.9861240923143288, + "kl_loss": 0.04094009846448898, + "loss_ib": 0.0006867246702313423, + "step": 3429 + }, + { + "ce_ib": 3.523151397705078, + "ce_orig": 0.9650356769561768, + "epoch": 0.9861240923143288, + "kl_loss": 0.06631806492805481, + "loss_ib": 0.0010154957417398691, + "step": 3429 + }, + { + "ce_ib": 3.612175464630127, + "ce_orig": 0.6373445391654968, + "epoch": 0.9861240923143288, + "kl_loss": 0.058754097670316696, + "loss_ib": 0.0009487585048191249, + "step": 3429 + }, + { + "epoch": 0.9864116758933065, + "grad_norm": 0.09105050563812256, + "learning_rate": 3.92261741377453e-05, + "loss": 0.8552, + "step": 3430 + }, + { + "ce_ib": 4.709369659423828, + "ce_orig": 1.0598469972610474, + "epoch": 0.9864116758933065, + "kl_loss": 0.06642981618642807, + "loss_ib": 0.0011352350702509284, + "step": 3430 + }, + { + "ce_ib": 3.560187816619873, + "ce_orig": 0.671776294708252, + "epoch": 0.9864116758933065, + "kl_loss": 0.04234986752271652, + "loss_ib": 0.0007795174024067819, + "step": 3430 + }, + { + "ce_ib": 4.203542709350586, + "ce_orig": 1.115645408630371, + "epoch": 0.9864116758933065, + "kl_loss": 0.04233662784099579, + "loss_ib": 0.0008437205688096583, + "step": 3430 + }, + { + "ce_ib": 4.311371803283691, + "ce_orig": 1.2724013328552246, + "epoch": 0.9864116758933065, + "kl_loss": 0.0304342582821846, + "loss_ib": 0.0007354796980507672, + "step": 3430 + }, + { + "ce_ib": 4.727351188659668, + "ce_orig": 1.3935604095458984, + "epoch": 0.9866992594722841, + "kl_loss": 0.06512244045734406, + "loss_ib": 0.001123959431424737, + "step": 3431 + }, + { + "ce_ib": 4.0213470458984375, + "ce_orig": 0.9495340585708618, + "epoch": 0.9866992594722841, + "kl_loss": 0.04507549852132797, + "loss_ib": 0.0008528896723873913, + "step": 3431 + }, + { + "ce_ib": 3.5492780208587646, + "ce_orig": 0.9811987280845642, + "epoch": 0.9866992594722841, + "kl_loss": 0.04147239774465561, + "loss_ib": 0.0007696517277508974, + "step": 3431 + }, + { + "ce_ib": 3.074599266052246, + "ce_orig": 0.864536702632904, + "epoch": 0.9866992594722841, + "kl_loss": 0.041498973965644836, + "loss_ib": 0.0007224496221169829, + "step": 3431 + }, + { + "ce_ib": 3.154947519302368, + "ce_orig": 0.7349036931991577, + "epoch": 0.9869868430512617, + "kl_loss": 0.0613805390894413, + "loss_ib": 0.0009293001494370401, + "step": 3432 + }, + { + "ce_ib": 3.8694992065429688, + "ce_orig": 0.8757373690605164, + "epoch": 0.9869868430512617, + "kl_loss": 0.04977969080209732, + "loss_ib": 0.0008847468416206539, + "step": 3432 + }, + { + "ce_ib": 5.644591808319092, + "ce_orig": 0.40621328353881836, + "epoch": 0.9869868430512617, + "kl_loss": 0.22473973035812378, + "loss_ib": 0.0028118565678596497, + "step": 3432 + }, + { + "ce_ib": 4.0780744552612305, + "ce_orig": 0.9947366714477539, + "epoch": 0.9869868430512617, + "kl_loss": 0.044539548456668854, + "loss_ib": 0.0008532029460184276, + "step": 3432 + }, + { + "ce_ib": 3.291077136993408, + "ce_orig": 0.7105340957641602, + "epoch": 0.9872744266302395, + "kl_loss": 0.051800042390823364, + "loss_ib": 0.0008471081382595003, + "step": 3433 + }, + { + "ce_ib": 2.9023373126983643, + "ce_orig": 0.6944644451141357, + "epoch": 0.9872744266302395, + "kl_loss": 0.08329605311155319, + "loss_ib": 0.0011231942335143685, + "step": 3433 + }, + { + "ce_ib": 4.093806266784668, + "ce_orig": 1.23722505569458, + "epoch": 0.9872744266302395, + "kl_loss": 0.03759396821260452, + "loss_ib": 0.0007853202405385673, + "step": 3433 + }, + { + "ce_ib": 3.2734439373016357, + "ce_orig": 0.9720627665519714, + "epoch": 0.9872744266302395, + "kl_loss": 0.034376081079244614, + "loss_ib": 0.0006711052265018225, + "step": 3433 + }, + { + "ce_ib": 3.8029491901397705, + "ce_orig": 0.9233096241950989, + "epoch": 0.9875620102092171, + "kl_loss": 0.043700024485588074, + "loss_ib": 0.0008172951638698578, + "step": 3434 + }, + { + "ce_ib": 2.95619797706604, + "ce_orig": 0.4798144996166229, + "epoch": 0.9875620102092171, + "kl_loss": 0.04346360266208649, + "loss_ib": 0.000730255851522088, + "step": 3434 + }, + { + "ce_ib": 3.6348421573638916, + "ce_orig": 0.8616621494293213, + "epoch": 0.9875620102092171, + "kl_loss": 0.024929504841566086, + "loss_ib": 0.000612779229413718, + "step": 3434 + }, + { + "ce_ib": 7.092808723449707, + "ce_orig": 1.6720117330551147, + "epoch": 0.9875620102092171, + "kl_loss": 0.042614370584487915, + "loss_ib": 0.0011354245943948627, + "step": 3434 + }, + { + "epoch": 0.9878495937881947, + "grad_norm": 0.10067583620548248, + "learning_rate": 3.9194248077961024e-05, + "loss": 0.8594, + "step": 3435 + }, + { + "ce_ib": 3.929966926574707, + "ce_orig": 0.7381854057312012, + "epoch": 0.9878495937881947, + "kl_loss": 0.07295821607112885, + "loss_ib": 0.0011225788621231914, + "step": 3435 + }, + { + "ce_ib": 4.30954122543335, + "ce_orig": 0.9585177302360535, + "epoch": 0.9878495937881947, + "kl_loss": 0.05805773288011551, + "loss_ib": 0.0010115314507856965, + "step": 3435 + }, + { + "ce_ib": 2.953012228012085, + "ce_orig": 0.7930155992507935, + "epoch": 0.9878495937881947, + "kl_loss": 0.030735082924365997, + "loss_ib": 0.0006026520277373493, + "step": 3435 + }, + { + "ce_ib": 3.7427875995635986, + "ce_orig": 0.8570645451545715, + "epoch": 0.9878495937881947, + "kl_loss": 0.057331010699272156, + "loss_ib": 0.0009475888218730688, + "step": 3435 + }, + { + "ce_ib": 3.925123929977417, + "ce_orig": 1.0239731073379517, + "epoch": 0.9881371773671723, + "kl_loss": 0.06407254934310913, + "loss_ib": 0.0010332379024475813, + "step": 3436 + }, + { + "ce_ib": 3.8872671127319336, + "ce_orig": 0.9069960713386536, + "epoch": 0.9881371773671723, + "kl_loss": 0.0503213107585907, + "loss_ib": 0.0008919398533180356, + "step": 3436 + }, + { + "ce_ib": 1.912251591682434, + "ce_orig": 0.4745296239852905, + "epoch": 0.9881371773671723, + "kl_loss": 0.033110346645116806, + "loss_ib": 0.0005223285988904536, + "step": 3436 + }, + { + "ce_ib": 3.911626100540161, + "ce_orig": 1.092907190322876, + "epoch": 0.9881371773671723, + "kl_loss": 0.08734211325645447, + "loss_ib": 0.0012645837850868702, + "step": 3436 + }, + { + "ce_ib": 3.791231393814087, + "ce_orig": 0.905414342880249, + "epoch": 0.98842476094615, + "kl_loss": 0.04484890401363373, + "loss_ib": 0.0008276121225208044, + "step": 3437 + }, + { + "ce_ib": 2.490657329559326, + "ce_orig": 0.710720956325531, + "epoch": 0.98842476094615, + "kl_loss": 0.021468427032232285, + "loss_ib": 0.00046374998055398464, + "step": 3437 + }, + { + "ce_ib": 4.260788440704346, + "ce_orig": 0.7033459544181824, + "epoch": 0.98842476094615, + "kl_loss": 0.03820253908634186, + "loss_ib": 0.0008081042324192822, + "step": 3437 + }, + { + "ce_ib": 2.499873399734497, + "ce_orig": 0.27996861934661865, + "epoch": 0.98842476094615, + "kl_loss": 0.05297142267227173, + "loss_ib": 0.0007797015132382512, + "step": 3437 + }, + { + "ce_ib": 4.0182085037231445, + "ce_orig": 0.8581237196922302, + "epoch": 0.9887123445251276, + "kl_loss": 0.06531090289354324, + "loss_ib": 0.0010549298021942377, + "step": 3438 + }, + { + "ce_ib": 4.872376441955566, + "ce_orig": 1.2637253999710083, + "epoch": 0.9887123445251276, + "kl_loss": 0.07020969688892365, + "loss_ib": 0.001189334550872445, + "step": 3438 + }, + { + "ce_ib": 2.4558887481689453, + "ce_orig": 0.7552817463874817, + "epoch": 0.9887123445251276, + "kl_loss": 0.03152139112353325, + "loss_ib": 0.0005608027568086982, + "step": 3438 + }, + { + "ce_ib": 3.2423133850097656, + "ce_orig": 0.44526582956314087, + "epoch": 0.9887123445251276, + "kl_loss": 0.04407679662108421, + "loss_ib": 0.0007649993058294058, + "step": 3438 + }, + { + "ce_ib": 3.887590169906616, + "ce_orig": 0.6071950793266296, + "epoch": 0.9889999281041053, + "kl_loss": 0.06489145755767822, + "loss_ib": 0.0010376735590398312, + "step": 3439 + }, + { + "ce_ib": 3.3216934204101562, + "ce_orig": 0.5003530383110046, + "epoch": 0.9889999281041053, + "kl_loss": 0.05628707632422447, + "loss_ib": 0.0008950401097536087, + "step": 3439 + }, + { + "ce_ib": 5.116591453552246, + "ce_orig": 1.5382431745529175, + "epoch": 0.9889999281041053, + "kl_loss": 0.041428014636039734, + "loss_ib": 0.0009259392390958965, + "step": 3439 + }, + { + "ce_ib": 2.496459722518921, + "ce_orig": 0.7462438344955444, + "epoch": 0.9889999281041053, + "kl_loss": 0.03733265772461891, + "loss_ib": 0.0006229725549928844, + "step": 3439 + }, + { + "epoch": 0.9892875116830829, + "grad_norm": 0.09468097984790802, + "learning_rate": 3.916228782093857e-05, + "loss": 0.8462, + "step": 3440 + }, + { + "ce_ib": 2.828047752380371, + "ce_orig": 0.7906096577644348, + "epoch": 0.9892875116830829, + "kl_loss": 0.06147558614611626, + "loss_ib": 0.0008975606178864837, + "step": 3440 + }, + { + "ce_ib": 3.5401079654693604, + "ce_orig": 0.9321709275245667, + "epoch": 0.9892875116830829, + "kl_loss": 0.0536063015460968, + "loss_ib": 0.0008900737739168108, + "step": 3440 + }, + { + "ce_ib": 3.2336206436157227, + "ce_orig": 0.7132374048233032, + "epoch": 0.9892875116830829, + "kl_loss": 0.045046616345644, + "loss_ib": 0.0007738281856290996, + "step": 3440 + }, + { + "ce_ib": 4.753568172454834, + "ce_orig": 1.1024781465530396, + "epoch": 0.9892875116830829, + "kl_loss": 0.0801345705986023, + "loss_ib": 0.0012767025036737323, + "step": 3440 + }, + { + "ce_ib": 1.767022967338562, + "ce_orig": 0.23304495215415955, + "epoch": 0.9895750952620606, + "kl_loss": 0.05063556134700775, + "loss_ib": 0.0006830578786320984, + "step": 3441 + }, + { + "ce_ib": 3.197521209716797, + "ce_orig": 0.6084330081939697, + "epoch": 0.9895750952620606, + "kl_loss": 0.040591876953840256, + "loss_ib": 0.0007256708922795951, + "step": 3441 + }, + { + "ce_ib": 3.0511090755462646, + "ce_orig": 0.5883503556251526, + "epoch": 0.9895750952620606, + "kl_loss": 0.049159251153469086, + "loss_ib": 0.0007967033889144659, + "step": 3441 + }, + { + "ce_ib": 3.083144426345825, + "ce_orig": 0.7462989687919617, + "epoch": 0.9895750952620606, + "kl_loss": 0.026355527341365814, + "loss_ib": 0.0005718696629628539, + "step": 3441 + }, + { + "ce_ib": 4.880331516265869, + "ce_orig": 1.1034767627716064, + "epoch": 0.9898626788410382, + "kl_loss": 0.06887216120958328, + "loss_ib": 0.0011767547111958265, + "step": 3442 + }, + { + "ce_ib": 5.246883869171143, + "ce_orig": 1.312873363494873, + "epoch": 0.9898626788410382, + "kl_loss": 0.031508736312389374, + "loss_ib": 0.0008397757192142308, + "step": 3442 + }, + { + "ce_ib": 3.246222496032715, + "ce_orig": 0.6260581016540527, + "epoch": 0.9898626788410382, + "kl_loss": 0.04803719371557236, + "loss_ib": 0.0008049941388890147, + "step": 3442 + }, + { + "ce_ib": 2.5490987300872803, + "ce_orig": 0.6290776133537292, + "epoch": 0.9898626788410382, + "kl_loss": 0.05237201601266861, + "loss_ib": 0.0007786299684084952, + "step": 3442 + }, + { + "ce_ib": 3.484151840209961, + "ce_orig": 0.8073164820671082, + "epoch": 0.9901502624200158, + "kl_loss": 0.04811576008796692, + "loss_ib": 0.0008295727893710136, + "step": 3443 + }, + { + "ce_ib": 4.382771968841553, + "ce_orig": 1.0428630113601685, + "epoch": 0.9901502624200158, + "kl_loss": 0.05623029172420502, + "loss_ib": 0.001000580145046115, + "step": 3443 + }, + { + "ce_ib": 4.044766902923584, + "ce_orig": 1.2350075244903564, + "epoch": 0.9901502624200158, + "kl_loss": 0.053867340087890625, + "loss_ib": 0.0009431500802747905, + "step": 3443 + }, + { + "ce_ib": 3.7394869327545166, + "ce_orig": 0.8747358322143555, + "epoch": 0.9901502624200158, + "kl_loss": 0.04009704291820526, + "loss_ib": 0.0007749190554022789, + "step": 3443 + }, + { + "ce_ib": 5.05189323425293, + "ce_orig": 1.411061406135559, + "epoch": 0.9904378459989934, + "kl_loss": 0.0737898126244545, + "loss_ib": 0.0012430873466655612, + "step": 3444 + }, + { + "ce_ib": 4.462912082672119, + "ce_orig": 0.920249342918396, + "epoch": 0.9904378459989934, + "kl_loss": 0.0444297194480896, + "loss_ib": 0.0008905883878469467, + "step": 3444 + }, + { + "ce_ib": 7.42014217376709, + "ce_orig": 1.992401123046875, + "epoch": 0.9904378459989934, + "kl_loss": 0.04747633635997772, + "loss_ib": 0.001216777483932674, + "step": 3444 + }, + { + "ce_ib": 2.6822385787963867, + "ce_orig": 0.6728799939155579, + "epoch": 0.9904378459989934, + "kl_loss": 0.04701351374387741, + "loss_ib": 0.0007383589399978518, + "step": 3444 + }, + { + "epoch": 0.990725429577971, + "grad_norm": 0.09810478985309601, + "learning_rate": 3.913029344367763e-05, + "loss": 0.8663, + "step": 3445 + }, + { + "ce_ib": 3.5804824829101562, + "ce_orig": 0.895319402217865, + "epoch": 0.990725429577971, + "kl_loss": 0.03604265674948692, + "loss_ib": 0.000718474795576185, + "step": 3445 + }, + { + "ce_ib": 2.303917407989502, + "ce_orig": 0.5378848314285278, + "epoch": 0.990725429577971, + "kl_loss": 0.06062307953834534, + "loss_ib": 0.0008366225520148873, + "step": 3445 + }, + { + "ce_ib": 4.856667995452881, + "ce_orig": 1.3812352418899536, + "epoch": 0.990725429577971, + "kl_loss": 0.06683209538459778, + "loss_ib": 0.0011539877159520984, + "step": 3445 + }, + { + "ce_ib": 5.542900562286377, + "ce_orig": 1.2340792417526245, + "epoch": 0.990725429577971, + "kl_loss": 0.051991067826747894, + "loss_ib": 0.0010742007289081812, + "step": 3445 + }, + { + "ce_ib": 3.088143825531006, + "ce_orig": 0.8538684248924255, + "epoch": 0.9910130131569488, + "kl_loss": 0.034372128546237946, + "loss_ib": 0.0006525357021018863, + "step": 3446 + }, + { + "ce_ib": 4.356167316436768, + "ce_orig": 0.8966936469078064, + "epoch": 0.9910130131569488, + "kl_loss": 0.07803063094615936, + "loss_ib": 0.0012159229954704642, + "step": 3446 + }, + { + "ce_ib": 1.857974886894226, + "ce_orig": 0.5274730324745178, + "epoch": 0.9910130131569488, + "kl_loss": 0.028964798897504807, + "loss_ib": 0.00047544544213451445, + "step": 3446 + }, + { + "ce_ib": 4.1628737449646, + "ce_orig": 1.3896363973617554, + "epoch": 0.9910130131569488, + "kl_loss": 0.07201114296913147, + "loss_ib": 0.0011363987578079104, + "step": 3446 + }, + { + "ce_ib": 1.8676449060440063, + "ce_orig": 0.38615328073501587, + "epoch": 0.9913005967359264, + "kl_loss": 0.029285583645105362, + "loss_ib": 0.00047962029930204153, + "step": 3447 + }, + { + "ce_ib": 3.591600179672241, + "ce_orig": 0.6133151054382324, + "epoch": 0.9913005967359264, + "kl_loss": 0.03319130092859268, + "loss_ib": 0.000691073015332222, + "step": 3447 + }, + { + "ce_ib": 4.673821449279785, + "ce_orig": 0.9161855578422546, + "epoch": 0.9913005967359264, + "kl_loss": 0.05968976020812988, + "loss_ib": 0.001064279698766768, + "step": 3447 + }, + { + "ce_ib": 2.5551836490631104, + "ce_orig": 0.6463584899902344, + "epoch": 0.9913005967359264, + "kl_loss": 0.07571309804916382, + "loss_ib": 0.0010126492707058787, + "step": 3447 + }, + { + "ce_ib": 1.8621231317520142, + "ce_orig": 0.45262980461120605, + "epoch": 0.991588180314904, + "kl_loss": 0.04513310268521309, + "loss_ib": 0.0006375433295033872, + "step": 3448 + }, + { + "ce_ib": 3.183131217956543, + "ce_orig": 0.6669974327087402, + "epoch": 0.991588180314904, + "kl_loss": 0.03869766369462013, + "loss_ib": 0.0007052897708490491, + "step": 3448 + }, + { + "ce_ib": 3.85578989982605, + "ce_orig": 0.954896092414856, + "epoch": 0.991588180314904, + "kl_loss": 0.03884454444050789, + "loss_ib": 0.0007740244036540389, + "step": 3448 + }, + { + "ce_ib": 5.848783493041992, + "ce_orig": 1.4860525131225586, + "epoch": 0.991588180314904, + "kl_loss": 0.05292316526174545, + "loss_ib": 0.0011141099967062473, + "step": 3448 + }, + { + "ce_ib": 4.834281921386719, + "ce_orig": 1.3096256256103516, + "epoch": 0.9918757638938817, + "kl_loss": 0.07472777366638184, + "loss_ib": 0.0012307058786973357, + "step": 3449 + }, + { + "ce_ib": 3.34675931930542, + "ce_orig": 0.9020187854766846, + "epoch": 0.9918757638938817, + "kl_loss": 0.07786903530359268, + "loss_ib": 0.001113366219215095, + "step": 3449 + }, + { + "ce_ib": 5.176205635070801, + "ce_orig": 1.4119946956634521, + "epoch": 0.9918757638938817, + "kl_loss": 0.04092024266719818, + "loss_ib": 0.0009268230060115457, + "step": 3449 + }, + { + "ce_ib": 1.804386019706726, + "ce_orig": 0.4502350687980652, + "epoch": 0.9918757638938817, + "kl_loss": 0.045783765614032745, + "loss_ib": 0.0006382762221619487, + "step": 3449 + }, + { + "epoch": 0.9921633474728593, + "grad_norm": 0.0901523008942604, + "learning_rate": 3.909826502326007e-05, + "loss": 0.8581, + "step": 3450 + }, + { + "ce_ib": 5.410490036010742, + "ce_orig": 0.9019655585289001, + "epoch": 0.9921633474728593, + "kl_loss": 0.0641641840338707, + "loss_ib": 0.0011826908448711038, + "step": 3450 + }, + { + "ce_ib": 2.720749616622925, + "ce_orig": 0.8362650275230408, + "epoch": 0.9921633474728593, + "kl_loss": 0.02852609194815159, + "loss_ib": 0.0005573358503170311, + "step": 3450 + }, + { + "ce_ib": 6.435689926147461, + "ce_orig": 1.5958024263381958, + "epoch": 0.9921633474728593, + "kl_loss": 0.03994029387831688, + "loss_ib": 0.0010429718531668186, + "step": 3450 + }, + { + "ce_ib": 4.040875434875488, + "ce_orig": 0.988314151763916, + "epoch": 0.9921633474728593, + "kl_loss": 0.058672018349170685, + "loss_ib": 0.0009908076608553529, + "step": 3450 + }, + { + "ce_ib": 2.1830766201019287, + "ce_orig": 0.5380333065986633, + "epoch": 0.9924509310518369, + "kl_loss": 0.08193057775497437, + "loss_ib": 0.0010376133723184466, + "step": 3451 + }, + { + "ce_ib": 3.3604519367218018, + "ce_orig": 0.8828903436660767, + "epoch": 0.9924509310518369, + "kl_loss": 0.035476479679346085, + "loss_ib": 0.0006908099167048931, + "step": 3451 + }, + { + "ce_ib": 2.3102197647094727, + "ce_orig": 0.45661938190460205, + "epoch": 0.9924509310518369, + "kl_loss": 0.043411992490291595, + "loss_ib": 0.0006651419098488986, + "step": 3451 + }, + { + "ce_ib": 1.8182941675186157, + "ce_orig": 0.5525387525558472, + "epoch": 0.9924509310518369, + "kl_loss": 0.02944095805287361, + "loss_ib": 0.00047623898717574775, + "step": 3451 + }, + { + "ce_ib": 3.437746286392212, + "ce_orig": 1.0252633094787598, + "epoch": 0.9927385146308145, + "kl_loss": 0.036957044154405594, + "loss_ib": 0.0007133450708352029, + "step": 3452 + }, + { + "ce_ib": 3.236767292022705, + "ce_orig": 0.7553039193153381, + "epoch": 0.9927385146308145, + "kl_loss": 0.06360477209091187, + "loss_ib": 0.0009597244788892567, + "step": 3452 + }, + { + "ce_ib": 2.9259660243988037, + "ce_orig": 0.6152380108833313, + "epoch": 0.9927385146308145, + "kl_loss": 0.05680835247039795, + "loss_ib": 0.0008606801275163889, + "step": 3452 + }, + { + "ce_ib": 3.320582151412964, + "ce_orig": 0.8609183430671692, + "epoch": 0.9927385146308145, + "kl_loss": 0.053610868752002716, + "loss_ib": 0.0008681669132784009, + "step": 3452 + }, + { + "ce_ib": 4.643490791320801, + "ce_orig": 1.086855173110962, + "epoch": 0.9930260982097923, + "kl_loss": 0.050058089196681976, + "loss_ib": 0.0009649298735894263, + "step": 3453 + }, + { + "ce_ib": 3.048644542694092, + "ce_orig": 0.8532254099845886, + "epoch": 0.9930260982097923, + "kl_loss": 0.04563391953706741, + "loss_ib": 0.0007612035842612386, + "step": 3453 + }, + { + "ce_ib": 4.405314922332764, + "ce_orig": 0.8490936160087585, + "epoch": 0.9930260982097923, + "kl_loss": 0.07260128855705261, + "loss_ib": 0.0011665443889796734, + "step": 3453 + }, + { + "ce_ib": 3.5886950492858887, + "ce_orig": 0.9884226322174072, + "epoch": 0.9930260982097923, + "kl_loss": 0.04368872940540314, + "loss_ib": 0.0007957567577250302, + "step": 3453 + }, + { + "ce_ib": 3.422978639602661, + "ce_orig": 0.8783181309700012, + "epoch": 0.9933136817887699, + "kl_loss": 0.04747480899095535, + "loss_ib": 0.0008170459186658263, + "step": 3454 + }, + { + "ce_ib": 3.7365972995758057, + "ce_orig": 0.9965710043907166, + "epoch": 0.9933136817887699, + "kl_loss": 0.040958479046821594, + "loss_ib": 0.0007832444971427321, + "step": 3454 + }, + { + "ce_ib": 3.4963181018829346, + "ce_orig": 0.8444181680679321, + "epoch": 0.9933136817887699, + "kl_loss": 0.049484286457300186, + "loss_ib": 0.0008444746490567923, + "step": 3454 + }, + { + "ce_ib": 2.4948034286499023, + "ce_orig": 0.47328728437423706, + "epoch": 0.9933136817887699, + "kl_loss": 0.07008591294288635, + "loss_ib": 0.0009503394248895347, + "step": 3454 + }, + { + "epoch": 0.9936012653677475, + "grad_norm": 0.08772442489862442, + "learning_rate": 3.906620263684979e-05, + "loss": 0.8596, + "step": 3455 + }, + { + "ce_ib": 1.6756864786148071, + "ce_orig": 0.4439273476600647, + "epoch": 0.9936012653677475, + "kl_loss": 0.0431634820997715, + "loss_ib": 0.00059920345665887, + "step": 3455 + }, + { + "ce_ib": 4.345160484313965, + "ce_orig": 1.3322697877883911, + "epoch": 0.9936012653677475, + "kl_loss": 0.07830852270126343, + "loss_ib": 0.0012176012387499213, + "step": 3455 + }, + { + "ce_ib": 4.659205436706543, + "ce_orig": 1.3788354396820068, + "epoch": 0.9936012653677475, + "kl_loss": 0.036883726716041565, + "loss_ib": 0.0008347578113898635, + "step": 3455 + }, + { + "ce_ib": 5.049861431121826, + "ce_orig": 1.291413426399231, + "epoch": 0.9936012653677475, + "kl_loss": 0.055611636489629745, + "loss_ib": 0.0010611023753881454, + "step": 3455 + }, + { + "ce_ib": 2.6232917308807373, + "ce_orig": 0.762520432472229, + "epoch": 0.9938888489467251, + "kl_loss": 0.03649001941084862, + "loss_ib": 0.0006272293394431472, + "step": 3456 + }, + { + "ce_ib": 2.2621147632598877, + "ce_orig": 0.5301852822303772, + "epoch": 0.9938888489467251, + "kl_loss": 0.055742859840393066, + "loss_ib": 0.0007836400764063001, + "step": 3456 + }, + { + "ce_ib": 2.738340139389038, + "ce_orig": 0.5304576754570007, + "epoch": 0.9938888489467251, + "kl_loss": 0.062160130590200424, + "loss_ib": 0.0008954352815635502, + "step": 3456 + }, + { + "ce_ib": 2.7375636100769043, + "ce_orig": 0.6412847638130188, + "epoch": 0.9938888489467251, + "kl_loss": 0.04484260827302933, + "loss_ib": 0.0007221823907457292, + "step": 3456 + }, + { + "ce_ib": 5.556692600250244, + "ce_orig": 1.5691579580307007, + "epoch": 0.9941764325257028, + "kl_loss": 0.043439727276563644, + "loss_ib": 0.0009900665609166026, + "step": 3457 + }, + { + "ce_ib": 4.02565336227417, + "ce_orig": 0.9721002578735352, + "epoch": 0.9941764325257028, + "kl_loss": 0.05704527348279953, + "loss_ib": 0.0009730180609039962, + "step": 3457 + }, + { + "ce_ib": 4.9383745193481445, + "ce_orig": 1.4034584760665894, + "epoch": 0.9941764325257028, + "kl_loss": 0.05957803875207901, + "loss_ib": 0.0010896178428083658, + "step": 3457 + }, + { + "ce_ib": 3.123121976852417, + "ce_orig": 0.5509228706359863, + "epoch": 0.9941764325257028, + "kl_loss": 0.0832476019859314, + "loss_ib": 0.0011447881115600467, + "step": 3457 + }, + { + "ce_ib": 4.849219799041748, + "ce_orig": 1.3060787916183472, + "epoch": 0.9944640161046804, + "kl_loss": 0.06657997518777847, + "loss_ib": 0.0011507216840982437, + "step": 3458 + }, + { + "ce_ib": 2.6626288890838623, + "ce_orig": 0.8333879113197327, + "epoch": 0.9944640161046804, + "kl_loss": 0.03073814883828163, + "loss_ib": 0.0005736443563364446, + "step": 3458 + }, + { + "ce_ib": 2.885624647140503, + "ce_orig": 0.5810627937316895, + "epoch": 0.9944640161046804, + "kl_loss": 0.0273594968020916, + "loss_ib": 0.0005621574237011373, + "step": 3458 + }, + { + "ce_ib": 4.464109897613525, + "ce_orig": 0.9117350578308105, + "epoch": 0.9944640161046804, + "kl_loss": 0.048911649733781815, + "loss_ib": 0.0009355274378322065, + "step": 3458 + }, + { + "ce_ib": 3.376535654067993, + "ce_orig": 0.43389424681663513, + "epoch": 0.9947515996836581, + "kl_loss": 0.050598714500665665, + "loss_ib": 0.000843640707898885, + "step": 3459 + }, + { + "ce_ib": 3.8142948150634766, + "ce_orig": 1.0237165689468384, + "epoch": 0.9947515996836581, + "kl_loss": 0.04889947921037674, + "loss_ib": 0.0008704242063686252, + "step": 3459 + }, + { + "ce_ib": 2.0450825691223145, + "ce_orig": 0.5880318880081177, + "epoch": 0.9947515996836581, + "kl_loss": 0.04308794438838959, + "loss_ib": 0.0006353876669891179, + "step": 3459 + }, + { + "ce_ib": 3.9194841384887695, + "ce_orig": 1.1675682067871094, + "epoch": 0.9947515996836581, + "kl_loss": 0.06648367643356323, + "loss_ib": 0.0010567851131781936, + "step": 3459 + }, + { + "epoch": 0.9950391832626357, + "grad_norm": 0.08963147550821304, + "learning_rate": 3.9034106361692524e-05, + "loss": 0.9084, + "step": 3460 + }, + { + "ce_ib": 3.707965612411499, + "ce_orig": 0.9606744647026062, + "epoch": 0.9950391832626357, + "kl_loss": 0.06846965849399567, + "loss_ib": 0.0010554931359365582, + "step": 3460 + }, + { + "ce_ib": 2.810591459274292, + "ce_orig": 0.47396987676620483, + "epoch": 0.9950391832626357, + "kl_loss": 0.06611964106559753, + "loss_ib": 0.0009422555449418724, + "step": 3460 + }, + { + "ce_ib": 3.6269736289978027, + "ce_orig": 1.0279651880264282, + "epoch": 0.9950391832626357, + "kl_loss": 0.033323440700769424, + "loss_ib": 0.0006959317252039909, + "step": 3460 + }, + { + "ce_ib": 2.5393474102020264, + "ce_orig": 0.6658011674880981, + "epoch": 0.9950391832626357, + "kl_loss": 0.04798753559589386, + "loss_ib": 0.0007338100695051253, + "step": 3460 + }, + { + "ce_ib": 3.7332332134246826, + "ce_orig": 0.9853635430335999, + "epoch": 0.9953267668416134, + "kl_loss": 0.035313937813043594, + "loss_ib": 0.0007264626328833401, + "step": 3461 + }, + { + "ce_ib": 4.529253959655762, + "ce_orig": 1.2174381017684937, + "epoch": 0.9953267668416134, + "kl_loss": 0.05432768911123276, + "loss_ib": 0.0009962022304534912, + "step": 3461 + }, + { + "ce_ib": 3.1022610664367676, + "ce_orig": 0.7883530259132385, + "epoch": 0.9953267668416134, + "kl_loss": 0.06949876248836517, + "loss_ib": 0.0010052137076854706, + "step": 3461 + }, + { + "ce_ib": 4.934890270233154, + "ce_orig": 1.4393254518508911, + "epoch": 0.9953267668416134, + "kl_loss": 0.05800695717334747, + "loss_ib": 0.0010735585819929838, + "step": 3461 + }, + { + "ce_ib": 2.6559536457061768, + "ce_orig": 0.5093846321105957, + "epoch": 0.995614350420591, + "kl_loss": 0.041361965239048004, + "loss_ib": 0.0006792150088585913, + "step": 3462 + }, + { + "ce_ib": 2.7522029876708984, + "ce_orig": 0.7478729486465454, + "epoch": 0.995614350420591, + "kl_loss": 0.038657113909721375, + "loss_ib": 0.0006617914186790586, + "step": 3462 + }, + { + "ce_ib": 2.52516508102417, + "ce_orig": 0.6546809673309326, + "epoch": 0.995614350420591, + "kl_loss": 0.03452007472515106, + "loss_ib": 0.0005977172404527664, + "step": 3462 + }, + { + "ce_ib": 3.6051855087280273, + "ce_orig": 0.9843088388442993, + "epoch": 0.995614350420591, + "kl_loss": 0.0691034197807312, + "loss_ib": 0.0010515527101233602, + "step": 3462 + }, + { + "ce_ib": 3.5773255825042725, + "ce_orig": 0.9991762042045593, + "epoch": 0.9959019339995686, + "kl_loss": 0.04599941521883011, + "loss_ib": 0.0008177266572602093, + "step": 3463 + }, + { + "ce_ib": 2.7009387016296387, + "ce_orig": 0.7166500091552734, + "epoch": 0.9959019339995686, + "kl_loss": 0.050448063760995865, + "loss_ib": 0.0007745744660496712, + "step": 3463 + }, + { + "ce_ib": 3.8591675758361816, + "ce_orig": 0.8044015765190125, + "epoch": 0.9959019339995686, + "kl_loss": 0.049357179552316666, + "loss_ib": 0.0008794885943643749, + "step": 3463 + }, + { + "ce_ib": 3.0666370391845703, + "ce_orig": 0.5440292954444885, + "epoch": 0.9959019339995686, + "kl_loss": 0.08147101104259491, + "loss_ib": 0.001121373730711639, + "step": 3463 + }, + { + "ce_ib": 3.11042857170105, + "ce_orig": 0.5294910073280334, + "epoch": 0.9961895175785462, + "kl_loss": 0.031036505475640297, + "loss_ib": 0.0006214079330675304, + "step": 3464 + }, + { + "ce_ib": 3.065765857696533, + "ce_orig": 0.6090077757835388, + "epoch": 0.9961895175785462, + "kl_loss": 0.06883904337882996, + "loss_ib": 0.0009949669474735856, + "step": 3464 + }, + { + "ce_ib": 4.309707164764404, + "ce_orig": 0.9806931018829346, + "epoch": 0.9961895175785462, + "kl_loss": 0.0571918711066246, + "loss_ib": 0.001002889359369874, + "step": 3464 + }, + { + "ce_ib": 5.051384449005127, + "ce_orig": 1.3726757764816284, + "epoch": 0.9961895175785462, + "kl_loss": 0.04143910855054855, + "loss_ib": 0.0009195294696837664, + "step": 3464 + }, + { + "epoch": 0.9964771011575239, + "grad_norm": 0.09849798679351807, + "learning_rate": 3.900197627511564e-05, + "loss": 0.8194, + "step": 3465 + }, + { + "ce_ib": 5.515732288360596, + "ce_orig": 0.7748374938964844, + "epoch": 0.9964771011575239, + "kl_loss": 0.10468035936355591, + "loss_ib": 0.0015983767807483673, + "step": 3465 + }, + { + "ce_ib": 4.5540266036987305, + "ce_orig": 1.2420388460159302, + "epoch": 0.9964771011575239, + "kl_loss": 0.04276282712817192, + "loss_ib": 0.0008830308797769248, + "step": 3465 + }, + { + "ce_ib": 6.7068634033203125, + "ce_orig": 1.8978383541107178, + "epoch": 0.9964771011575239, + "kl_loss": 0.04906290024518967, + "loss_ib": 0.0011613152455538511, + "step": 3465 + }, + { + "ce_ib": 2.4944825172424316, + "ce_orig": 0.21690130233764648, + "epoch": 0.9964771011575239, + "kl_loss": 0.05285106599330902, + "loss_ib": 0.0007779588922858238, + "step": 3465 + }, + { + "ce_ib": 3.5918729305267334, + "ce_orig": 0.8715946674346924, + "epoch": 0.9967646847365016, + "kl_loss": 0.06905907392501831, + "loss_ib": 0.0010497779585421085, + "step": 3466 + }, + { + "ce_ib": 2.643371820449829, + "ce_orig": 0.7108846306800842, + "epoch": 0.9967646847365016, + "kl_loss": 0.06182108446955681, + "loss_ib": 0.0008825479890219867, + "step": 3466 + }, + { + "ce_ib": 5.612440586090088, + "ce_orig": 1.5775142908096313, + "epoch": 0.9967646847365016, + "kl_loss": 0.04256472736597061, + "loss_ib": 0.000986891333013773, + "step": 3466 + }, + { + "ce_ib": 3.0789787769317627, + "ce_orig": 0.7832356095314026, + "epoch": 0.9967646847365016, + "kl_loss": 0.03204594552516937, + "loss_ib": 0.0006283572874963284, + "step": 3466 + }, + { + "ce_ib": 3.288980484008789, + "ce_orig": 0.8154432773590088, + "epoch": 0.9970522683154792, + "kl_loss": 0.07630325853824615, + "loss_ib": 0.0010919306660071015, + "step": 3467 + }, + { + "ce_ib": 2.080437183380127, + "ce_orig": 0.5076729655265808, + "epoch": 0.9970522683154792, + "kl_loss": 0.051662690937519073, + "loss_ib": 0.0007246705936267972, + "step": 3467 + }, + { + "ce_ib": 4.2996907234191895, + "ce_orig": 0.9800199270248413, + "epoch": 0.9970522683154792, + "kl_loss": 0.07720577716827393, + "loss_ib": 0.0012020268477499485, + "step": 3467 + }, + { + "ce_ib": 3.79672908782959, + "ce_orig": 0.8922995328903198, + "epoch": 0.9970522683154792, + "kl_loss": 0.0528997965157032, + "loss_ib": 0.000908670830540359, + "step": 3467 + }, + { + "ce_ib": 2.8989601135253906, + "ce_orig": 0.7261828184127808, + "epoch": 0.9973398518944568, + "kl_loss": 0.03781363368034363, + "loss_ib": 0.0006680323276668787, + "step": 3468 + }, + { + "ce_ib": 2.012991428375244, + "ce_orig": 0.5995684862136841, + "epoch": 0.9973398518944568, + "kl_loss": 0.023391880095005035, + "loss_ib": 0.00043521789484657347, + "step": 3468 + }, + { + "ce_ib": 3.426506757736206, + "ce_orig": 0.7930960059165955, + "epoch": 0.9973398518944568, + "kl_loss": 0.03904947265982628, + "loss_ib": 0.0007331453962251544, + "step": 3468 + }, + { + "ce_ib": 3.7603771686553955, + "ce_orig": 0.9430840015411377, + "epoch": 0.9973398518944568, + "kl_loss": 0.06677491962909698, + "loss_ib": 0.001043786876834929, + "step": 3468 + }, + { + "ce_ib": 5.618847846984863, + "ce_orig": 1.5754270553588867, + "epoch": 0.9976274354734345, + "kl_loss": 0.05957059562206268, + "loss_ib": 0.001157590770162642, + "step": 3469 + }, + { + "ce_ib": 2.3744776248931885, + "ce_orig": 0.5978808403015137, + "epoch": 0.9976274354734345, + "kl_loss": 0.04179412126541138, + "loss_ib": 0.0006553889834322035, + "step": 3469 + }, + { + "ce_ib": 6.707810401916504, + "ce_orig": 1.6467852592468262, + "epoch": 0.9976274354734345, + "kl_loss": 0.05001183599233627, + "loss_ib": 0.0011708993697538972, + "step": 3469 + }, + { + "ce_ib": 2.036198854446411, + "ce_orig": 0.6882242560386658, + "epoch": 0.9976274354734345, + "kl_loss": 0.019492721185088158, + "loss_ib": 0.000398547068471089, + "step": 3469 + }, + { + "epoch": 0.9979150190524121, + "grad_norm": 0.10505367815494537, + "learning_rate": 3.896981245452799e-05, + "loss": 0.8584, + "step": 3470 + }, + { + "ce_ib": 3.4588420391082764, + "ce_orig": 0.48028138279914856, + "epoch": 0.9979150190524121, + "kl_loss": 0.062381818890571594, + "loss_ib": 0.0009697023779153824, + "step": 3470 + }, + { + "ce_ib": 3.765451669692993, + "ce_orig": 0.9893690347671509, + "epoch": 0.9979150190524121, + "kl_loss": 0.029679911211133003, + "loss_ib": 0.0006733442423865199, + "step": 3470 + }, + { + "ce_ib": 3.534989833831787, + "ce_orig": 0.9606521725654602, + "epoch": 0.9979150190524121, + "kl_loss": 0.04881585016846657, + "loss_ib": 0.0008416575146839023, + "step": 3470 + }, + { + "ce_ib": 5.117006301879883, + "ce_orig": 1.3385289907455444, + "epoch": 0.9979150190524121, + "kl_loss": 0.04934654384851456, + "loss_ib": 0.0010051659774035215, + "step": 3470 + }, + { + "ce_ib": 2.3628742694854736, + "ce_orig": 0.6707727909088135, + "epoch": 0.9982026026313897, + "kl_loss": 0.03078833594918251, + "loss_ib": 0.0005441707908175886, + "step": 3471 + }, + { + "ce_ib": 5.0916595458984375, + "ce_orig": 1.3238857984542847, + "epoch": 0.9982026026313897, + "kl_loss": 0.07552658021450043, + "loss_ib": 0.0012644317466765642, + "step": 3471 + }, + { + "ce_ib": 2.8384923934936523, + "ce_orig": 0.7869274616241455, + "epoch": 0.9982026026313897, + "kl_loss": 0.041499268263578415, + "loss_ib": 0.0006988419336266816, + "step": 3471 + }, + { + "ce_ib": 4.440670967102051, + "ce_orig": 1.0307177305221558, + "epoch": 0.9982026026313897, + "kl_loss": 0.0917360708117485, + "loss_ib": 0.0013614277122542262, + "step": 3471 + }, + { + "ce_ib": 2.993041753768921, + "ce_orig": 0.84736168384552, + "epoch": 0.9984901862103673, + "kl_loss": 0.04757519066333771, + "loss_ib": 0.0007750560180284083, + "step": 3472 + }, + { + "ce_ib": 2.7880442142486572, + "ce_orig": 0.5999387502670288, + "epoch": 0.9984901862103673, + "kl_loss": 0.035583339631557465, + "loss_ib": 0.0006346377776935697, + "step": 3472 + }, + { + "ce_ib": 3.0447728633880615, + "ce_orig": 0.803627610206604, + "epoch": 0.9984901862103673, + "kl_loss": 0.04262303560972214, + "loss_ib": 0.0007307076011784375, + "step": 3472 + }, + { + "ce_ib": 4.0292253494262695, + "ce_orig": 1.0783096551895142, + "epoch": 0.9984901862103673, + "kl_loss": 0.07057057321071625, + "loss_ib": 0.0011086282320320606, + "step": 3472 + }, + { + "ce_ib": 4.486243724822998, + "ce_orig": 1.448228120803833, + "epoch": 0.9987777697893451, + "kl_loss": 0.03293859586119652, + "loss_ib": 0.0007780103478580713, + "step": 3473 + }, + { + "ce_ib": 2.7548906803131104, + "ce_orig": 0.633521556854248, + "epoch": 0.9987777697893451, + "kl_loss": 0.028193864971399307, + "loss_ib": 0.0005574277020059526, + "step": 3473 + }, + { + "ce_ib": 2.2053024768829346, + "ce_orig": 0.3219430446624756, + "epoch": 0.9987777697893451, + "kl_loss": 0.0724869966506958, + "loss_ib": 0.0009454002138227224, + "step": 3473 + }, + { + "ce_ib": 4.659140586853027, + "ce_orig": 1.3099058866500854, + "epoch": 0.9987777697893451, + "kl_loss": 0.048179466277360916, + "loss_ib": 0.0009477086714468896, + "step": 3473 + }, + { + "ce_ib": 3.718771457672119, + "ce_orig": 0.9185711741447449, + "epoch": 0.9990653533683227, + "kl_loss": 0.06936350464820862, + "loss_ib": 0.001065512071363628, + "step": 3474 + }, + { + "ce_ib": 2.3702540397644043, + "ce_orig": 0.6217535734176636, + "epoch": 0.9990653533683227, + "kl_loss": 0.03670823946595192, + "loss_ib": 0.0006041078013367951, + "step": 3474 + }, + { + "ce_ib": 4.165712833404541, + "ce_orig": 0.9141705632209778, + "epoch": 0.9990653533683227, + "kl_loss": 0.07073673605918884, + "loss_ib": 0.00112393859308213, + "step": 3474 + }, + { + "ce_ib": 3.2727913856506348, + "ce_orig": 0.6811156868934631, + "epoch": 0.9990653533683227, + "kl_loss": 0.05207725986838341, + "loss_ib": 0.0008480517426505685, + "step": 3474 + }, + { + "epoch": 0.9993529369473003, + "grad_norm": 0.09236595034599304, + "learning_rate": 3.8937614977419666e-05, + "loss": 0.8039, + "step": 3475 + }, + { + "ce_ib": 4.489645481109619, + "ce_orig": 1.0587085485458374, + "epoch": 0.9993529369473003, + "kl_loss": 0.06197848916053772, + "loss_ib": 0.0010687493486329913, + "step": 3475 + }, + { + "ce_ib": 4.786440849304199, + "ce_orig": 1.0300703048706055, + "epoch": 0.9993529369473003, + "kl_loss": 0.059286292642354965, + "loss_ib": 0.0010715069947764277, + "step": 3475 + }, + { + "ce_ib": 4.053666114807129, + "ce_orig": 1.1731735467910767, + "epoch": 0.9993529369473003, + "kl_loss": 0.045823145657777786, + "loss_ib": 0.0008635980193503201, + "step": 3475 + }, + { + "ce_ib": 4.674835681915283, + "ce_orig": 1.088647484779358, + "epoch": 0.9993529369473003, + "kl_loss": 0.04198300838470459, + "loss_ib": 0.000887313624843955, + "step": 3475 + }, + { + "ce_ib": 1.96013343334198, + "ce_orig": 0.5352150797843933, + "epoch": 0.9996405205262779, + "kl_loss": 0.09652666002511978, + "loss_ib": 0.0011612799717113376, + "step": 3476 + }, + { + "ce_ib": 4.7780890464782715, + "ce_orig": 1.152045488357544, + "epoch": 0.9996405205262779, + "kl_loss": 0.05365524813532829, + "loss_ib": 0.0010143613908439875, + "step": 3476 + }, + { + "ce_ib": 3.786515712738037, + "ce_orig": 1.0036417245864868, + "epoch": 0.9996405205262779, + "kl_loss": 0.03185952082276344, + "loss_ib": 0.000697246752679348, + "step": 3476 + }, + { + "ce_ib": 3.838087558746338, + "ce_orig": 0.5722286105155945, + "epoch": 0.9996405205262779, + "kl_loss": 0.052208758890628815, + "loss_ib": 0.0009058963041752577, + "step": 3476 + }, + { + "ce_ib": 2.2217862606048584, + "ce_orig": 0.6201798915863037, + "epoch": 0.9999281041052556, + "kl_loss": 0.02814691700041294, + "loss_ib": 0.0005036477814428508, + "step": 3477 + }, + { + "ce_ib": 3.1217870712280273, + "ce_orig": 0.8803791403770447, + "epoch": 1.0, + "kl_loss": 0.0353650227189064, + "loss_ib": 0.0006658288766629994, + "step": 3478 + }, + { + "ce_ib": 5.204699993133545, + "ce_orig": 1.0658543109893799, + "epoch": 1.0, + "kl_loss": 0.033031146973371506, + "loss_ib": 0.0008507815073244274, + "step": 3478 + }, + { + "ce_ib": 2.148297071456909, + "ce_orig": 0.4601636230945587, + "epoch": 1.0, + "kl_loss": 0.0354003943502903, + "loss_ib": 0.0005688336095772684, + "step": 3478 + }, + { + "ce_ib": 3.495579481124878, + "ce_orig": 0.7393850684165955, + "epoch": 1.0, + "kl_loss": 0.04701061174273491, + "loss_ib": 0.0008196640410460532, + "step": 3478 + }, + { + "ce_ib": 6.376699924468994, + "ce_orig": 1.6147390604019165, + "epoch": 1.0002875835789777, + "kl_loss": 0.052786048501729965, + "loss_ib": 0.0011655305279418826, + "step": 3479 + }, + { + "ce_ib": 3.1564862728118896, + "ce_orig": 1.07258939743042, + "epoch": 1.0002875835789777, + "kl_loss": 0.04286006838083267, + "loss_ib": 0.0007442492642439902, + "step": 3479 + }, + { + "ce_ib": 5.558323383331299, + "ce_orig": 1.0641130208969116, + "epoch": 1.0002875835789777, + "kl_loss": 0.07032541930675507, + "loss_ib": 0.0012590864207595587, + "step": 3479 + }, + { + "ce_ib": 3.883009433746338, + "ce_orig": 1.119279146194458, + "epoch": 1.0002875835789777, + "kl_loss": 0.06115012615919113, + "loss_ib": 0.0009998021414503455, + "step": 3479 + }, + { + "epoch": 1.0005751671579552, + "grad_norm": 0.09513700008392334, + "learning_rate": 3.890538392136188e-05, + "loss": 0.7653, + "step": 3480 + }, + { + "ce_ib": 3.1962125301361084, + "ce_orig": 0.5789076685905457, + "epoch": 1.0005751671579552, + "kl_loss": 0.03173927962779999, + "loss_ib": 0.000637014047242701, + "step": 3480 + }, + { + "ce_ib": 2.6145501136779785, + "ce_orig": 0.6884479522705078, + "epoch": 1.0005751671579552, + "kl_loss": 0.029563847929239273, + "loss_ib": 0.0005570934736169875, + "step": 3480 + }, + { + "ce_ib": 2.522592067718506, + "ce_orig": 0.569451630115509, + "epoch": 1.0005751671579552, + "kl_loss": 0.03524550050497055, + "loss_ib": 0.0006047142087481916, + "step": 3480 + }, + { + "ce_ib": 4.651681900024414, + "ce_orig": 1.2207874059677124, + "epoch": 1.0005751671579552, + "kl_loss": 0.05708366632461548, + "loss_ib": 0.0010360048618167639, + "step": 3480 + }, + { + "ce_ib": 2.6096320152282715, + "ce_orig": 0.30446338653564453, + "epoch": 1.000862750736933, + "kl_loss": 0.1119430810213089, + "loss_ib": 0.0013803938636556268, + "step": 3481 + }, + { + "ce_ib": 3.6993155479431152, + "ce_orig": 0.8483468294143677, + "epoch": 1.000862750736933, + "kl_loss": 0.04429346323013306, + "loss_ib": 0.000812866142950952, + "step": 3481 + }, + { + "ce_ib": 1.7519718408584595, + "ce_orig": 0.35416221618652344, + "epoch": 1.000862750736933, + "kl_loss": 0.032715532928705215, + "loss_ib": 0.0005023524863645434, + "step": 3481 + }, + { + "ce_ib": 3.180299997329712, + "ce_orig": 0.8753499388694763, + "epoch": 1.000862750736933, + "kl_loss": 0.04410572350025177, + "loss_ib": 0.0007590872119180858, + "step": 3481 + }, + { + "ce_ib": 2.5213780403137207, + "ce_orig": 0.7356521487236023, + "epoch": 1.0011503343159105, + "kl_loss": 0.03054107539355755, + "loss_ib": 0.000557548541110009, + "step": 3482 + }, + { + "ce_ib": 4.614668846130371, + "ce_orig": 0.9440299272537231, + "epoch": 1.0011503343159105, + "kl_loss": 0.0687004029750824, + "loss_ib": 0.0011484709102660418, + "step": 3482 + }, + { + "ce_ib": 2.4272403717041016, + "ce_orig": 0.7733532190322876, + "epoch": 1.0011503343159105, + "kl_loss": 0.027355430647730827, + "loss_ib": 0.000516278319992125, + "step": 3482 + }, + { + "ce_ib": 2.8763844966888428, + "ce_orig": 0.805410623550415, + "epoch": 1.0011503343159105, + "kl_loss": 0.04544409364461899, + "loss_ib": 0.0007420793990604579, + "step": 3482 + }, + { + "ce_ib": 4.437617778778076, + "ce_orig": 1.0573298931121826, + "epoch": 1.0014379178948882, + "kl_loss": 0.07187282294034958, + "loss_ib": 0.001162489876151085, + "step": 3483 + }, + { + "ce_ib": 2.639787197113037, + "ce_orig": 0.9021163582801819, + "epoch": 1.0014379178948882, + "kl_loss": 0.03561578691005707, + "loss_ib": 0.0006201365613378584, + "step": 3483 + }, + { + "ce_ib": 3.6344797611236572, + "ce_orig": 0.8005877137184143, + "epoch": 1.0014379178948882, + "kl_loss": 0.06507135182619095, + "loss_ib": 0.0010141615057364106, + "step": 3483 + }, + { + "ce_ib": 2.871980667114258, + "ce_orig": 0.781480073928833, + "epoch": 1.0014379178948882, + "kl_loss": 0.0394427552819252, + "loss_ib": 0.0006816255627200007, + "step": 3483 + }, + { + "ce_ib": 2.1367084980010986, + "ce_orig": 0.5119180679321289, + "epoch": 1.0017255014738657, + "kl_loss": 0.03662731498479843, + "loss_ib": 0.0005799439968541265, + "step": 3484 + }, + { + "ce_ib": 4.0718607902526855, + "ce_orig": 0.8389078378677368, + "epoch": 1.0017255014738657, + "kl_loss": 0.07544418424367905, + "loss_ib": 0.0011616279371082783, + "step": 3484 + }, + { + "ce_ib": 5.519287586212158, + "ce_orig": 1.7229344844818115, + "epoch": 1.0017255014738657, + "kl_loss": 0.050840795040130615, + "loss_ib": 0.0010603367118164897, + "step": 3484 + }, + { + "ce_ib": 5.793507099151611, + "ce_orig": 1.4051554203033447, + "epoch": 1.0017255014738657, + "kl_loss": 0.0710161030292511, + "loss_ib": 0.0012895117979496717, + "step": 3484 + }, + { + "epoch": 1.0020130850528435, + "grad_norm": 0.08581116795539856, + "learning_rate": 3.8873119364006715e-05, + "loss": 0.8534, + "step": 3485 + }, + { + "ce_ib": 3.569666862487793, + "ce_orig": 0.6216431856155396, + "epoch": 1.0020130850528435, + "kl_loss": 0.025664210319519043, + "loss_ib": 0.0006136087467893958, + "step": 3485 + }, + { + "ce_ib": 4.38763427734375, + "ce_orig": 0.5793853998184204, + "epoch": 1.0020130850528435, + "kl_loss": 0.0948626697063446, + "loss_ib": 0.001387390191666782, + "step": 3485 + }, + { + "ce_ib": 3.763390064239502, + "ce_orig": 0.8480707406997681, + "epoch": 1.0020130850528435, + "kl_loss": 0.031225591897964478, + "loss_ib": 0.000688594882376492, + "step": 3485 + }, + { + "ce_ib": 4.518955230712891, + "ce_orig": 0.9949951171875, + "epoch": 1.0020130850528435, + "kl_loss": 0.050026193261146545, + "loss_ib": 0.000952157424762845, + "step": 3485 + }, + { + "ce_ib": 2.2246084213256836, + "ce_orig": 0.5341277718544006, + "epoch": 1.0023006686318212, + "kl_loss": 0.043034423142671585, + "loss_ib": 0.0006528050289489329, + "step": 3486 + }, + { + "ce_ib": 4.662140369415283, + "ce_orig": 1.382905125617981, + "epoch": 1.0023006686318212, + "kl_loss": 0.04700121656060219, + "loss_ib": 0.0009362261625938118, + "step": 3486 + }, + { + "ce_ib": 5.43032693862915, + "ce_orig": 1.5406323671340942, + "epoch": 1.0023006686318212, + "kl_loss": 0.07013580203056335, + "loss_ib": 0.0012443907326087356, + "step": 3486 + }, + { + "ce_ib": 3.8879175186157227, + "ce_orig": 1.068693995475769, + "epoch": 1.0023006686318212, + "kl_loss": 0.04774565249681473, + "loss_ib": 0.0008662482723593712, + "step": 3486 + }, + { + "ce_ib": 3.9694018363952637, + "ce_orig": 0.8727822303771973, + "epoch": 1.0025882522107987, + "kl_loss": 0.04005637764930725, + "loss_ib": 0.0007975039188750088, + "step": 3487 + }, + { + "ce_ib": 4.849270820617676, + "ce_orig": 0.9985588192939758, + "epoch": 1.0025882522107987, + "kl_loss": 0.055257245898246765, + "loss_ib": 0.0010374995181337, + "step": 3487 + }, + { + "ce_ib": 5.458737850189209, + "ce_orig": 1.3375707864761353, + "epoch": 1.0025882522107987, + "kl_loss": 0.05526996776461601, + "loss_ib": 0.0010985734406858683, + "step": 3487 + }, + { + "ce_ib": 4.742007255554199, + "ce_orig": 1.0882726907730103, + "epoch": 1.0025882522107987, + "kl_loss": 0.0516485832631588, + "loss_ib": 0.0009906865889206529, + "step": 3487 + }, + { + "ce_ib": 1.7391777038574219, + "ce_orig": 0.42560136318206787, + "epoch": 1.0028758357897765, + "kl_loss": 0.04902967810630798, + "loss_ib": 0.0006642145453952253, + "step": 3488 + }, + { + "ce_ib": 3.3302927017211914, + "ce_orig": 0.33283430337905884, + "epoch": 1.0028758357897765, + "kl_loss": 0.05706590786576271, + "loss_ib": 0.000903688371181488, + "step": 3488 + }, + { + "ce_ib": 4.852448463439941, + "ce_orig": 1.2308381795883179, + "epoch": 1.0028758357897765, + "kl_loss": 0.044016096740961075, + "loss_ib": 0.0009254057658836246, + "step": 3488 + }, + { + "ce_ib": 5.321582317352295, + "ce_orig": 1.1843475103378296, + "epoch": 1.0028758357897765, + "kl_loss": 0.052840933203697205, + "loss_ib": 0.0010605674469843507, + "step": 3488 + }, + { + "ce_ib": 2.951997756958008, + "ce_orig": 0.7721894979476929, + "epoch": 1.003163419368754, + "kl_loss": 0.041105758398771286, + "loss_ib": 0.0007062573567964137, + "step": 3489 + }, + { + "ce_ib": 3.828460931777954, + "ce_orig": 0.8438724875450134, + "epoch": 1.003163419368754, + "kl_loss": 0.08373132348060608, + "loss_ib": 0.0012201592326164246, + "step": 3489 + }, + { + "ce_ib": 4.036995887756348, + "ce_orig": 0.8328161835670471, + "epoch": 1.003163419368754, + "kl_loss": 0.044933006167411804, + "loss_ib": 0.0008530296036042273, + "step": 3489 + }, + { + "ce_ib": 1.753504991531372, + "ce_orig": 0.3494964838027954, + "epoch": 1.003163419368754, + "kl_loss": 0.024257976561784744, + "loss_ib": 0.0004179302486591041, + "step": 3489 + }, + { + "epoch": 1.0034510029477317, + "grad_norm": 0.09945628046989441, + "learning_rate": 3.884082138308698e-05, + "loss": 0.8606, + "step": 3490 + }, + { + "ce_ib": 6.613028049468994, + "ce_orig": 1.4671602249145508, + "epoch": 1.0034510029477317, + "kl_loss": 0.06645874679088593, + "loss_ib": 0.001325890189036727, + "step": 3490 + }, + { + "ce_ib": 4.218664646148682, + "ce_orig": 0.9857509732246399, + "epoch": 1.0034510029477317, + "kl_loss": 0.04738400876522064, + "loss_ib": 0.000895706529263407, + "step": 3490 + }, + { + "ce_ib": 4.616494655609131, + "ce_orig": 0.9198637008666992, + "epoch": 1.0034510029477317, + "kl_loss": 0.06750920414924622, + "loss_ib": 0.001136741484515369, + "step": 3490 + }, + { + "ce_ib": 3.1467297077178955, + "ce_orig": 0.40523508191108704, + "epoch": 1.0034510029477317, + "kl_loss": 0.0433083139359951, + "loss_ib": 0.0007477560429833829, + "step": 3490 + }, + { + "ce_ib": 2.748985528945923, + "ce_orig": 0.6653023958206177, + "epoch": 1.0037385865267092, + "kl_loss": 0.06474436819553375, + "loss_ib": 0.0009223422384820879, + "step": 3491 + }, + { + "ce_ib": 3.7615132331848145, + "ce_orig": 1.0731277465820312, + "epoch": 1.0037385865267092, + "kl_loss": 0.0509662888944149, + "loss_ib": 0.0008858141954988241, + "step": 3491 + }, + { + "ce_ib": 3.8817269802093506, + "ce_orig": 1.306039810180664, + "epoch": 1.0037385865267092, + "kl_loss": 0.04653904214501381, + "loss_ib": 0.0008535630768164992, + "step": 3491 + }, + { + "ce_ib": 2.6638355255126953, + "ce_orig": 0.4497489333152771, + "epoch": 1.0037385865267092, + "kl_loss": 0.0399135947227478, + "loss_ib": 0.0006655195029452443, + "step": 3491 + }, + { + "ce_ib": 2.1066880226135254, + "ce_orig": 0.6016479730606079, + "epoch": 1.004026170105687, + "kl_loss": 0.03551546484231949, + "loss_ib": 0.0005658234586007893, + "step": 3492 + }, + { + "ce_ib": 4.768523216247559, + "ce_orig": 1.186694622039795, + "epoch": 1.004026170105687, + "kl_loss": 0.05557980388402939, + "loss_ib": 0.0010326503543183208, + "step": 3492 + }, + { + "ce_ib": 3.675201177597046, + "ce_orig": 0.650250256061554, + "epoch": 1.004026170105687, + "kl_loss": 0.06262960284948349, + "loss_ib": 0.0009938160656020045, + "step": 3492 + }, + { + "ce_ib": 2.8772988319396973, + "ce_orig": 0.6758449673652649, + "epoch": 1.004026170105687, + "kl_loss": 0.04616553708910942, + "loss_ib": 0.00074938521720469, + "step": 3492 + }, + { + "ce_ib": 4.895780086517334, + "ce_orig": 1.4394365549087524, + "epoch": 1.0043137536846647, + "kl_loss": 0.06927531957626343, + "loss_ib": 0.0011823311215266585, + "step": 3493 + }, + { + "ce_ib": 3.4792401790618896, + "ce_orig": 0.7709848284721375, + "epoch": 1.0043137536846647, + "kl_loss": 0.051810722798109055, + "loss_ib": 0.000866031157784164, + "step": 3493 + }, + { + "ce_ib": 4.63859224319458, + "ce_orig": 1.2791465520858765, + "epoch": 1.0043137536846647, + "kl_loss": 0.09240597486495972, + "loss_ib": 0.001387918833643198, + "step": 3493 + }, + { + "ce_ib": 2.6544766426086426, + "ce_orig": 0.7621646523475647, + "epoch": 1.0043137536846647, + "kl_loss": 0.0323474295437336, + "loss_ib": 0.0005889219464734197, + "step": 3493 + }, + { + "ce_ib": 4.037138938903809, + "ce_orig": 1.2835943698883057, + "epoch": 1.0046013372636422, + "kl_loss": 0.06223098188638687, + "loss_ib": 0.0010260236449539661, + "step": 3494 + }, + { + "ce_ib": 2.7622737884521484, + "ce_orig": 0.5555009841918945, + "epoch": 1.0046013372636422, + "kl_loss": 0.09795776009559631, + "loss_ib": 0.001255804905667901, + "step": 3494 + }, + { + "ce_ib": 4.136101245880127, + "ce_orig": 1.179409384727478, + "epoch": 1.0046013372636422, + "kl_loss": 0.04018595069646835, + "loss_ib": 0.0008154695970006287, + "step": 3494 + }, + { + "ce_ib": 5.493145942687988, + "ce_orig": 1.6712535619735718, + "epoch": 1.0046013372636422, + "kl_loss": 0.053868167102336884, + "loss_ib": 0.0010879961773753166, + "step": 3494 + }, + { + "epoch": 1.00488892084262, + "grad_norm": 0.09981932491064072, + "learning_rate": 3.880849005641601e-05, + "loss": 0.867, + "step": 3495 + }, + { + "ce_ib": 3.947739839553833, + "ce_orig": 0.938679575920105, + "epoch": 1.00488892084262, + "kl_loss": 0.06536565721035004, + "loss_ib": 0.0010484304511919618, + "step": 3495 + }, + { + "ce_ib": 4.479057312011719, + "ce_orig": 0.9831138849258423, + "epoch": 1.00488892084262, + "kl_loss": 0.06753776967525482, + "loss_ib": 0.001123283407650888, + "step": 3495 + }, + { + "ce_ib": 2.0934817790985107, + "ce_orig": 0.4791463017463684, + "epoch": 1.00488892084262, + "kl_loss": 0.04615895450115204, + "loss_ib": 0.0006709377048537135, + "step": 3495 + }, + { + "ce_ib": 2.984017848968506, + "ce_orig": 0.4215310215950012, + "epoch": 1.00488892084262, + "kl_loss": 0.030575215816497803, + "loss_ib": 0.0006041539600118995, + "step": 3495 + }, + { + "ce_ib": 4.007381439208984, + "ce_orig": 1.2127094268798828, + "epoch": 1.0051765044215974, + "kl_loss": 0.06214509531855583, + "loss_ib": 0.0010221890406683087, + "step": 3496 + }, + { + "ce_ib": 2.680040121078491, + "ce_orig": 0.8252348899841309, + "epoch": 1.0051765044215974, + "kl_loss": 0.031235462054610252, + "loss_ib": 0.000580358668230474, + "step": 3496 + }, + { + "ce_ib": 2.9528403282165527, + "ce_orig": 0.8510251641273499, + "epoch": 1.0051765044215974, + "kl_loss": 0.08737485110759735, + "loss_ib": 0.0011690325336530805, + "step": 3496 + }, + { + "ce_ib": 3.2930948734283447, + "ce_orig": 0.6317000389099121, + "epoch": 1.0051765044215974, + "kl_loss": 0.07271483540534973, + "loss_ib": 0.0010564577532932162, + "step": 3496 + }, + { + "ce_ib": 4.087261199951172, + "ce_orig": 0.8301107287406921, + "epoch": 1.0054640880005752, + "kl_loss": 0.04980958253145218, + "loss_ib": 0.0009068219223991036, + "step": 3497 + }, + { + "ce_ib": 1.966817021369934, + "ce_orig": 0.3362247943878174, + "epoch": 1.0054640880005752, + "kl_loss": 0.06378662586212158, + "loss_ib": 0.0008345479145646095, + "step": 3497 + }, + { + "ce_ib": 5.4667277336120605, + "ce_orig": 1.3160288333892822, + "epoch": 1.0054640880005752, + "kl_loss": 0.06437558680772781, + "loss_ib": 0.0011904285056516528, + "step": 3497 + }, + { + "ce_ib": 3.5420360565185547, + "ce_orig": 0.9508720636367798, + "epoch": 1.0054640880005752, + "kl_loss": 0.03304655849933624, + "loss_ib": 0.0006846691248938441, + "step": 3497 + }, + { + "ce_ib": 5.657104015350342, + "ce_orig": 1.5932313203811646, + "epoch": 1.0057516715795527, + "kl_loss": 0.04217857867479324, + "loss_ib": 0.0009874962270259857, + "step": 3498 + }, + { + "ce_ib": 6.0632829666137695, + "ce_orig": 1.557913899421692, + "epoch": 1.0057516715795527, + "kl_loss": 0.06823234260082245, + "loss_ib": 0.0012886516051366925, + "step": 3498 + }, + { + "ce_ib": 4.3896026611328125, + "ce_orig": 1.055946946144104, + "epoch": 1.0057516715795527, + "kl_loss": 0.062864750623703, + "loss_ib": 0.0010676077799871564, + "step": 3498 + }, + { + "ce_ib": 3.9787628650665283, + "ce_orig": 0.8661747574806213, + "epoch": 1.0057516715795527, + "kl_loss": 0.058237940073013306, + "loss_ib": 0.0009802556596696377, + "step": 3498 + }, + { + "ce_ib": 3.3973581790924072, + "ce_orig": 0.676597535610199, + "epoch": 1.0060392551585304, + "kl_loss": 0.063894122838974, + "loss_ib": 0.0009786770679056644, + "step": 3499 + }, + { + "ce_ib": 4.2062764167785645, + "ce_orig": 0.9785535335540771, + "epoch": 1.0060392551585304, + "kl_loss": 0.09486642479896545, + "loss_ib": 0.0013692918000742793, + "step": 3499 + }, + { + "ce_ib": 4.46420955657959, + "ce_orig": 0.9891757965087891, + "epoch": 1.0060392551585304, + "kl_loss": 0.047321341931819916, + "loss_ib": 0.0009196343598887324, + "step": 3499 + }, + { + "ce_ib": 3.480464220046997, + "ce_orig": 1.0476003885269165, + "epoch": 1.0060392551585304, + "kl_loss": 0.039398543536663055, + "loss_ib": 0.0007420318434014916, + "step": 3499 + }, + { + "epoch": 1.0063268387375082, + "grad_norm": 0.09801509976387024, + "learning_rate": 3.8776125461887484e-05, + "loss": 0.8702, + "step": 3500 + }, + { + "ce_ib": 3.2746918201446533, + "ce_orig": 0.9416418671607971, + "epoch": 1.0063268387375082, + "kl_loss": 0.04328889772295952, + "loss_ib": 0.0007603581179864705, + "step": 3500 + }, + { + "ce_ib": 2.9115726947784424, + "ce_orig": 0.6453126668930054, + "epoch": 1.0063268387375082, + "kl_loss": 0.06280487775802612, + "loss_ib": 0.0009192060679197311, + "step": 3500 + }, + { + "ce_ib": 2.3008792400360107, + "ce_orig": 0.6593748927116394, + "epoch": 1.0063268387375082, + "kl_loss": 0.043873678892850876, + "loss_ib": 0.0006688246503472328, + "step": 3500 + }, + { + "ce_ib": 3.833845853805542, + "ce_orig": 1.3658047914505005, + "epoch": 1.0063268387375082, + "kl_loss": 0.041546858847141266, + "loss_ib": 0.000798853172454983, + "step": 3500 + }, + { + "ce_ib": 3.430791139602661, + "ce_orig": 0.9476206302642822, + "epoch": 1.0066144223164857, + "kl_loss": 0.06293556094169617, + "loss_ib": 0.0009724347037263215, + "step": 3501 + }, + { + "ce_ib": 3.12231183052063, + "ce_orig": 0.9612237215042114, + "epoch": 1.0066144223164857, + "kl_loss": 0.03477845713496208, + "loss_ib": 0.0006600157939828932, + "step": 3501 + }, + { + "ce_ib": 3.0781803131103516, + "ce_orig": 0.3940429389476776, + "epoch": 1.0066144223164857, + "kl_loss": 0.0734548270702362, + "loss_ib": 0.001042366260662675, + "step": 3501 + }, + { + "ce_ib": 3.361724376678467, + "ce_orig": 0.8441818952560425, + "epoch": 1.0066144223164857, + "kl_loss": 0.09229312837123871, + "loss_ib": 0.001259103650227189, + "step": 3501 + }, + { + "ce_ib": 2.519059419631958, + "ce_orig": 0.5994945168495178, + "epoch": 1.0069020058954634, + "kl_loss": 0.04231453686952591, + "loss_ib": 0.0006750512984581292, + "step": 3502 + }, + { + "ce_ib": 5.507658004760742, + "ce_orig": 1.322513461112976, + "epoch": 1.0069020058954634, + "kl_loss": 0.08897671103477478, + "loss_ib": 0.0014405329711735249, + "step": 3502 + }, + { + "ce_ib": 4.995233535766602, + "ce_orig": 1.4125099182128906, + "epoch": 1.0069020058954634, + "kl_loss": 0.059860195964574814, + "loss_ib": 0.0010981252416968346, + "step": 3502 + }, + { + "ce_ib": 1.1978776454925537, + "ce_orig": 0.17743384838104248, + "epoch": 1.0069020058954634, + "kl_loss": 0.10109655559062958, + "loss_ib": 0.001130753313191235, + "step": 3502 + }, + { + "ce_ib": 2.4159021377563477, + "ce_orig": 0.43387648463249207, + "epoch": 1.007189589474441, + "kl_loss": 0.067983478307724, + "loss_ib": 0.0009214250021614134, + "step": 3503 + }, + { + "ce_ib": 2.849085807800293, + "ce_orig": 0.5789613723754883, + "epoch": 1.007189589474441, + "kl_loss": 0.04116221144795418, + "loss_ib": 0.0006965306820347905, + "step": 3503 + }, + { + "ce_ib": 4.334465026855469, + "ce_orig": 1.3988533020019531, + "epoch": 1.007189589474441, + "kl_loss": 0.03144382685422897, + "loss_ib": 0.0007478847983293235, + "step": 3503 + }, + { + "ce_ib": 3.164245367050171, + "ce_orig": 0.8929179310798645, + "epoch": 1.007189589474441, + "kl_loss": 0.038714826107025146, + "loss_ib": 0.0007035727612674236, + "step": 3503 + }, + { + "ce_ib": 6.2444233894348145, + "ce_orig": 1.659660816192627, + "epoch": 1.0074771730534187, + "kl_loss": 0.07854707539081573, + "loss_ib": 0.0014099129475653172, + "step": 3504 + }, + { + "ce_ib": 3.790290117263794, + "ce_orig": 0.9888508319854736, + "epoch": 1.0074771730534187, + "kl_loss": 0.053486645221710205, + "loss_ib": 0.00091389543376863, + "step": 3504 + }, + { + "ce_ib": 3.358497381210327, + "ce_orig": 0.7461373209953308, + "epoch": 1.0074771730534187, + "kl_loss": 0.05010291934013367, + "loss_ib": 0.0008368788985535502, + "step": 3504 + }, + { + "ce_ib": 4.6029887199401855, + "ce_orig": 0.9945048689842224, + "epoch": 1.0074771730534187, + "kl_loss": 0.05325685814023018, + "loss_ib": 0.0009928673971444368, + "step": 3504 + }, + { + "epoch": 1.0077647566323964, + "grad_norm": 0.09656573086977005, + "learning_rate": 3.874372767747521e-05, + "loss": 0.8466, + "step": 3505 + }, + { + "ce_ib": 3.0669960975646973, + "ce_orig": 0.6914551258087158, + "epoch": 1.0077647566323964, + "kl_loss": 0.0529046505689621, + "loss_ib": 0.0008357460610568523, + "step": 3505 + }, + { + "ce_ib": 2.7561309337615967, + "ce_orig": 0.9541803598403931, + "epoch": 1.0077647566323964, + "kl_loss": 0.02558140456676483, + "loss_ib": 0.0005314270965754986, + "step": 3505 + }, + { + "ce_ib": 3.6132960319519043, + "ce_orig": 0.6455292701721191, + "epoch": 1.0077647566323964, + "kl_loss": 0.04593859612941742, + "loss_ib": 0.000820715562440455, + "step": 3505 + }, + { + "ce_ib": 2.8230626583099365, + "ce_orig": 0.5261470079421997, + "epoch": 1.0077647566323964, + "kl_loss": 0.07252495735883713, + "loss_ib": 0.0010075558675453067, + "step": 3505 + }, + { + "ce_ib": 3.97116756439209, + "ce_orig": 0.9005761742591858, + "epoch": 1.008052340211374, + "kl_loss": 0.06590519845485687, + "loss_ib": 0.00105616869404912, + "step": 3506 + }, + { + "ce_ib": 3.786526679992676, + "ce_orig": 0.7054985761642456, + "epoch": 1.008052340211374, + "kl_loss": 0.05702609196305275, + "loss_ib": 0.0009489135118201375, + "step": 3506 + }, + { + "ce_ib": 3.847910165786743, + "ce_orig": 0.9562749862670898, + "epoch": 1.008052340211374, + "kl_loss": 0.08418093621730804, + "loss_ib": 0.001226600375957787, + "step": 3506 + }, + { + "ce_ib": 2.508593797683716, + "ce_orig": 0.8450639843940735, + "epoch": 1.008052340211374, + "kl_loss": 0.03912097215652466, + "loss_ib": 0.0006420690915547311, + "step": 3506 + }, + { + "ce_ib": 2.8947672843933105, + "ce_orig": 0.49838387966156006, + "epoch": 1.0083399237903516, + "kl_loss": 0.062076907604932785, + "loss_ib": 0.0009102457552216947, + "step": 3507 + }, + { + "ce_ib": 2.539588689804077, + "ce_orig": 0.6900296807289124, + "epoch": 1.0083399237903516, + "kl_loss": 0.03086687996983528, + "loss_ib": 0.0005626276833936572, + "step": 3507 + }, + { + "ce_ib": 3.7058184146881104, + "ce_orig": 0.7615242600440979, + "epoch": 1.0083399237903516, + "kl_loss": 0.034163329750299454, + "loss_ib": 0.0007122151437215507, + "step": 3507 + }, + { + "ce_ib": 4.673040390014648, + "ce_orig": 1.3054143190383911, + "epoch": 1.0083399237903516, + "kl_loss": 0.03824307397007942, + "loss_ib": 0.0008497347589582205, + "step": 3507 + }, + { + "ce_ib": 2.821753978729248, + "ce_orig": 0.4613170921802521, + "epoch": 1.0086275073693292, + "kl_loss": 0.05011730641126633, + "loss_ib": 0.0007833484560251236, + "step": 3508 + }, + { + "ce_ib": 2.9107513427734375, + "ce_orig": 0.5184391736984253, + "epoch": 1.0086275073693292, + "kl_loss": 0.03764684498310089, + "loss_ib": 0.0006675435579381883, + "step": 3508 + }, + { + "ce_ib": 3.8006322383880615, + "ce_orig": 0.9654508829116821, + "epoch": 1.0086275073693292, + "kl_loss": 0.04687844216823578, + "loss_ib": 0.0008488476159982383, + "step": 3508 + }, + { + "ce_ib": 1.789657473564148, + "ce_orig": 0.21296323835849762, + "epoch": 1.0086275073693292, + "kl_loss": 0.09116074442863464, + "loss_ib": 0.0010905731469392776, + "step": 3508 + }, + { + "ce_ib": 5.066526412963867, + "ce_orig": 1.5180283784866333, + "epoch": 1.008915090948307, + "kl_loss": 0.05886506289243698, + "loss_ib": 0.0010953032178804278, + "step": 3509 + }, + { + "ce_ib": 3.2504935264587402, + "ce_orig": 0.9009330868721008, + "epoch": 1.008915090948307, + "kl_loss": 0.02954292483627796, + "loss_ib": 0.000620478589553386, + "step": 3509 + }, + { + "ce_ib": 4.70527982711792, + "ce_orig": 1.0974478721618652, + "epoch": 1.008915090948307, + "kl_loss": 0.02727125957608223, + "loss_ib": 0.0007432405254803598, + "step": 3509 + }, + { + "ce_ib": 4.346827507019043, + "ce_orig": 1.0570945739746094, + "epoch": 1.008915090948307, + "kl_loss": 0.05437284708023071, + "loss_ib": 0.0009784111753106117, + "step": 3509 + }, + { + "epoch": 1.0092026745272844, + "grad_norm": 0.10429726541042328, + "learning_rate": 3.871129678123297e-05, + "loss": 0.8635, + "step": 3510 + }, + { + "ce_ib": 5.521762847900391, + "ce_orig": 1.6189700365066528, + "epoch": 1.0092026745272844, + "kl_loss": 0.03437218442559242, + "loss_ib": 0.0008958980906754732, + "step": 3510 + }, + { + "ce_ib": 2.8714184761047363, + "ce_orig": 0.7647817134857178, + "epoch": 1.0092026745272844, + "kl_loss": 0.08278017491102219, + "loss_ib": 0.00111494364682585, + "step": 3510 + }, + { + "ce_ib": 3.7021877765655518, + "ce_orig": 1.239917278289795, + "epoch": 1.0092026745272844, + "kl_loss": 0.030801944434642792, + "loss_ib": 0.0006782381678931415, + "step": 3510 + }, + { + "ce_ib": 3.726245164871216, + "ce_orig": 0.9230521321296692, + "epoch": 1.0092026745272844, + "kl_loss": 0.07594280689954758, + "loss_ib": 0.0011320525081828237, + "step": 3510 + }, + { + "ce_ib": 3.439164400100708, + "ce_orig": 1.013968586921692, + "epoch": 1.0094902581062621, + "kl_loss": 0.0520157553255558, + "loss_ib": 0.0008640739251859486, + "step": 3511 + }, + { + "ce_ib": 1.4422869682312012, + "ce_orig": 0.2478531450033188, + "epoch": 1.0094902581062621, + "kl_loss": 0.10997166484594345, + "loss_ib": 0.0012439453275874257, + "step": 3511 + }, + { + "ce_ib": 2.561573028564453, + "ce_orig": 0.8879328966140747, + "epoch": 1.0094902581062621, + "kl_loss": 0.03204359486699104, + "loss_ib": 0.0005765932146459818, + "step": 3511 + }, + { + "ce_ib": 2.042445659637451, + "ce_orig": 0.4318332076072693, + "epoch": 1.0094902581062621, + "kl_loss": 0.04360809922218323, + "loss_ib": 0.0006403255392797291, + "step": 3511 + }, + { + "ce_ib": 3.647951602935791, + "ce_orig": 0.935651421546936, + "epoch": 1.0097778416852399, + "kl_loss": 0.05218013375997543, + "loss_ib": 0.0008865964482538402, + "step": 3512 + }, + { + "ce_ib": 4.797001838684082, + "ce_orig": 1.1539461612701416, + "epoch": 1.0097778416852399, + "kl_loss": 0.035581402480602264, + "loss_ib": 0.000835514219943434, + "step": 3512 + }, + { + "ce_ib": 2.581408977508545, + "ce_orig": 0.441584050655365, + "epoch": 1.0097778416852399, + "kl_loss": 0.02581046149134636, + "loss_ib": 0.0005162454908713698, + "step": 3512 + }, + { + "ce_ib": 2.129483222961426, + "ce_orig": 0.49570176005363464, + "epoch": 1.0097778416852399, + "kl_loss": 0.04558973014354706, + "loss_ib": 0.0006688456051051617, + "step": 3512 + }, + { + "ce_ib": 4.094200611114502, + "ce_orig": 0.6194756627082825, + "epoch": 1.0100654252642174, + "kl_loss": 0.04923003539443016, + "loss_ib": 0.0009017203701660037, + "step": 3513 + }, + { + "ce_ib": 3.225900411605835, + "ce_orig": 0.4695965051651001, + "epoch": 1.0100654252642174, + "kl_loss": 0.07597126811742783, + "loss_ib": 0.0010823026532307267, + "step": 3513 + }, + { + "ce_ib": 2.33406138420105, + "ce_orig": 0.6270101070404053, + "epoch": 1.0100654252642174, + "kl_loss": 0.041087351739406586, + "loss_ib": 0.0006442796438932419, + "step": 3513 + }, + { + "ce_ib": 2.5634353160858154, + "ce_orig": 0.7794075608253479, + "epoch": 1.0100654252642174, + "kl_loss": 0.03490171581506729, + "loss_ib": 0.0006053606630302966, + "step": 3513 + }, + { + "ce_ib": 2.884528636932373, + "ce_orig": 0.7217035889625549, + "epoch": 1.0103530088431951, + "kl_loss": 0.05589378625154495, + "loss_ib": 0.0008473907364532351, + "step": 3514 + }, + { + "ce_ib": 3.2498316764831543, + "ce_orig": 0.7889646887779236, + "epoch": 1.0103530088431951, + "kl_loss": 0.046488307416439056, + "loss_ib": 0.0007898662006482482, + "step": 3514 + }, + { + "ce_ib": 3.7052600383758545, + "ce_orig": 0.9660313725471497, + "epoch": 1.0103530088431951, + "kl_loss": 0.05516698211431503, + "loss_ib": 0.0009221957297995687, + "step": 3514 + }, + { + "ce_ib": 2.99290132522583, + "ce_orig": 0.6242303848266602, + "epoch": 1.0103530088431951, + "kl_loss": 0.05120735242962837, + "loss_ib": 0.0008113635703921318, + "step": 3514 + }, + { + "epoch": 1.0106405924221726, + "grad_norm": 0.09735569357872009, + "learning_rate": 3.867883285129432e-05, + "loss": 0.8351, + "step": 3515 + }, + { + "ce_ib": 5.245660305023193, + "ce_orig": 1.1296411752700806, + "epoch": 1.0106405924221726, + "kl_loss": 0.0429779551923275, + "loss_ib": 0.0009543455671519041, + "step": 3515 + }, + { + "ce_ib": 4.983395576477051, + "ce_orig": 1.3141281604766846, + "epoch": 1.0106405924221726, + "kl_loss": 0.03970225155353546, + "loss_ib": 0.0008953620563261211, + "step": 3515 + }, + { + "ce_ib": 1.906148076057434, + "ce_orig": 0.5418247580528259, + "epoch": 1.0106405924221726, + "kl_loss": 0.023807702586054802, + "loss_ib": 0.0004286917974241078, + "step": 3515 + }, + { + "ce_ib": 4.480119228363037, + "ce_orig": 1.1777113676071167, + "epoch": 1.0106405924221726, + "kl_loss": 0.04912324622273445, + "loss_ib": 0.000939244288019836, + "step": 3515 + }, + { + "ce_ib": 2.487004518508911, + "ce_orig": 0.4762752056121826, + "epoch": 1.0109281760011504, + "kl_loss": 0.06191888824105263, + "loss_ib": 0.0008678893791511655, + "step": 3516 + }, + { + "ce_ib": 3.2154831886291504, + "ce_orig": 0.8472704291343689, + "epoch": 1.0109281760011504, + "kl_loss": 0.08649268746376038, + "loss_ib": 0.001186475157737732, + "step": 3516 + }, + { + "ce_ib": 2.5990867614746094, + "ce_orig": 0.5829936861991882, + "epoch": 1.0109281760011504, + "kl_loss": 0.036833517253398895, + "loss_ib": 0.000628243840765208, + "step": 3516 + }, + { + "ce_ib": 2.4829177856445312, + "ce_orig": 0.6318520307540894, + "epoch": 1.0109281760011504, + "kl_loss": 0.05862802639603615, + "loss_ib": 0.0008345720125362277, + "step": 3516 + }, + { + "ce_ib": 1.6604676246643066, + "ce_orig": 0.24250996112823486, + "epoch": 1.0112157595801279, + "kl_loss": 0.12039736658334732, + "loss_ib": 0.0013700203271582723, + "step": 3517 + }, + { + "ce_ib": 4.338295936584473, + "ce_orig": 1.2546982765197754, + "epoch": 1.0112157595801279, + "kl_loss": 0.05954781919717789, + "loss_ib": 0.001029307721182704, + "step": 3517 + }, + { + "ce_ib": 2.006023406982422, + "ce_orig": 0.41837647557258606, + "epoch": 1.0112157595801279, + "kl_loss": 0.12623214721679688, + "loss_ib": 0.0014629238285124302, + "step": 3517 + }, + { + "ce_ib": 5.521909713745117, + "ce_orig": 1.3993866443634033, + "epoch": 1.0112157595801279, + "kl_loss": 0.05159137398004532, + "loss_ib": 0.0010681046405807137, + "step": 3517 + }, + { + "ce_ib": 1.9439023733139038, + "ce_orig": 0.24501559138298035, + "epoch": 1.0115033431591056, + "kl_loss": 0.0626935139298439, + "loss_ib": 0.0008213252876885235, + "step": 3518 + }, + { + "ce_ib": 2.25468111038208, + "ce_orig": 0.6254292726516724, + "epoch": 1.0115033431591056, + "kl_loss": 0.06335578858852386, + "loss_ib": 0.00085902598220855, + "step": 3518 + }, + { + "ce_ib": 4.0141921043396, + "ce_orig": 1.178505301475525, + "epoch": 1.0115033431591056, + "kl_loss": 0.04689592495560646, + "loss_ib": 0.0008703784551471472, + "step": 3518 + }, + { + "ce_ib": 3.250542402267456, + "ce_orig": 0.8860410451889038, + "epoch": 1.0115033431591056, + "kl_loss": 0.039609238505363464, + "loss_ib": 0.0007211465854197741, + "step": 3518 + }, + { + "ce_ib": 5.379656791687012, + "ce_orig": 1.5136058330535889, + "epoch": 1.0117909267380834, + "kl_loss": 0.04747080057859421, + "loss_ib": 0.0010126736015081406, + "step": 3519 + }, + { + "ce_ib": 4.5696516036987305, + "ce_orig": 1.179471731185913, + "epoch": 1.0117909267380834, + "kl_loss": 0.05473952740430832, + "loss_ib": 0.001004360499791801, + "step": 3519 + }, + { + "ce_ib": 4.180464744567871, + "ce_orig": 0.9770216345787048, + "epoch": 1.0117909267380834, + "kl_loss": 0.052189238369464874, + "loss_ib": 0.0009399388800375164, + "step": 3519 + }, + { + "ce_ib": 2.884089469909668, + "ce_orig": 0.6335499882698059, + "epoch": 1.0117909267380834, + "kl_loss": 0.060752350836992264, + "loss_ib": 0.0008959324331954122, + "step": 3519 + }, + { + "epoch": 1.0120785103170609, + "grad_norm": 0.08841949701309204, + "learning_rate": 3.8646335965872414e-05, + "loss": 0.8656, + "step": 3520 + }, + { + "ce_ib": 5.495333671569824, + "ce_orig": 1.6632080078125, + "epoch": 1.0120785103170609, + "kl_loss": 0.06345373392105103, + "loss_ib": 0.0011840707156807184, + "step": 3520 + }, + { + "ce_ib": 3.8112356662750244, + "ce_orig": 0.9657891392707825, + "epoch": 1.0120785103170609, + "kl_loss": 0.049491036683321, + "loss_ib": 0.0008760339114814997, + "step": 3520 + }, + { + "ce_ib": 2.6290574073791504, + "ce_orig": 0.5929751992225647, + "epoch": 1.0120785103170609, + "kl_loss": 0.030237987637519836, + "loss_ib": 0.0005652856198139489, + "step": 3520 + }, + { + "ce_ib": 3.7165770530700684, + "ce_orig": 0.7792166471481323, + "epoch": 1.0120785103170609, + "kl_loss": 0.05593036860227585, + "loss_ib": 0.0009309613378718495, + "step": 3520 + }, + { + "ce_ib": 3.2693819999694824, + "ce_orig": 0.957035481929779, + "epoch": 1.0123660938960386, + "kl_loss": 0.038704391568899155, + "loss_ib": 0.0007139820954762399, + "step": 3521 + }, + { + "ce_ib": 3.6982297897338867, + "ce_orig": 0.940666913986206, + "epoch": 1.0123660938960386, + "kl_loss": 0.03861461952328682, + "loss_ib": 0.0007559691439382732, + "step": 3521 + }, + { + "ce_ib": 4.785062313079834, + "ce_orig": 1.1203783750534058, + "epoch": 1.0123660938960386, + "kl_loss": 0.04582422971725464, + "loss_ib": 0.0009367485181428492, + "step": 3521 + }, + { + "ce_ib": 5.662992000579834, + "ce_orig": 1.3854470252990723, + "epoch": 1.0123660938960386, + "kl_loss": 0.07250040769577026, + "loss_ib": 0.0012913033133372664, + "step": 3521 + }, + { + "ce_ib": 1.7148624658584595, + "ce_orig": 0.4418226182460785, + "epoch": 1.0126536774750161, + "kl_loss": 0.025599995627999306, + "loss_ib": 0.0004274862294550985, + "step": 3522 + }, + { + "ce_ib": 2.61496639251709, + "ce_orig": 0.6564294099807739, + "epoch": 1.0126536774750161, + "kl_loss": 0.03438282012939453, + "loss_ib": 0.0006053248071111739, + "step": 3522 + }, + { + "ce_ib": 3.565152406692505, + "ce_orig": 0.3700674772262573, + "epoch": 1.0126536774750161, + "kl_loss": 0.07156749814748764, + "loss_ib": 0.00107219023630023, + "step": 3522 + }, + { + "ce_ib": 5.348245143890381, + "ce_orig": 1.5939990282058716, + "epoch": 1.0126536774750161, + "kl_loss": 0.06042448431253433, + "loss_ib": 0.0011390693252906203, + "step": 3522 + }, + { + "ce_ib": 4.581168174743652, + "ce_orig": 0.9119672179222107, + "epoch": 1.0129412610539938, + "kl_loss": 0.05731882154941559, + "loss_ib": 0.0010313050588592887, + "step": 3523 + }, + { + "ce_ib": 3.5195393562316895, + "ce_orig": 0.9388973116874695, + "epoch": 1.0129412610539938, + "kl_loss": 0.0680910050868988, + "loss_ib": 0.0010328639764338732, + "step": 3523 + }, + { + "ce_ib": 2.9593143463134766, + "ce_orig": 0.7576556205749512, + "epoch": 1.0129412610539938, + "kl_loss": 0.06913589686155319, + "loss_ib": 0.0009872904047369957, + "step": 3523 + }, + { + "ce_ib": 2.578645706176758, + "ce_orig": 0.851932942867279, + "epoch": 1.0129412610539938, + "kl_loss": 0.02647005021572113, + "loss_ib": 0.0005225650384090841, + "step": 3523 + }, + { + "ce_ib": 3.3820409774780273, + "ce_orig": 0.6972578167915344, + "epoch": 1.0132288446329714, + "kl_loss": 0.05486653745174408, + "loss_ib": 0.0008868693839758635, + "step": 3524 + }, + { + "ce_ib": 1.6730341911315918, + "ce_orig": 0.2751365900039673, + "epoch": 1.0132288446329714, + "kl_loss": 0.0819329023361206, + "loss_ib": 0.0009866324253380299, + "step": 3524 + }, + { + "ce_ib": 2.9264495372772217, + "ce_orig": 0.5527572631835938, + "epoch": 1.0132288446329714, + "kl_loss": 0.05733140930533409, + "loss_ib": 0.0008659590384922922, + "step": 3524 + }, + { + "ce_ib": 4.583714008331299, + "ce_orig": 1.0962523221969604, + "epoch": 1.0132288446329714, + "kl_loss": 0.03882245719432831, + "loss_ib": 0.0008465959108434618, + "step": 3524 + }, + { + "epoch": 1.013516428211949, + "grad_norm": 0.0923343226313591, + "learning_rate": 3.861380620325978e-05, + "loss": 0.85, + "step": 3525 + }, + { + "ce_ib": 4.209800720214844, + "ce_orig": 0.7436109185218811, + "epoch": 1.013516428211949, + "kl_loss": 0.05775509774684906, + "loss_ib": 0.0009985310025513172, + "step": 3525 + }, + { + "ce_ib": 3.639486074447632, + "ce_orig": 0.9871081709861755, + "epoch": 1.013516428211949, + "kl_loss": 0.0543186292052269, + "loss_ib": 0.0009071348467841744, + "step": 3525 + }, + { + "ce_ib": 4.532528400421143, + "ce_orig": 1.1736747026443481, + "epoch": 1.013516428211949, + "kl_loss": 0.04363764822483063, + "loss_ib": 0.0008896292420104146, + "step": 3525 + }, + { + "ce_ib": 4.523370265960693, + "ce_orig": 1.048141360282898, + "epoch": 1.013516428211949, + "kl_loss": 0.060836516320705414, + "loss_ib": 0.0010607021395117044, + "step": 3525 + }, + { + "ce_ib": 5.851871967315674, + "ce_orig": 1.7926175594329834, + "epoch": 1.0138040117909268, + "kl_loss": 0.06683572381734848, + "loss_ib": 0.0012535443529486656, + "step": 3526 + }, + { + "ce_ib": 2.75905704498291, + "ce_orig": 0.6111860871315002, + "epoch": 1.0138040117909268, + "kl_loss": 0.031103016808629036, + "loss_ib": 0.0005869358428753912, + "step": 3526 + }, + { + "ce_ib": 2.7323391437530518, + "ce_orig": 0.47052955627441406, + "epoch": 1.0138040117909268, + "kl_loss": 0.06245305761694908, + "loss_ib": 0.0008977644611150026, + "step": 3526 + }, + { + "ce_ib": 2.887545585632324, + "ce_orig": 0.6808836460113525, + "epoch": 1.0138040117909268, + "kl_loss": 0.065492644906044, + "loss_ib": 0.0009436809923499823, + "step": 3526 + }, + { + "ce_ib": 2.536087989807129, + "ce_orig": 0.9040138125419617, + "epoch": 1.0140915953699043, + "kl_loss": 0.024871109053492546, + "loss_ib": 0.000502319831866771, + "step": 3527 + }, + { + "ce_ib": 3.4562602043151855, + "ce_orig": 0.5653513073921204, + "epoch": 1.0140915953699043, + "kl_loss": 0.04855810105800629, + "loss_ib": 0.0008312070276588202, + "step": 3527 + }, + { + "ce_ib": 4.121584415435791, + "ce_orig": 0.9641424417495728, + "epoch": 1.0140915953699043, + "kl_loss": 0.03849663957953453, + "loss_ib": 0.0007971248123794794, + "step": 3527 + }, + { + "ce_ib": 3.328770637512207, + "ce_orig": 0.8670457005500793, + "epoch": 1.0140915953699043, + "kl_loss": 0.051472365856170654, + "loss_ib": 0.00084760069148615, + "step": 3527 + }, + { + "ce_ib": 4.366908073425293, + "ce_orig": 1.2279895544052124, + "epoch": 1.014379178948882, + "kl_loss": 0.03440370783209801, + "loss_ib": 0.0007807278307154775, + "step": 3528 + }, + { + "ce_ib": 6.474099159240723, + "ce_orig": 1.8564848899841309, + "epoch": 1.014379178948882, + "kl_loss": 0.043709345161914825, + "loss_ib": 0.001084503368474543, + "step": 3528 + }, + { + "ce_ib": 4.674768924713135, + "ce_orig": 1.5020653009414673, + "epoch": 1.014379178948882, + "kl_loss": 0.04257417097687721, + "loss_ib": 0.0008932185592129827, + "step": 3528 + }, + { + "ce_ib": 3.119493007659912, + "ce_orig": 0.7256542444229126, + "epoch": 1.014379178948882, + "kl_loss": 0.04377235472202301, + "loss_ib": 0.0007496727630496025, + "step": 3528 + }, + { + "ce_ib": 3.3421430587768555, + "ce_orig": 0.75141441822052, + "epoch": 1.0146667625278596, + "kl_loss": 0.05349946767091751, + "loss_ib": 0.0008692090050317347, + "step": 3529 + }, + { + "ce_ib": 5.713838577270508, + "ce_orig": 1.0047261714935303, + "epoch": 1.0146667625278596, + "kl_loss": 0.021992705762386322, + "loss_ib": 0.0007913108565844595, + "step": 3529 + }, + { + "ce_ib": 3.1307854652404785, + "ce_orig": 0.6520746350288391, + "epoch": 1.0146667625278596, + "kl_loss": 0.07153204083442688, + "loss_ib": 0.001028398866765201, + "step": 3529 + }, + { + "ce_ib": 3.0607900619506836, + "ce_orig": 0.7437888383865356, + "epoch": 1.0146667625278596, + "kl_loss": 0.06000981852412224, + "loss_ib": 0.0009061771561391652, + "step": 3529 + }, + { + "epoch": 1.0149543461068373, + "grad_norm": 0.11784587800502777, + "learning_rate": 3.858124364182818e-05, + "loss": 0.8989, + "step": 3530 + }, + { + "ce_ib": 3.5247702598571777, + "ce_orig": 1.1304285526275635, + "epoch": 1.0149543461068373, + "kl_loss": 0.027026254683732986, + "loss_ib": 0.0006227395497262478, + "step": 3530 + }, + { + "ce_ib": 2.552347421646118, + "ce_orig": 0.7902625799179077, + "epoch": 1.0149543461068373, + "kl_loss": 0.023071518167853355, + "loss_ib": 0.0004859499167650938, + "step": 3530 + }, + { + "ce_ib": 3.5095834732055664, + "ce_orig": 0.6724838018417358, + "epoch": 1.0149543461068373, + "kl_loss": 0.025474997237324715, + "loss_ib": 0.0006057082791812718, + "step": 3530 + }, + { + "ce_ib": 3.727102518081665, + "ce_orig": 0.5957485437393188, + "epoch": 1.0149543461068373, + "kl_loss": 0.05838385969400406, + "loss_ib": 0.0009565487853251398, + "step": 3530 + }, + { + "ce_ib": 2.458192825317383, + "ce_orig": 0.5986446142196655, + "epoch": 1.0152419296858148, + "kl_loss": 0.03174370154738426, + "loss_ib": 0.0005632562679238617, + "step": 3531 + }, + { + "ce_ib": 3.5579867362976074, + "ce_orig": 0.9034735560417175, + "epoch": 1.0152419296858148, + "kl_loss": 0.07155481725931168, + "loss_ib": 0.0010713468072935939, + "step": 3531 + }, + { + "ce_ib": 5.134092330932617, + "ce_orig": 1.548168420791626, + "epoch": 1.0152419296858148, + "kl_loss": 0.04155896231532097, + "loss_ib": 0.000928998866584152, + "step": 3531 + }, + { + "ce_ib": 2.4204206466674805, + "ce_orig": 0.6618062853813171, + "epoch": 1.0152419296858148, + "kl_loss": 0.020159754902124405, + "loss_ib": 0.00044363958295434713, + "step": 3531 + }, + { + "ce_ib": 4.780149936676025, + "ce_orig": 1.6495331525802612, + "epoch": 1.0155295132647926, + "kl_loss": 0.04215395450592041, + "loss_ib": 0.0008995544631034136, + "step": 3532 + }, + { + "ce_ib": 4.704822540283203, + "ce_orig": 0.6018173694610596, + "epoch": 1.0155295132647926, + "kl_loss": 0.06469105184078217, + "loss_ib": 0.001117392792366445, + "step": 3532 + }, + { + "ce_ib": 4.0827107429504395, + "ce_orig": 1.3408362865447998, + "epoch": 1.0155295132647926, + "kl_loss": 0.039133429527282715, + "loss_ib": 0.0007996053900569677, + "step": 3532 + }, + { + "ce_ib": 4.2994384765625, + "ce_orig": 1.2707475423812866, + "epoch": 1.0155295132647926, + "kl_loss": 0.03573765605688095, + "loss_ib": 0.0007873204303905368, + "step": 3532 + }, + { + "ce_ib": 2.8976686000823975, + "ce_orig": 0.6916202902793884, + "epoch": 1.0158170968437703, + "kl_loss": 0.04321249574422836, + "loss_ib": 0.0007218918181024492, + "step": 3533 + }, + { + "ce_ib": 2.3599696159362793, + "ce_orig": 0.7108099460601807, + "epoch": 1.0158170968437703, + "kl_loss": 0.03518133983016014, + "loss_ib": 0.0005878103547729552, + "step": 3533 + }, + { + "ce_ib": 4.823136329650879, + "ce_orig": 1.4938325881958008, + "epoch": 1.0158170968437703, + "kl_loss": 0.0654711127281189, + "loss_ib": 0.0011370247229933739, + "step": 3533 + }, + { + "ce_ib": 5.30076265335083, + "ce_orig": 1.4792585372924805, + "epoch": 1.0158170968437703, + "kl_loss": 0.04187255725264549, + "loss_ib": 0.0009488017531111836, + "step": 3533 + }, + { + "ce_ib": 2.8534927368164062, + "ce_orig": 0.7740436792373657, + "epoch": 1.0161046804227478, + "kl_loss": 0.028477098792791367, + "loss_ib": 0.0005701202317140996, + "step": 3534 + }, + { + "ce_ib": 2.0174221992492676, + "ce_orig": 0.44525060057640076, + "epoch": 1.0161046804227478, + "kl_loss": 0.025039583444595337, + "loss_ib": 0.0004521380178630352, + "step": 3534 + }, + { + "ce_ib": 4.156040668487549, + "ce_orig": 0.9951539635658264, + "epoch": 1.0161046804227478, + "kl_loss": 0.04872440919280052, + "loss_ib": 0.0009028480853885412, + "step": 3534 + }, + { + "ce_ib": 4.648533821105957, + "ce_orig": 1.3048421144485474, + "epoch": 1.0161046804227478, + "kl_loss": 0.06376732885837555, + "loss_ib": 0.0011025265557691455, + "step": 3534 + }, + { + "epoch": 1.0163922640017256, + "grad_norm": 0.09088674187660217, + "learning_rate": 3.854864836002836e-05, + "loss": 0.897, + "step": 3535 + }, + { + "ce_ib": 2.3036248683929443, + "ce_orig": 0.6516978740692139, + "epoch": 1.0163922640017256, + "kl_loss": 0.04309766739606857, + "loss_ib": 0.0006613391451537609, + "step": 3535 + }, + { + "ce_ib": 5.087545394897461, + "ce_orig": 1.309414029121399, + "epoch": 1.0163922640017256, + "kl_loss": 0.04136291891336441, + "loss_ib": 0.0009223836241289973, + "step": 3535 + }, + { + "ce_ib": 2.074033260345459, + "ce_orig": 0.39200839400291443, + "epoch": 1.0163922640017256, + "kl_loss": 0.028663672506809235, + "loss_ib": 0.0004940400249324739, + "step": 3535 + }, + { + "ce_ib": 2.9380886554718018, + "ce_orig": 0.6031710505485535, + "epoch": 1.0163922640017256, + "kl_loss": 0.04036150500178337, + "loss_ib": 0.0006974238785915077, + "step": 3535 + }, + { + "ce_ib": 5.19828987121582, + "ce_orig": 0.9394057989120483, + "epoch": 1.016679847580703, + "kl_loss": 0.05017945542931557, + "loss_ib": 0.0010216234950348735, + "step": 3536 + }, + { + "ce_ib": 3.5423521995544434, + "ce_orig": 1.0132544040679932, + "epoch": 1.016679847580703, + "kl_loss": 0.028907326981425285, + "loss_ib": 0.0006433084490709007, + "step": 3536 + }, + { + "ce_ib": 3.363020420074463, + "ce_orig": 0.7194274663925171, + "epoch": 1.016679847580703, + "kl_loss": 0.06070081144571304, + "loss_ib": 0.0009433100931346416, + "step": 3536 + }, + { + "ce_ib": 4.369606971740723, + "ce_orig": 0.8810110092163086, + "epoch": 1.016679847580703, + "kl_loss": 0.052428096532821655, + "loss_ib": 0.0009612416033633053, + "step": 3536 + }, + { + "ce_ib": 4.357076644897461, + "ce_orig": 1.1262493133544922, + "epoch": 1.0169674311596808, + "kl_loss": 0.05377120524644852, + "loss_ib": 0.0009734196937642992, + "step": 3537 + }, + { + "ce_ib": 2.518993377685547, + "ce_orig": 0.6509769558906555, + "epoch": 1.0169674311596808, + "kl_loss": 0.02164934203028679, + "loss_ib": 0.00046839274000376463, + "step": 3537 + }, + { + "ce_ib": 3.7475805282592773, + "ce_orig": 0.9206858277320862, + "epoch": 1.0169674311596808, + "kl_loss": 0.043554700911045074, + "loss_ib": 0.0008103050058707595, + "step": 3537 + }, + { + "ce_ib": 2.230182409286499, + "ce_orig": 0.49084559082984924, + "epoch": 1.0169674311596808, + "kl_loss": 0.03004692867398262, + "loss_ib": 0.0005234875134192407, + "step": 3537 + }, + { + "ce_ib": 2.832866907119751, + "ce_orig": 0.7611285448074341, + "epoch": 1.0172550147386583, + "kl_loss": 0.04424768686294556, + "loss_ib": 0.0007257635588757694, + "step": 3538 + }, + { + "ce_ib": 2.1583480834960938, + "ce_orig": 0.6709778904914856, + "epoch": 1.0172550147386583, + "kl_loss": 0.026616398245096207, + "loss_ib": 0.0004819987516384572, + "step": 3538 + }, + { + "ce_ib": 4.296016216278076, + "ce_orig": 1.0486935377120972, + "epoch": 1.0172550147386583, + "kl_loss": 0.06224307045340538, + "loss_ib": 0.0010520322248339653, + "step": 3538 + }, + { + "ce_ib": 2.9729864597320557, + "ce_orig": 0.6563963294029236, + "epoch": 1.0172550147386583, + "kl_loss": 0.05017532408237457, + "loss_ib": 0.0007990518934093416, + "step": 3538 + }, + { + "ce_ib": 2.4274792671203613, + "ce_orig": 0.7781170010566711, + "epoch": 1.017542598317636, + "kl_loss": 0.054466526955366135, + "loss_ib": 0.0007874131551943719, + "step": 3539 + }, + { + "ce_ib": 3.397570848464966, + "ce_orig": 0.722222089767456, + "epoch": 1.017542598317636, + "kl_loss": 0.05890468508005142, + "loss_ib": 0.0009288039873354137, + "step": 3539 + }, + { + "ce_ib": 3.2580835819244385, + "ce_orig": 0.7731778025627136, + "epoch": 1.017542598317636, + "kl_loss": 0.05629704147577286, + "loss_ib": 0.000888778711669147, + "step": 3539 + }, + { + "ce_ib": 4.380607604980469, + "ce_orig": 1.1374305486679077, + "epoch": 1.017542598317636, + "kl_loss": 0.06860349327325821, + "loss_ib": 0.0011240956373512745, + "step": 3539 + }, + { + "epoch": 1.0178301818966138, + "grad_norm": 0.0846182182431221, + "learning_rate": 3.851602043638994e-05, + "loss": 0.8736, + "step": 3540 + }, + { + "ce_ib": 3.5554895401000977, + "ce_orig": 0.9878926277160645, + "epoch": 1.0178301818966138, + "kl_loss": 0.042743127793073654, + "loss_ib": 0.0007829801761545241, + "step": 3540 + }, + { + "ce_ib": 3.353058099746704, + "ce_orig": 0.8776978850364685, + "epoch": 1.0178301818966138, + "kl_loss": 0.10034279525279999, + "loss_ib": 0.0013387337094172835, + "step": 3540 + }, + { + "ce_ib": 5.372487545013428, + "ce_orig": 1.3118115663528442, + "epoch": 1.0178301818966138, + "kl_loss": 0.05573868378996849, + "loss_ib": 0.0010946355760097504, + "step": 3540 + }, + { + "ce_ib": 3.785259485244751, + "ce_orig": 0.874880313873291, + "epoch": 1.0178301818966138, + "kl_loss": 0.04093574732542038, + "loss_ib": 0.0007878833566792309, + "step": 3540 + }, + { + "ce_ib": 3.9464523792266846, + "ce_orig": 0.9874404668807983, + "epoch": 1.0181177654755913, + "kl_loss": 0.0484948456287384, + "loss_ib": 0.0008795936009846628, + "step": 3541 + }, + { + "ce_ib": 3.455965042114258, + "ce_orig": 1.0413661003112793, + "epoch": 1.0181177654755913, + "kl_loss": 0.04262690991163254, + "loss_ib": 0.0007718655397184193, + "step": 3541 + }, + { + "ce_ib": 1.5532541275024414, + "ce_orig": 0.2867189049720764, + "epoch": 1.0181177654755913, + "kl_loss": 0.04378765448927879, + "loss_ib": 0.0005932019557803869, + "step": 3541 + }, + { + "ce_ib": 4.409840106964111, + "ce_orig": 1.0915942192077637, + "epoch": 1.0181177654755913, + "kl_loss": 0.03981028497219086, + "loss_ib": 0.0008390868315473199, + "step": 3541 + }, + { + "ce_ib": 4.6741437911987305, + "ce_orig": 1.416200876235962, + "epoch": 1.018405349054569, + "kl_loss": 0.03901374340057373, + "loss_ib": 0.0008575518149882555, + "step": 3542 + }, + { + "ce_ib": 3.0933687686920166, + "ce_orig": 0.8415295481681824, + "epoch": 1.018405349054569, + "kl_loss": 0.0344962552189827, + "loss_ib": 0.0006542993942275643, + "step": 3542 + }, + { + "ce_ib": 3.396090269088745, + "ce_orig": 0.8171901106834412, + "epoch": 1.018405349054569, + "kl_loss": 0.05575314164161682, + "loss_ib": 0.0008971404167823493, + "step": 3542 + }, + { + "ce_ib": 4.055532932281494, + "ce_orig": 0.8264047503471375, + "epoch": 1.018405349054569, + "kl_loss": 0.05283770337700844, + "loss_ib": 0.0009339302778244019, + "step": 3542 + }, + { + "ce_ib": 1.9484378099441528, + "ce_orig": 0.6199613809585571, + "epoch": 1.0186929326335465, + "kl_loss": 0.02521868608891964, + "loss_ib": 0.0004470306448638439, + "step": 3543 + }, + { + "ce_ib": 2.782888174057007, + "ce_orig": 0.6803990006446838, + "epoch": 1.0186929326335465, + "kl_loss": 0.06721082329750061, + "loss_ib": 0.0009503969922661781, + "step": 3543 + }, + { + "ce_ib": 2.8672869205474854, + "ce_orig": 0.7082569003105164, + "epoch": 1.0186929326335465, + "kl_loss": 0.048754751682281494, + "loss_ib": 0.0007742761517874897, + "step": 3543 + }, + { + "ce_ib": 2.757755994796753, + "ce_orig": 0.48120415210723877, + "epoch": 1.0186929326335465, + "kl_loss": 0.03613551706075668, + "loss_ib": 0.0006371306953951716, + "step": 3543 + }, + { + "ce_ib": 2.9749279022216797, + "ce_orig": 0.7400362491607666, + "epoch": 1.0189805162125243, + "kl_loss": 0.04600103199481964, + "loss_ib": 0.0007575030322186649, + "step": 3544 + }, + { + "ce_ib": 4.183814525604248, + "ce_orig": 0.8834238052368164, + "epoch": 1.0189805162125243, + "kl_loss": 0.03764365613460541, + "loss_ib": 0.0007948179263621569, + "step": 3544 + }, + { + "ce_ib": 5.835658073425293, + "ce_orig": 1.6046383380889893, + "epoch": 1.0189805162125243, + "kl_loss": 0.05498512461781502, + "loss_ib": 0.0011334170121699572, + "step": 3544 + }, + { + "ce_ib": 5.030210971832275, + "ce_orig": 1.152401328086853, + "epoch": 1.0189805162125243, + "kl_loss": 0.06176890432834625, + "loss_ib": 0.0011207100469619036, + "step": 3544 + }, + { + "epoch": 1.019268099791502, + "grad_norm": 0.10465764999389648, + "learning_rate": 3.8483359949521155e-05, + "loss": 0.8657, + "step": 3545 + }, + { + "ce_ib": 3.477008104324341, + "ce_orig": 0.9352164268493652, + "epoch": 1.019268099791502, + "kl_loss": 0.0472695529460907, + "loss_ib": 0.0008203962934203446, + "step": 3545 + }, + { + "ce_ib": 3.5482094287872314, + "ce_orig": 0.9153587222099304, + "epoch": 1.019268099791502, + "kl_loss": 0.04930531606078148, + "loss_ib": 0.0008478740928694606, + "step": 3545 + }, + { + "ce_ib": 3.5438170433044434, + "ce_orig": 1.101389765739441, + "epoch": 1.019268099791502, + "kl_loss": 0.059494055807590485, + "loss_ib": 0.0009493222460150719, + "step": 3545 + }, + { + "ce_ib": 2.1454379558563232, + "ce_orig": 0.5471594333648682, + "epoch": 1.019268099791502, + "kl_loss": 0.040387846529483795, + "loss_ib": 0.000618422229308635, + "step": 3545 + }, + { + "ce_ib": 4.985386848449707, + "ce_orig": 1.2828898429870605, + "epoch": 1.0195556833704795, + "kl_loss": 0.05974052473902702, + "loss_ib": 0.0010959438513964415, + "step": 3546 + }, + { + "ce_ib": 5.61295223236084, + "ce_orig": 1.5996206998825073, + "epoch": 1.0195556833704795, + "kl_loss": 0.04980270192027092, + "loss_ib": 0.001059322152286768, + "step": 3546 + }, + { + "ce_ib": 4.760580062866211, + "ce_orig": 1.3475420475006104, + "epoch": 1.0195556833704795, + "kl_loss": 0.03411991149187088, + "loss_ib": 0.0008172571542672813, + "step": 3546 + }, + { + "ce_ib": 2.782515287399292, + "ce_orig": 0.7646498680114746, + "epoch": 1.0195556833704795, + "kl_loss": 0.051952049136161804, + "loss_ib": 0.0007977720233611763, + "step": 3546 + }, + { + "ce_ib": 3.3945229053497314, + "ce_orig": 0.37963438034057617, + "epoch": 1.0198432669494573, + "kl_loss": 0.043110139667987823, + "loss_ib": 0.0007705536554567516, + "step": 3547 + }, + { + "ce_ib": 2.6846225261688232, + "ce_orig": 0.612770140171051, + "epoch": 1.0198432669494573, + "kl_loss": 0.06635642796754837, + "loss_ib": 0.0009320264798589051, + "step": 3547 + }, + { + "ce_ib": 2.8640129566192627, + "ce_orig": 0.8299571871757507, + "epoch": 1.0198432669494573, + "kl_loss": 0.048955287784338, + "loss_ib": 0.0007759541040286422, + "step": 3547 + }, + { + "ce_ib": 2.6751456260681152, + "ce_orig": 0.6287849545478821, + "epoch": 1.0198432669494573, + "kl_loss": 0.04054811969399452, + "loss_ib": 0.000672995753120631, + "step": 3547 + }, + { + "ce_ib": 4.5140533447265625, + "ce_orig": 1.2698715925216675, + "epoch": 1.0201308505284348, + "kl_loss": 0.06725621223449707, + "loss_ib": 0.001123967464081943, + "step": 3548 + }, + { + "ce_ib": 3.1839213371276855, + "ce_orig": 0.9826879501342773, + "epoch": 1.0201308505284348, + "kl_loss": 0.040119342505931854, + "loss_ib": 0.0007195855723693967, + "step": 3548 + }, + { + "ce_ib": 4.057089328765869, + "ce_orig": 0.9146869778633118, + "epoch": 1.0201308505284348, + "kl_loss": 0.049978528171777725, + "loss_ib": 0.0009054942056536674, + "step": 3548 + }, + { + "ce_ib": 3.5000312328338623, + "ce_orig": 0.787490963935852, + "epoch": 1.0201308505284348, + "kl_loss": 0.07806839793920517, + "loss_ib": 0.0011306870728731155, + "step": 3548 + }, + { + "ce_ib": 2.7098915576934814, + "ce_orig": 0.5930418968200684, + "epoch": 1.0204184341074125, + "kl_loss": 0.05570743605494499, + "loss_ib": 0.0008280634647235274, + "step": 3549 + }, + { + "ce_ib": 4.381779670715332, + "ce_orig": 1.1374167203903198, + "epoch": 1.0204184341074125, + "kl_loss": 0.053785622119903564, + "loss_ib": 0.0009760341490618885, + "step": 3549 + }, + { + "ce_ib": 2.823387622833252, + "ce_orig": 0.6166263818740845, + "epoch": 1.0204184341074125, + "kl_loss": 0.0663260892033577, + "loss_ib": 0.000945599633269012, + "step": 3549 + }, + { + "ce_ib": 3.8083856105804443, + "ce_orig": 1.1757097244262695, + "epoch": 1.0204184341074125, + "kl_loss": 0.04020782187581062, + "loss_ib": 0.0007829167298041284, + "step": 3549 + }, + { + "epoch": 1.02070601768639, + "grad_norm": 0.09225630760192871, + "learning_rate": 3.8450666978108695e-05, + "loss": 0.822, + "step": 3550 + }, + { + "ce_ib": 2.471133232116699, + "ce_orig": 0.7471223473548889, + "epoch": 1.02070601768639, + "kl_loss": 0.03280818089842796, + "loss_ib": 0.0005751950666308403, + "step": 3550 + }, + { + "ce_ib": 2.292161703109741, + "ce_orig": 0.5095592737197876, + "epoch": 1.02070601768639, + "kl_loss": 0.04681180790066719, + "loss_ib": 0.0006973342387937009, + "step": 3550 + }, + { + "ce_ib": 2.63244366645813, + "ce_orig": 0.7882601022720337, + "epoch": 1.02070601768639, + "kl_loss": 0.028691386803984642, + "loss_ib": 0.0005501582636497915, + "step": 3550 + }, + { + "ce_ib": 2.5860931873321533, + "ce_orig": 0.657606840133667, + "epoch": 1.02070601768639, + "kl_loss": 0.0910106897354126, + "loss_ib": 0.0011687162332236767, + "step": 3550 + }, + { + "ce_ib": 5.255091190338135, + "ce_orig": 1.3643677234649658, + "epoch": 1.0209936012653678, + "kl_loss": 0.05103512853384018, + "loss_ib": 0.0010358603904023767, + "step": 3551 + }, + { + "ce_ib": 2.400710344314575, + "ce_orig": 0.6802031397819519, + "epoch": 1.0209936012653678, + "kl_loss": 0.04911172762513161, + "loss_ib": 0.0007311882800422609, + "step": 3551 + }, + { + "ce_ib": 5.172080993652344, + "ce_orig": 1.4760364294052124, + "epoch": 1.0209936012653678, + "kl_loss": 0.04739389196038246, + "loss_ib": 0.0009911470115184784, + "step": 3551 + }, + { + "ce_ib": 4.272568702697754, + "ce_orig": 0.6518140435218811, + "epoch": 1.0209936012653678, + "kl_loss": 0.057798128575086594, + "loss_ib": 0.0010052381549030542, + "step": 3551 + }, + { + "ce_ib": 4.570605278015137, + "ce_orig": 1.2575113773345947, + "epoch": 1.0212811848443455, + "kl_loss": 0.039187125861644745, + "loss_ib": 0.0008489317842759192, + "step": 3552 + }, + { + "ce_ib": 1.8817476034164429, + "ce_orig": 0.3653523325920105, + "epoch": 1.0212811848443455, + "kl_loss": 0.0335337370634079, + "loss_ib": 0.0005235121352598071, + "step": 3552 + }, + { + "ce_ib": 5.831960678100586, + "ce_orig": 0.9482642412185669, + "epoch": 1.0212811848443455, + "kl_loss": 0.07404851913452148, + "loss_ib": 0.001323681091889739, + "step": 3552 + }, + { + "ce_ib": 3.072333335876465, + "ce_orig": 0.7130284309387207, + "epoch": 1.0212811848443455, + "kl_loss": 0.05140194669365883, + "loss_ib": 0.0008212527609430254, + "step": 3552 + }, + { + "ce_ib": 2.720637321472168, + "ce_orig": 0.821586549282074, + "epoch": 1.021568768423323, + "kl_loss": 0.042272135615348816, + "loss_ib": 0.0006947850924916565, + "step": 3553 + }, + { + "ce_ib": 5.000968933105469, + "ce_orig": 1.1943717002868652, + "epoch": 1.021568768423323, + "kl_loss": 0.046796105802059174, + "loss_ib": 0.000968057953286916, + "step": 3553 + }, + { + "ce_ib": 2.4434814453125, + "ce_orig": 0.7117018103599548, + "epoch": 1.021568768423323, + "kl_loss": 0.07272669672966003, + "loss_ib": 0.0009716150816529989, + "step": 3553 + }, + { + "ce_ib": 4.8350372314453125, + "ce_orig": 0.9974102973937988, + "epoch": 1.021568768423323, + "kl_loss": 0.06867526471614838, + "loss_ib": 0.0011702562915161252, + "step": 3553 + }, + { + "ce_ib": 3.413789987564087, + "ce_orig": 0.8641055226325989, + "epoch": 1.0218563520023007, + "kl_loss": 0.0529111884534359, + "loss_ib": 0.0008704908541403711, + "step": 3554 + }, + { + "ce_ib": 4.350025653839111, + "ce_orig": 1.052445650100708, + "epoch": 1.0218563520023007, + "kl_loss": 0.035940852016210556, + "loss_ib": 0.0007944110548123717, + "step": 3554 + }, + { + "ce_ib": 3.2419424057006836, + "ce_orig": 0.9160780310630798, + "epoch": 1.0218563520023007, + "kl_loss": 0.06215660274028778, + "loss_ib": 0.0009457602864131331, + "step": 3554 + }, + { + "ce_ib": 4.077336311340332, + "ce_orig": 0.7794416546821594, + "epoch": 1.0218563520023007, + "kl_loss": 0.0614565908908844, + "loss_ib": 0.0010222995188087225, + "step": 3554 + }, + { + "epoch": 1.0221439355812783, + "grad_norm": 0.09639974683523178, + "learning_rate": 3.841794160091752e-05, + "loss": 0.8653, + "step": 3555 + }, + { + "ce_ib": 4.7350897789001465, + "ce_orig": 1.2945828437805176, + "epoch": 1.0221439355812783, + "kl_loss": 0.051687177270650864, + "loss_ib": 0.0009903807658702135, + "step": 3555 + }, + { + "ce_ib": 3.1550650596618652, + "ce_orig": 0.7857145667076111, + "epoch": 1.0221439355812783, + "kl_loss": 0.03110472857952118, + "loss_ib": 0.0006265537231229246, + "step": 3555 + }, + { + "ce_ib": 4.533540725708008, + "ce_orig": 1.2364715337753296, + "epoch": 1.0221439355812783, + "kl_loss": 0.06452664732933044, + "loss_ib": 0.0010986204724758863, + "step": 3555 + }, + { + "ce_ib": 2.8180291652679443, + "ce_orig": 0.9703078269958496, + "epoch": 1.0221439355812783, + "kl_loss": 0.04962961748242378, + "loss_ib": 0.0007780990563333035, + "step": 3555 + }, + { + "ce_ib": 3.1536879539489746, + "ce_orig": 0.4336634874343872, + "epoch": 1.022431519160256, + "kl_loss": 0.0435289703309536, + "loss_ib": 0.0007506585097871721, + "step": 3556 + }, + { + "ce_ib": 3.133599281311035, + "ce_orig": 0.7549418807029724, + "epoch": 1.022431519160256, + "kl_loss": 0.08435597270727158, + "loss_ib": 0.001156919519416988, + "step": 3556 + }, + { + "ce_ib": 3.978715419769287, + "ce_orig": 0.6332373023033142, + "epoch": 1.022431519160256, + "kl_loss": 0.06666333973407745, + "loss_ib": 0.001064504962414503, + "step": 3556 + }, + { + "ce_ib": 3.9137449264526367, + "ce_orig": 0.8832229971885681, + "epoch": 1.022431519160256, + "kl_loss": 0.03281189501285553, + "loss_ib": 0.0007194934878498316, + "step": 3556 + }, + { + "ce_ib": 0.9695588946342468, + "ce_orig": 0.21503321826457977, + "epoch": 1.0227191027392335, + "kl_loss": 0.08312824368476868, + "loss_ib": 0.0009282383252866566, + "step": 3557 + }, + { + "ce_ib": 4.349771022796631, + "ce_orig": 1.2249032258987427, + "epoch": 1.0227191027392335, + "kl_loss": 0.039640042930841446, + "loss_ib": 0.0008313774596899748, + "step": 3557 + }, + { + "ce_ib": 2.938762664794922, + "ce_orig": 0.8057246208190918, + "epoch": 1.0227191027392335, + "kl_loss": 0.040466271340847015, + "loss_ib": 0.0006985390209592879, + "step": 3557 + }, + { + "ce_ib": 1.5204527378082275, + "ce_orig": 0.2788340151309967, + "epoch": 1.0227191027392335, + "kl_loss": 0.08238863945007324, + "loss_ib": 0.0009759316453710198, + "step": 3557 + }, + { + "ce_ib": 3.168964147567749, + "ce_orig": 1.0755269527435303, + "epoch": 1.0230066863182112, + "kl_loss": 0.03245601803064346, + "loss_ib": 0.0006414565141312778, + "step": 3558 + }, + { + "ce_ib": 4.960775375366211, + "ce_orig": 1.2681890726089478, + "epoch": 1.0230066863182112, + "kl_loss": 0.057653918862342834, + "loss_ib": 0.001072616665624082, + "step": 3558 + }, + { + "ce_ib": 2.876882791519165, + "ce_orig": 0.9328356981277466, + "epoch": 1.0230066863182112, + "kl_loss": 0.04336677864193916, + "loss_ib": 0.0007213560165837407, + "step": 3558 + }, + { + "ce_ib": 3.829275369644165, + "ce_orig": 0.790223240852356, + "epoch": 1.0230066863182112, + "kl_loss": 0.07308025658130646, + "loss_ib": 0.0011137300170958042, + "step": 3558 + }, + { + "ce_ib": 4.046142578125, + "ce_orig": 0.9029895663261414, + "epoch": 1.023294269897189, + "kl_loss": 0.06397297233343124, + "loss_ib": 0.0010443440405651927, + "step": 3559 + }, + { + "ce_ib": 2.4900755882263184, + "ce_orig": 0.434610515832901, + "epoch": 1.023294269897189, + "kl_loss": 0.04377100616693497, + "loss_ib": 0.0006867176271043718, + "step": 3559 + }, + { + "ce_ib": 4.402655124664307, + "ce_orig": 0.938352108001709, + "epoch": 1.023294269897189, + "kl_loss": 0.042466871440410614, + "loss_ib": 0.0008649341762065887, + "step": 3559 + }, + { + "ce_ib": 3.8604683876037598, + "ce_orig": 0.9241869449615479, + "epoch": 1.023294269897189, + "kl_loss": 0.04098198562860489, + "loss_ib": 0.0007958666537888348, + "step": 3559 + }, + { + "epoch": 1.0235818534761665, + "grad_norm": 0.09390439838171005, + "learning_rate": 3.838518389679065e-05, + "loss": 0.8528, + "step": 3560 + }, + { + "ce_ib": 1.963196873664856, + "ce_orig": 0.5522909760475159, + "epoch": 1.0235818534761665, + "kl_loss": 0.030351057648658752, + "loss_ib": 0.0004998302320018411, + "step": 3560 + }, + { + "ce_ib": 4.090065956115723, + "ce_orig": 0.7107771635055542, + "epoch": 1.0235818534761665, + "kl_loss": 0.05989295244216919, + "loss_ib": 0.0010079360799863935, + "step": 3560 + }, + { + "ce_ib": 2.136536121368408, + "ce_orig": 0.3870089650154114, + "epoch": 1.0235818534761665, + "kl_loss": 0.03806373476982117, + "loss_ib": 0.0005942909047007561, + "step": 3560 + }, + { + "ce_ib": 3.831252098083496, + "ce_orig": 0.7347872257232666, + "epoch": 1.0235818534761665, + "kl_loss": 0.059852130711078644, + "loss_ib": 0.000981646473519504, + "step": 3560 + }, + { + "ce_ib": 3.723896026611328, + "ce_orig": 0.6812296509742737, + "epoch": 1.0238694370551442, + "kl_loss": 0.06170983612537384, + "loss_ib": 0.000989487860351801, + "step": 3561 + }, + { + "ce_ib": 3.649164915084839, + "ce_orig": 0.5597164034843445, + "epoch": 1.0238694370551442, + "kl_loss": 0.04550869017839432, + "loss_ib": 0.0008200033335015178, + "step": 3561 + }, + { + "ce_ib": 3.674551248550415, + "ce_orig": 0.9994924068450928, + "epoch": 1.0238694370551442, + "kl_loss": 0.03770698606967926, + "loss_ib": 0.0007445249357260764, + "step": 3561 + }, + { + "ce_ib": 3.5231943130493164, + "ce_orig": 0.7754204869270325, + "epoch": 1.0238694370551442, + "kl_loss": 0.0409528985619545, + "loss_ib": 0.000761848350521177, + "step": 3561 + }, + { + "ce_ib": 5.053725719451904, + "ce_orig": 1.4005855321884155, + "epoch": 1.0241570206341217, + "kl_loss": 0.043696243315935135, + "loss_ib": 0.0009423349401913583, + "step": 3562 + }, + { + "ce_ib": 2.3898210525512695, + "ce_orig": 0.3197745382785797, + "epoch": 1.0241570206341217, + "kl_loss": 0.04800371825695038, + "loss_ib": 0.0007190193282440305, + "step": 3562 + }, + { + "ce_ib": 3.0843183994293213, + "ce_orig": 0.48388442397117615, + "epoch": 1.0241570206341217, + "kl_loss": 0.03253699839115143, + "loss_ib": 0.0006338017992675304, + "step": 3562 + }, + { + "ce_ib": 2.044172525405884, + "ce_orig": 0.23009853065013885, + "epoch": 1.0241570206341217, + "kl_loss": 0.05785338953137398, + "loss_ib": 0.0007829510723240674, + "step": 3562 + }, + { + "ce_ib": 0.7811241149902344, + "ce_orig": 0.1461426168680191, + "epoch": 1.0244446042130995, + "kl_loss": 0.09776172786951065, + "loss_ib": 0.0010557296918705106, + "step": 3563 + }, + { + "ce_ib": 2.6766107082366943, + "ce_orig": 0.7128492593765259, + "epoch": 1.0244446042130995, + "kl_loss": 0.03271951526403427, + "loss_ib": 0.0005948562175035477, + "step": 3563 + }, + { + "ce_ib": 3.5025527477264404, + "ce_orig": 0.608837902545929, + "epoch": 1.0244446042130995, + "kl_loss": 0.07349291443824768, + "loss_ib": 0.0010851843981072307, + "step": 3563 + }, + { + "ce_ib": 3.261760711669922, + "ce_orig": 0.6621811985969543, + "epoch": 1.0244446042130995, + "kl_loss": 0.04642053321003914, + "loss_ib": 0.0007903813966549933, + "step": 3563 + }, + { + "ce_ib": 4.58663272857666, + "ce_orig": 1.4254804849624634, + "epoch": 1.024732187792077, + "kl_loss": 0.038910865783691406, + "loss_ib": 0.0008477718802168965, + "step": 3564 + }, + { + "ce_ib": 3.584197521209717, + "ce_orig": 0.7187772393226624, + "epoch": 1.024732187792077, + "kl_loss": 0.06125091761350632, + "loss_ib": 0.000970928929746151, + "step": 3564 + }, + { + "ce_ib": 2.8327364921569824, + "ce_orig": 0.7256253361701965, + "epoch": 1.024732187792077, + "kl_loss": 0.03876945748925209, + "loss_ib": 0.0006709682056680322, + "step": 3564 + }, + { + "ce_ib": 2.487344980239868, + "ce_orig": 0.6402795910835266, + "epoch": 1.024732187792077, + "kl_loss": 0.037686385214328766, + "loss_ib": 0.0006255983607843518, + "step": 3564 + }, + { + "epoch": 1.0250197713710547, + "grad_norm": 0.1026521623134613, + "learning_rate": 3.835239394464901e-05, + "loss": 0.8112, + "step": 3565 + }, + { + "ce_ib": 3.3165442943573, + "ce_orig": 0.46866852045059204, + "epoch": 1.0250197713710547, + "kl_loss": 0.06066048517823219, + "loss_ib": 0.0009382592397741973, + "step": 3565 + }, + { + "ce_ib": 2.7729718685150146, + "ce_orig": 0.6625040173530579, + "epoch": 1.0250197713710547, + "kl_loss": 0.026867084205150604, + "loss_ib": 0.0005459680105559528, + "step": 3565 + }, + { + "ce_ib": 3.2694051265716553, + "ce_orig": 0.762825608253479, + "epoch": 1.0250197713710547, + "kl_loss": 0.04691179841756821, + "loss_ib": 0.0007960584480315447, + "step": 3565 + }, + { + "ce_ib": 2.406704902648926, + "ce_orig": 0.6313830018043518, + "epoch": 1.0250197713710547, + "kl_loss": 0.027019277215003967, + "loss_ib": 0.0005108632612973452, + "step": 3565 + }, + { + "ce_ib": 3.078071117401123, + "ce_orig": 0.7458805441856384, + "epoch": 1.0253073549500324, + "kl_loss": 0.046481259167194366, + "loss_ib": 0.0007726196781732142, + "step": 3566 + }, + { + "ce_ib": 3.563765287399292, + "ce_orig": 1.034528374671936, + "epoch": 1.0253073549500324, + "kl_loss": 0.06623028963804245, + "loss_ib": 0.0010186794679611921, + "step": 3566 + }, + { + "ce_ib": 3.2480056285858154, + "ce_orig": 0.39844176173210144, + "epoch": 1.0253073549500324, + "kl_loss": 0.06726187467575073, + "loss_ib": 0.0009974193526431918, + "step": 3566 + }, + { + "ce_ib": 3.0906155109405518, + "ce_orig": 0.47611290216445923, + "epoch": 1.0253073549500324, + "kl_loss": 0.06404310464859009, + "loss_ib": 0.0009494925616309047, + "step": 3566 + }, + { + "ce_ib": 3.5579380989074707, + "ce_orig": 1.0548501014709473, + "epoch": 1.02559493852901, + "kl_loss": 0.0491693839430809, + "loss_ib": 0.0008474875940009952, + "step": 3567 + }, + { + "ce_ib": 3.144658088684082, + "ce_orig": 0.6858482360839844, + "epoch": 1.02559493852901, + "kl_loss": 0.0489940345287323, + "loss_ib": 0.000804406066890806, + "step": 3567 + }, + { + "ce_ib": 3.529839038848877, + "ce_orig": 0.7702025771141052, + "epoch": 1.02559493852901, + "kl_loss": 0.058084506541490555, + "loss_ib": 0.0009338289382867515, + "step": 3567 + }, + { + "ce_ib": 3.000044107437134, + "ce_orig": 0.5971007347106934, + "epoch": 1.02559493852901, + "kl_loss": 0.08064447343349457, + "loss_ib": 0.001106449170038104, + "step": 3567 + }, + { + "ce_ib": 5.2203369140625, + "ce_orig": 1.105143666267395, + "epoch": 1.0258825221079877, + "kl_loss": 0.10854849219322205, + "loss_ib": 0.0016075186431407928, + "step": 3568 + }, + { + "ce_ib": 4.62047004699707, + "ce_orig": 1.2176272869110107, + "epoch": 1.0258825221079877, + "kl_loss": 0.03914933651685715, + "loss_ib": 0.000853540375828743, + "step": 3568 + }, + { + "ce_ib": 2.914489269256592, + "ce_orig": 0.6351281404495239, + "epoch": 1.0258825221079877, + "kl_loss": 0.06338454782962799, + "loss_ib": 0.0009252943564206362, + "step": 3568 + }, + { + "ce_ib": 2.7700419425964355, + "ce_orig": 0.557800829410553, + "epoch": 1.0258825221079877, + "kl_loss": 0.05135209113359451, + "loss_ib": 0.0007905250531621277, + "step": 3568 + }, + { + "ce_ib": 3.105912685394287, + "ce_orig": 0.8371145129203796, + "epoch": 1.0261701056869652, + "kl_loss": 0.033654000610113144, + "loss_ib": 0.0006471312372013927, + "step": 3569 + }, + { + "ce_ib": 2.0282366275787354, + "ce_orig": 0.48269858956336975, + "epoch": 1.0261701056869652, + "kl_loss": 0.04728551208972931, + "loss_ib": 0.0006756787770427763, + "step": 3569 + }, + { + "ce_ib": 4.818871021270752, + "ce_orig": 1.2600271701812744, + "epoch": 1.0261701056869652, + "kl_loss": 0.0528026819229126, + "loss_ib": 0.0010099138598889112, + "step": 3569 + }, + { + "ce_ib": 5.122138977050781, + "ce_orig": 1.2076199054718018, + "epoch": 1.0261701056869652, + "kl_loss": 0.03979529067873955, + "loss_ib": 0.0009101667674258351, + "step": 3569 + }, + { + "epoch": 1.026457689265943, + "grad_norm": 0.10982489585876465, + "learning_rate": 3.8319571823491204e-05, + "loss": 0.7801, + "step": 3570 + }, + { + "ce_ib": 4.090981960296631, + "ce_orig": 1.1590675115585327, + "epoch": 1.026457689265943, + "kl_loss": 0.0461449921131134, + "loss_ib": 0.000870548072271049, + "step": 3570 + }, + { + "ce_ib": 3.890570878982544, + "ce_orig": 0.9651972651481628, + "epoch": 1.026457689265943, + "kl_loss": 0.03699313849210739, + "loss_ib": 0.0007589883753098547, + "step": 3570 + }, + { + "ce_ib": 4.015626430511475, + "ce_orig": 0.9711881875991821, + "epoch": 1.026457689265943, + "kl_loss": 0.024333223700523376, + "loss_ib": 0.0006448948406614363, + "step": 3570 + }, + { + "ce_ib": 4.103512763977051, + "ce_orig": 0.8914878368377686, + "epoch": 1.026457689265943, + "kl_loss": 0.04270799458026886, + "loss_ib": 0.0008374312310479581, + "step": 3570 + }, + { + "ce_ib": 3.2164061069488525, + "ce_orig": 0.6772381663322449, + "epoch": 1.0267452728449205, + "kl_loss": 0.035781510174274445, + "loss_ib": 0.0006794556975364685, + "step": 3571 + }, + { + "ce_ib": 4.679947853088379, + "ce_orig": 0.6389361619949341, + "epoch": 1.0267452728449205, + "kl_loss": 0.09924495220184326, + "loss_ib": 0.0014604442985728383, + "step": 3571 + }, + { + "ce_ib": 3.8916702270507812, + "ce_orig": 1.0594497919082642, + "epoch": 1.0267452728449205, + "kl_loss": 0.038972411304712296, + "loss_ib": 0.0007788911461830139, + "step": 3571 + }, + { + "ce_ib": 3.99271559715271, + "ce_orig": 1.1299808025360107, + "epoch": 1.0267452728449205, + "kl_loss": 0.05356227606534958, + "loss_ib": 0.0009348943131044507, + "step": 3571 + }, + { + "ce_ib": 5.124117851257324, + "ce_orig": 1.3650797605514526, + "epoch": 1.0270328564238982, + "kl_loss": 0.06827180087566376, + "loss_ib": 0.00119512970559299, + "step": 3572 + }, + { + "ce_ib": 4.671802520751953, + "ce_orig": 1.2391725778579712, + "epoch": 1.0270328564238982, + "kl_loss": 0.0578739158809185, + "loss_ib": 0.0010459193727001548, + "step": 3572 + }, + { + "ce_ib": 2.596900701522827, + "ce_orig": 0.8547714352607727, + "epoch": 1.0270328564238982, + "kl_loss": 0.02868867665529251, + "loss_ib": 0.0005465768044814467, + "step": 3572 + }, + { + "ce_ib": 3.3659098148345947, + "ce_orig": 1.2486971616744995, + "epoch": 1.0270328564238982, + "kl_loss": 0.03508555889129639, + "loss_ib": 0.0006874465616419911, + "step": 3572 + }, + { + "ce_ib": 6.165942668914795, + "ce_orig": 1.4642897844314575, + "epoch": 1.027320440002876, + "kl_loss": 0.06990306824445724, + "loss_ib": 0.0013156250352039933, + "step": 3573 + }, + { + "ce_ib": 3.614583969116211, + "ce_orig": 1.1023497581481934, + "epoch": 1.027320440002876, + "kl_loss": 0.02860543690621853, + "loss_ib": 0.0006475127302110195, + "step": 3573 + }, + { + "ce_ib": 2.7130379676818848, + "ce_orig": 0.6477078795433044, + "epoch": 1.027320440002876, + "kl_loss": 0.04281039908528328, + "loss_ib": 0.0006994077120907605, + "step": 3573 + }, + { + "ce_ib": 2.0054492950439453, + "ce_orig": 0.4214160740375519, + "epoch": 1.027320440002876, + "kl_loss": 0.06781715899705887, + "loss_ib": 0.0008787165279500186, + "step": 3573 + }, + { + "ce_ib": 4.047159194946289, + "ce_orig": 0.701964795589447, + "epoch": 1.0276080235818534, + "kl_loss": 0.04579994082450867, + "loss_ib": 0.0008627153001725674, + "step": 3574 + }, + { + "ce_ib": 2.6990809440612793, + "ce_orig": 0.5119542479515076, + "epoch": 1.0276080235818534, + "kl_loss": 0.05858040601015091, + "loss_ib": 0.0008557121036574244, + "step": 3574 + }, + { + "ce_ib": 2.6672544479370117, + "ce_orig": 0.6904184222221375, + "epoch": 1.0276080235818534, + "kl_loss": 0.018770616501569748, + "loss_ib": 0.000454431603429839, + "step": 3574 + }, + { + "ce_ib": 2.15702486038208, + "ce_orig": 0.4349527359008789, + "epoch": 1.0276080235818534, + "kl_loss": 0.04417489469051361, + "loss_ib": 0.0006574514554813504, + "step": 3574 + }, + { + "epoch": 1.0278956071608312, + "grad_norm": 0.10270065069198608, + "learning_rate": 3.828671761239333e-05, + "loss": 0.8808, + "step": 3575 + }, + { + "ce_ib": 2.4108364582061768, + "ce_orig": 0.7657090425491333, + "epoch": 1.0278956071608312, + "kl_loss": 0.04569092392921448, + "loss_ib": 0.0006979928584769368, + "step": 3575 + }, + { + "ce_ib": 3.7387943267822266, + "ce_orig": 0.9163857102394104, + "epoch": 1.0278956071608312, + "kl_loss": 0.056021373718976974, + "loss_ib": 0.0009340931428596377, + "step": 3575 + }, + { + "ce_ib": 5.330221176147461, + "ce_orig": 1.5500153303146362, + "epoch": 1.0278956071608312, + "kl_loss": 0.03487574681639671, + "loss_ib": 0.0008817795314826071, + "step": 3575 + }, + { + "ce_ib": 4.401991367340088, + "ce_orig": 1.2533031702041626, + "epoch": 1.0278956071608312, + "kl_loss": 0.053422313183546066, + "loss_ib": 0.0009744222043082118, + "step": 3575 + }, + { + "ce_ib": 2.776797294616699, + "ce_orig": 0.7083109617233276, + "epoch": 1.0281831907398087, + "kl_loss": 0.04645492881536484, + "loss_ib": 0.0007422289927490056, + "step": 3576 + }, + { + "ce_ib": 2.4480667114257812, + "ce_orig": 0.7078196406364441, + "epoch": 1.0281831907398087, + "kl_loss": 0.03597278520464897, + "loss_ib": 0.0006045345216989517, + "step": 3576 + }, + { + "ce_ib": 4.097442150115967, + "ce_orig": 0.9314661026000977, + "epoch": 1.0281831907398087, + "kl_loss": 0.062295544892549515, + "loss_ib": 0.0010326995979994535, + "step": 3576 + }, + { + "ce_ib": 3.3642489910125732, + "ce_orig": 0.7827569246292114, + "epoch": 1.0281831907398087, + "kl_loss": 0.0700402781367302, + "loss_ib": 0.0010368276853114367, + "step": 3576 + }, + { + "ce_ib": 4.516533851623535, + "ce_orig": 1.285496473312378, + "epoch": 1.0284707743187864, + "kl_loss": 0.06139099970459938, + "loss_ib": 0.0010655634105205536, + "step": 3577 + }, + { + "ce_ib": 4.157855033874512, + "ce_orig": 0.9340003132820129, + "epoch": 1.0284707743187864, + "kl_loss": 0.047978296875953674, + "loss_ib": 0.0008955684024840593, + "step": 3577 + }, + { + "ce_ib": 4.304104328155518, + "ce_orig": 0.9625415205955505, + "epoch": 1.0284707743187864, + "kl_loss": 0.04006422311067581, + "loss_ib": 0.0008310526609420776, + "step": 3577 + }, + { + "ce_ib": 2.5242373943328857, + "ce_orig": 0.6754941940307617, + "epoch": 1.0284707743187864, + "kl_loss": 0.037452150136232376, + "loss_ib": 0.0006269451696425676, + "step": 3577 + }, + { + "ce_ib": 2.240248203277588, + "ce_orig": 0.6072173714637756, + "epoch": 1.028758357897764, + "kl_loss": 0.03836070001125336, + "loss_ib": 0.0006076318095438182, + "step": 3578 + }, + { + "ce_ib": 2.428489923477173, + "ce_orig": 0.6293929815292358, + "epoch": 1.028758357897764, + "kl_loss": 0.043572284281253815, + "loss_ib": 0.0006785718142054975, + "step": 3578 + }, + { + "ce_ib": 3.3850114345550537, + "ce_orig": 0.6776675581932068, + "epoch": 1.028758357897764, + "kl_loss": 0.04642167687416077, + "loss_ib": 0.0008027179283089936, + "step": 3578 + }, + { + "ce_ib": 2.914384603500366, + "ce_orig": 0.5208145380020142, + "epoch": 1.028758357897764, + "kl_loss": 0.18314164876937866, + "loss_ib": 0.0021228548139333725, + "step": 3578 + }, + { + "ce_ib": 4.3343825340271, + "ce_orig": 1.0664641857147217, + "epoch": 1.0290459414767417, + "kl_loss": 0.04415259510278702, + "loss_ib": 0.0008749641128815711, + "step": 3579 + }, + { + "ce_ib": 5.426599502563477, + "ce_orig": 1.2049707174301147, + "epoch": 1.0290459414767417, + "kl_loss": 0.04317475110292435, + "loss_ib": 0.0009744074777700007, + "step": 3579 + }, + { + "ce_ib": 1.6258747577667236, + "ce_orig": 0.1614535003900528, + "epoch": 1.0290459414767417, + "kl_loss": 0.03221734240651131, + "loss_ib": 0.00048476087977178395, + "step": 3579 + }, + { + "ce_ib": 4.347473621368408, + "ce_orig": 1.0271704196929932, + "epoch": 1.0290459414767417, + "kl_loss": 0.05172445625066757, + "loss_ib": 0.0009519918821752071, + "step": 3579 + }, + { + "epoch": 1.0293335250557194, + "grad_norm": 0.10995268076658249, + "learning_rate": 3.825383139050881e-05, + "loss": 0.7543, + "step": 3580 + }, + { + "ce_ib": 3.5786664485931396, + "ce_orig": 0.7316752672195435, + "epoch": 1.0293335250557194, + "kl_loss": 0.05979565903544426, + "loss_ib": 0.0009558231686241925, + "step": 3580 + }, + { + "ce_ib": 5.343243598937988, + "ce_orig": 1.1627362966537476, + "epoch": 1.0293335250557194, + "kl_loss": 0.04959952086210251, + "loss_ib": 0.0010303194867447019, + "step": 3580 + }, + { + "ce_ib": 2.355755567550659, + "ce_orig": 0.5824925303459167, + "epoch": 1.0293335250557194, + "kl_loss": 0.042696841061115265, + "loss_ib": 0.0006625439273193479, + "step": 3580 + }, + { + "ce_ib": 3.4543652534484863, + "ce_orig": 0.8189288377761841, + "epoch": 1.0293335250557194, + "kl_loss": 0.04636920243501663, + "loss_ib": 0.0008091285708360374, + "step": 3580 + }, + { + "ce_ib": 3.968864917755127, + "ce_orig": 0.9918146729469299, + "epoch": 1.029621108634697, + "kl_loss": 0.04347557574510574, + "loss_ib": 0.0008316421881318092, + "step": 3581 + }, + { + "ce_ib": 4.188397407531738, + "ce_orig": 1.065015435218811, + "epoch": 1.029621108634697, + "kl_loss": 0.036595527082681656, + "loss_ib": 0.0007847949746064842, + "step": 3581 + }, + { + "ce_ib": 3.763664722442627, + "ce_orig": 1.3275141716003418, + "epoch": 1.029621108634697, + "kl_loss": 0.04813771694898605, + "loss_ib": 0.0008577436092309654, + "step": 3581 + }, + { + "ce_ib": 2.9152557849884033, + "ce_orig": 0.6171338558197021, + "epoch": 1.029621108634697, + "kl_loss": 0.0739806592464447, + "loss_ib": 0.0010313321836292744, + "step": 3581 + }, + { + "ce_ib": 3.7663490772247314, + "ce_orig": 0.7159185409545898, + "epoch": 1.0299086922136746, + "kl_loss": 0.05352495610713959, + "loss_ib": 0.0009118844172917306, + "step": 3582 + }, + { + "ce_ib": 6.124256134033203, + "ce_orig": 1.6890391111373901, + "epoch": 1.0299086922136746, + "kl_loss": 0.04323408380150795, + "loss_ib": 0.0010447663953527808, + "step": 3582 + }, + { + "ce_ib": 3.3787355422973633, + "ce_orig": 0.903659999370575, + "epoch": 1.0299086922136746, + "kl_loss": 0.046029675751924515, + "loss_ib": 0.0007981702219694853, + "step": 3582 + }, + { + "ce_ib": 3.509638547897339, + "ce_orig": 0.6592971086502075, + "epoch": 1.0299086922136746, + "kl_loss": 0.01784530282020569, + "loss_ib": 0.0005294168367981911, + "step": 3582 + }, + { + "ce_ib": 4.478092193603516, + "ce_orig": 1.3306900262832642, + "epoch": 1.0301962757926522, + "kl_loss": 0.05309266597032547, + "loss_ib": 0.000978735857643187, + "step": 3583 + }, + { + "ce_ib": 2.424105644226074, + "ce_orig": 0.6469182968139648, + "epoch": 1.0301962757926522, + "kl_loss": 0.03488641977310181, + "loss_ib": 0.0005912747583352029, + "step": 3583 + }, + { + "ce_ib": 4.564316749572754, + "ce_orig": 1.2848631143569946, + "epoch": 1.0301962757926522, + "kl_loss": 0.10715171694755554, + "loss_ib": 0.001527948770672083, + "step": 3583 + }, + { + "ce_ib": 4.965561866760254, + "ce_orig": 1.1101959943771362, + "epoch": 1.0301962757926522, + "kl_loss": 0.0547727569937706, + "loss_ib": 0.0010442837374284863, + "step": 3583 + }, + { + "ce_ib": 4.043630123138428, + "ce_orig": 0.8999270796775818, + "epoch": 1.03048385937163, + "kl_loss": 0.03790498897433281, + "loss_ib": 0.0007834128919057548, + "step": 3584 + }, + { + "ce_ib": 5.359075546264648, + "ce_orig": 0.6541309952735901, + "epoch": 1.03048385937163, + "kl_loss": 0.0548996701836586, + "loss_ib": 0.0010849041864275932, + "step": 3584 + }, + { + "ce_ib": 2.8569529056549072, + "ce_orig": 0.782235860824585, + "epoch": 1.03048385937163, + "kl_loss": 0.047278523445129395, + "loss_ib": 0.0007584805134683847, + "step": 3584 + }, + { + "ce_ib": 2.638105630874634, + "ce_orig": 0.6626614332199097, + "epoch": 1.03048385937163, + "kl_loss": 0.035461004823446274, + "loss_ib": 0.0006184205994941294, + "step": 3584 + }, + { + "epoch": 1.0307714429506074, + "grad_norm": 0.13087402284145355, + "learning_rate": 3.82209132370682e-05, + "loss": 0.8557, + "step": 3585 + }, + { + "ce_ib": 2.267580986022949, + "ce_orig": 0.5331100225448608, + "epoch": 1.0307714429506074, + "kl_loss": 0.03420677408576012, + "loss_ib": 0.000568825809750706, + "step": 3585 + }, + { + "ce_ib": 2.681579828262329, + "ce_orig": 0.38280951976776123, + "epoch": 1.0307714429506074, + "kl_loss": 0.04214192181825638, + "loss_ib": 0.0006895771366544068, + "step": 3585 + }, + { + "ce_ib": 3.1083645820617676, + "ce_orig": 0.7785444259643555, + "epoch": 1.0307714429506074, + "kl_loss": 0.05602356791496277, + "loss_ib": 0.000871072115842253, + "step": 3585 + }, + { + "ce_ib": 2.886650800704956, + "ce_orig": 0.4165860414505005, + "epoch": 1.0307714429506074, + "kl_loss": 0.04779762774705887, + "loss_ib": 0.0007666413439437747, + "step": 3585 + }, + { + "ce_ib": 1.9328622817993164, + "ce_orig": 0.30753466486930847, + "epoch": 1.0310590265295851, + "kl_loss": 0.02753199264407158, + "loss_ib": 0.0004686061292886734, + "step": 3586 + }, + { + "ce_ib": 3.9331185817718506, + "ce_orig": 1.1876693964004517, + "epoch": 1.0310590265295851, + "kl_loss": 0.0378912091255188, + "loss_ib": 0.0007722239242866635, + "step": 3586 + }, + { + "ce_ib": 4.489336013793945, + "ce_orig": 1.397709846496582, + "epoch": 1.0310590265295851, + "kl_loss": 0.0489925853908062, + "loss_ib": 0.0009388594189658761, + "step": 3586 + }, + { + "ce_ib": 3.829636573791504, + "ce_orig": 0.7913032174110413, + "epoch": 1.0310590265295851, + "kl_loss": 0.06323594599962234, + "loss_ib": 0.0010153230978175998, + "step": 3586 + }, + { + "ce_ib": 3.4776418209075928, + "ce_orig": 0.892713189125061, + "epoch": 1.0313466101085629, + "kl_loss": 0.027705375105142593, + "loss_ib": 0.0006248179124668241, + "step": 3587 + }, + { + "ce_ib": 2.381477117538452, + "ce_orig": 0.6283038854598999, + "epoch": 1.0313466101085629, + "kl_loss": 0.02786676399409771, + "loss_ib": 0.0005168153438717127, + "step": 3587 + }, + { + "ce_ib": 4.011752605438232, + "ce_orig": 1.2546874284744263, + "epoch": 1.0313466101085629, + "kl_loss": 0.06501898169517517, + "loss_ib": 0.0010513650486245751, + "step": 3587 + }, + { + "ce_ib": 3.4270079135894775, + "ce_orig": 0.6920791864395142, + "epoch": 1.0313466101085629, + "kl_loss": 0.05429008603096008, + "loss_ib": 0.0008856016211211681, + "step": 3587 + }, + { + "ce_ib": 2.159972906112671, + "ce_orig": 0.43653926253318787, + "epoch": 1.0316341936875404, + "kl_loss": 0.05386894941329956, + "loss_ib": 0.0007546867127530277, + "step": 3588 + }, + { + "ce_ib": 2.561514377593994, + "ce_orig": 0.6263435482978821, + "epoch": 1.0316341936875404, + "kl_loss": 0.04869657754898071, + "loss_ib": 0.0007431171834468842, + "step": 3588 + }, + { + "ce_ib": 3.1751554012298584, + "ce_orig": 0.7717872858047485, + "epoch": 1.0316341936875404, + "kl_loss": 0.05204232782125473, + "loss_ib": 0.0008379387436434627, + "step": 3588 + }, + { + "ce_ib": 2.4402432441711426, + "ce_orig": 0.4271167814731598, + "epoch": 1.0316341936875404, + "kl_loss": 0.05220402032136917, + "loss_ib": 0.0007660645060241222, + "step": 3588 + }, + { + "ce_ib": 4.49504280090332, + "ce_orig": 1.383724570274353, + "epoch": 1.0319217772665181, + "kl_loss": 0.038420870900154114, + "loss_ib": 0.0008337129256688058, + "step": 3589 + }, + { + "ce_ib": 2.6408467292785645, + "ce_orig": 0.6361181735992432, + "epoch": 1.0319217772665181, + "kl_loss": 0.03859659284353256, + "loss_ib": 0.0006500506424345076, + "step": 3589 + }, + { + "ce_ib": 1.936244249343872, + "ce_orig": 0.48896482586860657, + "epoch": 1.0319217772665181, + "kl_loss": 0.03489876911044121, + "loss_ib": 0.0005426121060736477, + "step": 3589 + }, + { + "ce_ib": 2.13387393951416, + "ce_orig": 0.4599680006504059, + "epoch": 1.0319217772665181, + "kl_loss": 0.0563708171248436, + "loss_ib": 0.0007770955562591553, + "step": 3589 + }, + { + "epoch": 1.0322093608454956, + "grad_norm": 0.10478094965219498, + "learning_rate": 3.818796323137896e-05, + "loss": 0.7905, + "step": 3590 + }, + { + "ce_ib": 1.3798397779464722, + "ce_orig": 0.1134706363081932, + "epoch": 1.0322093608454956, + "kl_loss": 0.05935706943273544, + "loss_ib": 0.0007315546390600502, + "step": 3590 + }, + { + "ce_ib": 2.657224416732788, + "ce_orig": 0.7158486843109131, + "epoch": 1.0322093608454956, + "kl_loss": 0.08902902901172638, + "loss_ib": 0.001156012644059956, + "step": 3590 + }, + { + "ce_ib": 3.088724374771118, + "ce_orig": 0.5631896257400513, + "epoch": 1.0322093608454956, + "kl_loss": 0.08623005449771881, + "loss_ib": 0.0011711729457601905, + "step": 3590 + }, + { + "ce_ib": 4.248992919921875, + "ce_orig": 0.9979557394981384, + "epoch": 1.0322093608454956, + "kl_loss": 0.06437885761260986, + "loss_ib": 0.0010686878813430667, + "step": 3590 + }, + { + "ce_ib": 3.677081823348999, + "ce_orig": 1.0307892560958862, + "epoch": 1.0324969444244734, + "kl_loss": 0.06089682877063751, + "loss_ib": 0.0009766764706000686, + "step": 3591 + }, + { + "ce_ib": 4.417055606842041, + "ce_orig": 1.2036021947860718, + "epoch": 1.0324969444244734, + "kl_loss": 0.053238824009895325, + "loss_ib": 0.000974093796685338, + "step": 3591 + }, + { + "ce_ib": 4.624338626861572, + "ce_orig": 0.7861464619636536, + "epoch": 1.0324969444244734, + "kl_loss": 0.04213865101337433, + "loss_ib": 0.0008838203502818942, + "step": 3591 + }, + { + "ce_ib": 3.162419319152832, + "ce_orig": 0.77120041847229, + "epoch": 1.0324969444244734, + "kl_loss": 0.03439852222800255, + "loss_ib": 0.00066022714599967, + "step": 3591 + }, + { + "ce_ib": 5.1668314933776855, + "ce_orig": 1.7181977033615112, + "epoch": 1.032784528003451, + "kl_loss": 0.06089058890938759, + "loss_ib": 0.0011255890130996704, + "step": 3592 + }, + { + "ce_ib": 3.9567830562591553, + "ce_orig": 0.8769131302833557, + "epoch": 1.032784528003451, + "kl_loss": 0.0661071389913559, + "loss_ib": 0.0010567496065050364, + "step": 3592 + }, + { + "ce_ib": 3.089304208755493, + "ce_orig": 0.5052276253700256, + "epoch": 1.032784528003451, + "kl_loss": 0.03897787630558014, + "loss_ib": 0.000698709161952138, + "step": 3592 + }, + { + "ce_ib": 2.883179187774658, + "ce_orig": 0.767508864402771, + "epoch": 1.032784528003451, + "kl_loss": 0.03739149495959282, + "loss_ib": 0.0006622328655794263, + "step": 3592 + }, + { + "ce_ib": 1.8884093761444092, + "ce_orig": 0.3744581937789917, + "epoch": 1.0330721115824286, + "kl_loss": 0.0493294894695282, + "loss_ib": 0.0006821358110755682, + "step": 3593 + }, + { + "ce_ib": 3.9837534427642822, + "ce_orig": 0.7617047429084778, + "epoch": 1.0330721115824286, + "kl_loss": 0.051087312400341034, + "loss_ib": 0.0009092484251596034, + "step": 3593 + }, + { + "ce_ib": 4.491486072540283, + "ce_orig": 1.1760692596435547, + "epoch": 1.0330721115824286, + "kl_loss": 0.06936737149953842, + "loss_ib": 0.001142822322435677, + "step": 3593 + }, + { + "ce_ib": 3.834545850753784, + "ce_orig": 0.922177791595459, + "epoch": 1.0330721115824286, + "kl_loss": 0.0470382496714592, + "loss_ib": 0.0008538370602764189, + "step": 3593 + }, + { + "ce_ib": 2.24745512008667, + "ce_orig": 0.5182161331176758, + "epoch": 1.0333596951614064, + "kl_loss": 0.0601687990128994, + "loss_ib": 0.0008264335338026285, + "step": 3594 + }, + { + "ce_ib": 3.677018642425537, + "ce_orig": 0.7645161747932434, + "epoch": 1.0333596951614064, + "kl_loss": 0.060694918036460876, + "loss_ib": 0.0009746510186232626, + "step": 3594 + }, + { + "ce_ib": 3.0350098609924316, + "ce_orig": 0.6393066644668579, + "epoch": 1.0333596951614064, + "kl_loss": 0.016856256872415543, + "loss_ib": 0.00047206354793161154, + "step": 3594 + }, + { + "ce_ib": 3.487734079360962, + "ce_orig": 0.6331994533538818, + "epoch": 1.0333596951614064, + "kl_loss": 0.06220905855298042, + "loss_ib": 0.0009708639699965715, + "step": 3594 + }, + { + "epoch": 1.0336472787403839, + "grad_norm": 0.1035182997584343, + "learning_rate": 3.81549814528253e-05, + "loss": 0.8261, + "step": 3595 + }, + { + "ce_ib": 2.6908936500549316, + "ce_orig": 0.7047996520996094, + "epoch": 1.0336472787403839, + "kl_loss": 0.03105604089796543, + "loss_ib": 0.0005796497571282089, + "step": 3595 + }, + { + "ce_ib": 3.5441250801086426, + "ce_orig": 1.038462519645691, + "epoch": 1.0336472787403839, + "kl_loss": 0.07812517881393433, + "loss_ib": 0.0011356642935425043, + "step": 3595 + }, + { + "ce_ib": 3.7690210342407227, + "ce_orig": 0.7313186526298523, + "epoch": 1.0336472787403839, + "kl_loss": 0.08400115370750427, + "loss_ib": 0.0012169135734438896, + "step": 3595 + }, + { + "ce_ib": 5.508296012878418, + "ce_orig": 1.6695680618286133, + "epoch": 1.0336472787403839, + "kl_loss": 0.071279376745224, + "loss_ib": 0.001263623358681798, + "step": 3595 + }, + { + "ce_ib": 1.7348804473876953, + "ce_orig": 0.5393964052200317, + "epoch": 1.0339348623193616, + "kl_loss": 0.03121275082230568, + "loss_ib": 0.0004856155428569764, + "step": 3596 + }, + { + "ce_ib": 5.395533561706543, + "ce_orig": 1.3044427633285522, + "epoch": 1.0339348623193616, + "kl_loss": 0.05559717118740082, + "loss_ib": 0.0010955249890685081, + "step": 3596 + }, + { + "ce_ib": 3.323347568511963, + "ce_orig": 1.0473276376724243, + "epoch": 1.0339348623193616, + "kl_loss": 0.04623971879482269, + "loss_ib": 0.0007947319536469877, + "step": 3596 + }, + { + "ce_ib": 4.782833099365234, + "ce_orig": 1.2478200197219849, + "epoch": 1.0339348623193616, + "kl_loss": 0.04498472437262535, + "loss_ib": 0.0009281305829063058, + "step": 3596 + }, + { + "ce_ib": 2.744231939315796, + "ce_orig": 0.4129769206047058, + "epoch": 1.0342224458983391, + "kl_loss": 0.023369355127215385, + "loss_ib": 0.0005081167328171432, + "step": 3597 + }, + { + "ce_ib": 3.894700288772583, + "ce_orig": 1.0107132196426392, + "epoch": 1.0342224458983391, + "kl_loss": 0.04650837555527687, + "loss_ib": 0.0008545537129975855, + "step": 3597 + }, + { + "ce_ib": 3.7046866416931152, + "ce_orig": 0.5257781147956848, + "epoch": 1.0342224458983391, + "kl_loss": 0.06912615895271301, + "loss_ib": 0.001061730203218758, + "step": 3597 + }, + { + "ce_ib": 3.155301094055176, + "ce_orig": 1.0376603603363037, + "epoch": 1.0342224458983391, + "kl_loss": 0.029596418142318726, + "loss_ib": 0.0006114942952990532, + "step": 3597 + }, + { + "ce_ib": 4.387679576873779, + "ce_orig": 1.2906910181045532, + "epoch": 1.0345100294773169, + "kl_loss": 0.032265108078718185, + "loss_ib": 0.0007614190108142793, + "step": 3598 + }, + { + "ce_ib": 5.433546543121338, + "ce_orig": 1.5139870643615723, + "epoch": 1.0345100294773169, + "kl_loss": 0.06404994428157806, + "loss_ib": 0.0011838540667667985, + "step": 3598 + }, + { + "ce_ib": 5.706398963928223, + "ce_orig": 1.646107792854309, + "epoch": 1.0345100294773169, + "kl_loss": 0.06291145086288452, + "loss_ib": 0.0011997544206678867, + "step": 3598 + }, + { + "ce_ib": 2.0374393463134766, + "ce_orig": 0.412244588136673, + "epoch": 1.0345100294773169, + "kl_loss": 0.04777456820011139, + "loss_ib": 0.0006814895896241069, + "step": 3598 + }, + { + "ce_ib": 4.818519592285156, + "ce_orig": 1.1033471822738647, + "epoch": 1.0347976130562946, + "kl_loss": 0.04692543298006058, + "loss_ib": 0.0009511062526144087, + "step": 3599 + }, + { + "ce_ib": 4.0627031326293945, + "ce_orig": 1.0711532831192017, + "epoch": 1.0347976130562946, + "kl_loss": 0.06061090528964996, + "loss_ib": 0.001012379303574562, + "step": 3599 + }, + { + "ce_ib": 3.770251512527466, + "ce_orig": 0.7806698083877563, + "epoch": 1.0347976130562946, + "kl_loss": 0.055300284177064896, + "loss_ib": 0.0009300279198214412, + "step": 3599 + }, + { + "ce_ib": 2.5852553844451904, + "ce_orig": 0.6225724816322327, + "epoch": 1.0347976130562946, + "kl_loss": 0.05388905853033066, + "loss_ib": 0.0007974160835146904, + "step": 3599 + }, + { + "epoch": 1.035085196635272, + "grad_norm": 0.10308659821748734, + "learning_rate": 3.812196798086799e-05, + "loss": 0.8902, + "step": 3600 + }, + { + "ce_ib": 3.4027211666107178, + "ce_orig": 0.8151189684867859, + "epoch": 1.035085196635272, + "kl_loss": 0.05086929351091385, + "loss_ib": 0.0008489650208503008, + "step": 3600 + }, + { + "ce_ib": 2.778602361679077, + "ce_orig": 0.5546197891235352, + "epoch": 1.035085196635272, + "kl_loss": 0.032826945185661316, + "loss_ib": 0.0006061297026462853, + "step": 3600 + }, + { + "ce_ib": 4.0890607833862305, + "ce_orig": 0.9329740405082703, + "epoch": 1.035085196635272, + "kl_loss": 0.04201565682888031, + "loss_ib": 0.0008290625992231071, + "step": 3600 + }, + { + "ce_ib": 2.747549295425415, + "ce_orig": 0.8813520073890686, + "epoch": 1.035085196635272, + "kl_loss": 0.032593950629234314, + "loss_ib": 0.0006006943876855075, + "step": 3600 + }, + { + "ce_ib": 3.7557926177978516, + "ce_orig": 0.7773083448410034, + "epoch": 1.0353727802142498, + "kl_loss": 0.05166550725698471, + "loss_ib": 0.0008922342676669359, + "step": 3601 + }, + { + "ce_ib": 2.9580605030059814, + "ce_orig": 0.6921283006668091, + "epoch": 1.0353727802142498, + "kl_loss": 0.09105478227138519, + "loss_ib": 0.0012063538888469338, + "step": 3601 + }, + { + "ce_ib": 2.9817988872528076, + "ce_orig": 0.8860995769500732, + "epoch": 1.0353727802142498, + "kl_loss": 0.04106082767248154, + "loss_ib": 0.0007087881676852703, + "step": 3601 + }, + { + "ce_ib": 2.3996951580047607, + "ce_orig": 0.6396622657775879, + "epoch": 1.0353727802142498, + "kl_loss": 0.09034931659698486, + "loss_ib": 0.0011434626067057252, + "step": 3601 + }, + { + "ce_ib": 3.931623935699463, + "ce_orig": 0.5657770037651062, + "epoch": 1.0356603637932273, + "kl_loss": 0.0618726871907711, + "loss_ib": 0.0010118893114849925, + "step": 3602 + }, + { + "ce_ib": 4.618503570556641, + "ce_orig": 1.5268518924713135, + "epoch": 1.0356603637932273, + "kl_loss": 0.03155171126127243, + "loss_ib": 0.000777367502450943, + "step": 3602 + }, + { + "ce_ib": 3.437845230102539, + "ce_orig": 0.7818020582199097, + "epoch": 1.0356603637932273, + "kl_loss": 0.05131831020116806, + "loss_ib": 0.0008569675264880061, + "step": 3602 + }, + { + "ce_ib": 4.836798667907715, + "ce_orig": 0.8746954798698425, + "epoch": 1.0356603637932273, + "kl_loss": 0.049870822578668594, + "loss_ib": 0.0009823880391195416, + "step": 3602 + }, + { + "ce_ib": 4.957702159881592, + "ce_orig": 1.3925421237945557, + "epoch": 1.035947947372205, + "kl_loss": 0.04842381179332733, + "loss_ib": 0.0009800082771107554, + "step": 3603 + }, + { + "ce_ib": 3.688215494155884, + "ce_orig": 1.0256110429763794, + "epoch": 1.035947947372205, + "kl_loss": 0.04517398774623871, + "loss_ib": 0.0008205614285543561, + "step": 3603 + }, + { + "ce_ib": 2.982611656188965, + "ce_orig": 0.6310313940048218, + "epoch": 1.035947947372205, + "kl_loss": 0.043983522802591324, + "loss_ib": 0.0007380963652394712, + "step": 3603 + }, + { + "ce_ib": 2.887369394302368, + "ce_orig": 0.8029597997665405, + "epoch": 1.035947947372205, + "kl_loss": 0.02396593615412712, + "loss_ib": 0.0005283962818793952, + "step": 3603 + }, + { + "ce_ib": 2.564685106277466, + "ce_orig": 0.646064043045044, + "epoch": 1.0362355309511826, + "kl_loss": 0.039854746311903, + "loss_ib": 0.00065501598874107, + "step": 3604 + }, + { + "ce_ib": 1.987933874130249, + "ce_orig": 0.48826688528060913, + "epoch": 1.0362355309511826, + "kl_loss": 0.05088270455598831, + "loss_ib": 0.0007076203473843634, + "step": 3604 + }, + { + "ce_ib": 2.771176815032959, + "ce_orig": 0.4655449390411377, + "epoch": 1.0362355309511826, + "kl_loss": 0.030887162312865257, + "loss_ib": 0.0005859892698936164, + "step": 3604 + }, + { + "ce_ib": 3.9471676349639893, + "ce_orig": 1.0572129487991333, + "epoch": 1.0362355309511826, + "kl_loss": 0.1389135867357254, + "loss_ib": 0.0017838525818660855, + "step": 3604 + }, + { + "epoch": 1.0365231145301603, + "grad_norm": 0.1419651210308075, + "learning_rate": 3.8088922895044144e-05, + "loss": 0.861, + "step": 3605 + }, + { + "ce_ib": 3.7197299003601074, + "ce_orig": 1.1572567224502563, + "epoch": 1.0365231145301603, + "kl_loss": 0.027837757021188736, + "loss_ib": 0.0006503505865111947, + "step": 3605 + }, + { + "ce_ib": 1.9779465198516846, + "ce_orig": 0.5843262672424316, + "epoch": 1.0365231145301603, + "kl_loss": 0.019389018416404724, + "loss_ib": 0.00039168482180684805, + "step": 3605 + }, + { + "ce_ib": 2.133751630783081, + "ce_orig": 0.5900054574012756, + "epoch": 1.0365231145301603, + "kl_loss": 0.025499528273940086, + "loss_ib": 0.00046837044646963477, + "step": 3605 + }, + { + "ce_ib": 4.125655174255371, + "ce_orig": 1.0119010210037231, + "epoch": 1.0365231145301603, + "kl_loss": 0.0635671317577362, + "loss_ib": 0.0010482367360964417, + "step": 3605 + }, + { + "ce_ib": 3.4761736392974854, + "ce_orig": 0.7168460488319397, + "epoch": 1.036810698109138, + "kl_loss": 0.04825027287006378, + "loss_ib": 0.0008301200577989221, + "step": 3606 + }, + { + "ce_ib": 2.97955060005188, + "ce_orig": 0.6805715560913086, + "epoch": 1.036810698109138, + "kl_loss": 0.05625569447875023, + "loss_ib": 0.0008605119655840099, + "step": 3606 + }, + { + "ce_ib": 2.7214300632476807, + "ce_orig": 0.617706835269928, + "epoch": 1.036810698109138, + "kl_loss": 0.04043085500597954, + "loss_ib": 0.0006764515419490635, + "step": 3606 + }, + { + "ce_ib": 4.196891784667969, + "ce_orig": 0.8351659178733826, + "epoch": 1.036810698109138, + "kl_loss": 0.09132044017314911, + "loss_ib": 0.0013328935019671917, + "step": 3606 + }, + { + "ce_ib": 3.0014312267303467, + "ce_orig": 0.7094451785087585, + "epoch": 1.0370982816881156, + "kl_loss": 0.03448671102523804, + "loss_ib": 0.0006450102082453668, + "step": 3607 + }, + { + "ce_ib": 3.6564881801605225, + "ce_orig": 0.8466569185256958, + "epoch": 1.0370982816881156, + "kl_loss": 0.04682295769453049, + "loss_ib": 0.0008338783518411219, + "step": 3607 + }, + { + "ce_ib": 3.906707286834717, + "ce_orig": 0.7018362879753113, + "epoch": 1.0370982816881156, + "kl_loss": 0.07703909277915955, + "loss_ib": 0.001161061692982912, + "step": 3607 + }, + { + "ce_ib": 2.9783284664154053, + "ce_orig": 0.603093683719635, + "epoch": 1.0370982816881156, + "kl_loss": 0.03242585062980652, + "loss_ib": 0.0006220913492143154, + "step": 3607 + }, + { + "ce_ib": 2.335784435272217, + "ce_orig": 0.6216042041778564, + "epoch": 1.0373858652670933, + "kl_loss": 0.038678109645843506, + "loss_ib": 0.0006203595548868179, + "step": 3608 + }, + { + "ce_ib": 2.3325743675231934, + "ce_orig": 0.7790809273719788, + "epoch": 1.0373858652670933, + "kl_loss": 0.038976527750492096, + "loss_ib": 0.0006230226717889309, + "step": 3608 + }, + { + "ce_ib": 5.970742702484131, + "ce_orig": 1.6511204242706299, + "epoch": 1.0373858652670933, + "kl_loss": 0.08288216590881348, + "loss_ib": 0.0014258959563449025, + "step": 3608 + }, + { + "ce_ib": 1.747352123260498, + "ce_orig": 0.3053806722164154, + "epoch": 1.0373858652670933, + "kl_loss": 0.11103884875774384, + "loss_ib": 0.0012851236388087273, + "step": 3608 + }, + { + "ce_ib": 3.4720077514648438, + "ce_orig": 1.163844108581543, + "epoch": 1.0376734488460708, + "kl_loss": 0.033084966242313385, + "loss_ib": 0.0006780504481866956, + "step": 3609 + }, + { + "ce_ib": 2.2469091415405273, + "ce_orig": 0.4879360496997833, + "epoch": 1.0376734488460708, + "kl_loss": 0.025770986452698708, + "loss_ib": 0.00048240076284855604, + "step": 3609 + }, + { + "ce_ib": 5.542665004730225, + "ce_orig": 1.019561767578125, + "epoch": 1.0376734488460708, + "kl_loss": 0.06906314939260483, + "loss_ib": 0.0012448979541659355, + "step": 3609 + }, + { + "ce_ib": 2.5678091049194336, + "ce_orig": 0.6793452501296997, + "epoch": 1.0376734488460708, + "kl_loss": 0.031630679965019226, + "loss_ib": 0.0005730877164751291, + "step": 3609 + }, + { + "epoch": 1.0379610324250486, + "grad_norm": 0.10306089371442795, + "learning_rate": 3.8055846274967035e-05, + "loss": 0.8215, + "step": 3610 + }, + { + "ce_ib": 2.712236166000366, + "ce_orig": 0.693924069404602, + "epoch": 1.0379610324250486, + "kl_loss": 0.04260249063372612, + "loss_ib": 0.0006972484989091754, + "step": 3610 + }, + { + "ce_ib": 4.071495056152344, + "ce_orig": 0.9989027380943298, + "epoch": 1.0379610324250486, + "kl_loss": 0.03810441493988037, + "loss_ib": 0.0007881936035118997, + "step": 3610 + }, + { + "ce_ib": 2.7596328258514404, + "ce_orig": 0.7015916109085083, + "epoch": 1.0379610324250486, + "kl_loss": 0.04699409008026123, + "loss_ib": 0.0007459041080437601, + "step": 3610 + }, + { + "ce_ib": 5.157138347625732, + "ce_orig": 0.9927470684051514, + "epoch": 1.0379610324250486, + "kl_loss": 0.08483567833900452, + "loss_ib": 0.0013640705728903413, + "step": 3610 + }, + { + "ce_ib": 3.886988878250122, + "ce_orig": 0.7631436586380005, + "epoch": 1.038248616004026, + "kl_loss": 0.03789599984884262, + "loss_ib": 0.000767658872064203, + "step": 3611 + }, + { + "ce_ib": 4.8831963539123535, + "ce_orig": 1.19000244140625, + "epoch": 1.038248616004026, + "kl_loss": 0.06785589456558228, + "loss_ib": 0.0011668785009533167, + "step": 3611 + }, + { + "ce_ib": 5.265395641326904, + "ce_orig": 1.5125983953475952, + "epoch": 1.038248616004026, + "kl_loss": 0.047468457370996475, + "loss_ib": 0.0010012240381911397, + "step": 3611 + }, + { + "ce_ib": 4.755459308624268, + "ce_orig": 1.1548701524734497, + "epoch": 1.038248616004026, + "kl_loss": 0.07104868441820145, + "loss_ib": 0.0011860326630994678, + "step": 3611 + }, + { + "ce_ib": 2.3522226810455322, + "ce_orig": 0.6383640170097351, + "epoch": 1.0385361995830038, + "kl_loss": 0.03793523088097572, + "loss_ib": 0.0006145745282992721, + "step": 3612 + }, + { + "ce_ib": 3.8853631019592285, + "ce_orig": 0.5492738485336304, + "epoch": 1.0385361995830038, + "kl_loss": 0.06005631387233734, + "loss_ib": 0.0009890993824228644, + "step": 3612 + }, + { + "ce_ib": 2.2236809730529785, + "ce_orig": 0.4304578900337219, + "epoch": 1.0385361995830038, + "kl_loss": 0.03225568309426308, + "loss_ib": 0.0005449249292723835, + "step": 3612 + }, + { + "ce_ib": 3.3218255043029785, + "ce_orig": 0.48818859457969666, + "epoch": 1.0385361995830038, + "kl_loss": 0.06201423704624176, + "loss_ib": 0.0009523248299956322, + "step": 3612 + }, + { + "ce_ib": 2.3003556728363037, + "ce_orig": 0.5365278720855713, + "epoch": 1.0388237831619815, + "kl_loss": 0.06027951091527939, + "loss_ib": 0.0008328306721523404, + "step": 3613 + }, + { + "ce_ib": 2.1123862266540527, + "ce_orig": 0.3945178985595703, + "epoch": 1.0388237831619815, + "kl_loss": 0.16722361743450165, + "loss_ib": 0.0018834746442735195, + "step": 3613 + }, + { + "ce_ib": 3.2565298080444336, + "ce_orig": 0.7597638964653015, + "epoch": 1.0388237831619815, + "kl_loss": 0.06314495950937271, + "loss_ib": 0.0009571025148034096, + "step": 3613 + }, + { + "ce_ib": 2.110288381576538, + "ce_orig": 0.41870951652526855, + "epoch": 1.0388237831619815, + "kl_loss": 0.04885657876729965, + "loss_ib": 0.0006995946168899536, + "step": 3613 + }, + { + "ce_ib": 4.944021701812744, + "ce_orig": 1.419245719909668, + "epoch": 1.039111366740959, + "kl_loss": 0.049839384853839874, + "loss_ib": 0.000992795918136835, + "step": 3614 + }, + { + "ce_ib": 2.4735636711120605, + "ce_orig": 0.6618853807449341, + "epoch": 1.039111366740959, + "kl_loss": 0.049515604972839355, + "loss_ib": 0.0007425124058499932, + "step": 3614 + }, + { + "ce_ib": 4.693879127502441, + "ce_orig": 1.3503361940383911, + "epoch": 1.039111366740959, + "kl_loss": 0.06188700720667839, + "loss_ib": 0.0010882579954341054, + "step": 3614 + }, + { + "ce_ib": 3.710273027420044, + "ce_orig": 0.8095992803573608, + "epoch": 1.039111366740959, + "kl_loss": 0.03896230831742287, + "loss_ib": 0.0007606503204442561, + "step": 3614 + }, + { + "epoch": 1.0393989503199368, + "grad_norm": 0.10750418156385422, + "learning_rate": 3.8022738200325916e-05, + "loss": 0.8036, + "step": 3615 + }, + { + "ce_ib": 2.892967462539673, + "ce_orig": 0.742710530757904, + "epoch": 1.0393989503199368, + "kl_loss": 0.04656871408224106, + "loss_ib": 0.0007549838628619909, + "step": 3615 + }, + { + "ce_ib": 2.2133047580718994, + "ce_orig": 0.688720166683197, + "epoch": 1.0393989503199368, + "kl_loss": 0.029572568833827972, + "loss_ib": 0.0005170561489649117, + "step": 3615 + }, + { + "ce_ib": 2.8131017684936523, + "ce_orig": 0.6869921684265137, + "epoch": 1.0393989503199368, + "kl_loss": 0.04670779034495354, + "loss_ib": 0.0007483880617655814, + "step": 3615 + }, + { + "ce_ib": 3.6766092777252197, + "ce_orig": 0.8958309888839722, + "epoch": 1.0393989503199368, + "kl_loss": 0.06198570877313614, + "loss_ib": 0.0009875179966911674, + "step": 3615 + }, + { + "ce_ib": 4.297173023223877, + "ce_orig": 1.4392874240875244, + "epoch": 1.0396865338989143, + "kl_loss": 0.045197587460279465, + "loss_ib": 0.0008816932095214725, + "step": 3616 + }, + { + "ce_ib": 3.8238303661346436, + "ce_orig": 0.5268951058387756, + "epoch": 1.0396865338989143, + "kl_loss": 0.20367519557476044, + "loss_ib": 0.002419134834781289, + "step": 3616 + }, + { + "ce_ib": 4.599557876586914, + "ce_orig": 0.6864416599273682, + "epoch": 1.0396865338989143, + "kl_loss": 0.05731828883290291, + "loss_ib": 0.001033138600178063, + "step": 3616 + }, + { + "ce_ib": 3.532172441482544, + "ce_orig": 0.7391554713249207, + "epoch": 1.0396865338989143, + "kl_loss": 0.04788043722510338, + "loss_ib": 0.0008320215856656432, + "step": 3616 + }, + { + "ce_ib": 3.121716260910034, + "ce_orig": 0.7330489754676819, + "epoch": 1.039974117477892, + "kl_loss": 0.05684217810630798, + "loss_ib": 0.0008805933175608516, + "step": 3617 + }, + { + "ce_ib": 2.477263927459717, + "ce_orig": 0.5373714566230774, + "epoch": 1.039974117477892, + "kl_loss": 0.07013529539108276, + "loss_ib": 0.0009490792872384191, + "step": 3617 + }, + { + "ce_ib": 7.0190300941467285, + "ce_orig": 1.87177312374115, + "epoch": 1.039974117477892, + "kl_loss": 0.0723486840724945, + "loss_ib": 0.0014253898989409208, + "step": 3617 + }, + { + "ce_ib": 4.1129255294799805, + "ce_orig": 0.8372676968574524, + "epoch": 1.039974117477892, + "kl_loss": 0.044847555458545685, + "loss_ib": 0.0008597680716775358, + "step": 3617 + }, + { + "ce_ib": 3.519713878631592, + "ce_orig": 0.5522962212562561, + "epoch": 1.0402617010568695, + "kl_loss": 0.07813972234725952, + "loss_ib": 0.0011333685833960772, + "step": 3618 + }, + { + "ce_ib": 3.80352783203125, + "ce_orig": 0.6375481486320496, + "epoch": 1.0402617010568695, + "kl_loss": 0.06987114250659943, + "loss_ib": 0.0010790640953928232, + "step": 3618 + }, + { + "ce_ib": 2.669717311859131, + "ce_orig": 0.49041619896888733, + "epoch": 1.0402617010568695, + "kl_loss": 0.03585274517536163, + "loss_ib": 0.0006254991749301553, + "step": 3618 + }, + { + "ce_ib": 2.5742738246917725, + "ce_orig": 0.8162478804588318, + "epoch": 1.0402617010568695, + "kl_loss": 0.051717109978199005, + "loss_ib": 0.0007745985058136284, + "step": 3618 + }, + { + "ce_ib": 3.1446115970611572, + "ce_orig": 0.5594590306282043, + "epoch": 1.0405492846358473, + "kl_loss": 0.06268759816884995, + "loss_ib": 0.0009413370862603188, + "step": 3619 + }, + { + "ce_ib": 1.7523574829101562, + "ce_orig": 0.33196285367012024, + "epoch": 1.0405492846358473, + "kl_loss": 0.07823987305164337, + "loss_ib": 0.0009576344164088368, + "step": 3619 + }, + { + "ce_ib": 3.011260509490967, + "ce_orig": 0.7199894189834595, + "epoch": 1.0405492846358473, + "kl_loss": 0.0323152169585228, + "loss_ib": 0.0006242781528271735, + "step": 3619 + }, + { + "ce_ib": 3.67082142829895, + "ce_orig": 0.8359575271606445, + "epoch": 1.0405492846358473, + "kl_loss": 0.05814158543944359, + "loss_ib": 0.0009484979091212153, + "step": 3619 + }, + { + "epoch": 1.040836868214825, + "grad_norm": 0.09567473828792572, + "learning_rate": 3.798959875088584e-05, + "loss": 0.7808, + "step": 3620 + }, + { + "ce_ib": 2.6143410205841064, + "ce_orig": 0.5230795741081238, + "epoch": 1.040836868214825, + "kl_loss": 0.043703511357307434, + "loss_ib": 0.0006984691717661917, + "step": 3620 + }, + { + "ce_ib": 2.7262003421783447, + "ce_orig": 0.753064751625061, + "epoch": 1.040836868214825, + "kl_loss": 0.03320806846022606, + "loss_ib": 0.0006047007045708597, + "step": 3620 + }, + { + "ce_ib": 5.375926971435547, + "ce_orig": 1.4827245473861694, + "epoch": 1.040836868214825, + "kl_loss": 0.05629035830497742, + "loss_ib": 0.0011004962725564837, + "step": 3620 + }, + { + "ce_ib": 6.387918472290039, + "ce_orig": 1.9825439453125, + "epoch": 1.040836868214825, + "kl_loss": 0.05101636052131653, + "loss_ib": 0.0011489554308354855, + "step": 3620 + }, + { + "ce_ib": 5.704512119293213, + "ce_orig": 1.5941660404205322, + "epoch": 1.0411244517938025, + "kl_loss": 0.09920705854892731, + "loss_ib": 0.0015625216765329242, + "step": 3621 + }, + { + "ce_ib": 3.190998077392578, + "ce_orig": 0.7211166024208069, + "epoch": 1.0411244517938025, + "kl_loss": 0.0535447858273983, + "loss_ib": 0.0008545476011931896, + "step": 3621 + }, + { + "ce_ib": 3.9762566089630127, + "ce_orig": 0.931994616985321, + "epoch": 1.0411244517938025, + "kl_loss": 0.043730031698942184, + "loss_ib": 0.0008349259151145816, + "step": 3621 + }, + { + "ce_ib": 2.9288504123687744, + "ce_orig": 0.6755293011665344, + "epoch": 1.0411244517938025, + "kl_loss": 0.06252827495336533, + "loss_ib": 0.0009181678178720176, + "step": 3621 + }, + { + "ce_ib": 4.109652996063232, + "ce_orig": 1.1996196508407593, + "epoch": 1.0414120353727803, + "kl_loss": 0.033221591264009476, + "loss_ib": 0.0007431812700815499, + "step": 3622 + }, + { + "ce_ib": 3.9353890419006348, + "ce_orig": 1.0686535835266113, + "epoch": 1.0414120353727803, + "kl_loss": 0.051682837307453156, + "loss_ib": 0.0009103671764023602, + "step": 3622 + }, + { + "ce_ib": 3.4715261459350586, + "ce_orig": 0.9573656916618347, + "epoch": 1.0414120353727803, + "kl_loss": 0.039960119873285294, + "loss_ib": 0.0007467538234777749, + "step": 3622 + }, + { + "ce_ib": 2.7878429889678955, + "ce_orig": 0.7607963681221008, + "epoch": 1.0414120353727803, + "kl_loss": 0.028826069086790085, + "loss_ib": 0.0005670450045727193, + "step": 3622 + }, + { + "ce_ib": 2.472879648208618, + "ce_orig": 0.726611316204071, + "epoch": 1.0416996189517578, + "kl_loss": 0.027999604120850563, + "loss_ib": 0.0005272839916869998, + "step": 3623 + }, + { + "ce_ib": 2.3498148918151855, + "ce_orig": 0.48682910203933716, + "epoch": 1.0416996189517578, + "kl_loss": 0.08405505120754242, + "loss_ib": 0.0010755319381132722, + "step": 3623 + }, + { + "ce_ib": 4.06371545791626, + "ce_orig": 0.7689616680145264, + "epoch": 1.0416996189517578, + "kl_loss": 0.045213401317596436, + "loss_ib": 0.0008585054893046618, + "step": 3623 + }, + { + "ce_ib": 3.8290748596191406, + "ce_orig": 1.0170845985412598, + "epoch": 1.0416996189517578, + "kl_loss": 0.03889855369925499, + "loss_ib": 0.0007718929555267096, + "step": 3623 + }, + { + "ce_ib": 2.6842286586761475, + "ce_orig": 0.5629074573516846, + "epoch": 1.0419872025307355, + "kl_loss": 0.038851384073495865, + "loss_ib": 0.0006569366669282317, + "step": 3624 + }, + { + "ce_ib": 5.507602691650391, + "ce_orig": 1.466698408126831, + "epoch": 1.0419872025307355, + "kl_loss": 0.0511295348405838, + "loss_ib": 0.0010620555840432644, + "step": 3624 + }, + { + "ce_ib": 2.6280016899108887, + "ce_orig": 0.6986632347106934, + "epoch": 1.0419872025307355, + "kl_loss": 0.07819140702486038, + "loss_ib": 0.0010447142412886024, + "step": 3624 + }, + { + "ce_ib": 2.186128854751587, + "ce_orig": 0.2689025104045868, + "epoch": 1.0419872025307355, + "kl_loss": 0.04641188681125641, + "loss_ib": 0.0006827316829003394, + "step": 3624 + }, + { + "epoch": 1.0422747861097132, + "grad_norm": 0.13598769903182983, + "learning_rate": 3.795642800648742e-05, + "loss": 0.901, + "step": 3625 + }, + { + "ce_ib": 2.807417869567871, + "ce_orig": 0.7151684165000916, + "epoch": 1.0422747861097132, + "kl_loss": 0.03819160908460617, + "loss_ib": 0.0006626578979194164, + "step": 3625 + }, + { + "ce_ib": 2.7378928661346436, + "ce_orig": 0.3452225625514984, + "epoch": 1.0422747861097132, + "kl_loss": 0.05908571928739548, + "loss_ib": 0.0008646463975310326, + "step": 3625 + }, + { + "ce_ib": 3.11708927154541, + "ce_orig": 0.7897409200668335, + "epoch": 1.0422747861097132, + "kl_loss": 0.038192883133888245, + "loss_ib": 0.0006936377030797303, + "step": 3625 + }, + { + "ce_ib": 3.23392653465271, + "ce_orig": 0.8662629723548889, + "epoch": 1.0422747861097132, + "kl_loss": 0.06220060959458351, + "loss_ib": 0.0009453987004235387, + "step": 3625 + }, + { + "ce_ib": 1.7571210861206055, + "ce_orig": 0.3375561833381653, + "epoch": 1.0425623696886908, + "kl_loss": 0.07331755757331848, + "loss_ib": 0.0009088876540772617, + "step": 3626 + }, + { + "ce_ib": 3.544811725616455, + "ce_orig": 0.5789409875869751, + "epoch": 1.0425623696886908, + "kl_loss": 0.04444526880979538, + "loss_ib": 0.0007989337900653481, + "step": 3626 + }, + { + "ce_ib": 2.8759164810180664, + "ce_orig": 0.890485942363739, + "epoch": 1.0425623696886908, + "kl_loss": 0.041880227625370026, + "loss_ib": 0.0007063939119689167, + "step": 3626 + }, + { + "ce_ib": 3.4575040340423584, + "ce_orig": 0.9408693909645081, + "epoch": 1.0425623696886908, + "kl_loss": 0.035401590168476105, + "loss_ib": 0.0006997662712819874, + "step": 3626 + }, + { + "ce_ib": 4.06010103225708, + "ce_orig": 0.6921137571334839, + "epoch": 1.0428499532676685, + "kl_loss": 0.06855036318302155, + "loss_ib": 0.0010915136663243175, + "step": 3627 + }, + { + "ce_ib": 5.9218950271606445, + "ce_orig": 1.4261964559555054, + "epoch": 1.0428499532676685, + "kl_loss": 0.05242403596639633, + "loss_ib": 0.0011164298048242927, + "step": 3627 + }, + { + "ce_ib": 3.5219109058380127, + "ce_orig": 0.8172938823699951, + "epoch": 1.0428499532676685, + "kl_loss": 0.06802525371313095, + "loss_ib": 0.0010324436007067561, + "step": 3627 + }, + { + "ce_ib": 3.3015754222869873, + "ce_orig": 0.4520440399646759, + "epoch": 1.0428499532676685, + "kl_loss": 0.05133380740880966, + "loss_ib": 0.0008434955379925668, + "step": 3627 + }, + { + "ce_ib": 5.102388858795166, + "ce_orig": 1.2287719249725342, + "epoch": 1.043137536846646, + "kl_loss": 0.08346442133188248, + "loss_ib": 0.0013448831159621477, + "step": 3628 + }, + { + "ce_ib": 2.7170727252960205, + "ce_orig": 0.8123228549957275, + "epoch": 1.043137536846646, + "kl_loss": 0.06176891177892685, + "loss_ib": 0.0008893963531590998, + "step": 3628 + }, + { + "ce_ib": 4.2741522789001465, + "ce_orig": 0.9109777808189392, + "epoch": 1.043137536846646, + "kl_loss": 0.04745590686798096, + "loss_ib": 0.0009019742719829082, + "step": 3628 + }, + { + "ce_ib": 3.3922388553619385, + "ce_orig": 0.9463043212890625, + "epoch": 1.043137536846646, + "kl_loss": 0.04285561293363571, + "loss_ib": 0.000767780002206564, + "step": 3628 + }, + { + "ce_ib": 3.612586736679077, + "ce_orig": 1.0834754705429077, + "epoch": 1.0434251204256237, + "kl_loss": 0.042049627751111984, + "loss_ib": 0.0007817549048922956, + "step": 3629 + }, + { + "ce_ib": 3.028153419494629, + "ce_orig": 0.856448769569397, + "epoch": 1.0434251204256237, + "kl_loss": 0.06612265110015869, + "loss_ib": 0.0009640418575145304, + "step": 3629 + }, + { + "ce_ib": 5.448115348815918, + "ce_orig": 1.5350550413131714, + "epoch": 1.0434251204256237, + "kl_loss": 0.07624796032905579, + "loss_ib": 0.0013072910951450467, + "step": 3629 + }, + { + "ce_ib": 4.196591854095459, + "ce_orig": 0.7640632390975952, + "epoch": 1.0434251204256237, + "kl_loss": 0.0314764566719532, + "loss_ib": 0.0007344236946664751, + "step": 3629 + }, + { + "epoch": 1.0437127040046013, + "grad_norm": 0.10995136201381683, + "learning_rate": 3.7923226047046684e-05, + "loss": 0.8802, + "step": 3630 + }, + { + "ce_ib": 4.970627784729004, + "ce_orig": 1.0242670774459839, + "epoch": 1.0437127040046013, + "kl_loss": 0.05441267043352127, + "loss_ib": 0.0010411894181743264, + "step": 3630 + }, + { + "ce_ib": 4.180978298187256, + "ce_orig": 0.8287485241889954, + "epoch": 1.0437127040046013, + "kl_loss": 0.10183551162481308, + "loss_ib": 0.0014364528469741344, + "step": 3630 + }, + { + "ce_ib": 3.7746195793151855, + "ce_orig": 1.1967008113861084, + "epoch": 1.0437127040046013, + "kl_loss": 0.0478089414536953, + "loss_ib": 0.0008555513340979815, + "step": 3630 + }, + { + "ce_ib": 2.803920030593872, + "ce_orig": 0.41638466715812683, + "epoch": 1.0437127040046013, + "kl_loss": 0.046573132276535034, + "loss_ib": 0.0007461233180947602, + "step": 3630 + }, + { + "ce_ib": 5.004895210266113, + "ce_orig": 1.0630742311477661, + "epoch": 1.044000287583579, + "kl_loss": 0.03657517209649086, + "loss_ib": 0.0008662412292324007, + "step": 3631 + }, + { + "ce_ib": 3.057567596435547, + "ce_orig": 0.5200394988059998, + "epoch": 1.044000287583579, + "kl_loss": 0.0765879899263382, + "loss_ib": 0.001071636681444943, + "step": 3631 + }, + { + "ce_ib": 1.960605263710022, + "ce_orig": 0.4294467270374298, + "epoch": 1.044000287583579, + "kl_loss": 0.03440064564347267, + "loss_ib": 0.0005400669178925455, + "step": 3631 + }, + { + "ce_ib": 4.651352882385254, + "ce_orig": 1.2456097602844238, + "epoch": 1.044000287583579, + "kl_loss": 0.06423888355493546, + "loss_ib": 0.0011075240327045321, + "step": 3631 + }, + { + "ce_ib": 2.041409492492676, + "ce_orig": 0.41636016964912415, + "epoch": 1.0442878711625567, + "kl_loss": 0.03881283849477768, + "loss_ib": 0.0005922693526372313, + "step": 3632 + }, + { + "ce_ib": 3.3624229431152344, + "ce_orig": 0.8059715032577515, + "epoch": 1.0442878711625567, + "kl_loss": 0.0423106923699379, + "loss_ib": 0.0007593491463921964, + "step": 3632 + }, + { + "ce_ib": 3.475355625152588, + "ce_orig": 1.2288874387741089, + "epoch": 1.0442878711625567, + "kl_loss": 0.037677377462387085, + "loss_ib": 0.0007243093568831682, + "step": 3632 + }, + { + "ce_ib": 4.359315872192383, + "ce_orig": 1.1523373126983643, + "epoch": 1.0442878711625567, + "kl_loss": 0.056098904460668564, + "loss_ib": 0.0009969206294044852, + "step": 3632 + }, + { + "ce_ib": 2.4092681407928467, + "ce_orig": 0.6447193026542664, + "epoch": 1.0445754547415342, + "kl_loss": 0.024396613240242004, + "loss_ib": 0.0004848929529543966, + "step": 3633 + }, + { + "ce_ib": 4.63482141494751, + "ce_orig": 1.1844241619110107, + "epoch": 1.0445754547415342, + "kl_loss": 0.05031919479370117, + "loss_ib": 0.0009666740661486983, + "step": 3633 + }, + { + "ce_ib": 4.875977516174316, + "ce_orig": 1.2848197221755981, + "epoch": 1.0445754547415342, + "kl_loss": 0.04757893830537796, + "loss_ib": 0.0009633870795369148, + "step": 3633 + }, + { + "ce_ib": 2.65511417388916, + "ce_orig": 0.7789871096611023, + "epoch": 1.0445754547415342, + "kl_loss": 0.05647018551826477, + "loss_ib": 0.0008302132482640445, + "step": 3633 + }, + { + "ce_ib": 2.92979097366333, + "ce_orig": 0.8390363454818726, + "epoch": 1.044863038320512, + "kl_loss": 0.050236836075782776, + "loss_ib": 0.0007953474414534867, + "step": 3634 + }, + { + "ce_ib": 3.9116086959838867, + "ce_orig": 0.9700295329093933, + "epoch": 1.044863038320512, + "kl_loss": 0.055648647248744965, + "loss_ib": 0.0009476473205722868, + "step": 3634 + }, + { + "ce_ib": 3.347156524658203, + "ce_orig": 1.0066274404525757, + "epoch": 1.044863038320512, + "kl_loss": 0.04431052505970001, + "loss_ib": 0.0007778209401294589, + "step": 3634 + }, + { + "ce_ib": 5.666679382324219, + "ce_orig": 1.6079931259155273, + "epoch": 1.044863038320512, + "kl_loss": 0.03260741010308266, + "loss_ib": 0.0008927419548854232, + "step": 3634 + }, + { + "epoch": 1.0451506218994895, + "grad_norm": 0.11585390567779541, + "learning_rate": 3.788999295255485e-05, + "loss": 0.9109, + "step": 3635 + }, + { + "ce_ib": 4.095139503479004, + "ce_orig": 1.172995686531067, + "epoch": 1.0451506218994895, + "kl_loss": 0.048590682446956635, + "loss_ib": 0.0008954207296483219, + "step": 3635 + }, + { + "ce_ib": 2.6400794982910156, + "ce_orig": 0.5572373867034912, + "epoch": 1.0451506218994895, + "kl_loss": 0.06826864182949066, + "loss_ib": 0.0009466943447478116, + "step": 3635 + }, + { + "ce_ib": 1.9755514860153198, + "ce_orig": 0.3601635992527008, + "epoch": 1.0451506218994895, + "kl_loss": 0.11089786887168884, + "loss_ib": 0.0013065338134765625, + "step": 3635 + }, + { + "ce_ib": 4.013950824737549, + "ce_orig": 0.7150952816009521, + "epoch": 1.0451506218994895, + "kl_loss": 0.03974919021129608, + "loss_ib": 0.0007988869911059737, + "step": 3635 + }, + { + "ce_ib": 2.4440765380859375, + "ce_orig": 0.4641151428222656, + "epoch": 1.0454382054784672, + "kl_loss": 0.04505058377981186, + "loss_ib": 0.0006949134985916317, + "step": 3636 + }, + { + "ce_ib": 3.9701151847839355, + "ce_orig": 0.9330031275749207, + "epoch": 1.0454382054784672, + "kl_loss": 0.0420176237821579, + "loss_ib": 0.0008171877125278115, + "step": 3636 + }, + { + "ce_ib": 5.350597858428955, + "ce_orig": 1.484035849571228, + "epoch": 1.0454382054784672, + "kl_loss": 0.06489565223455429, + "loss_ib": 0.0011840162333101034, + "step": 3636 + }, + { + "ce_ib": 2.972343921661377, + "ce_orig": 0.7671582698822021, + "epoch": 1.0454382054784672, + "kl_loss": 0.04743628948926926, + "loss_ib": 0.0007715973188169301, + "step": 3636 + }, + { + "ce_ib": 2.857139825820923, + "ce_orig": 0.7227497696876526, + "epoch": 1.0457257890574447, + "kl_loss": 0.03034188039600849, + "loss_ib": 0.0005891327164135873, + "step": 3637 + }, + { + "ce_ib": 2.7134296894073486, + "ce_orig": 0.6359072327613831, + "epoch": 1.0457257890574447, + "kl_loss": 0.06144963949918747, + "loss_ib": 0.0008858392830006778, + "step": 3637 + }, + { + "ce_ib": 3.5235695838928223, + "ce_orig": 0.9087072014808655, + "epoch": 1.0457257890574447, + "kl_loss": 0.05958765745162964, + "loss_ib": 0.0009482335299253464, + "step": 3637 + }, + { + "ce_ib": 2.7305185794830322, + "ce_orig": 0.9003462195396423, + "epoch": 1.0457257890574447, + "kl_loss": 0.02891278639435768, + "loss_ib": 0.0005621797172352672, + "step": 3637 + }, + { + "ce_ib": 4.093862533569336, + "ce_orig": 1.108447790145874, + "epoch": 1.0460133726364225, + "kl_loss": 0.10556355118751526, + "loss_ib": 0.0014650216326117516, + "step": 3638 + }, + { + "ce_ib": 2.7516162395477295, + "ce_orig": 0.7581051588058472, + "epoch": 1.0460133726364225, + "kl_loss": 0.027401432394981384, + "loss_ib": 0.0005491759511642158, + "step": 3638 + }, + { + "ce_ib": 3.191328287124634, + "ce_orig": 0.6972124576568604, + "epoch": 1.0460133726364225, + "kl_loss": 0.033356890082359314, + "loss_ib": 0.0006527017103508115, + "step": 3638 + }, + { + "ce_ib": 4.339853286743164, + "ce_orig": 1.2126308679580688, + "epoch": 1.0460133726364225, + "kl_loss": 0.07095906138420105, + "loss_ib": 0.0011435758788138628, + "step": 3638 + }, + { + "ce_ib": 3.8675448894500732, + "ce_orig": 0.9967195987701416, + "epoch": 1.0463009562154002, + "kl_loss": 0.07152704894542694, + "loss_ib": 0.0011020249221473932, + "step": 3639 + }, + { + "ce_ib": 3.424650192260742, + "ce_orig": 0.7717434167861938, + "epoch": 1.0463009562154002, + "kl_loss": 0.04904921352863312, + "loss_ib": 0.0008329571573995054, + "step": 3639 + }, + { + "ce_ib": 2.790005922317505, + "ce_orig": 0.6746125221252441, + "epoch": 1.0463009562154002, + "kl_loss": 0.03567248582839966, + "loss_ib": 0.0006357253878377378, + "step": 3639 + }, + { + "ce_ib": 3.2645316123962402, + "ce_orig": 0.7223042249679565, + "epoch": 1.0463009562154002, + "kl_loss": 0.07153773307800293, + "loss_ib": 0.0010418304009363055, + "step": 3639 + }, + { + "epoch": 1.0465885397943777, + "grad_norm": 0.10041658580303192, + "learning_rate": 3.785672880307817e-05, + "loss": 0.7914, + "step": 3640 + }, + { + "ce_ib": 3.2306859493255615, + "ce_orig": 0.807073712348938, + "epoch": 1.0465885397943777, + "kl_loss": 0.07360376417636871, + "loss_ib": 0.001059106201864779, + "step": 3640 + }, + { + "ce_ib": 2.0131120681762695, + "ce_orig": 0.4567253291606903, + "epoch": 1.0465885397943777, + "kl_loss": 0.03583042323589325, + "loss_ib": 0.0005596153787337244, + "step": 3640 + }, + { + "ce_ib": 4.8726677894592285, + "ce_orig": 1.2343615293502808, + "epoch": 1.0465885397943777, + "kl_loss": 0.04786884784698486, + "loss_ib": 0.0009659552597440779, + "step": 3640 + }, + { + "ce_ib": 4.247217655181885, + "ce_orig": 1.2049587965011597, + "epoch": 1.0465885397943777, + "kl_loss": 0.050897788256406784, + "loss_ib": 0.0009336996008642018, + "step": 3640 + }, + { + "ce_ib": 3.412778615951538, + "ce_orig": 0.7262548804283142, + "epoch": 1.0468761233733554, + "kl_loss": 0.08408211171627045, + "loss_ib": 0.0011820989893749356, + "step": 3641 + }, + { + "ce_ib": 4.2817702293396, + "ce_orig": 0.9752016663551331, + "epoch": 1.0468761233733554, + "kl_loss": 0.08376768976449966, + "loss_ib": 0.001265853876248002, + "step": 3641 + }, + { + "ce_ib": 3.975749969482422, + "ce_orig": 0.6159504055976868, + "epoch": 1.0468761233733554, + "kl_loss": 0.06302496045827866, + "loss_ib": 0.0010278245899826288, + "step": 3641 + }, + { + "ce_ib": 2.737459182739258, + "ce_orig": 0.6746302843093872, + "epoch": 1.0468761233733554, + "kl_loss": 0.042675189673900604, + "loss_ib": 0.000700497766956687, + "step": 3641 + }, + { + "ce_ib": 3.562891721725464, + "ce_orig": 0.9409930109977722, + "epoch": 1.047163706952333, + "kl_loss": 0.03894440084695816, + "loss_ib": 0.0007457331521436572, + "step": 3642 + }, + { + "ce_ib": 3.943392753601074, + "ce_orig": 0.8878598213195801, + "epoch": 1.047163706952333, + "kl_loss": 0.029983270913362503, + "loss_ib": 0.0006941719329915941, + "step": 3642 + }, + { + "ce_ib": 3.8673288822174072, + "ce_orig": 0.8687927722930908, + "epoch": 1.047163706952333, + "kl_loss": 0.03487969934940338, + "loss_ib": 0.0007355298730544746, + "step": 3642 + }, + { + "ce_ib": 4.095306873321533, + "ce_orig": 1.2094945907592773, + "epoch": 1.047163706952333, + "kl_loss": 0.03698510676622391, + "loss_ib": 0.0007793816621415317, + "step": 3642 + }, + { + "ce_ib": 5.579973220825195, + "ce_orig": 1.4262354373931885, + "epoch": 1.0474512905313107, + "kl_loss": 0.11353567242622375, + "loss_ib": 0.0016933538718149066, + "step": 3643 + }, + { + "ce_ib": 3.3365085124969482, + "ce_orig": 0.5759968161582947, + "epoch": 1.0474512905313107, + "kl_loss": 0.059710267931222916, + "loss_ib": 0.0009307534783147275, + "step": 3643 + }, + { + "ce_ib": 2.786882162094116, + "ce_orig": 0.5611968636512756, + "epoch": 1.0474512905313107, + "kl_loss": 0.06867191940546036, + "loss_ib": 0.0009654074092395604, + "step": 3643 + }, + { + "ce_ib": 4.217031478881836, + "ce_orig": 0.5523214340209961, + "epoch": 1.0474512905313107, + "kl_loss": 0.08359445631504059, + "loss_ib": 0.0012576476437970996, + "step": 3643 + }, + { + "ce_ib": 4.576821804046631, + "ce_orig": 1.2875370979309082, + "epoch": 1.0477388741102882, + "kl_loss": 0.04017610847949982, + "loss_ib": 0.0008594432147219777, + "step": 3644 + }, + { + "ce_ib": 2.4392101764678955, + "ce_orig": 0.7526944875717163, + "epoch": 1.0477388741102882, + "kl_loss": 0.050326868891716, + "loss_ib": 0.0007471896824426949, + "step": 3644 + }, + { + "ce_ib": 4.454152584075928, + "ce_orig": 0.9967886209487915, + "epoch": 1.0477388741102882, + "kl_loss": 0.049343667924404144, + "loss_ib": 0.0009388519683852792, + "step": 3644 + }, + { + "ce_ib": 7.156513690948486, + "ce_orig": 1.620667815208435, + "epoch": 1.0477388741102882, + "kl_loss": 0.02930317632853985, + "loss_ib": 0.001008683117106557, + "step": 3644 + }, + { + "epoch": 1.048026457689266, + "grad_norm": 0.1127745658159256, + "learning_rate": 3.7823433678757694e-05, + "loss": 0.8676, + "step": 3645 + }, + { + "ce_ib": 3.0248184204101562, + "ce_orig": 0.783523440361023, + "epoch": 1.048026457689266, + "kl_loss": 0.031140323728322983, + "loss_ib": 0.000613885058555752, + "step": 3645 + }, + { + "ce_ib": 3.2961788177490234, + "ce_orig": 0.7440508604049683, + "epoch": 1.048026457689266, + "kl_loss": 0.04656364396214485, + "loss_ib": 0.0007952542509883642, + "step": 3645 + }, + { + "ce_ib": 3.9006083011627197, + "ce_orig": 0.7859775424003601, + "epoch": 1.048026457689266, + "kl_loss": 0.060140930116176605, + "loss_ib": 0.0009914700640365481, + "step": 3645 + }, + { + "ce_ib": 4.86191463470459, + "ce_orig": 1.3570462465286255, + "epoch": 1.048026457689266, + "kl_loss": 0.05767148733139038, + "loss_ib": 0.0010629062307998538, + "step": 3645 + }, + { + "ce_ib": 5.313906669616699, + "ce_orig": 1.3836867809295654, + "epoch": 1.0483140412682437, + "kl_loss": 0.06429390609264374, + "loss_ib": 0.0011743296636268497, + "step": 3646 + }, + { + "ce_ib": 4.1280927658081055, + "ce_orig": 0.8106207251548767, + "epoch": 1.0483140412682437, + "kl_loss": 0.06057456508278847, + "loss_ib": 0.0010185547871515155, + "step": 3646 + }, + { + "ce_ib": 2.63730788230896, + "ce_orig": 0.7722344398498535, + "epoch": 1.0483140412682437, + "kl_loss": 0.04057418927550316, + "loss_ib": 0.0006694726762361825, + "step": 3646 + }, + { + "ce_ib": 2.120060443878174, + "ce_orig": 0.3812369108200073, + "epoch": 1.0483140412682437, + "kl_loss": 0.04337966442108154, + "loss_ib": 0.0006458027055487037, + "step": 3646 + }, + { + "ce_ib": 5.680984020233154, + "ce_orig": 1.5491982698440552, + "epoch": 1.0486016248472212, + "kl_loss": 0.045846354216337204, + "loss_ib": 0.0010265619494020939, + "step": 3647 + }, + { + "ce_ib": 4.409982681274414, + "ce_orig": 0.8213547468185425, + "epoch": 1.0486016248472212, + "kl_loss": 0.0664558857679367, + "loss_ib": 0.0011055570794269443, + "step": 3647 + }, + { + "ce_ib": 2.251716136932373, + "ce_orig": 0.48031890392303467, + "epoch": 1.0486016248472212, + "kl_loss": 0.03203342854976654, + "loss_ib": 0.0005455058417282999, + "step": 3647 + }, + { + "ce_ib": 2.3769264221191406, + "ce_orig": 0.41799673438072205, + "epoch": 1.0486016248472212, + "kl_loss": 0.058123879134655, + "loss_ib": 0.0008189314394257963, + "step": 3647 + }, + { + "ce_ib": 4.1477532386779785, + "ce_orig": 0.7237592339515686, + "epoch": 1.048889208426199, + "kl_loss": 0.07125545293092728, + "loss_ib": 0.0011273297714069486, + "step": 3648 + }, + { + "ce_ib": 4.411855697631836, + "ce_orig": 0.9174479842185974, + "epoch": 1.048889208426199, + "kl_loss": 0.05651026591658592, + "loss_ib": 0.0010062882211059332, + "step": 3648 + }, + { + "ce_ib": 2.346351146697998, + "ce_orig": 0.4891236424446106, + "epoch": 1.048889208426199, + "kl_loss": 0.048437707126140594, + "loss_ib": 0.0007190121687017381, + "step": 3648 + }, + { + "ce_ib": 4.425379753112793, + "ce_orig": 1.1339426040649414, + "epoch": 1.048889208426199, + "kl_loss": 0.025289878249168396, + "loss_ib": 0.0006954367272555828, + "step": 3648 + }, + { + "ce_ib": 3.410703659057617, + "ce_orig": 0.736789345741272, + "epoch": 1.0491767920051764, + "kl_loss": 0.06571392714977264, + "loss_ib": 0.000998209579847753, + "step": 3649 + }, + { + "ce_ib": 3.1541550159454346, + "ce_orig": 0.7866247892379761, + "epoch": 1.0491767920051764, + "kl_loss": 0.05484771728515625, + "loss_ib": 0.0008638926665298641, + "step": 3649 + }, + { + "ce_ib": 3.56309175491333, + "ce_orig": 0.4663054645061493, + "epoch": 1.0491767920051764, + "kl_loss": 0.06491746008396149, + "loss_ib": 0.0010054836748167872, + "step": 3649 + }, + { + "ce_ib": 2.589296817779541, + "ce_orig": 0.580349326133728, + "epoch": 1.0491767920051764, + "kl_loss": 0.041833505034446716, + "loss_ib": 0.0006772647029720247, + "step": 3649 + }, + { + "epoch": 1.0494643755841542, + "grad_norm": 0.10126332193613052, + "learning_rate": 3.7790107659809096e-05, + "loss": 0.7973, + "step": 3650 + }, + { + "ce_ib": 3.54872989654541, + "ce_orig": 0.5599799156188965, + "epoch": 1.0494643755841542, + "kl_loss": 0.05573710799217224, + "loss_ib": 0.000912244024220854, + "step": 3650 + }, + { + "ce_ib": 2.3563644886016846, + "ce_orig": 0.7037509083747864, + "epoch": 1.0494643755841542, + "kl_loss": 0.03355604037642479, + "loss_ib": 0.0005711968406103551, + "step": 3650 + }, + { + "ce_ib": 2.828415632247925, + "ce_orig": 0.8507262468338013, + "epoch": 1.0494643755841542, + "kl_loss": 0.0340384915471077, + "loss_ib": 0.0006232264568097889, + "step": 3650 + }, + { + "ce_ib": 2.604367256164551, + "ce_orig": 0.46803128719329834, + "epoch": 1.0494643755841542, + "kl_loss": 0.03190428018569946, + "loss_ib": 0.000579479499720037, + "step": 3650 + }, + { + "ce_ib": 4.420785427093506, + "ce_orig": 1.1841404438018799, + "epoch": 1.0497519591631317, + "kl_loss": 0.0670948475599289, + "loss_ib": 0.0011130269849672914, + "step": 3651 + }, + { + "ce_ib": 2.2194714546203613, + "ce_orig": 0.41681501269340515, + "epoch": 1.0497519591631317, + "kl_loss": 0.03235204517841339, + "loss_ib": 0.0005454675992950797, + "step": 3651 + }, + { + "ce_ib": 4.717986106872559, + "ce_orig": 1.491771936416626, + "epoch": 1.0497519591631317, + "kl_loss": 0.03783370554447174, + "loss_ib": 0.0008501356933265924, + "step": 3651 + }, + { + "ce_ib": 5.194962024688721, + "ce_orig": 1.4536749124526978, + "epoch": 1.0497519591631317, + "kl_loss": 0.05174877494573593, + "loss_ib": 0.0010369839146733284, + "step": 3651 + }, + { + "ce_ib": 3.138779401779175, + "ce_orig": 1.0165663957595825, + "epoch": 1.0500395427421094, + "kl_loss": 0.04649584740400314, + "loss_ib": 0.0007788364309817553, + "step": 3652 + }, + { + "ce_ib": 3.2306551933288574, + "ce_orig": 0.3935449421405792, + "epoch": 1.0500395427421094, + "kl_loss": 0.14702780544757843, + "loss_ib": 0.00179334357380867, + "step": 3652 + }, + { + "ce_ib": 4.91366720199585, + "ce_orig": 0.8664909601211548, + "epoch": 1.0500395427421094, + "kl_loss": 0.07394436001777649, + "loss_ib": 0.0012308103032410145, + "step": 3652 + }, + { + "ce_ib": 2.637369394302368, + "ce_orig": 0.6024738550186157, + "epoch": 1.0500395427421094, + "kl_loss": 0.04398774728178978, + "loss_ib": 0.0007036144379526377, + "step": 3652 + }, + { + "ce_ib": 3.1156792640686035, + "ce_orig": 0.7114083766937256, + "epoch": 1.0503271263210872, + "kl_loss": 0.048265621066093445, + "loss_ib": 0.0007942241500131786, + "step": 3653 + }, + { + "ce_ib": 4.210839748382568, + "ce_orig": 1.1064449548721313, + "epoch": 1.0503271263210872, + "kl_loss": 0.05263818800449371, + "loss_ib": 0.0009474658872932196, + "step": 3653 + }, + { + "ce_ib": 3.730048418045044, + "ce_orig": 0.6004230380058289, + "epoch": 1.0503271263210872, + "kl_loss": 0.05988619476556778, + "loss_ib": 0.0009718667715787888, + "step": 3653 + }, + { + "ce_ib": 2.718602180480957, + "ce_orig": 0.6198137998580933, + "epoch": 1.0503271263210872, + "kl_loss": 0.06833764910697937, + "loss_ib": 0.0009552366682328284, + "step": 3653 + }, + { + "ce_ib": 4.830807685852051, + "ce_orig": 1.0031254291534424, + "epoch": 1.0506147099000647, + "kl_loss": 0.05583444610238075, + "loss_ib": 0.001041425159201026, + "step": 3654 + }, + { + "ce_ib": 3.7683918476104736, + "ce_orig": 0.6636815071105957, + "epoch": 1.0506147099000647, + "kl_loss": 0.07972054183483124, + "loss_ib": 0.0011740445625036955, + "step": 3654 + }, + { + "ce_ib": 2.8456156253814697, + "ce_orig": 0.555304765701294, + "epoch": 1.0506147099000647, + "kl_loss": 0.044573474675416946, + "loss_ib": 0.0007302963058464229, + "step": 3654 + }, + { + "ce_ib": 4.056174278259277, + "ce_orig": 1.0819900035858154, + "epoch": 1.0506147099000647, + "kl_loss": 0.03806191682815552, + "loss_ib": 0.000786236603744328, + "step": 3654 + }, + { + "epoch": 1.0509022934790424, + "grad_norm": 0.10241714119911194, + "learning_rate": 3.77567508265225e-05, + "loss": 0.8514, + "step": 3655 + }, + { + "ce_ib": 3.8882076740264893, + "ce_orig": 0.9425865411758423, + "epoch": 1.0509022934790424, + "kl_loss": 0.06156750023365021, + "loss_ib": 0.0010044957743957639, + "step": 3655 + }, + { + "ce_ib": 3.649251699447632, + "ce_orig": 0.7713489532470703, + "epoch": 1.0509022934790424, + "kl_loss": 0.055497657507658005, + "loss_ib": 0.000919901707675308, + "step": 3655 + }, + { + "ce_ib": 3.884326457977295, + "ce_orig": 1.1952643394470215, + "epoch": 1.0509022934790424, + "kl_loss": 0.054467372596263885, + "loss_ib": 0.0009331063483841717, + "step": 3655 + }, + { + "ce_ib": 3.8399384021759033, + "ce_orig": 0.5683444738388062, + "epoch": 1.0509022934790424, + "kl_loss": 0.05621841549873352, + "loss_ib": 0.0009461779845878482, + "step": 3655 + }, + { + "ce_ib": 2.564403533935547, + "ce_orig": 0.5584840774536133, + "epoch": 1.05118987705802, + "kl_loss": 0.028128542006015778, + "loss_ib": 0.0005377257475629449, + "step": 3656 + }, + { + "ce_ib": 3.237281560897827, + "ce_orig": 1.0830272436141968, + "epoch": 1.05118987705802, + "kl_loss": 0.03475799411535263, + "loss_ib": 0.000671308021992445, + "step": 3656 + }, + { + "ce_ib": 3.7118589878082275, + "ce_orig": 0.995951235294342, + "epoch": 1.05118987705802, + "kl_loss": 0.056612398475408554, + "loss_ib": 0.0009373098146170378, + "step": 3656 + }, + { + "ce_ib": 4.339733123779297, + "ce_orig": 0.7990221977233887, + "epoch": 1.05118987705802, + "kl_loss": 0.126449316740036, + "loss_ib": 0.00169846648350358, + "step": 3656 + }, + { + "ce_ib": 2.8939874172210693, + "ce_orig": 0.48677435517311096, + "epoch": 1.0514774606369977, + "kl_loss": 0.0471079982817173, + "loss_ib": 0.0007604786660522223, + "step": 3657 + }, + { + "ce_ib": 3.407078742980957, + "ce_orig": 0.8044586181640625, + "epoch": 1.0514774606369977, + "kl_loss": 0.04451790079474449, + "loss_ib": 0.000785886833909899, + "step": 3657 + }, + { + "ce_ib": 3.863553285598755, + "ce_orig": 0.7740575671195984, + "epoch": 1.0514774606369977, + "kl_loss": 0.061129629611968994, + "loss_ib": 0.0009976516012102365, + "step": 3657 + }, + { + "ce_ib": 3.2150156497955322, + "ce_orig": 0.6634572744369507, + "epoch": 1.0514774606369977, + "kl_loss": 0.08074690401554108, + "loss_ib": 0.0011289705289527774, + "step": 3657 + }, + { + "ce_ib": 3.7192413806915283, + "ce_orig": 1.1445400714874268, + "epoch": 1.0517650442159752, + "kl_loss": 0.04079535976052284, + "loss_ib": 0.0007798777078278363, + "step": 3658 + }, + { + "ce_ib": 3.640854835510254, + "ce_orig": 0.9441388845443726, + "epoch": 1.0517650442159752, + "kl_loss": 0.08773277699947357, + "loss_ib": 0.001241413177922368, + "step": 3658 + }, + { + "ce_ib": 2.9406256675720215, + "ce_orig": 0.8134599328041077, + "epoch": 1.0517650442159752, + "kl_loss": 0.027888046577572823, + "loss_ib": 0.0005729430122300982, + "step": 3658 + }, + { + "ce_ib": 4.953153133392334, + "ce_orig": 1.1478837728500366, + "epoch": 1.0517650442159752, + "kl_loss": 0.04142272472381592, + "loss_ib": 0.0009095425484701991, + "step": 3658 + }, + { + "ce_ib": 2.8217084407806396, + "ce_orig": 0.5913548469543457, + "epoch": 1.052052627794953, + "kl_loss": 0.06900839507579803, + "loss_ib": 0.0009722547838464379, + "step": 3659 + }, + { + "ce_ib": 2.2726731300354004, + "ce_orig": 0.599726140499115, + "epoch": 1.052052627794953, + "kl_loss": 0.05035392940044403, + "loss_ib": 0.0007308065542019904, + "step": 3659 + }, + { + "ce_ib": 2.780005931854248, + "ce_orig": 0.375083327293396, + "epoch": 1.052052627794953, + "kl_loss": 0.05309094488620758, + "loss_ib": 0.0008089100592769682, + "step": 3659 + }, + { + "ce_ib": 2.9161465167999268, + "ce_orig": 0.5824180245399475, + "epoch": 1.052052627794953, + "kl_loss": 0.05656488239765167, + "loss_ib": 0.0008572635124437511, + "step": 3659 + }, + { + "epoch": 1.0523402113739306, + "grad_norm": 0.09179432690143585, + "learning_rate": 3.7723363259262254e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "ce_ib": 3.3473877906799316, + "ce_orig": 0.3858797550201416, + "epoch": 1.0523402113739306, + "kl_loss": 0.03808792307972908, + "loss_ib": 0.000715617963578552, + "step": 3660 + }, + { + "ce_ib": 3.2206039428710938, + "ce_orig": 0.520172655582428, + "epoch": 1.0523402113739306, + "kl_loss": 0.0598440021276474, + "loss_ib": 0.0009205004316754639, + "step": 3660 + }, + { + "ce_ib": 3.350889205932617, + "ce_orig": 1.0310777425765991, + "epoch": 1.0523402113739306, + "kl_loss": 0.06622914224863052, + "loss_ib": 0.000997380237095058, + "step": 3660 + }, + { + "ce_ib": 2.792623519897461, + "ce_orig": 0.6698562502861023, + "epoch": 1.0523402113739306, + "kl_loss": 0.046085961163043976, + "loss_ib": 0.000740121933631599, + "step": 3660 + }, + { + "ce_ib": 2.7741899490356445, + "ce_orig": 0.6909371614456177, + "epoch": 1.0526277949529081, + "kl_loss": 0.02511490508913994, + "loss_ib": 0.00052856799447909, + "step": 3661 + }, + { + "ce_ib": 5.469202041625977, + "ce_orig": 1.0560603141784668, + "epoch": 1.0526277949529081, + "kl_loss": 0.05469090864062309, + "loss_ib": 0.001093829283490777, + "step": 3661 + }, + { + "ce_ib": 2.5387325286865234, + "ce_orig": 0.5429944396018982, + "epoch": 1.0526277949529081, + "kl_loss": 0.045528218150138855, + "loss_ib": 0.0007091553998179734, + "step": 3661 + }, + { + "ce_ib": 4.241623878479004, + "ce_orig": 1.048890233039856, + "epoch": 1.0526277949529081, + "kl_loss": 0.02995149791240692, + "loss_ib": 0.0007236773381009698, + "step": 3661 + }, + { + "ce_ib": 2.4421803951263428, + "ce_orig": 0.7574121356010437, + "epoch": 1.0529153785318859, + "kl_loss": 0.03922107815742493, + "loss_ib": 0.000636428827419877, + "step": 3662 + }, + { + "ce_ib": 2.9568214416503906, + "ce_orig": 0.7463662028312683, + "epoch": 1.0529153785318859, + "kl_loss": 0.03272746503353119, + "loss_ib": 0.0006229567807167768, + "step": 3662 + }, + { + "ce_ib": 3.9000113010406494, + "ce_orig": 0.795534074306488, + "epoch": 1.0529153785318859, + "kl_loss": 0.033542949706315994, + "loss_ib": 0.0007254306110553443, + "step": 3662 + }, + { + "ce_ib": 4.001974582672119, + "ce_orig": 1.021540880203247, + "epoch": 1.0529153785318859, + "kl_loss": 0.06132529303431511, + "loss_ib": 0.0010134503245353699, + "step": 3662 + }, + { + "ce_ib": 5.831356525421143, + "ce_orig": 1.9135109186172485, + "epoch": 1.0532029621108634, + "kl_loss": 0.04590475559234619, + "loss_ib": 0.0010421831393614411, + "step": 3663 + }, + { + "ce_ib": 4.355069160461426, + "ce_orig": 1.2525663375854492, + "epoch": 1.0532029621108634, + "kl_loss": 0.0537244975566864, + "loss_ib": 0.000972751819062978, + "step": 3663 + }, + { + "ce_ib": 3.246973991394043, + "ce_orig": 0.7350581288337708, + "epoch": 1.0532029621108634, + "kl_loss": 0.048420097678899765, + "loss_ib": 0.0008088983595371246, + "step": 3663 + }, + { + "ce_ib": 2.600611686706543, + "ce_orig": 0.6225153207778931, + "epoch": 1.0532029621108634, + "kl_loss": 0.04082493111491203, + "loss_ib": 0.0006683104438707232, + "step": 3663 + }, + { + "ce_ib": 5.618424892425537, + "ce_orig": 1.4586844444274902, + "epoch": 1.0534905456898411, + "kl_loss": 0.056203823536634445, + "loss_ib": 0.001123880734667182, + "step": 3664 + }, + { + "ce_ib": 3.0560669898986816, + "ce_orig": 0.4466637372970581, + "epoch": 1.0534905456898411, + "kl_loss": 0.07497067749500275, + "loss_ib": 0.0010553135070949793, + "step": 3664 + }, + { + "ce_ib": 3.519864797592163, + "ce_orig": 0.7380690574645996, + "epoch": 1.0534905456898411, + "kl_loss": 0.06470923125743866, + "loss_ib": 0.000999078736640513, + "step": 3664 + }, + { + "ce_ib": 3.2352406978607178, + "ce_orig": 1.0492075681686401, + "epoch": 1.0534905456898411, + "kl_loss": 0.0210820734500885, + "loss_ib": 0.000534344813786447, + "step": 3664 + }, + { + "epoch": 1.0537781292688186, + "grad_norm": 0.1049962267279625, + "learning_rate": 3.7689945038466764e-05, + "loss": 0.8314, + "step": 3665 + }, + { + "ce_ib": 4.739682674407959, + "ce_orig": 1.1902743577957153, + "epoch": 1.0537781292688186, + "kl_loss": 0.04239189624786377, + "loss_ib": 0.0008978871628642082, + "step": 3665 + }, + { + "ce_ib": 1.8663029670715332, + "ce_orig": 0.43287283182144165, + "epoch": 1.0537781292688186, + "kl_loss": 0.09675808995962143, + "loss_ib": 0.0011542111169546843, + "step": 3665 + }, + { + "ce_ib": 2.7203454971313477, + "ce_orig": 0.6366628408432007, + "epoch": 1.0537781292688186, + "kl_loss": 0.037621140480041504, + "loss_ib": 0.0006482459721155465, + "step": 3665 + }, + { + "ce_ib": 3.429405450820923, + "ce_orig": 0.8116710782051086, + "epoch": 1.0537781292688186, + "kl_loss": 0.05557671934366226, + "loss_ib": 0.0008987077162601054, + "step": 3665 + }, + { + "ce_ib": 2.5263123512268066, + "ce_orig": 0.2571970522403717, + "epoch": 1.0540657128477964, + "kl_loss": 0.1503526270389557, + "loss_ib": 0.0017561574932187796, + "step": 3666 + }, + { + "ce_ib": 4.882833003997803, + "ce_orig": 0.6565601825714111, + "epoch": 1.0540657128477964, + "kl_loss": 0.058380864560604095, + "loss_ib": 0.0010720918653532863, + "step": 3666 + }, + { + "ce_ib": 2.6065452098846436, + "ce_orig": 0.4149096608161926, + "epoch": 1.0540657128477964, + "kl_loss": 0.050992194563150406, + "loss_ib": 0.0007705764146521688, + "step": 3666 + }, + { + "ce_ib": 3.9167287349700928, + "ce_orig": 1.1685429811477661, + "epoch": 1.0540657128477964, + "kl_loss": 0.047382161021232605, + "loss_ib": 0.0008654944249428809, + "step": 3666 + }, + { + "ce_ib": 2.8686647415161133, + "ce_orig": 0.7044447660446167, + "epoch": 1.0543532964267741, + "kl_loss": 0.06527143716812134, + "loss_ib": 0.0009395807865075767, + "step": 3667 + }, + { + "ce_ib": 4.460022449493408, + "ce_orig": 1.2012908458709717, + "epoch": 1.0543532964267741, + "kl_loss": 0.08047366887331009, + "loss_ib": 0.0012507389765232801, + "step": 3667 + }, + { + "ce_ib": 3.637583017349243, + "ce_orig": 1.073083758354187, + "epoch": 1.0543532964267741, + "kl_loss": 0.048995986580848694, + "loss_ib": 0.0008537181420251727, + "step": 3667 + }, + { + "ce_ib": 3.623255968093872, + "ce_orig": 0.781750500202179, + "epoch": 1.0543532964267741, + "kl_loss": 0.02382178232073784, + "loss_ib": 0.000600543397013098, + "step": 3667 + }, + { + "ce_ib": 2.6515800952911377, + "ce_orig": 0.5314957499504089, + "epoch": 1.0546408800057516, + "kl_loss": 0.047321632504463196, + "loss_ib": 0.0007383742486126721, + "step": 3668 + }, + { + "ce_ib": 3.535130023956299, + "ce_orig": 0.8753983378410339, + "epoch": 1.0546408800057516, + "kl_loss": 0.0865277424454689, + "loss_ib": 0.0012187904212623835, + "step": 3668 + }, + { + "ce_ib": 2.304236888885498, + "ce_orig": 0.6749607920646667, + "epoch": 1.0546408800057516, + "kl_loss": 0.03889094293117523, + "loss_ib": 0.0006193330627866089, + "step": 3668 + }, + { + "ce_ib": 2.6649181842803955, + "ce_orig": 0.647301435470581, + "epoch": 1.0546408800057516, + "kl_loss": 0.045059043914079666, + "loss_ib": 0.0007170822354964912, + "step": 3668 + }, + { + "ce_ib": 3.0404982566833496, + "ce_orig": 0.6514320373535156, + "epoch": 1.0549284635847294, + "kl_loss": 0.04333079978823662, + "loss_ib": 0.0007373577682301402, + "step": 3669 + }, + { + "ce_ib": 2.0070912837982178, + "ce_orig": 0.19000153243541718, + "epoch": 1.0549284635847294, + "kl_loss": 0.1518150418996811, + "loss_ib": 0.0017188595375046134, + "step": 3669 + }, + { + "ce_ib": 4.117775917053223, + "ce_orig": 1.1342236995697021, + "epoch": 1.0549284635847294, + "kl_loss": 0.04494043439626694, + "loss_ib": 0.000861181877553463, + "step": 3669 + }, + { + "ce_ib": 4.953807830810547, + "ce_orig": 1.102846384048462, + "epoch": 1.0549284635847294, + "kl_loss": 0.06450831890106201, + "loss_ib": 0.001140463980846107, + "step": 3669 + }, + { + "epoch": 1.0552160471637069, + "grad_norm": 0.11109195649623871, + "learning_rate": 3.765649624464828e-05, + "loss": 0.8885, + "step": 3670 + }, + { + "ce_ib": 3.7985644340515137, + "ce_orig": 0.9075586795806885, + "epoch": 1.0552160471637069, + "kl_loss": 0.041818805038928986, + "loss_ib": 0.0007980444934219122, + "step": 3670 + }, + { + "ce_ib": 3.64703631401062, + "ce_orig": 0.7847633957862854, + "epoch": 1.0552160471637069, + "kl_loss": 0.04752729833126068, + "loss_ib": 0.0008399765938520432, + "step": 3670 + }, + { + "ce_ib": 3.4428138732910156, + "ce_orig": 1.0806078910827637, + "epoch": 1.0552160471637069, + "kl_loss": 0.0315890908241272, + "loss_ib": 0.0006601722561754286, + "step": 3670 + }, + { + "ce_ib": 4.58545446395874, + "ce_orig": 1.251474380493164, + "epoch": 1.0552160471637069, + "kl_loss": 0.040674127638339996, + "loss_ib": 0.0008652867400087416, + "step": 3670 + }, + { + "ce_ib": 2.0081613063812256, + "ce_orig": 0.42415735125541687, + "epoch": 1.0555036307426846, + "kl_loss": 0.040360577404499054, + "loss_ib": 0.0006044218898750842, + "step": 3671 + }, + { + "ce_ib": 5.161348819732666, + "ce_orig": 1.1166390180587769, + "epoch": 1.0555036307426846, + "kl_loss": 0.056542299687862396, + "loss_ib": 0.001081557828001678, + "step": 3671 + }, + { + "ce_ib": 4.340669631958008, + "ce_orig": 1.2088215351104736, + "epoch": 1.0555036307426846, + "kl_loss": 0.05241654813289642, + "loss_ib": 0.0009582323837094009, + "step": 3671 + }, + { + "ce_ib": 2.2467854022979736, + "ce_orig": 0.6623722314834595, + "epoch": 1.0555036307426846, + "kl_loss": 0.044464509934186935, + "loss_ib": 0.000669323664624244, + "step": 3671 + }, + { + "ce_ib": 3.455765962600708, + "ce_orig": 0.6643897891044617, + "epoch": 1.0557912143216623, + "kl_loss": 0.05630963295698166, + "loss_ib": 0.0009086729260161519, + "step": 3672 + }, + { + "ce_ib": 2.6855521202087402, + "ce_orig": 0.7186477780342102, + "epoch": 1.0557912143216623, + "kl_loss": 0.03668134659528732, + "loss_ib": 0.0006353686330839992, + "step": 3672 + }, + { + "ce_ib": 4.630824089050293, + "ce_orig": 0.846615195274353, + "epoch": 1.0557912143216623, + "kl_loss": 0.021223876625299454, + "loss_ib": 0.000675321149174124, + "step": 3672 + }, + { + "ce_ib": 3.002976179122925, + "ce_orig": 0.8057954907417297, + "epoch": 1.0557912143216623, + "kl_loss": 0.0487605556845665, + "loss_ib": 0.0007879031472839415, + "step": 3672 + }, + { + "ce_ib": 3.255660057067871, + "ce_orig": 0.7589721083641052, + "epoch": 1.0560787979006399, + "kl_loss": 0.02930726855993271, + "loss_ib": 0.0006186387035995722, + "step": 3673 + }, + { + "ce_ib": 2.27150821685791, + "ce_orig": 0.3241187632083893, + "epoch": 1.0560787979006399, + "kl_loss": 0.14293093979358673, + "loss_ib": 0.0016564601100981236, + "step": 3673 + }, + { + "ce_ib": 3.7225587368011475, + "ce_orig": 0.8391950130462646, + "epoch": 1.0560787979006399, + "kl_loss": 0.05237610638141632, + "loss_ib": 0.0008960169507190585, + "step": 3673 + }, + { + "ce_ib": 2.6998536586761475, + "ce_orig": 0.6229172348976135, + "epoch": 1.0560787979006399, + "kl_loss": 0.045523159205913544, + "loss_ib": 0.000725216930732131, + "step": 3673 + }, + { + "ce_ib": 4.566819190979004, + "ce_orig": 0.7314819097518921, + "epoch": 1.0563663814796176, + "kl_loss": 0.056959882378578186, + "loss_ib": 0.00102628068998456, + "step": 3674 + }, + { + "ce_ib": 2.4440877437591553, + "ce_orig": 0.7669122219085693, + "epoch": 1.0563663814796176, + "kl_loss": 0.03997616469860077, + "loss_ib": 0.0006441703881137073, + "step": 3674 + }, + { + "ce_ib": 3.1303441524505615, + "ce_orig": 0.7691506147384644, + "epoch": 1.0563663814796176, + "kl_loss": 0.05143791809678078, + "loss_ib": 0.0008274135761894286, + "step": 3674 + }, + { + "ce_ib": 2.9198455810546875, + "ce_orig": 0.7034056782722473, + "epoch": 1.0563663814796176, + "kl_loss": 0.034881625324487686, + "loss_ib": 0.0006408008048310876, + "step": 3674 + }, + { + "epoch": 1.056653965058595, + "grad_norm": 0.10221625119447708, + "learning_rate": 3.762301695839271e-05, + "loss": 0.7777, + "step": 3675 + }, + { + "ce_ib": 4.448761940002441, + "ce_orig": 1.091296911239624, + "epoch": 1.056653965058595, + "kl_loss": 0.07952187955379486, + "loss_ib": 0.0012400948908179998, + "step": 3675 + }, + { + "ce_ib": 5.219203948974609, + "ce_orig": 1.4943829774856567, + "epoch": 1.056653965058595, + "kl_loss": 0.052925415337085724, + "loss_ib": 0.0010511744767427444, + "step": 3675 + }, + { + "ce_ib": 3.0829086303710938, + "ce_orig": 0.47401872277259827, + "epoch": 1.056653965058595, + "kl_loss": 0.1734631508588791, + "loss_ib": 0.0020429224241524935, + "step": 3675 + }, + { + "ce_ib": 3.3494696617126465, + "ce_orig": 0.8144792914390564, + "epoch": 1.056653965058595, + "kl_loss": 0.12053229659795761, + "loss_ib": 0.0015402698190882802, + "step": 3675 + }, + { + "ce_ib": 3.684643507003784, + "ce_orig": 0.8997794985771179, + "epoch": 1.0569415486375728, + "kl_loss": 0.050926461815834045, + "loss_ib": 0.0008777289185672998, + "step": 3676 + }, + { + "ce_ib": 2.298492431640625, + "ce_orig": 0.3465374708175659, + "epoch": 1.0569415486375728, + "kl_loss": 0.051681630313396454, + "loss_ib": 0.0007466655224561691, + "step": 3676 + }, + { + "ce_ib": 4.038775444030762, + "ce_orig": 0.9432259202003479, + "epoch": 1.0569415486375728, + "kl_loss": 0.058206453919410706, + "loss_ib": 0.0009859419660642743, + "step": 3676 + }, + { + "ce_ib": 5.236411094665527, + "ce_orig": 1.4272518157958984, + "epoch": 1.0569415486375728, + "kl_loss": 0.05729703605175018, + "loss_ib": 0.001096611493267119, + "step": 3676 + }, + { + "ce_ib": 4.470858097076416, + "ce_orig": 1.202985405921936, + "epoch": 1.0572291322165503, + "kl_loss": 0.07941056042909622, + "loss_ib": 0.0012411914067342877, + "step": 3677 + }, + { + "ce_ib": 3.8794384002685547, + "ce_orig": 0.9199538230895996, + "epoch": 1.0572291322165503, + "kl_loss": 0.035884443670511246, + "loss_ib": 0.0007467882242053747, + "step": 3677 + }, + { + "ce_ib": 2.1235032081604004, + "ce_orig": 0.5607789754867554, + "epoch": 1.0572291322165503, + "kl_loss": 0.04505394771695137, + "loss_ib": 0.0006628897390328348, + "step": 3677 + }, + { + "ce_ib": 4.001328945159912, + "ce_orig": 0.9619510769844055, + "epoch": 1.0572291322165503, + "kl_loss": 0.060697250068187714, + "loss_ib": 0.0010071053402498364, + "step": 3677 + }, + { + "ce_ib": 4.48187255859375, + "ce_orig": 1.1055632829666138, + "epoch": 1.057516715795528, + "kl_loss": 0.049478210508823395, + "loss_ib": 0.0009429692872799933, + "step": 3678 + }, + { + "ce_ib": 2.6335551738739014, + "ce_orig": 0.5880091786384583, + "epoch": 1.057516715795528, + "kl_loss": 0.061469919979572296, + "loss_ib": 0.0008780547068454325, + "step": 3678 + }, + { + "ce_ib": 2.812020778656006, + "ce_orig": 0.7508968710899353, + "epoch": 1.057516715795528, + "kl_loss": 0.04203995317220688, + "loss_ib": 0.0007016015588305891, + "step": 3678 + }, + { + "ce_ib": 0.8179013729095459, + "ce_orig": 0.12118224054574966, + "epoch": 1.057516715795528, + "kl_loss": 0.09373552352190018, + "loss_ib": 0.0010191453620791435, + "step": 3678 + }, + { + "ce_ib": 2.962233543395996, + "ce_orig": 0.6554964780807495, + "epoch": 1.0578042993745058, + "kl_loss": 0.04361097142100334, + "loss_ib": 0.000732333050109446, + "step": 3679 + }, + { + "ce_ib": 4.400030612945557, + "ce_orig": 1.4292845726013184, + "epoch": 1.0578042993745058, + "kl_loss": 0.04836735129356384, + "loss_ib": 0.0009236765326932073, + "step": 3679 + }, + { + "ce_ib": 3.336416721343994, + "ce_orig": 0.8010415434837341, + "epoch": 1.0578042993745058, + "kl_loss": 0.056581661105155945, + "loss_ib": 0.0008994581876322627, + "step": 3679 + }, + { + "ce_ib": 2.988227128982544, + "ce_orig": 0.9342183470726013, + "epoch": 1.0578042993745058, + "kl_loss": 0.040720950812101364, + "loss_ib": 0.0007060322095640004, + "step": 3679 + }, + { + "epoch": 1.0580918829534833, + "grad_norm": 0.10495264828205109, + "learning_rate": 3.7589507260359404e-05, + "loss": 0.8692, + "step": 3680 + }, + { + "ce_ib": 4.899991989135742, + "ce_orig": 1.4539695978164673, + "epoch": 1.0580918829534833, + "kl_loss": 0.0421561636030674, + "loss_ib": 0.0009115607826970518, + "step": 3680 + }, + { + "ce_ib": 4.807260990142822, + "ce_orig": 1.107971429824829, + "epoch": 1.0580918829534833, + "kl_loss": 0.04448400437831879, + "loss_ib": 0.0009255660697817802, + "step": 3680 + }, + { + "ce_ib": 2.031446695327759, + "ce_orig": 0.49906399846076965, + "epoch": 1.0580918829534833, + "kl_loss": 0.03126487508416176, + "loss_ib": 0.0005157933919690549, + "step": 3680 + }, + { + "ce_ib": 2.4217941761016846, + "ce_orig": 0.657681405544281, + "epoch": 1.0580918829534833, + "kl_loss": 0.034055598080158234, + "loss_ib": 0.0005827354034408927, + "step": 3680 + }, + { + "ce_ib": 2.7908987998962402, + "ce_orig": 0.6811373233795166, + "epoch": 1.058379466532461, + "kl_loss": 0.025723662227392197, + "loss_ib": 0.000536326493602246, + "step": 3681 + }, + { + "ce_ib": 5.7509589195251465, + "ce_orig": 1.7880455255508423, + "epoch": 1.058379466532461, + "kl_loss": 0.03901144117116928, + "loss_ib": 0.0009652102598920465, + "step": 3681 + }, + { + "ce_ib": 3.313293933868408, + "ce_orig": 0.5443181395530701, + "epoch": 1.058379466532461, + "kl_loss": 0.06911510229110718, + "loss_ib": 0.0010224804282188416, + "step": 3681 + }, + { + "ce_ib": 3.6490979194641113, + "ce_orig": 1.0106916427612305, + "epoch": 1.058379466532461, + "kl_loss": 0.0459345281124115, + "loss_ib": 0.0008242550538852811, + "step": 3681 + }, + { + "ce_ib": 5.03627347946167, + "ce_orig": 1.1413508653640747, + "epoch": 1.0586670501114386, + "kl_loss": 0.04461669921875, + "loss_ib": 0.0009497943101450801, + "step": 3682 + }, + { + "ce_ib": 4.561119556427002, + "ce_orig": 1.2637429237365723, + "epoch": 1.0586670501114386, + "kl_loss": 0.04519205167889595, + "loss_ib": 0.000908032467123121, + "step": 3682 + }, + { + "ce_ib": 4.535766124725342, + "ce_orig": 1.2349954843521118, + "epoch": 1.0586670501114386, + "kl_loss": 0.04849420487880707, + "loss_ib": 0.0009385186131112278, + "step": 3682 + }, + { + "ce_ib": 1.3636974096298218, + "ce_orig": 0.18747535347938538, + "epoch": 1.0586670501114386, + "kl_loss": 0.05412765592336655, + "loss_ib": 0.0006776463123969734, + "step": 3682 + }, + { + "ce_ib": 2.718675374984741, + "ce_orig": 0.75033038854599, + "epoch": 1.0589546336904163, + "kl_loss": 0.04465499520301819, + "loss_ib": 0.0007184174610301852, + "step": 3683 + }, + { + "ce_ib": 3.4016952514648438, + "ce_orig": 0.9450106024742126, + "epoch": 1.0589546336904163, + "kl_loss": 0.049001194536685944, + "loss_ib": 0.0008301814668811858, + "step": 3683 + }, + { + "ce_ib": 3.0490410327911377, + "ce_orig": 0.6836897134780884, + "epoch": 1.0589546336904163, + "kl_loss": 0.030291328206658363, + "loss_ib": 0.0006078173755668104, + "step": 3683 + }, + { + "ce_ib": 2.6219420433044434, + "ce_orig": 0.6044001579284668, + "epoch": 1.0589546336904163, + "kl_loss": 0.03243771195411682, + "loss_ib": 0.0005865712882950902, + "step": 3683 + }, + { + "ce_ib": 6.675936698913574, + "ce_orig": 1.554236888885498, + "epoch": 1.0592422172693938, + "kl_loss": 0.0746510773897171, + "loss_ib": 0.0014141043648123741, + "step": 3684 + }, + { + "ce_ib": 4.078558444976807, + "ce_orig": 1.079607605934143, + "epoch": 1.0592422172693938, + "kl_loss": 0.042122431099414825, + "loss_ib": 0.000829080177936703, + "step": 3684 + }, + { + "ce_ib": 3.6670994758605957, + "ce_orig": 0.8324710726737976, + "epoch": 1.0592422172693938, + "kl_loss": 0.055521801114082336, + "loss_ib": 0.0009219279745593667, + "step": 3684 + }, + { + "ce_ib": 3.2533209323883057, + "ce_orig": 0.6892156600952148, + "epoch": 1.0592422172693938, + "kl_loss": 0.04384786635637283, + "loss_ib": 0.0007638107053935528, + "step": 3684 + }, + { + "epoch": 1.0595298008483716, + "grad_norm": 0.10328046977519989, + "learning_rate": 3.755596723128104e-05, + "loss": 0.8674, + "step": 3685 + }, + { + "ce_ib": 2.6600184440612793, + "ce_orig": 0.5273665189743042, + "epoch": 1.0595298008483716, + "kl_loss": 0.044452033936977386, + "loss_ib": 0.0007105221156962216, + "step": 3685 + }, + { + "ce_ib": 4.012338161468506, + "ce_orig": 0.8469489812850952, + "epoch": 1.0595298008483716, + "kl_loss": 0.03958763927221298, + "loss_ib": 0.0007971102022565901, + "step": 3685 + }, + { + "ce_ib": 4.061145305633545, + "ce_orig": 1.1600728034973145, + "epoch": 1.0595298008483716, + "kl_loss": 0.06959185749292374, + "loss_ib": 0.0011020330712199211, + "step": 3685 + }, + { + "ce_ib": 2.953584909439087, + "ce_orig": 0.35028061270713806, + "epoch": 1.0595298008483716, + "kl_loss": 0.049586519598960876, + "loss_ib": 0.0007912236033007503, + "step": 3685 + }, + { + "ce_ib": 2.939466953277588, + "ce_orig": 0.5338577032089233, + "epoch": 1.0598173844273493, + "kl_loss": 0.056437499821186066, + "loss_ib": 0.000858321669511497, + "step": 3686 + }, + { + "ce_ib": 3.489628314971924, + "ce_orig": 0.9289372563362122, + "epoch": 1.0598173844273493, + "kl_loss": 0.04607924073934555, + "loss_ib": 0.0008097551763057709, + "step": 3686 + }, + { + "ce_ib": 2.882937431335449, + "ce_orig": 0.6689295768737793, + "epoch": 1.0598173844273493, + "kl_loss": 0.05245010554790497, + "loss_ib": 0.0008127947803586721, + "step": 3686 + }, + { + "ce_ib": 5.002571105957031, + "ce_orig": 0.5956726670265198, + "epoch": 1.0598173844273493, + "kl_loss": 0.06932081282138824, + "loss_ib": 0.0011934651993215084, + "step": 3686 + }, + { + "ce_ib": 3.3611512184143066, + "ce_orig": 0.7554436922073364, + "epoch": 1.0601049680063268, + "kl_loss": 0.048443570733070374, + "loss_ib": 0.0008205507765524089, + "step": 3687 + }, + { + "ce_ib": 3.9511640071868896, + "ce_orig": 1.0463064908981323, + "epoch": 1.0601049680063268, + "kl_loss": 0.03996070474386215, + "loss_ib": 0.0007947234553284943, + "step": 3687 + }, + { + "ce_ib": 2.738276481628418, + "ce_orig": 0.5984033942222595, + "epoch": 1.0601049680063268, + "kl_loss": 0.03869478404521942, + "loss_ib": 0.0006607754621654749, + "step": 3687 + }, + { + "ce_ib": 3.1502413749694824, + "ce_orig": 0.9083443284034729, + "epoch": 1.0601049680063268, + "kl_loss": 0.03562174364924431, + "loss_ib": 0.0006712415488436818, + "step": 3687 + }, + { + "ce_ib": 1.8422422409057617, + "ce_orig": 0.5008044242858887, + "epoch": 1.0603925515853045, + "kl_loss": 0.03002787195146084, + "loss_ib": 0.00048450290341861546, + "step": 3688 + }, + { + "ce_ib": 4.526506423950195, + "ce_orig": 1.1721868515014648, + "epoch": 1.0603925515853045, + "kl_loss": 0.05101126432418823, + "loss_ib": 0.0009627632680349052, + "step": 3688 + }, + { + "ce_ib": 3.0578410625457764, + "ce_orig": 0.7119435667991638, + "epoch": 1.0603925515853045, + "kl_loss": 0.06112377345561981, + "loss_ib": 0.0009170218254439533, + "step": 3688 + }, + { + "ce_ib": 3.3911733627319336, + "ce_orig": 0.4263986349105835, + "epoch": 1.0603925515853045, + "kl_loss": 0.10300147533416748, + "loss_ib": 0.0013691320782527328, + "step": 3688 + }, + { + "ce_ib": 3.1446592807769775, + "ce_orig": 0.5902112126350403, + "epoch": 1.060680135164282, + "kl_loss": 0.048617344349622726, + "loss_ib": 0.0008006393909454346, + "step": 3689 + }, + { + "ce_ib": 3.6539978981018066, + "ce_orig": 0.8017173409461975, + "epoch": 1.060680135164282, + "kl_loss": 0.1542099267244339, + "loss_ib": 0.0019074990414083004, + "step": 3689 + }, + { + "ce_ib": 4.496782302856445, + "ce_orig": 1.1345494985580444, + "epoch": 1.060680135164282, + "kl_loss": 0.06044013798236847, + "loss_ib": 0.0010540796210989356, + "step": 3689 + }, + { + "ce_ib": 4.714128017425537, + "ce_orig": 1.1176278591156006, + "epoch": 1.060680135164282, + "kl_loss": 0.07505588233470917, + "loss_ib": 0.0012219715863466263, + "step": 3689 + }, + { + "epoch": 1.0609677187432598, + "grad_norm": 0.09247490018606186, + "learning_rate": 3.75223969519633e-05, + "loss": 0.8568, + "step": 3690 + }, + { + "ce_ib": 2.9835221767425537, + "ce_orig": 0.45489054918289185, + "epoch": 1.0609677187432598, + "kl_loss": 0.05852430313825607, + "loss_ib": 0.0008835952030494809, + "step": 3690 + }, + { + "ce_ib": 2.863848924636841, + "ce_orig": 0.7497650980949402, + "epoch": 1.0609677187432598, + "kl_loss": 0.03498966619372368, + "loss_ib": 0.0006362815038301051, + "step": 3690 + }, + { + "ce_ib": 3.559401035308838, + "ce_orig": 0.9418178200721741, + "epoch": 1.0609677187432598, + "kl_loss": 0.05258302018046379, + "loss_ib": 0.000881770218256861, + "step": 3690 + }, + { + "ce_ib": 4.729901313781738, + "ce_orig": 1.5082720518112183, + "epoch": 1.0609677187432598, + "kl_loss": 0.0521090030670166, + "loss_ib": 0.000994080095551908, + "step": 3690 + }, + { + "ce_ib": 3.990408182144165, + "ce_orig": 0.9030332565307617, + "epoch": 1.0612553023222373, + "kl_loss": 0.05291719734668732, + "loss_ib": 0.0009282127721235156, + "step": 3691 + }, + { + "ce_ib": 3.882002115249634, + "ce_orig": 0.9168003797531128, + "epoch": 1.0612553023222373, + "kl_loss": 0.0332847535610199, + "loss_ib": 0.000721047748811543, + "step": 3691 + }, + { + "ce_ib": 2.447305679321289, + "ce_orig": 0.5757784247398376, + "epoch": 1.0612553023222373, + "kl_loss": 0.04286641627550125, + "loss_ib": 0.0006733947084285319, + "step": 3691 + }, + { + "ce_ib": 4.0153374671936035, + "ce_orig": 0.8164312243461609, + "epoch": 1.0612553023222373, + "kl_loss": 0.057435739785432816, + "loss_ib": 0.0009758911328390241, + "step": 3691 + }, + { + "ce_ib": 2.8064372539520264, + "ce_orig": 0.6040792465209961, + "epoch": 1.061542885901215, + "kl_loss": 0.04719429463148117, + "loss_ib": 0.0007525866385549307, + "step": 3692 + }, + { + "ce_ib": 2.377333164215088, + "ce_orig": 0.7047256231307983, + "epoch": 1.061542885901215, + "kl_loss": 0.04248254746198654, + "loss_ib": 0.0006625588284805417, + "step": 3692 + }, + { + "ce_ib": 5.732911586761475, + "ce_orig": 1.3083707094192505, + "epoch": 1.061542885901215, + "kl_loss": 0.08464735746383667, + "loss_ib": 0.0014197647105902433, + "step": 3692 + }, + { + "ce_ib": 3.7843737602233887, + "ce_orig": 0.680205225944519, + "epoch": 1.061542885901215, + "kl_loss": 0.03703443333506584, + "loss_ib": 0.0007487817201763391, + "step": 3692 + }, + { + "ce_ib": 4.600966930389404, + "ce_orig": 1.2305763959884644, + "epoch": 1.0618304694801928, + "kl_loss": 0.05590960383415222, + "loss_ib": 0.001019192743115127, + "step": 3693 + }, + { + "ce_ib": 3.8989691734313965, + "ce_orig": 0.5433670282363892, + "epoch": 1.0618304694801928, + "kl_loss": 0.06563829630613327, + "loss_ib": 0.001046279794536531, + "step": 3693 + }, + { + "ce_ib": 3.0948891639709473, + "ce_orig": 0.8675826787948608, + "epoch": 1.0618304694801928, + "kl_loss": 0.038176506757736206, + "loss_ib": 0.000691253982950002, + "step": 3693 + }, + { + "ce_ib": 3.7700536251068115, + "ce_orig": 0.8774775266647339, + "epoch": 1.0618304694801928, + "kl_loss": 0.05009961128234863, + "loss_ib": 0.0008780014468356967, + "step": 3693 + }, + { + "ce_ib": 1.1209986209869385, + "ce_orig": 0.1617640256881714, + "epoch": 1.0621180530591703, + "kl_loss": 0.10183528065681458, + "loss_ib": 0.001130452612414956, + "step": 3694 + }, + { + "ce_ib": 3.7496824264526367, + "ce_orig": 0.5160831809043884, + "epoch": 1.0621180530591703, + "kl_loss": 0.05447410047054291, + "loss_ib": 0.0009197091567330062, + "step": 3694 + }, + { + "ce_ib": 5.842114448547363, + "ce_orig": 1.4586384296417236, + "epoch": 1.0621180530591703, + "kl_loss": 0.03636900335550308, + "loss_ib": 0.000947901455219835, + "step": 3694 + }, + { + "ce_ib": 3.1968984603881836, + "ce_orig": 0.4892879128456116, + "epoch": 1.0621180530591703, + "kl_loss": 0.03621745854616165, + "loss_ib": 0.0006818644469603896, + "step": 3694 + }, + { + "epoch": 1.062405636638148, + "grad_norm": 0.10530728101730347, + "learning_rate": 3.748879650328481e-05, + "loss": 0.8601, + "step": 3695 + }, + { + "ce_ib": 3.404590129852295, + "ce_orig": 0.986975371837616, + "epoch": 1.062405636638148, + "kl_loss": 0.047319117933511734, + "loss_ib": 0.0008136502001434565, + "step": 3695 + }, + { + "ce_ib": 2.2387783527374268, + "ce_orig": 0.4757207930088043, + "epoch": 1.062405636638148, + "kl_loss": 0.039888933300971985, + "loss_ib": 0.0006227671401575208, + "step": 3695 + }, + { + "ce_ib": 4.482672214508057, + "ce_orig": 0.9328665137290955, + "epoch": 1.062405636638148, + "kl_loss": 0.06312665343284607, + "loss_ib": 0.001079533714801073, + "step": 3695 + }, + { + "ce_ib": 4.075089931488037, + "ce_orig": 0.6844134330749512, + "epoch": 1.062405636638148, + "kl_loss": 0.04783547669649124, + "loss_ib": 0.0008858637302182615, + "step": 3695 + }, + { + "ce_ib": 5.517061710357666, + "ce_orig": 1.7405576705932617, + "epoch": 1.0626932202171255, + "kl_loss": 0.05002515763044357, + "loss_ib": 0.001051957719027996, + "step": 3696 + }, + { + "ce_ib": 4.401388168334961, + "ce_orig": 0.7444131970405579, + "epoch": 1.0626932202171255, + "kl_loss": 0.0378616601228714, + "loss_ib": 0.000818755361251533, + "step": 3696 + }, + { + "ce_ib": 1.8315520286560059, + "ce_orig": 0.3869491219520569, + "epoch": 1.0626932202171255, + "kl_loss": 0.0403301864862442, + "loss_ib": 0.0005864570266567171, + "step": 3696 + }, + { + "ce_ib": 5.125545024871826, + "ce_orig": 1.3555666208267212, + "epoch": 1.0626932202171255, + "kl_loss": 0.052022628486156464, + "loss_ib": 0.001032780739478767, + "step": 3696 + }, + { + "ce_ib": 4.553793907165527, + "ce_orig": 1.3202136754989624, + "epoch": 1.0629808037961033, + "kl_loss": 0.03519313782453537, + "loss_ib": 0.0008073107455857098, + "step": 3697 + }, + { + "ce_ib": 2.711043119430542, + "ce_orig": 0.6236414909362793, + "epoch": 1.0629808037961033, + "kl_loss": 0.02879771590232849, + "loss_ib": 0.0005590814398601651, + "step": 3697 + }, + { + "ce_ib": 2.618043899536133, + "ce_orig": 0.7031376361846924, + "epoch": 1.0629808037961033, + "kl_loss": 0.050141915678977966, + "loss_ib": 0.0007632235065102577, + "step": 3697 + }, + { + "ce_ib": 2.9226436614990234, + "ce_orig": 0.7934965491294861, + "epoch": 1.0629808037961033, + "kl_loss": 0.0488387867808342, + "loss_ib": 0.0007806522189639509, + "step": 3697 + }, + { + "ce_ib": 3.250351905822754, + "ce_orig": 0.5222002863883972, + "epoch": 1.0632683873750808, + "kl_loss": 0.07206441462039948, + "loss_ib": 0.0010456793243065476, + "step": 3698 + }, + { + "ce_ib": 3.465135097503662, + "ce_orig": 0.801612913608551, + "epoch": 1.0632683873750808, + "kl_loss": 0.03371920809149742, + "loss_ib": 0.0006837055552750826, + "step": 3698 + }, + { + "ce_ib": 2.0881106853485107, + "ce_orig": 0.21972738206386566, + "epoch": 1.0632683873750808, + "kl_loss": 0.031391073018312454, + "loss_ib": 0.000522721791639924, + "step": 3698 + }, + { + "ce_ib": 5.371395111083984, + "ce_orig": 1.2497667074203491, + "epoch": 1.0632683873750808, + "kl_loss": 0.05568080395460129, + "loss_ib": 0.0010939475614577532, + "step": 3698 + }, + { + "ce_ib": 3.842751979827881, + "ce_orig": 0.8918229937553406, + "epoch": 1.0635559709540585, + "kl_loss": 0.04696820676326752, + "loss_ib": 0.0008539572008885443, + "step": 3699 + }, + { + "ce_ib": 2.514669179916382, + "ce_orig": 0.518560528755188, + "epoch": 1.0635559709540585, + "kl_loss": 0.12768954038619995, + "loss_ib": 0.0015283622778952122, + "step": 3699 + }, + { + "ce_ib": 3.497962713241577, + "ce_orig": 0.7755013108253479, + "epoch": 1.0635559709540585, + "kl_loss": 0.040331803262233734, + "loss_ib": 0.0007531142910011113, + "step": 3699 + }, + { + "ce_ib": 3.235633134841919, + "ce_orig": 0.9238806366920471, + "epoch": 1.0635559709540585, + "kl_loss": 0.055021315813064575, + "loss_ib": 0.0008737764437682927, + "step": 3699 + }, + { + "epoch": 1.0638435545330363, + "grad_norm": 0.10213116556406021, + "learning_rate": 3.745516596619681e-05, + "loss": 0.8179, + "step": 3700 + }, + { + "ce_ib": 4.385622024536133, + "ce_orig": 0.9772007465362549, + "epoch": 1.0638435545330363, + "kl_loss": 0.06028849259018898, + "loss_ib": 0.0010414470452815294, + "step": 3700 + }, + { + "ce_ib": 2.6713109016418457, + "ce_orig": 0.6428437829017639, + "epoch": 1.0638435545330363, + "kl_loss": 0.03809274360537529, + "loss_ib": 0.0006480584852397442, + "step": 3700 + }, + { + "ce_ib": 3.4664878845214844, + "ce_orig": 0.8076411485671997, + "epoch": 1.0638435545330363, + "kl_loss": 0.044057007879018784, + "loss_ib": 0.0007872187998145819, + "step": 3700 + }, + { + "ce_ib": 3.050363063812256, + "ce_orig": 0.7719506621360779, + "epoch": 1.0638435545330363, + "kl_loss": 0.04423622041940689, + "loss_ib": 0.0007473984733223915, + "step": 3700 + }, + { + "ce_ib": 4.054849624633789, + "ce_orig": 1.056265950202942, + "epoch": 1.0641311381120138, + "kl_loss": 0.052677951753139496, + "loss_ib": 0.0009322644327767193, + "step": 3701 + }, + { + "ce_ib": 3.3915443420410156, + "ce_orig": 0.7258814573287964, + "epoch": 1.0641311381120138, + "kl_loss": 0.08242630958557129, + "loss_ib": 0.001163417473435402, + "step": 3701 + }, + { + "ce_ib": 2.1181929111480713, + "ce_orig": 0.45085370540618896, + "epoch": 1.0641311381120138, + "kl_loss": 0.11557428538799286, + "loss_ib": 0.0013675621012225747, + "step": 3701 + }, + { + "ce_ib": 3.9026660919189453, + "ce_orig": 0.8679541349411011, + "epoch": 1.0641311381120138, + "kl_loss": 0.04688296467065811, + "loss_ib": 0.0008590961806476116, + "step": 3701 + }, + { + "ce_ib": 4.814026355743408, + "ce_orig": 1.1400586366653442, + "epoch": 1.0644187216909915, + "kl_loss": 0.062181152403354645, + "loss_ib": 0.0011032141046598554, + "step": 3702 + }, + { + "ce_ib": 2.759089708328247, + "ce_orig": 0.5796658992767334, + "epoch": 1.0644187216909915, + "kl_loss": 0.05452614277601242, + "loss_ib": 0.0008211703388951719, + "step": 3702 + }, + { + "ce_ib": 3.9618823528289795, + "ce_orig": 0.9171618819236755, + "epoch": 1.0644187216909915, + "kl_loss": 0.038117777556180954, + "loss_ib": 0.0007773659890517592, + "step": 3702 + }, + { + "ce_ib": 2.788149356842041, + "ce_orig": 0.5875897407531738, + "epoch": 1.0644187216909915, + "kl_loss": 0.043804802000522614, + "loss_ib": 0.0007168629672378302, + "step": 3702 + }, + { + "ce_ib": 3.5143160820007324, + "ce_orig": 0.6564195156097412, + "epoch": 1.064706305269969, + "kl_loss": 0.04046626761555672, + "loss_ib": 0.0007560942904092371, + "step": 3703 + }, + { + "ce_ib": 4.0125532150268555, + "ce_orig": 0.522063672542572, + "epoch": 1.064706305269969, + "kl_loss": 0.06141049042344093, + "loss_ib": 0.0010153602343052626, + "step": 3703 + }, + { + "ce_ib": 3.930816173553467, + "ce_orig": 1.1576522588729858, + "epoch": 1.064706305269969, + "kl_loss": 0.0420658178627491, + "loss_ib": 0.0008137397235259414, + "step": 3703 + }, + { + "ce_ib": 4.115622043609619, + "ce_orig": 1.2345694303512573, + "epoch": 1.064706305269969, + "kl_loss": 0.028122736141085625, + "loss_ib": 0.0006927895592525601, + "step": 3703 + }, + { + "ce_ib": 2.248276472091675, + "ce_orig": 0.4879184663295746, + "epoch": 1.0649938888489467, + "kl_loss": 0.04668629169464111, + "loss_ib": 0.0006916905986145139, + "step": 3704 + }, + { + "ce_ib": 3.870959997177124, + "ce_orig": 0.9893530607223511, + "epoch": 1.0649938888489467, + "kl_loss": 0.05155714601278305, + "loss_ib": 0.0009026674670167267, + "step": 3704 + }, + { + "ce_ib": 2.3323729038238525, + "ce_orig": 0.4063306450843811, + "epoch": 1.0649938888489467, + "kl_loss": 0.07092093676328659, + "loss_ib": 0.0009424466406926513, + "step": 3704 + }, + { + "ce_ib": 2.5688843727111816, + "ce_orig": 0.6828180551528931, + "epoch": 1.0649938888489467, + "kl_loss": 0.0400637611746788, + "loss_ib": 0.0006575260194949806, + "step": 3704 + }, + { + "epoch": 1.0652814724279245, + "grad_norm": 0.09615305066108704, + "learning_rate": 3.7421505421723116e-05, + "loss": 0.8686, + "step": 3705 + }, + { + "ce_ib": 4.599855422973633, + "ce_orig": 1.4300897121429443, + "epoch": 1.0652814724279245, + "kl_loss": 0.046002984046936035, + "loss_ib": 0.000920015387237072, + "step": 3705 + }, + { + "ce_ib": 3.255092144012451, + "ce_orig": 0.8656002283096313, + "epoch": 1.0652814724279245, + "kl_loss": 0.08135408163070679, + "loss_ib": 0.001139050000347197, + "step": 3705 + }, + { + "ce_ib": 2.548391103744507, + "ce_orig": 0.5216647386550903, + "epoch": 1.0652814724279245, + "kl_loss": 0.029180876910686493, + "loss_ib": 0.0005466478760354221, + "step": 3705 + }, + { + "ce_ib": 3.0132617950439453, + "ce_orig": 0.8746702671051025, + "epoch": 1.0652814724279245, + "kl_loss": 0.045869432389736176, + "loss_ib": 0.0007600204553455114, + "step": 3705 + }, + { + "ce_ib": 2.450225830078125, + "ce_orig": 0.6126688122749329, + "epoch": 1.065569056006902, + "kl_loss": 0.03952305018901825, + "loss_ib": 0.000640253012534231, + "step": 3706 + }, + { + "ce_ib": 2.3920059204101562, + "ce_orig": 0.7171149253845215, + "epoch": 1.065569056006902, + "kl_loss": 0.03024386428296566, + "loss_ib": 0.00054163922322914, + "step": 3706 + }, + { + "ce_ib": 3.8915367126464844, + "ce_orig": 0.5577316880226135, + "epoch": 1.065569056006902, + "kl_loss": 0.0838368833065033, + "loss_ib": 0.0012275225017219782, + "step": 3706 + }, + { + "ce_ib": 5.160237789154053, + "ce_orig": 1.2029327154159546, + "epoch": 1.065569056006902, + "kl_loss": 0.03449711948633194, + "loss_ib": 0.0008609949727542698, + "step": 3706 + }, + { + "ce_ib": 3.7723984718322754, + "ce_orig": 0.7465708255767822, + "epoch": 1.0658566395858797, + "kl_loss": 0.05729403719305992, + "loss_ib": 0.0009501801687292755, + "step": 3707 + }, + { + "ce_ib": 3.8484718799591064, + "ce_orig": 1.1676583290100098, + "epoch": 1.0658566395858797, + "kl_loss": 0.03203035891056061, + "loss_ib": 0.0007051507709547877, + "step": 3707 + }, + { + "ce_ib": 2.1589834690093994, + "ce_orig": 0.5414144992828369, + "epoch": 1.0658566395858797, + "kl_loss": 0.04155128076672554, + "loss_ib": 0.0006314111524261534, + "step": 3707 + }, + { + "ce_ib": 2.0864200592041016, + "ce_orig": 0.4731903672218323, + "epoch": 1.0658566395858797, + "kl_loss": 0.03864756599068642, + "loss_ib": 0.00059511762810871, + "step": 3707 + }, + { + "ce_ib": 4.296876907348633, + "ce_orig": 1.2795157432556152, + "epoch": 1.0661442231648572, + "kl_loss": 0.046585563570261, + "loss_ib": 0.0008955433149822056, + "step": 3708 + }, + { + "ce_ib": 2.1676132678985596, + "ce_orig": 0.6510347723960876, + "epoch": 1.0661442231648572, + "kl_loss": 0.03081841766834259, + "loss_ib": 0.0005249454989098012, + "step": 3708 + }, + { + "ce_ib": 3.569732427597046, + "ce_orig": 1.019876480102539, + "epoch": 1.0661442231648572, + "kl_loss": 0.06017128378152847, + "loss_ib": 0.0009586859960108995, + "step": 3708 + }, + { + "ce_ib": 2.3137173652648926, + "ce_orig": 0.7062209844589233, + "epoch": 1.0661442231648572, + "kl_loss": 0.02969677932560444, + "loss_ib": 0.0005283395294100046, + "step": 3708 + }, + { + "ce_ib": 2.913337469100952, + "ce_orig": 0.924013614654541, + "epoch": 1.066431806743835, + "kl_loss": 0.0407131090760231, + "loss_ib": 0.0006984648061916232, + "step": 3709 + }, + { + "ce_ib": 3.5049476623535156, + "ce_orig": 0.8355153799057007, + "epoch": 1.066431806743835, + "kl_loss": 0.05460400879383087, + "loss_ib": 0.0008965348242782056, + "step": 3709 + }, + { + "ce_ib": 3.671797752380371, + "ce_orig": 0.8501824140548706, + "epoch": 1.066431806743835, + "kl_loss": 0.060824405401945114, + "loss_ib": 0.0009754237835295498, + "step": 3709 + }, + { + "ce_ib": 3.4822208881378174, + "ce_orig": 0.8690894246101379, + "epoch": 1.066431806743835, + "kl_loss": 0.04004519432783127, + "loss_ib": 0.0007486739777959883, + "step": 3709 + }, + { + "epoch": 1.0667193903228125, + "grad_norm": 0.10586987435817719, + "learning_rate": 3.738781495095975e-05, + "loss": 0.8168, + "step": 3710 + }, + { + "ce_ib": 4.489129066467285, + "ce_orig": 1.0418046712875366, + "epoch": 1.0667193903228125, + "kl_loss": 0.054424893110990524, + "loss_ib": 0.0009931618114933372, + "step": 3710 + }, + { + "ce_ib": 4.03631067276001, + "ce_orig": 0.982231616973877, + "epoch": 1.0667193903228125, + "kl_loss": 0.06968165934085846, + "loss_ib": 0.00110044761095196, + "step": 3710 + }, + { + "ce_ib": 3.7284674644470215, + "ce_orig": 0.8251299858093262, + "epoch": 1.0667193903228125, + "kl_loss": 0.02324589714407921, + "loss_ib": 0.0006053056567907333, + "step": 3710 + }, + { + "ce_ib": 4.837790012359619, + "ce_orig": 1.1676998138427734, + "epoch": 1.0667193903228125, + "kl_loss": 0.03671197220683098, + "loss_ib": 0.0008508986793458462, + "step": 3710 + }, + { + "ce_ib": 3.0384273529052734, + "ce_orig": 0.6570648550987244, + "epoch": 1.0670069739017902, + "kl_loss": 0.0385446771979332, + "loss_ib": 0.0006892894743941724, + "step": 3711 + }, + { + "ce_ib": 2.1689810752868652, + "ce_orig": 0.4032461941242218, + "epoch": 1.0670069739017902, + "kl_loss": 0.03285978361964226, + "loss_ib": 0.0005454959464259446, + "step": 3711 + }, + { + "ce_ib": 3.396413803100586, + "ce_orig": 0.959132194519043, + "epoch": 1.0670069739017902, + "kl_loss": 0.03321196138858795, + "loss_ib": 0.0006717609940096736, + "step": 3711 + }, + { + "ce_ib": 3.274298667907715, + "ce_orig": 0.7828475832939148, + "epoch": 1.0670069739017902, + "kl_loss": 0.05516919493675232, + "loss_ib": 0.0008791217696852982, + "step": 3711 + }, + { + "ce_ib": 3.3219077587127686, + "ce_orig": 0.9087718725204468, + "epoch": 1.0672945574807677, + "kl_loss": 0.043743204325437546, + "loss_ib": 0.0007696227403357625, + "step": 3712 + }, + { + "ce_ib": 3.630342721939087, + "ce_orig": 0.918617844581604, + "epoch": 1.0672945574807677, + "kl_loss": 0.04078122600913048, + "loss_ib": 0.0007708464981988072, + "step": 3712 + }, + { + "ce_ib": 4.107723236083984, + "ce_orig": 0.9441841244697571, + "epoch": 1.0672945574807677, + "kl_loss": 0.039565309882164, + "loss_ib": 0.0008064253488555551, + "step": 3712 + }, + { + "ce_ib": 2.6573894023895264, + "ce_orig": 0.8542035818099976, + "epoch": 1.0672945574807677, + "kl_loss": 0.03308672457933426, + "loss_ib": 0.0005966061726212502, + "step": 3712 + }, + { + "ce_ib": 3.545649528503418, + "ce_orig": 1.152747392654419, + "epoch": 1.0675821410597455, + "kl_loss": 0.04567503556609154, + "loss_ib": 0.0008113153162412345, + "step": 3713 + }, + { + "ce_ib": 3.7103374004364014, + "ce_orig": 1.0278754234313965, + "epoch": 1.0675821410597455, + "kl_loss": 0.04236917570233345, + "loss_ib": 0.0007947254925966263, + "step": 3713 + }, + { + "ce_ib": 2.8372673988342285, + "ce_orig": 0.486640065908432, + "epoch": 1.0675821410597455, + "kl_loss": 0.051253702491521835, + "loss_ib": 0.0007962637464515865, + "step": 3713 + }, + { + "ce_ib": 4.416391372680664, + "ce_orig": 1.3162368535995483, + "epoch": 1.0675821410597455, + "kl_loss": 0.09799365699291229, + "loss_ib": 0.0014215756673365831, + "step": 3713 + }, + { + "ce_ib": 3.0201833248138428, + "ce_orig": 0.6635204553604126, + "epoch": 1.0678697246387232, + "kl_loss": 0.06462661176919937, + "loss_ib": 0.0009482844034209847, + "step": 3714 + }, + { + "ce_ib": 4.373138427734375, + "ce_orig": 1.295785903930664, + "epoch": 1.0678697246387232, + "kl_loss": 0.04490745812654495, + "loss_ib": 0.0008863884140737355, + "step": 3714 + }, + { + "ce_ib": 4.334707736968994, + "ce_orig": 1.1992067098617554, + "epoch": 1.0678697246387232, + "kl_loss": 0.05340806394815445, + "loss_ib": 0.0009675513720139861, + "step": 3714 + }, + { + "ce_ib": 5.889805793762207, + "ce_orig": 1.3048275709152222, + "epoch": 1.0678697246387232, + "kl_loss": 0.05117582157254219, + "loss_ib": 0.0011007387656718493, + "step": 3714 + }, + { + "epoch": 1.0681573082177007, + "grad_norm": 0.10289550572633743, + "learning_rate": 3.73540946350749e-05, + "loss": 0.8254, + "step": 3715 + }, + { + "ce_ib": 3.1527650356292725, + "ce_orig": 0.5484619736671448, + "epoch": 1.0681573082177007, + "kl_loss": 0.03919094055891037, + "loss_ib": 0.0007071858271956444, + "step": 3715 + }, + { + "ce_ib": 2.236280679702759, + "ce_orig": 0.5248292088508606, + "epoch": 1.0681573082177007, + "kl_loss": 0.04069758579134941, + "loss_ib": 0.0006306038703769445, + "step": 3715 + }, + { + "ce_ib": 4.188019752502441, + "ce_orig": 0.9193821549415588, + "epoch": 1.0681573082177007, + "kl_loss": 0.05228213965892792, + "loss_ib": 0.0009416233515366912, + "step": 3715 + }, + { + "ce_ib": 1.9153844118118286, + "ce_orig": 0.5675480365753174, + "epoch": 1.0681573082177007, + "kl_loss": 0.022544460371136665, + "loss_ib": 0.0004169830644968897, + "step": 3715 + }, + { + "ce_ib": 2.686230421066284, + "ce_orig": 0.6071634292602539, + "epoch": 1.0684448917966785, + "kl_loss": 0.034764550626277924, + "loss_ib": 0.0006162684876471758, + "step": 3716 + }, + { + "ce_ib": 4.304211139678955, + "ce_orig": 0.6336016654968262, + "epoch": 1.0684448917966785, + "kl_loss": 0.053870752453804016, + "loss_ib": 0.0009691285667940974, + "step": 3716 + }, + { + "ce_ib": 2.968804359436035, + "ce_orig": 0.9149234890937805, + "epoch": 1.0684448917966785, + "kl_loss": 0.035784728825092316, + "loss_ib": 0.0006547277444042265, + "step": 3716 + }, + { + "ce_ib": 2.3781397342681885, + "ce_orig": 0.5995119214057922, + "epoch": 1.0684448917966785, + "kl_loss": 0.04706839472055435, + "loss_ib": 0.0007084978860802948, + "step": 3716 + }, + { + "ce_ib": 3.7293951511383057, + "ce_orig": 0.6947351098060608, + "epoch": 1.068732475375656, + "kl_loss": 0.07492461800575256, + "loss_ib": 0.0011221857275813818, + "step": 3717 + }, + { + "ce_ib": 3.155142307281494, + "ce_orig": 0.5712680816650391, + "epoch": 1.068732475375656, + "kl_loss": 0.047972358763217926, + "loss_ib": 0.0007952377782203257, + "step": 3717 + }, + { + "ce_ib": 3.341674327850342, + "ce_orig": 0.6052590012550354, + "epoch": 1.068732475375656, + "kl_loss": 0.05033385008573532, + "loss_ib": 0.0008375059114769101, + "step": 3717 + }, + { + "ce_ib": 6.911088466644287, + "ce_orig": 1.4601870775222778, + "epoch": 1.068732475375656, + "kl_loss": 0.05625183880329132, + "loss_ib": 0.0012536272406578064, + "step": 3717 + }, + { + "ce_ib": 5.325322151184082, + "ce_orig": 1.4803661108016968, + "epoch": 1.0690200589546337, + "kl_loss": 0.04731673374772072, + "loss_ib": 0.0010056995088234544, + "step": 3718 + }, + { + "ce_ib": 6.287995338439941, + "ce_orig": 1.1765108108520508, + "epoch": 1.0690200589546337, + "kl_loss": 0.047224268317222595, + "loss_ib": 0.0011010421440005302, + "step": 3718 + }, + { + "ce_ib": 4.158463001251221, + "ce_orig": 1.2924870252609253, + "epoch": 1.0690200589546337, + "kl_loss": 0.04422004520893097, + "loss_ib": 0.0008580466965213418, + "step": 3718 + }, + { + "ce_ib": 2.5410428047180176, + "ce_orig": 0.6940717697143555, + "epoch": 1.0690200589546337, + "kl_loss": 0.03939535468816757, + "loss_ib": 0.0006480578449554741, + "step": 3718 + }, + { + "ce_ib": 3.762648820877075, + "ce_orig": 1.0142217874526978, + "epoch": 1.0693076425336114, + "kl_loss": 0.03659554570913315, + "loss_ib": 0.0007422202615998685, + "step": 3719 + }, + { + "ce_ib": 2.518954277038574, + "ce_orig": 0.6327720880508423, + "epoch": 1.0693076425336114, + "kl_loss": 0.04780279099941254, + "loss_ib": 0.0007299233111552894, + "step": 3719 + }, + { + "ce_ib": 3.6128976345062256, + "ce_orig": 0.9126882553100586, + "epoch": 1.0693076425336114, + "kl_loss": 0.05703972652554512, + "loss_ib": 0.0009316870127804577, + "step": 3719 + }, + { + "ce_ib": 3.0529940128326416, + "ce_orig": 0.7830175757408142, + "epoch": 1.0693076425336114, + "kl_loss": 0.041609928011894226, + "loss_ib": 0.0007213986245915294, + "step": 3719 + }, + { + "epoch": 1.069595226112589, + "grad_norm": 0.09957291185855865, + "learning_rate": 3.732034455530863e-05, + "loss": 0.826, + "step": 3720 + }, + { + "ce_ib": 3.6923022270202637, + "ce_orig": 0.6440872550010681, + "epoch": 1.069595226112589, + "kl_loss": 0.0471377857029438, + "loss_ib": 0.0008406080305576324, + "step": 3720 + }, + { + "ce_ib": 4.362018585205078, + "ce_orig": 1.4008818864822388, + "epoch": 1.069595226112589, + "kl_loss": 0.030621755868196487, + "loss_ib": 0.0007424193318001926, + "step": 3720 + }, + { + "ce_ib": 3.676483154296875, + "ce_orig": 1.2497026920318604, + "epoch": 1.069595226112589, + "kl_loss": 0.08046165853738785, + "loss_ib": 0.0011722648050636053, + "step": 3720 + }, + { + "ce_ib": 3.0897419452667236, + "ce_orig": 0.6741242408752441, + "epoch": 1.069595226112589, + "kl_loss": 0.03475351259112358, + "loss_ib": 0.000656509306281805, + "step": 3720 + }, + { + "ce_ib": 4.51775598526001, + "ce_orig": 0.9719982743263245, + "epoch": 1.0698828096915667, + "kl_loss": 0.05110887065529823, + "loss_ib": 0.000962864316534251, + "step": 3721 + }, + { + "ce_ib": 4.492781639099121, + "ce_orig": 1.318568468093872, + "epoch": 1.0698828096915667, + "kl_loss": 0.04375626891851425, + "loss_ib": 0.0008868408040143549, + "step": 3721 + }, + { + "ce_ib": 3.283809185028076, + "ce_orig": 0.7646835446357727, + "epoch": 1.0698828096915667, + "kl_loss": 0.040442146360874176, + "loss_ib": 0.0007328023784793913, + "step": 3721 + }, + { + "ce_ib": 3.1001620292663574, + "ce_orig": 0.6097191572189331, + "epoch": 1.0698828096915667, + "kl_loss": 0.04432224482297897, + "loss_ib": 0.0007532386225648224, + "step": 3721 + }, + { + "ce_ib": 4.483544826507568, + "ce_orig": 1.3336526155471802, + "epoch": 1.0701703932705442, + "kl_loss": 0.028892068192362785, + "loss_ib": 0.000737275171559304, + "step": 3722 + }, + { + "ce_ib": 3.7492458820343018, + "ce_orig": 1.0867607593536377, + "epoch": 1.0701703932705442, + "kl_loss": 0.04184802249073982, + "loss_ib": 0.0007934047607704997, + "step": 3722 + }, + { + "ce_ib": 3.7446255683898926, + "ce_orig": 0.9393448829650879, + "epoch": 1.0701703932705442, + "kl_loss": 0.028989512473344803, + "loss_ib": 0.0006643576780334115, + "step": 3722 + }, + { + "ce_ib": 3.9427993297576904, + "ce_orig": 0.9567840099334717, + "epoch": 1.0701703932705442, + "kl_loss": 0.05631110072135925, + "loss_ib": 0.0009573909337632358, + "step": 3722 + }, + { + "ce_ib": 2.489934206008911, + "ce_orig": 0.5841420888900757, + "epoch": 1.070457976849522, + "kl_loss": 0.031724072992801666, + "loss_ib": 0.0005662341136485338, + "step": 3723 + }, + { + "ce_ib": 2.2135117053985596, + "ce_orig": 0.6011503338813782, + "epoch": 1.070457976849522, + "kl_loss": 0.04228500649333, + "loss_ib": 0.0006442011799663305, + "step": 3723 + }, + { + "ce_ib": 3.243891716003418, + "ce_orig": 0.7756053805351257, + "epoch": 1.070457976849522, + "kl_loss": 0.03210277110338211, + "loss_ib": 0.0006454167887568474, + "step": 3723 + }, + { + "ce_ib": 2.293092966079712, + "ce_orig": 0.608695387840271, + "epoch": 1.070457976849522, + "kl_loss": 0.030665773898363113, + "loss_ib": 0.0005359670612961054, + "step": 3723 + }, + { + "ce_ib": 3.2469146251678467, + "ce_orig": 0.6239011883735657, + "epoch": 1.0707455604284994, + "kl_loss": 0.04846879094839096, + "loss_ib": 0.0008093793294392526, + "step": 3724 + }, + { + "ce_ib": 4.15523099899292, + "ce_orig": 1.043041467666626, + "epoch": 1.0707455604284994, + "kl_loss": 0.05886118859052658, + "loss_ib": 0.0010041350033134222, + "step": 3724 + }, + { + "ce_ib": 3.1343936920166016, + "ce_orig": 0.6139746904373169, + "epoch": 1.0707455604284994, + "kl_loss": 0.07044880092144012, + "loss_ib": 0.0010179273085668683, + "step": 3724 + }, + { + "ce_ib": 2.699686288833618, + "ce_orig": 0.601594090461731, + "epoch": 1.0707455604284994, + "kl_loss": 0.04759587347507477, + "loss_ib": 0.0007459273911081254, + "step": 3724 + }, + { + "epoch": 1.0710331440074772, + "grad_norm": 0.09612316638231277, + "learning_rate": 3.7286564792972714e-05, + "loss": 0.8886, + "step": 3725 + }, + { + "ce_ib": 3.8042616844177246, + "ce_orig": 1.0283782482147217, + "epoch": 1.0710331440074772, + "kl_loss": 0.030841421335935593, + "loss_ib": 0.0006888403440825641, + "step": 3725 + }, + { + "ce_ib": 2.4589295387268066, + "ce_orig": 0.5300339460372925, + "epoch": 1.0710331440074772, + "kl_loss": 0.049380455166101456, + "loss_ib": 0.0007396974833682179, + "step": 3725 + }, + { + "ce_ib": 2.475780963897705, + "ce_orig": 0.7129705548286438, + "epoch": 1.0710331440074772, + "kl_loss": 0.04657047986984253, + "loss_ib": 0.0007132829050533473, + "step": 3725 + }, + { + "ce_ib": 2.664874315261841, + "ce_orig": 0.7232853174209595, + "epoch": 1.0710331440074772, + "kl_loss": 0.035655826330184937, + "loss_ib": 0.0006230457220226526, + "step": 3725 + }, + { + "ce_ib": 3.19386625289917, + "ce_orig": 0.9869571924209595, + "epoch": 1.071320727586455, + "kl_loss": 0.055440083146095276, + "loss_ib": 0.0008737873868085444, + "step": 3726 + }, + { + "ce_ib": 4.113348007202148, + "ce_orig": 1.0878068208694458, + "epoch": 1.071320727586455, + "kl_loss": 0.05299929529428482, + "loss_ib": 0.0009413277148269117, + "step": 3726 + }, + { + "ce_ib": 3.5738799571990967, + "ce_orig": 0.7590008974075317, + "epoch": 1.071320727586455, + "kl_loss": 0.050530675798654556, + "loss_ib": 0.0008626947528682649, + "step": 3726 + }, + { + "ce_ib": 2.934497117996216, + "ce_orig": 0.313686341047287, + "epoch": 1.071320727586455, + "kl_loss": 0.02024720050394535, + "loss_ib": 0.0004959217039868236, + "step": 3726 + }, + { + "ce_ib": 2.4496428966522217, + "ce_orig": 0.31392812728881836, + "epoch": 1.0716083111654324, + "kl_loss": 0.05052979663014412, + "loss_ib": 0.0007502622902393341, + "step": 3727 + }, + { + "ce_ib": 3.230501890182495, + "ce_orig": 0.7868572473526001, + "epoch": 1.0716083111654324, + "kl_loss": 0.04817824810743332, + "loss_ib": 0.0008048326126299798, + "step": 3727 + }, + { + "ce_ib": 3.744314432144165, + "ce_orig": 0.8774124383926392, + "epoch": 1.0716083111654324, + "kl_loss": 0.02735963463783264, + "loss_ib": 0.0006480278098024428, + "step": 3727 + }, + { + "ce_ib": 4.362041473388672, + "ce_orig": 1.3143998384475708, + "epoch": 1.0716083111654324, + "kl_loss": 0.04388037323951721, + "loss_ib": 0.000875007885042578, + "step": 3727 + }, + { + "ce_ib": 1.9417140483856201, + "ce_orig": 0.4456411302089691, + "epoch": 1.0718958947444102, + "kl_loss": 0.04980877414345741, + "loss_ib": 0.0006922591128386557, + "step": 3728 + }, + { + "ce_ib": 2.532369375228882, + "ce_orig": 0.8635539412498474, + "epoch": 1.0718958947444102, + "kl_loss": 0.03239889442920685, + "loss_ib": 0.0005772258737124503, + "step": 3728 + }, + { + "ce_ib": 2.5088109970092773, + "ce_orig": 0.6337196826934814, + "epoch": 1.0718958947444102, + "kl_loss": 0.05259857326745987, + "loss_ib": 0.0007768668001517653, + "step": 3728 + }, + { + "ce_ib": 1.3603931665420532, + "ce_orig": 0.27231162786483765, + "epoch": 1.0718958947444102, + "kl_loss": 0.03606680408120155, + "loss_ib": 0.0004967073327861726, + "step": 3728 + }, + { + "ce_ib": 4.871379375457764, + "ce_orig": 1.415907621383667, + "epoch": 1.0721834783233877, + "kl_loss": 0.05977868288755417, + "loss_ib": 0.0010849247919395566, + "step": 3729 + }, + { + "ce_ib": 3.381963014602661, + "ce_orig": 0.9984756708145142, + "epoch": 1.0721834783233877, + "kl_loss": 0.09274649620056152, + "loss_ib": 0.0012656612088903785, + "step": 3729 + }, + { + "ce_ib": 3.052769422531128, + "ce_orig": 0.6411299705505371, + "epoch": 1.0721834783233877, + "kl_loss": 0.028008120134472847, + "loss_ib": 0.0005853581242263317, + "step": 3729 + }, + { + "ce_ib": 4.091670036315918, + "ce_orig": 1.085942029953003, + "epoch": 1.0721834783233877, + "kl_loss": 0.04316779971122742, + "loss_ib": 0.0008408449357375503, + "step": 3729 + }, + { + "epoch": 1.0724710619023654, + "grad_norm": 0.09862092137336731, + "learning_rate": 3.7252755429450434e-05, + "loss": 0.7812, + "step": 3730 + }, + { + "ce_ib": 3.500448226928711, + "ce_orig": 1.0989092588424683, + "epoch": 1.0724710619023654, + "kl_loss": 0.0464499294757843, + "loss_ib": 0.0008145440951921046, + "step": 3730 + }, + { + "ce_ib": 3.418060064315796, + "ce_orig": 0.782710611820221, + "epoch": 1.0724710619023654, + "kl_loss": 0.053838588297367096, + "loss_ib": 0.0008801919175311923, + "step": 3730 + }, + { + "ce_ib": 2.0953898429870605, + "ce_orig": 0.38941478729248047, + "epoch": 1.0724710619023654, + "kl_loss": 0.04025029391050339, + "loss_ib": 0.0006120419129729271, + "step": 3730 + }, + { + "ce_ib": 1.7597601413726807, + "ce_orig": 0.45982012152671814, + "epoch": 1.0724710619023654, + "kl_loss": 0.025732148438692093, + "loss_ib": 0.0004332974785938859, + "step": 3730 + }, + { + "ce_ib": 5.211580753326416, + "ce_orig": 1.6717482805252075, + "epoch": 1.072758645481343, + "kl_loss": 0.04578171297907829, + "loss_ib": 0.0009789750911295414, + "step": 3731 + }, + { + "ce_ib": 4.659128665924072, + "ce_orig": 1.3590636253356934, + "epoch": 1.072758645481343, + "kl_loss": 0.046156056225299835, + "loss_ib": 0.0009274734184145927, + "step": 3731 + }, + { + "ce_ib": 4.026351451873779, + "ce_orig": 1.1529089212417603, + "epoch": 1.072758645481343, + "kl_loss": 0.04406839981675148, + "loss_ib": 0.0008433191105723381, + "step": 3731 + }, + { + "ce_ib": 4.790724754333496, + "ce_orig": 1.0609525442123413, + "epoch": 1.072758645481343, + "kl_loss": 0.05487080663442612, + "loss_ib": 0.0010277804685756564, + "step": 3731 + }, + { + "ce_ib": 5.427001476287842, + "ce_orig": 1.5494410991668701, + "epoch": 1.0730462290603207, + "kl_loss": 0.050097815692424774, + "loss_ib": 0.0010436782613396645, + "step": 3732 + }, + { + "ce_ib": 3.831482172012329, + "ce_orig": 1.010475516319275, + "epoch": 1.0730462290603207, + "kl_loss": 0.039849743247032166, + "loss_ib": 0.0007816455909051001, + "step": 3732 + }, + { + "ce_ib": 2.6988816261291504, + "ce_orig": 0.6278862953186035, + "epoch": 1.0730462290603207, + "kl_loss": 0.04843360185623169, + "loss_ib": 0.0007542241946794093, + "step": 3732 + }, + { + "ce_ib": 6.076293468475342, + "ce_orig": 1.783440113067627, + "epoch": 1.0730462290603207, + "kl_loss": 0.06399589031934738, + "loss_ib": 0.0012475881958380342, + "step": 3732 + }, + { + "ce_ib": 3.361490249633789, + "ce_orig": 0.8621534109115601, + "epoch": 1.0733338126392984, + "kl_loss": 0.032120682299137115, + "loss_ib": 0.0006573557620868087, + "step": 3733 + }, + { + "ce_ib": 4.780618667602539, + "ce_orig": 1.0696243047714233, + "epoch": 1.0733338126392984, + "kl_loss": 0.06219996511936188, + "loss_ib": 0.0011000614613294601, + "step": 3733 + }, + { + "ce_ib": 3.7682695388793945, + "ce_orig": 0.794006884098053, + "epoch": 1.0733338126392984, + "kl_loss": 0.06829790771007538, + "loss_ib": 0.0010598059743642807, + "step": 3733 + }, + { + "ce_ib": 4.253345966339111, + "ce_orig": 0.6626226902008057, + "epoch": 1.0733338126392984, + "kl_loss": 0.08044514060020447, + "loss_ib": 0.0012297859648242593, + "step": 3733 + }, + { + "ce_ib": 2.613436222076416, + "ce_orig": 0.9075227975845337, + "epoch": 1.073621396218276, + "kl_loss": 0.047092050313949585, + "loss_ib": 0.0007322641322389245, + "step": 3734 + }, + { + "ce_ib": 2.715514898300171, + "ce_orig": 1.0304385423660278, + "epoch": 1.073621396218276, + "kl_loss": 0.04003439098596573, + "loss_ib": 0.0006718953372910619, + "step": 3734 + }, + { + "ce_ib": 4.776790618896484, + "ce_orig": 1.046550989151001, + "epoch": 1.073621396218276, + "kl_loss": 0.05261962115764618, + "loss_ib": 0.0010038752807304263, + "step": 3734 + }, + { + "ce_ib": 2.133654832839966, + "ce_orig": 0.6346238851547241, + "epoch": 1.073621396218276, + "kl_loss": 0.02848782017827034, + "loss_ib": 0.0004982436657883227, + "step": 3734 + }, + { + "epoch": 1.0739089797972536, + "grad_norm": 0.10402923077344894, + "learning_rate": 3.72189165461964e-05, + "loss": 0.8701, + "step": 3735 + }, + { + "ce_ib": 4.052002906799316, + "ce_orig": 0.9400032758712769, + "epoch": 1.0739089797972536, + "kl_loss": 0.050405729562044144, + "loss_ib": 0.0009092575055547059, + "step": 3735 + }, + { + "ce_ib": 3.2456252574920654, + "ce_orig": 0.7809160947799683, + "epoch": 1.0739089797972536, + "kl_loss": 0.03670550510287285, + "loss_ib": 0.0006916175480000675, + "step": 3735 + }, + { + "ce_ib": 3.6168556213378906, + "ce_orig": 0.5979524254798889, + "epoch": 1.0739089797972536, + "kl_loss": 0.04447198286652565, + "loss_ib": 0.0008064053836278617, + "step": 3735 + }, + { + "ce_ib": 1.7260167598724365, + "ce_orig": 0.43302223086357117, + "epoch": 1.0739089797972536, + "kl_loss": 0.05154994875192642, + "loss_ib": 0.000688101164996624, + "step": 3735 + }, + { + "ce_ib": 2.258509397506714, + "ce_orig": 0.5033326148986816, + "epoch": 1.0741965633762312, + "kl_loss": 0.057241037487983704, + "loss_ib": 0.0007982613169588149, + "step": 3736 + }, + { + "ce_ib": 5.532011032104492, + "ce_orig": 1.198176622390747, + "epoch": 1.0741965633762312, + "kl_loss": 0.06717093288898468, + "loss_ib": 0.0012249104911461473, + "step": 3736 + }, + { + "ce_ib": 2.656714916229248, + "ce_orig": 0.7936945557594299, + "epoch": 1.0741965633762312, + "kl_loss": 0.0414620116353035, + "loss_ib": 0.0006802915595471859, + "step": 3736 + }, + { + "ce_ib": 2.5629751682281494, + "ce_orig": 0.42408230900764465, + "epoch": 1.0741965633762312, + "kl_loss": 0.061639126390218735, + "loss_ib": 0.0008726887172088027, + "step": 3736 + }, + { + "ce_ib": 3.4468555450439453, + "ce_orig": 0.8314971923828125, + "epoch": 1.0744841469552089, + "kl_loss": 0.03182309493422508, + "loss_ib": 0.000662916456349194, + "step": 3737 + }, + { + "ce_ib": 2.535679578781128, + "ce_orig": 0.49384376406669617, + "epoch": 1.0744841469552089, + "kl_loss": 0.039856866002082825, + "loss_ib": 0.0006521366303786635, + "step": 3737 + }, + { + "ce_ib": 3.5714874267578125, + "ce_orig": 0.5496352910995483, + "epoch": 1.0744841469552089, + "kl_loss": 0.06956633925437927, + "loss_ib": 0.001052812091074884, + "step": 3737 + }, + { + "ce_ib": 1.5590496063232422, + "ce_orig": 0.5023605823516846, + "epoch": 1.0744841469552089, + "kl_loss": 0.03446251153945923, + "loss_ib": 0.0005005300627090037, + "step": 3737 + }, + { + "ce_ib": 2.870776653289795, + "ce_orig": 0.554670512676239, + "epoch": 1.0747717305341864, + "kl_loss": 0.04753854125738144, + "loss_ib": 0.0007624630234204233, + "step": 3738 + }, + { + "ce_ib": 3.0372672080993652, + "ce_orig": 0.603461742401123, + "epoch": 1.0747717305341864, + "kl_loss": 0.027511583641171455, + "loss_ib": 0.0005788425332866609, + "step": 3738 + }, + { + "ce_ib": 4.497758865356445, + "ce_orig": 1.147395133972168, + "epoch": 1.0747717305341864, + "kl_loss": 0.043528713285923004, + "loss_ib": 0.0008850629674270749, + "step": 3738 + }, + { + "ce_ib": 4.8646745681762695, + "ce_orig": 1.397773027420044, + "epoch": 1.0747717305341864, + "kl_loss": 0.06930507719516754, + "loss_ib": 0.0011795181781053543, + "step": 3738 + }, + { + "ce_ib": 2.3075807094573975, + "ce_orig": 0.4390535354614258, + "epoch": 1.0750593141131641, + "kl_loss": 0.03044825792312622, + "loss_ib": 0.0005352406296879053, + "step": 3739 + }, + { + "ce_ib": 5.731404781341553, + "ce_orig": 1.2446249723434448, + "epoch": 1.0750593141131641, + "kl_loss": 0.0451880544424057, + "loss_ib": 0.0010250209597870708, + "step": 3739 + }, + { + "ce_ib": 3.0973010063171387, + "ce_orig": 0.5088921189308167, + "epoch": 1.0750593141131641, + "kl_loss": 0.05850057303905487, + "loss_ib": 0.0008947358001023531, + "step": 3739 + }, + { + "ce_ib": 3.525965929031372, + "ce_orig": 1.1036341190338135, + "epoch": 1.0750593141131641, + "kl_loss": 0.04492630437016487, + "loss_ib": 0.0008018596563488245, + "step": 3739 + }, + { + "epoch": 1.0753468976921419, + "grad_norm": 0.10275167226791382, + "learning_rate": 3.718504822473634e-05, + "loss": 0.7852, + "step": 3740 + }, + { + "ce_ib": 3.7843425273895264, + "ce_orig": 0.8523989319801331, + "epoch": 1.0753468976921419, + "kl_loss": 0.06214965879917145, + "loss_ib": 0.0009999307803809643, + "step": 3740 + }, + { + "ce_ib": 2.3877506256103516, + "ce_orig": 0.7027493715286255, + "epoch": 1.0753468976921419, + "kl_loss": 0.030168021097779274, + "loss_ib": 0.0005404552794061601, + "step": 3740 + }, + { + "ce_ib": 2.243166208267212, + "ce_orig": 0.5993360280990601, + "epoch": 1.0753468976921419, + "kl_loss": 0.02809070236980915, + "loss_ib": 0.0005052236374467611, + "step": 3740 + }, + { + "ce_ib": 2.382556438446045, + "ce_orig": 0.5878341197967529, + "epoch": 1.0753468976921419, + "kl_loss": 0.0424627810716629, + "loss_ib": 0.0006628834526054561, + "step": 3740 + }, + { + "ce_ib": 4.378889083862305, + "ce_orig": 1.2585715055465698, + "epoch": 1.0756344812711194, + "kl_loss": 0.032584670931100845, + "loss_ib": 0.0007637356175109744, + "step": 3741 + }, + { + "ce_ib": 3.6448309421539307, + "ce_orig": 1.0383021831512451, + "epoch": 1.0756344812711194, + "kl_loss": 0.04713035374879837, + "loss_ib": 0.0008357865735888481, + "step": 3741 + }, + { + "ce_ib": 4.488763809204102, + "ce_orig": 1.230396032333374, + "epoch": 1.0756344812711194, + "kl_loss": 0.0682128518819809, + "loss_ib": 0.0011310047702863812, + "step": 3741 + }, + { + "ce_ib": 2.8462014198303223, + "ce_orig": 0.564372181892395, + "epoch": 1.0756344812711194, + "kl_loss": 0.04554280266165733, + "loss_ib": 0.0007400481263175607, + "step": 3741 + }, + { + "ce_ib": 3.89701771736145, + "ce_orig": 0.5818473696708679, + "epoch": 1.0759220648500971, + "kl_loss": 0.06911394000053406, + "loss_ib": 0.0010808410588651896, + "step": 3742 + }, + { + "ce_ib": 3.344931125640869, + "ce_orig": 0.7413668632507324, + "epoch": 1.0759220648500971, + "kl_loss": 0.028693128377199173, + "loss_ib": 0.0006214244058355689, + "step": 3742 + }, + { + "ce_ib": 2.005584478378296, + "ce_orig": 0.5790125727653503, + "epoch": 1.0759220648500971, + "kl_loss": 0.030735984444618225, + "loss_ib": 0.0005079183029010892, + "step": 3742 + }, + { + "ce_ib": 1.6239616870880127, + "ce_orig": 0.33639398217201233, + "epoch": 1.0759220648500971, + "kl_loss": 0.0575551874935627, + "loss_ib": 0.0007379480521194637, + "step": 3742 + }, + { + "ce_ib": 2.477764368057251, + "ce_orig": 0.5761188864707947, + "epoch": 1.0762096484290746, + "kl_loss": 0.0913110002875328, + "loss_ib": 0.0011608863715082407, + "step": 3743 + }, + { + "ce_ib": 4.120965003967285, + "ce_orig": 0.8882102966308594, + "epoch": 1.0762096484290746, + "kl_loss": 0.03993844985961914, + "loss_ib": 0.0008114810334518552, + "step": 3743 + }, + { + "ce_ib": 4.284945964813232, + "ce_orig": 1.171046257019043, + "epoch": 1.0762096484290746, + "kl_loss": 0.03986494988203049, + "loss_ib": 0.0008271440747193992, + "step": 3743 + }, + { + "ce_ib": 3.03143048286438, + "ce_orig": 0.7703272700309753, + "epoch": 1.0762096484290746, + "kl_loss": 0.03709666430950165, + "loss_ib": 0.000674109673127532, + "step": 3743 + }, + { + "ce_ib": 3.7366631031036377, + "ce_orig": 0.8186501860618591, + "epoch": 1.0764972320080524, + "kl_loss": 0.03498067334294319, + "loss_ib": 0.0007234729710035026, + "step": 3744 + }, + { + "ce_ib": 4.606304168701172, + "ce_orig": 0.9461236596107483, + "epoch": 1.0764972320080524, + "kl_loss": 0.07422121614217758, + "loss_ib": 0.0012028425699099898, + "step": 3744 + }, + { + "ce_ib": 3.802522897720337, + "ce_orig": 0.5403833985328674, + "epoch": 1.0764972320080524, + "kl_loss": 0.06333082914352417, + "loss_ib": 0.00101356056984514, + "step": 3744 + }, + { + "ce_ib": 3.4345040321350098, + "ce_orig": 0.9256139993667603, + "epoch": 1.0764972320080524, + "kl_loss": 0.04704953730106354, + "loss_ib": 0.0008139457204379141, + "step": 3744 + }, + { + "epoch": 1.0767848155870299, + "grad_norm": 0.1286022663116455, + "learning_rate": 3.715115054666689e-05, + "loss": 0.8167, + "step": 3745 + }, + { + "ce_ib": 3.2653098106384277, + "ce_orig": 0.8250822424888611, + "epoch": 1.0767848155870299, + "kl_loss": 0.044718630611896515, + "loss_ib": 0.0007737173000350595, + "step": 3745 + }, + { + "ce_ib": 2.975114107131958, + "ce_orig": 0.6405885219573975, + "epoch": 1.0767848155870299, + "kl_loss": 0.04315364733338356, + "loss_ib": 0.0007290478679351509, + "step": 3745 + }, + { + "ce_ib": 2.510399341583252, + "ce_orig": 0.3832700252532959, + "epoch": 1.0767848155870299, + "kl_loss": 0.06811410933732986, + "loss_ib": 0.0009321810211986303, + "step": 3745 + }, + { + "ce_ib": 2.3407180309295654, + "ce_orig": 0.6423132419586182, + "epoch": 1.0767848155870299, + "kl_loss": 0.030599582940340042, + "loss_ib": 0.0005400676163844764, + "step": 3745 + }, + { + "ce_ib": 4.690037250518799, + "ce_orig": 1.1206181049346924, + "epoch": 1.0770723991660076, + "kl_loss": 0.1060413271188736, + "loss_ib": 0.0015294170007109642, + "step": 3746 + }, + { + "ce_ib": 3.8572256565093994, + "ce_orig": 0.9946104884147644, + "epoch": 1.0770723991660076, + "kl_loss": 0.06038079783320427, + "loss_ib": 0.0009895304683595896, + "step": 3746 + }, + { + "ce_ib": 3.4841718673706055, + "ce_orig": 1.0802329778671265, + "epoch": 1.0770723991660076, + "kl_loss": 0.0626678615808487, + "loss_ib": 0.0009750957833603024, + "step": 3746 + }, + { + "ce_ib": 3.672058343887329, + "ce_orig": 1.2084126472473145, + "epoch": 1.0770723991660076, + "kl_loss": 0.03863290697336197, + "loss_ib": 0.0007535348413512111, + "step": 3746 + }, + { + "ce_ib": 2.6009957790374756, + "ce_orig": 0.6069030165672302, + "epoch": 1.0773599827449853, + "kl_loss": 0.036413341760635376, + "loss_ib": 0.0006242329254746437, + "step": 3747 + }, + { + "ce_ib": 5.838372707366943, + "ce_orig": 1.584053635597229, + "epoch": 1.0773599827449853, + "kl_loss": 0.0672602653503418, + "loss_ib": 0.001256439951248467, + "step": 3747 + }, + { + "ce_ib": 2.1829962730407715, + "ce_orig": 0.47132113575935364, + "epoch": 1.0773599827449853, + "kl_loss": 0.03392815589904785, + "loss_ib": 0.0005575811956077814, + "step": 3747 + }, + { + "ce_ib": 4.289677143096924, + "ce_orig": 1.2042756080627441, + "epoch": 1.0773599827449853, + "kl_loss": 0.07078863680362701, + "loss_ib": 0.0011368540581315756, + "step": 3747 + }, + { + "ce_ib": 4.099966526031494, + "ce_orig": 0.9212923049926758, + "epoch": 1.0776475663239629, + "kl_loss": 0.0704575628042221, + "loss_ib": 0.001114572281949222, + "step": 3748 + }, + { + "ce_ib": 2.7454700469970703, + "ce_orig": 0.7544722557067871, + "epoch": 1.0776475663239629, + "kl_loss": 0.04260788485407829, + "loss_ib": 0.0007006258238106966, + "step": 3748 + }, + { + "ce_ib": 2.815194845199585, + "ce_orig": 0.8034286499023438, + "epoch": 1.0776475663239629, + "kl_loss": 0.03678389638662338, + "loss_ib": 0.0006493584951385856, + "step": 3748 + }, + { + "ce_ib": 2.5527918338775635, + "ce_orig": 0.27761101722717285, + "epoch": 1.0776475663239629, + "kl_loss": 0.04891074821352959, + "loss_ib": 0.000744386576116085, + "step": 3748 + }, + { + "ce_ib": 3.2915165424346924, + "ce_orig": 0.8375396132469177, + "epoch": 1.0779351499029406, + "kl_loss": 0.02858259156346321, + "loss_ib": 0.0006149775581434369, + "step": 3749 + }, + { + "ce_ib": 4.356281280517578, + "ce_orig": 1.307862639427185, + "epoch": 1.0779351499029406, + "kl_loss": 0.04107481986284256, + "loss_ib": 0.0008463762351311743, + "step": 3749 + }, + { + "ce_ib": 3.4398038387298584, + "ce_orig": 0.9315783977508545, + "epoch": 1.0779351499029406, + "kl_loss": 0.041514329612255096, + "loss_ib": 0.0007591237081214786, + "step": 3749 + }, + { + "ce_ib": 3.555527687072754, + "ce_orig": 0.8087897300720215, + "epoch": 1.0779351499029406, + "kl_loss": 0.03328404575586319, + "loss_ib": 0.0006883932510390878, + "step": 3749 + }, + { + "epoch": 1.078222733481918, + "grad_norm": 0.10317705571651459, + "learning_rate": 3.7117223593655435e-05, + "loss": 0.8376, + "step": 3750 + }, + { + "ce_ib": 2.6118807792663574, + "ce_orig": 0.764166533946991, + "epoch": 1.078222733481918, + "kl_loss": 0.03567945957183838, + "loss_ib": 0.0006179826450534165, + "step": 3750 + }, + { + "ce_ib": 4.434514045715332, + "ce_orig": 1.4044498205184937, + "epoch": 1.078222733481918, + "kl_loss": 0.030407868325710297, + "loss_ib": 0.0007475300808437169, + "step": 3750 + }, + { + "ce_ib": 2.4697227478027344, + "ce_orig": 0.8836096525192261, + "epoch": 1.078222733481918, + "kl_loss": 0.029306169599294662, + "loss_ib": 0.0005400339141488075, + "step": 3750 + }, + { + "ce_ib": 2.1138291358947754, + "ce_orig": 0.4813932776451111, + "epoch": 1.078222733481918, + "kl_loss": 0.04745061323046684, + "loss_ib": 0.0006858890410512686, + "step": 3750 + }, + { + "ce_ib": 6.369159698486328, + "ce_orig": 1.826398491859436, + "epoch": 1.0785103170608958, + "kl_loss": 0.04696196690201759, + "loss_ib": 0.0011065356666222215, + "step": 3751 + }, + { + "ce_ib": 2.9926693439483643, + "ce_orig": 0.24375484883785248, + "epoch": 1.0785103170608958, + "kl_loss": 0.08326564729213715, + "loss_ib": 0.0011319234035909176, + "step": 3751 + }, + { + "ce_ib": 2.5696816444396973, + "ce_orig": 0.5395083427429199, + "epoch": 1.0785103170608958, + "kl_loss": 0.029291575774550438, + "loss_ib": 0.0005498839309439063, + "step": 3751 + }, + { + "ce_ib": 2.005290985107422, + "ce_orig": 0.5346301198005676, + "epoch": 1.0785103170608958, + "kl_loss": 0.02562306448817253, + "loss_ib": 0.00045675973524339497, + "step": 3751 + }, + { + "ce_ib": 4.509181976318359, + "ce_orig": 1.4442332983016968, + "epoch": 1.0787979006398736, + "kl_loss": 0.12323868274688721, + "loss_ib": 0.0016833050176501274, + "step": 3752 + }, + { + "ce_ib": 3.006624698638916, + "ce_orig": 0.5759540796279907, + "epoch": 1.0787979006398736, + "kl_loss": 0.052328407764434814, + "loss_ib": 0.0008239464950747788, + "step": 3752 + }, + { + "ce_ib": 3.3180766105651855, + "ce_orig": 0.8151203393936157, + "epoch": 1.0787979006398736, + "kl_loss": 0.032863371074199677, + "loss_ib": 0.0006604413501918316, + "step": 3752 + }, + { + "ce_ib": 3.1668002605438232, + "ce_orig": 0.9334075450897217, + "epoch": 1.0787979006398736, + "kl_loss": 0.037761662155389786, + "loss_ib": 0.0006942966138012707, + "step": 3752 + }, + { + "ce_ib": 3.8823740482330322, + "ce_orig": 0.9241123199462891, + "epoch": 1.079085484218851, + "kl_loss": 0.05448257178068161, + "loss_ib": 0.000933063100092113, + "step": 3753 + }, + { + "ce_ib": 3.553040027618408, + "ce_orig": 1.0182349681854248, + "epoch": 1.079085484218851, + "kl_loss": 0.05299162119626999, + "loss_ib": 0.0008852201281115413, + "step": 3753 + }, + { + "ce_ib": 3.7495532035827637, + "ce_orig": 0.7404537200927734, + "epoch": 1.079085484218851, + "kl_loss": 0.04700678586959839, + "loss_ib": 0.0008450231398455799, + "step": 3753 + }, + { + "ce_ib": 3.1838743686676025, + "ce_orig": 0.7627681493759155, + "epoch": 1.079085484218851, + "kl_loss": 0.06599771976470947, + "loss_ib": 0.000978364609181881, + "step": 3753 + }, + { + "ce_ib": 4.025373458862305, + "ce_orig": 0.5472899079322815, + "epoch": 1.0793730677978288, + "kl_loss": 0.0578213706612587, + "loss_ib": 0.0009807510068640113, + "step": 3754 + }, + { + "ce_ib": 2.4092764854431152, + "ce_orig": 0.7434115409851074, + "epoch": 1.0793730677978288, + "kl_loss": 0.021842235699295998, + "loss_ib": 0.00045934997615404427, + "step": 3754 + }, + { + "ce_ib": 3.0570943355560303, + "ce_orig": 0.7202489972114563, + "epoch": 1.0793730677978288, + "kl_loss": 0.03682931140065193, + "loss_ib": 0.0006740025128237903, + "step": 3754 + }, + { + "ce_ib": 3.6441638469696045, + "ce_orig": 0.6363624930381775, + "epoch": 1.0793730677978288, + "kl_loss": 0.06025451049208641, + "loss_ib": 0.000966961495578289, + "step": 3754 + }, + { + "epoch": 1.0796606513768063, + "grad_norm": 0.10436985641717911, + "learning_rate": 3.7083267447439877e-05, + "loss": 0.8463, + "step": 3755 + }, + { + "ce_ib": 3.813490390777588, + "ce_orig": 0.9153491258621216, + "epoch": 1.0796606513768063, + "kl_loss": 0.05964796990156174, + "loss_ib": 0.0009778287494555116, + "step": 3755 + }, + { + "ce_ib": 2.1782543659210205, + "ce_orig": 0.4044701159000397, + "epoch": 1.0796606513768063, + "kl_loss": 0.041897252202034, + "loss_ib": 0.0006367979804053903, + "step": 3755 + }, + { + "ce_ib": 3.4983365535736084, + "ce_orig": 0.6539420485496521, + "epoch": 1.0796606513768063, + "kl_loss": 0.056819066405296326, + "loss_ib": 0.0009180243359878659, + "step": 3755 + }, + { + "ce_ib": 2.7157156467437744, + "ce_orig": 0.6192690134048462, + "epoch": 1.0796606513768063, + "kl_loss": 0.02619580179452896, + "loss_ib": 0.000533529557287693, + "step": 3755 + }, + { + "ce_ib": 5.328938961029053, + "ce_orig": 1.141323447227478, + "epoch": 1.079948234955784, + "kl_loss": 0.05473146587610245, + "loss_ib": 0.0010802085744217038, + "step": 3756 + }, + { + "ce_ib": 2.4230575561523438, + "ce_orig": 0.6574130058288574, + "epoch": 1.079948234955784, + "kl_loss": 0.0522671714425087, + "loss_ib": 0.0007649774779565632, + "step": 3756 + }, + { + "ce_ib": 4.169065952301025, + "ce_orig": 1.201768398284912, + "epoch": 1.079948234955784, + "kl_loss": 0.03542771190404892, + "loss_ib": 0.000771183636970818, + "step": 3756 + }, + { + "ce_ib": 3.6286418437957764, + "ce_orig": 0.8067978620529175, + "epoch": 1.079948234955784, + "kl_loss": 0.06216752529144287, + "loss_ib": 0.0009845393942669034, + "step": 3756 + }, + { + "ce_ib": 3.911325454711914, + "ce_orig": 0.9993942975997925, + "epoch": 1.0802358185347616, + "kl_loss": 0.050652820616960526, + "loss_ib": 0.0008976606768555939, + "step": 3757 + }, + { + "ce_ib": 2.3833656311035156, + "ce_orig": 0.600832462310791, + "epoch": 1.0802358185347616, + "kl_loss": 0.03230427950620651, + "loss_ib": 0.0005613793036900461, + "step": 3757 + }, + { + "ce_ib": 5.064018249511719, + "ce_orig": 1.5797252655029297, + "epoch": 1.0802358185347616, + "kl_loss": 0.09295065701007843, + "loss_ib": 0.0014359083725139499, + "step": 3757 + }, + { + "ce_ib": 3.998192548751831, + "ce_orig": 0.930168867111206, + "epoch": 1.0802358185347616, + "kl_loss": 0.04733089730143547, + "loss_ib": 0.0008731281850486994, + "step": 3757 + }, + { + "ce_ib": 2.1938109397888184, + "ce_orig": 0.49766799807548523, + "epoch": 1.0805234021137393, + "kl_loss": 0.03637111932039261, + "loss_ib": 0.0005830922746099532, + "step": 3758 + }, + { + "ce_ib": 2.4646058082580566, + "ce_orig": 0.6126578450202942, + "epoch": 1.0805234021137393, + "kl_loss": 0.050501611083745956, + "loss_ib": 0.0007514766184613109, + "step": 3758 + }, + { + "ce_ib": 2.4887983798980713, + "ce_orig": 0.5094150304794312, + "epoch": 1.0805234021137393, + "kl_loss": 0.042160458862781525, + "loss_ib": 0.0006704844417981803, + "step": 3758 + }, + { + "ce_ib": 3.156078338623047, + "ce_orig": 0.7501397132873535, + "epoch": 1.0805234021137393, + "kl_loss": 0.02545168064534664, + "loss_ib": 0.0005701245972886682, + "step": 3758 + }, + { + "ce_ib": 4.872820854187012, + "ce_orig": 1.0346581935882568, + "epoch": 1.0808109856927168, + "kl_loss": 0.050536371767520905, + "loss_ib": 0.0009926457423716784, + "step": 3759 + }, + { + "ce_ib": 4.551280498504639, + "ce_orig": 1.082086443901062, + "epoch": 1.0808109856927168, + "kl_loss": 0.04441799223423004, + "loss_ib": 0.000899307953659445, + "step": 3759 + }, + { + "ce_ib": 2.289339780807495, + "ce_orig": 0.689346432685852, + "epoch": 1.0808109856927168, + "kl_loss": 0.03202257305383682, + "loss_ib": 0.0005491597112268209, + "step": 3759 + }, + { + "ce_ib": 3.220427989959717, + "ce_orig": 0.8783671855926514, + "epoch": 1.0808109856927168, + "kl_loss": 0.029004260897636414, + "loss_ib": 0.0006120853940956295, + "step": 3759 + }, + { + "epoch": 1.0810985692716946, + "grad_norm": 0.10692085325717926, + "learning_rate": 3.704928218982845e-05, + "loss": 0.8065, + "step": 3760 + }, + { + "ce_ib": 2.425501585006714, + "ce_orig": 0.4398646354675293, + "epoch": 1.0810985692716946, + "kl_loss": 0.05694856122136116, + "loss_ib": 0.0008120357524603605, + "step": 3760 + }, + { + "ce_ib": 5.3480753898620605, + "ce_orig": 1.28248929977417, + "epoch": 1.0810985692716946, + "kl_loss": 0.05344183370471001, + "loss_ib": 0.001069225836545229, + "step": 3760 + }, + { + "ce_ib": 2.9369053840637207, + "ce_orig": 1.0506314039230347, + "epoch": 1.0810985692716946, + "kl_loss": 0.17611631751060486, + "loss_ib": 0.0020548535976558924, + "step": 3760 + }, + { + "ce_ib": 2.388378381729126, + "ce_orig": 0.5631166696548462, + "epoch": 1.0810985692716946, + "kl_loss": 0.028038863092660904, + "loss_ib": 0.0005192264798097312, + "step": 3760 + }, + { + "ce_ib": 4.74709939956665, + "ce_orig": 1.2152891159057617, + "epoch": 1.0813861528506723, + "kl_loss": 0.050155967473983765, + "loss_ib": 0.0009762695990502834, + "step": 3761 + }, + { + "ce_ib": 3.55320405960083, + "ce_orig": 1.20296049118042, + "epoch": 1.0813861528506723, + "kl_loss": 0.033941470086574554, + "loss_ib": 0.000694735033903271, + "step": 3761 + }, + { + "ce_ib": 4.432699680328369, + "ce_orig": 1.142429232597351, + "epoch": 1.0813861528506723, + "kl_loss": 0.05324855074286461, + "loss_ib": 0.0009757554507814348, + "step": 3761 + }, + { + "ce_ib": 5.541138648986816, + "ce_orig": 0.8247188329696655, + "epoch": 1.0813861528506723, + "kl_loss": 0.0465172678232193, + "loss_ib": 0.0010192864574491978, + "step": 3761 + }, + { + "ce_ib": 1.8713462352752686, + "ce_orig": 0.39659470319747925, + "epoch": 1.0816737364296498, + "kl_loss": 0.042745307087898254, + "loss_ib": 0.0006145876250229776, + "step": 3762 + }, + { + "ce_ib": 1.9059213399887085, + "ce_orig": 0.45044565200805664, + "epoch": 1.0816737364296498, + "kl_loss": 0.02704520709812641, + "loss_ib": 0.0004610441974364221, + "step": 3762 + }, + { + "ce_ib": 2.650522232055664, + "ce_orig": 0.7262804508209229, + "epoch": 1.0816737364296498, + "kl_loss": 0.026421116665005684, + "loss_ib": 0.0005292634014040232, + "step": 3762 + }, + { + "ce_ib": 5.536917686462402, + "ce_orig": 0.8877483606338501, + "epoch": 1.0816737364296498, + "kl_loss": 0.05480511859059334, + "loss_ib": 0.0011017429642379284, + "step": 3762 + }, + { + "ce_ib": 2.9823343753814697, + "ce_orig": 0.6723228096961975, + "epoch": 1.0819613200086275, + "kl_loss": 0.057538364082574844, + "loss_ib": 0.0008736170711927116, + "step": 3763 + }, + { + "ce_ib": 3.3333330154418945, + "ce_orig": 0.751447856426239, + "epoch": 1.0819613200086275, + "kl_loss": 0.03803253918886185, + "loss_ib": 0.0007136586355045438, + "step": 3763 + }, + { + "ce_ib": 2.6042673587799072, + "ce_orig": 0.7668327689170837, + "epoch": 1.0819613200086275, + "kl_loss": 0.04552421346306801, + "loss_ib": 0.0007156688370741904, + "step": 3763 + }, + { + "ce_ib": 3.8637301921844482, + "ce_orig": 1.0370079278945923, + "epoch": 1.0819613200086275, + "kl_loss": 0.038997482508420944, + "loss_ib": 0.0007763478206470609, + "step": 3763 + }, + { + "ce_ib": 5.546306133270264, + "ce_orig": 1.0803649425506592, + "epoch": 1.082248903587605, + "kl_loss": 0.04633988440036774, + "loss_ib": 0.0010180294048041105, + "step": 3764 + }, + { + "ce_ib": 3.7701797485351562, + "ce_orig": 1.097898006439209, + "epoch": 1.082248903587605, + "kl_loss": 0.03955215960741043, + "loss_ib": 0.0007725395262241364, + "step": 3764 + }, + { + "ce_ib": 3.504683017730713, + "ce_orig": 1.087441325187683, + "epoch": 1.082248903587605, + "kl_loss": 0.02939542941749096, + "loss_ib": 0.0006444226019084454, + "step": 3764 + }, + { + "ce_ib": 4.385987281799316, + "ce_orig": 1.0219099521636963, + "epoch": 1.082248903587605, + "kl_loss": 0.05717339366674423, + "loss_ib": 0.0010103327222168446, + "step": 3764 + }, + { + "epoch": 1.0825364871665828, + "grad_norm": 0.11427906900644302, + "learning_rate": 3.7015267902699527e-05, + "loss": 0.854, + "step": 3765 + }, + { + "ce_ib": 2.8914685249328613, + "ce_orig": 0.548315703868866, + "epoch": 1.0825364871665828, + "kl_loss": 0.0487377867102623, + "loss_ib": 0.000776524655520916, + "step": 3765 + }, + { + "ce_ib": 3.5595836639404297, + "ce_orig": 0.5703548192977905, + "epoch": 1.0825364871665828, + "kl_loss": 0.06590615212917328, + "loss_ib": 0.0010150198359042406, + "step": 3765 + }, + { + "ce_ib": 4.074282169342041, + "ce_orig": 1.123268961906433, + "epoch": 1.0825364871665828, + "kl_loss": 0.050407081842422485, + "loss_ib": 0.0009114990243688226, + "step": 3765 + }, + { + "ce_ib": 1.8811850547790527, + "ce_orig": 0.27908098697662354, + "epoch": 1.0825364871665828, + "kl_loss": 0.029911238700151443, + "loss_ib": 0.000487230863654986, + "step": 3765 + }, + { + "ce_ib": 5.329934120178223, + "ce_orig": 1.334395408630371, + "epoch": 1.0828240707455605, + "kl_loss": 0.05514572188258171, + "loss_ib": 0.0010844506323337555, + "step": 3766 + }, + { + "ce_ib": 4.036459922790527, + "ce_orig": 0.780797004699707, + "epoch": 1.0828240707455605, + "kl_loss": 0.05259648710489273, + "loss_ib": 0.0009296108619309962, + "step": 3766 + }, + { + "ce_ib": 3.6602044105529785, + "ce_orig": 0.7315841913223267, + "epoch": 1.0828240707455605, + "kl_loss": 0.05866226926445961, + "loss_ib": 0.000952643109485507, + "step": 3766 + }, + { + "ce_ib": 4.659977436065674, + "ce_orig": 1.5203582048416138, + "epoch": 1.0828240707455605, + "kl_loss": 0.04591492563486099, + "loss_ib": 0.0009251469164155424, + "step": 3766 + }, + { + "ce_ib": 1.2886180877685547, + "ce_orig": 0.19040724635124207, + "epoch": 1.083111654324538, + "kl_loss": 0.09632538259029388, + "loss_ib": 0.0010921156499534845, + "step": 3767 + }, + { + "ce_ib": 4.612765312194824, + "ce_orig": 1.402310848236084, + "epoch": 1.083111654324538, + "kl_loss": 0.04907528683543205, + "loss_ib": 0.0009520293679088354, + "step": 3767 + }, + { + "ce_ib": 2.759408950805664, + "ce_orig": 0.7304694056510925, + "epoch": 1.083111654324538, + "kl_loss": 0.038888540118932724, + "loss_ib": 0.0006648263079114258, + "step": 3767 + }, + { + "ce_ib": 2.7527952194213867, + "ce_orig": 0.4569067358970642, + "epoch": 1.083111654324538, + "kl_loss": 0.042986489832401276, + "loss_ib": 0.0007051443681120872, + "step": 3767 + }, + { + "ce_ib": 3.1275827884674072, + "ce_orig": 0.4713091552257538, + "epoch": 1.0833992379035158, + "kl_loss": 0.04774150997400284, + "loss_ib": 0.0007901734206825495, + "step": 3768 + }, + { + "ce_ib": 5.250776290893555, + "ce_orig": 1.4439347982406616, + "epoch": 1.0833992379035158, + "kl_loss": 0.05751238763332367, + "loss_ib": 0.0011002013925462961, + "step": 3768 + }, + { + "ce_ib": 3.0405921936035156, + "ce_orig": 0.6205072402954102, + "epoch": 1.0833992379035158, + "kl_loss": 0.0500638484954834, + "loss_ib": 0.0008046976872719824, + "step": 3768 + }, + { + "ce_ib": 3.2421133518218994, + "ce_orig": 0.7855659127235413, + "epoch": 1.0833992379035158, + "kl_loss": 0.07416941225528717, + "loss_ib": 0.0010659054387360811, + "step": 3768 + }, + { + "ce_ib": 2.276517152786255, + "ce_orig": 0.5768246054649353, + "epoch": 1.0836868214824933, + "kl_loss": 0.023363744840025902, + "loss_ib": 0.000461289135273546, + "step": 3769 + }, + { + "ce_ib": 4.326053619384766, + "ce_orig": 1.1458208560943604, + "epoch": 1.0836868214824933, + "kl_loss": 0.05339799448847771, + "loss_ib": 0.0009665852412581444, + "step": 3769 + }, + { + "ce_ib": 2.266923427581787, + "ce_orig": 0.5596392750740051, + "epoch": 1.0836868214824933, + "kl_loss": 0.032059118151664734, + "loss_ib": 0.0005472835036925972, + "step": 3769 + }, + { + "ce_ib": 3.5719947814941406, + "ce_orig": 1.1968952417373657, + "epoch": 1.0836868214824933, + "kl_loss": 0.03254822641611099, + "loss_ib": 0.0006826816825196147, + "step": 3769 + }, + { + "epoch": 1.083974405061471, + "grad_norm": 0.12322474271059036, + "learning_rate": 3.6981224668001424e-05, + "loss": 0.8701, + "step": 3770 + }, + { + "ce_ib": 3.2613525390625, + "ce_orig": 0.8325713276863098, + "epoch": 1.083974405061471, + "kl_loss": 0.06016213819384575, + "loss_ib": 0.0009277565404772758, + "step": 3770 + }, + { + "ce_ib": 2.778244972229004, + "ce_orig": 0.8265330791473389, + "epoch": 1.083974405061471, + "kl_loss": 0.054223328828811646, + "loss_ib": 0.0008200577576644719, + "step": 3770 + }, + { + "ce_ib": 2.535527229309082, + "ce_orig": 0.47277623414993286, + "epoch": 1.083974405061471, + "kl_loss": 0.04951927438378334, + "loss_ib": 0.0007487454568035901, + "step": 3770 + }, + { + "ce_ib": 4.9758687019348145, + "ce_orig": 1.3062772750854492, + "epoch": 1.083974405061471, + "kl_loss": 0.041040368378162384, + "loss_ib": 0.0009079905576072633, + "step": 3770 + }, + { + "ce_ib": 5.097690105438232, + "ce_orig": 0.9568507671356201, + "epoch": 1.0842619886404485, + "kl_loss": 0.055007047951221466, + "loss_ib": 0.001059839385561645, + "step": 3771 + }, + { + "ce_ib": 2.829000234603882, + "ce_orig": 0.6640375256538391, + "epoch": 1.0842619886404485, + "kl_loss": 0.04025903344154358, + "loss_ib": 0.0006854903767816722, + "step": 3771 + }, + { + "ce_ib": 3.841338634490967, + "ce_orig": 0.9670641422271729, + "epoch": 1.0842619886404485, + "kl_loss": 0.05073961615562439, + "loss_ib": 0.0008915300713852048, + "step": 3771 + }, + { + "ce_ib": 5.053704738616943, + "ce_orig": 1.4443395137786865, + "epoch": 1.0842619886404485, + "kl_loss": 0.046684086322784424, + "loss_ib": 0.0009722113609313965, + "step": 3771 + }, + { + "ce_ib": 4.126392364501953, + "ce_orig": 0.8872730135917664, + "epoch": 1.0845495722194263, + "kl_loss": 0.04405614361166954, + "loss_ib": 0.000853200675919652, + "step": 3772 + }, + { + "ce_ib": 2.401244878768921, + "ce_orig": 0.8108454346656799, + "epoch": 1.0845495722194263, + "kl_loss": 0.02368907257914543, + "loss_ib": 0.0004770152154378593, + "step": 3772 + }, + { + "ce_ib": 4.704859256744385, + "ce_orig": 1.4073400497436523, + "epoch": 1.0845495722194263, + "kl_loss": 0.048544492572546005, + "loss_ib": 0.0009559307945892215, + "step": 3772 + }, + { + "ce_ib": 6.748884677886963, + "ce_orig": 1.79713773727417, + "epoch": 1.0845495722194263, + "kl_loss": 0.06900444626808167, + "loss_ib": 0.0013649329775944352, + "step": 3772 + }, + { + "ce_ib": 3.028653621673584, + "ce_orig": 0.8969062566757202, + "epoch": 1.084837155798404, + "kl_loss": 0.047133058309555054, + "loss_ib": 0.0007741959416307509, + "step": 3773 + }, + { + "ce_ib": 3.7710893154144287, + "ce_orig": 0.9656566381454468, + "epoch": 1.084837155798404, + "kl_loss": 0.045984163880348206, + "loss_ib": 0.0008369504939764738, + "step": 3773 + }, + { + "ce_ib": 4.4315595626831055, + "ce_orig": 0.6925877332687378, + "epoch": 1.084837155798404, + "kl_loss": 0.06977462023496628, + "loss_ib": 0.0011409021681174636, + "step": 3773 + }, + { + "ce_ib": 5.464961528778076, + "ce_orig": 1.5135865211486816, + "epoch": 1.084837155798404, + "kl_loss": 0.059306614100933075, + "loss_ib": 0.0011395622277632356, + "step": 3773 + }, + { + "ce_ib": 2.0838799476623535, + "ce_orig": 0.5539624094963074, + "epoch": 1.0851247393773815, + "kl_loss": 0.03410487622022629, + "loss_ib": 0.0005494367214851081, + "step": 3774 + }, + { + "ce_ib": 3.5039823055267334, + "ce_orig": 1.07612943649292, + "epoch": 1.0851247393773815, + "kl_loss": 0.02692887745797634, + "loss_ib": 0.0006196869653649628, + "step": 3774 + }, + { + "ce_ib": 1.8942790031433105, + "ce_orig": 0.9624440670013428, + "epoch": 1.0851247393773815, + "kl_loss": 0.1545521765947342, + "loss_ib": 0.0017349496483802795, + "step": 3774 + }, + { + "ce_ib": 5.26025915145874, + "ce_orig": 1.0177181959152222, + "epoch": 1.0851247393773815, + "kl_loss": 0.05168760567903519, + "loss_ib": 0.0010429018875584006, + "step": 3774 + }, + { + "epoch": 1.0854123229563593, + "grad_norm": 0.15045073628425598, + "learning_rate": 3.694715256775219e-05, + "loss": 0.8797, + "step": 3775 + }, + { + "ce_ib": 2.663618803024292, + "ce_orig": 0.5977757573127747, + "epoch": 1.0854123229563593, + "kl_loss": 0.02605145052075386, + "loss_ib": 0.0005268763634376228, + "step": 3775 + }, + { + "ce_ib": 4.668773174285889, + "ce_orig": 0.9514923691749573, + "epoch": 1.0854123229563593, + "kl_loss": 0.04393050819635391, + "loss_ib": 0.0009061823366209865, + "step": 3775 + }, + { + "ce_ib": 2.891402006149292, + "ce_orig": 0.8438654541969299, + "epoch": 1.0854123229563593, + "kl_loss": 0.05926414579153061, + "loss_ib": 0.0008817816269584, + "step": 3775 + }, + { + "ce_ib": 2.4863600730895996, + "ce_orig": 0.6444892883300781, + "epoch": 1.0854123229563593, + "kl_loss": 0.04781169071793556, + "loss_ib": 0.0007267528562806547, + "step": 3775 + }, + { + "ce_ib": 5.093502044677734, + "ce_orig": 1.231221318244934, + "epoch": 1.0856999065353368, + "kl_loss": 0.05042010545730591, + "loss_ib": 0.001013551140204072, + "step": 3776 + }, + { + "ce_ib": 1.805862307548523, + "ce_orig": 0.4319096803665161, + "epoch": 1.0856999065353368, + "kl_loss": 0.07451550662517548, + "loss_ib": 0.0009257412748411298, + "step": 3776 + }, + { + "ce_ib": 3.9518837928771973, + "ce_orig": 1.3080097436904907, + "epoch": 1.0856999065353368, + "kl_loss": 0.06926802545785904, + "loss_ib": 0.0010878685861825943, + "step": 3776 + }, + { + "ce_ib": 3.932446002960205, + "ce_orig": 1.0774297714233398, + "epoch": 1.0856999065353368, + "kl_loss": 0.03976583480834961, + "loss_ib": 0.000790902937296778, + "step": 3776 + }, + { + "ce_ib": 3.6549339294433594, + "ce_orig": 1.007706642150879, + "epoch": 1.0859874901143145, + "kl_loss": 0.04045896977186203, + "loss_ib": 0.0007700830465182662, + "step": 3777 + }, + { + "ce_ib": 3.1080551147460938, + "ce_orig": 0.8219942450523376, + "epoch": 1.0859874901143145, + "kl_loss": 0.05528874695301056, + "loss_ib": 0.00086369295604527, + "step": 3777 + }, + { + "ce_ib": 2.9000649452209473, + "ce_orig": 0.6813189387321472, + "epoch": 1.0859874901143145, + "kl_loss": 0.05561129003763199, + "loss_ib": 0.0008461193647235632, + "step": 3777 + }, + { + "ce_ib": 2.9394891262054443, + "ce_orig": 0.43361273407936096, + "epoch": 1.0859874901143145, + "kl_loss": 0.06856590509414673, + "loss_ib": 0.0009796079248189926, + "step": 3777 + }, + { + "ce_ib": 6.315638542175293, + "ce_orig": 1.7882622480392456, + "epoch": 1.086275073693292, + "kl_loss": 0.06780138611793518, + "loss_ib": 0.0013095776084810495, + "step": 3778 + }, + { + "ce_ib": 3.869612216949463, + "ce_orig": 0.7879853248596191, + "epoch": 1.086275073693292, + "kl_loss": 0.050866708159446716, + "loss_ib": 0.0008956282981671393, + "step": 3778 + }, + { + "ce_ib": 1.9167124032974243, + "ce_orig": 0.4893943667411804, + "epoch": 1.086275073693292, + "kl_loss": 0.033454835414886475, + "loss_ib": 0.0005262196063995361, + "step": 3778 + }, + { + "ce_ib": 3.8257086277008057, + "ce_orig": 0.8264917135238647, + "epoch": 1.086275073693292, + "kl_loss": 0.03786326199769974, + "loss_ib": 0.0007612034678459167, + "step": 3778 + }, + { + "ce_ib": 2.0855255126953125, + "ce_orig": 0.4081277847290039, + "epoch": 1.0865626572722697, + "kl_loss": 0.08557935059070587, + "loss_ib": 0.0010643460555002093, + "step": 3779 + }, + { + "ce_ib": 5.0456037521362305, + "ce_orig": 1.4394147396087646, + "epoch": 1.0865626572722697, + "kl_loss": 0.05290914699435234, + "loss_ib": 0.001033651758916676, + "step": 3779 + }, + { + "ce_ib": 2.5117886066436768, + "ce_orig": 0.5719835162162781, + "epoch": 1.0865626572722697, + "kl_loss": 0.03532698005437851, + "loss_ib": 0.0006044486653991044, + "step": 3779 + }, + { + "ce_ib": 3.2159671783447266, + "ce_orig": 0.5821964740753174, + "epoch": 1.0865626572722697, + "kl_loss": 0.10110026597976685, + "loss_ib": 0.001332599320448935, + "step": 3779 + }, + { + "epoch": 1.0868502408512475, + "grad_norm": 0.10142336785793304, + "learning_rate": 3.691305168403944e-05, + "loss": 0.889, + "step": 3780 + }, + { + "ce_ib": 2.4631118774414062, + "ce_orig": 0.7436454892158508, + "epoch": 1.0868502408512475, + "kl_loss": 0.02668026089668274, + "loss_ib": 0.0005131138022989035, + "step": 3780 + }, + { + "ce_ib": 4.267763614654541, + "ce_orig": 0.7297667860984802, + "epoch": 1.0868502408512475, + "kl_loss": 0.14978864789009094, + "loss_ib": 0.0019246628507971764, + "step": 3780 + }, + { + "ce_ib": 5.089537620544434, + "ce_orig": 1.5868409872055054, + "epoch": 1.0868502408512475, + "kl_loss": 0.043368685990571976, + "loss_ib": 0.0009426405886188149, + "step": 3780 + }, + { + "ce_ib": 5.076990604400635, + "ce_orig": 1.179328203201294, + "epoch": 1.0868502408512475, + "kl_loss": 0.0693860724568367, + "loss_ib": 0.001201559673063457, + "step": 3780 + }, + { + "ce_ib": 4.012114524841309, + "ce_orig": 1.1487014293670654, + "epoch": 1.087137824430225, + "kl_loss": 0.05137169361114502, + "loss_ib": 0.0009149283869192004, + "step": 3781 + }, + { + "ce_ib": 2.599181890487671, + "ce_orig": 0.5776275992393494, + "epoch": 1.087137824430225, + "kl_loss": 0.05781078338623047, + "loss_ib": 0.0008380259969271719, + "step": 3781 + }, + { + "ce_ib": 3.291407823562622, + "ce_orig": 0.6403886675834656, + "epoch": 1.087137824430225, + "kl_loss": 0.049826107919216156, + "loss_ib": 0.000827401876449585, + "step": 3781 + }, + { + "ce_ib": 4.684005260467529, + "ce_orig": 1.208272933959961, + "epoch": 1.087137824430225, + "kl_loss": 0.04843121021986008, + "loss_ib": 0.0009527125512249768, + "step": 3781 + }, + { + "ce_ib": 4.225606918334961, + "ce_orig": 1.1352099180221558, + "epoch": 1.0874254080092027, + "kl_loss": 0.05365185812115669, + "loss_ib": 0.0009590792469680309, + "step": 3782 + }, + { + "ce_ib": 2.6834607124328613, + "ce_orig": 0.5806912183761597, + "epoch": 1.0874254080092027, + "kl_loss": 0.04437948763370514, + "loss_ib": 0.000712140928953886, + "step": 3782 + }, + { + "ce_ib": 2.465237617492676, + "ce_orig": 0.4401625096797943, + "epoch": 1.0874254080092027, + "kl_loss": 0.06507103145122528, + "loss_ib": 0.0008972340729087591, + "step": 3782 + }, + { + "ce_ib": 3.72483491897583, + "ce_orig": 0.48954111337661743, + "epoch": 1.0874254080092027, + "kl_loss": 0.05124920606613159, + "loss_ib": 0.000884975481312722, + "step": 3782 + }, + { + "ce_ib": 4.531475067138672, + "ce_orig": 1.1594218015670776, + "epoch": 1.0877129915881802, + "kl_loss": 0.04603033512830734, + "loss_ib": 0.000913450843654573, + "step": 3783 + }, + { + "ce_ib": 2.6304991245269775, + "ce_orig": 0.6246340274810791, + "epoch": 1.0877129915881802, + "kl_loss": 0.18122880160808563, + "loss_ib": 0.002075337804853916, + "step": 3783 + }, + { + "ce_ib": 3.8209688663482666, + "ce_orig": 0.7381058931350708, + "epoch": 1.0877129915881802, + "kl_loss": 0.0643555074930191, + "loss_ib": 0.0010256519308313727, + "step": 3783 + }, + { + "ce_ib": 4.8135986328125, + "ce_orig": 1.3725897073745728, + "epoch": 1.0877129915881802, + "kl_loss": 0.054388947784900665, + "loss_ib": 0.0010252493666484952, + "step": 3783 + }, + { + "ce_ib": 3.6222097873687744, + "ce_orig": 0.9313843846321106, + "epoch": 1.088000575167158, + "kl_loss": 0.047211673110723495, + "loss_ib": 0.0008343376684933901, + "step": 3784 + }, + { + "ce_ib": 3.731729745864868, + "ce_orig": 0.7530717253684998, + "epoch": 1.088000575167158, + "kl_loss": 0.05791187286376953, + "loss_ib": 0.0009522917098365724, + "step": 3784 + }, + { + "ce_ib": 4.504255771636963, + "ce_orig": 1.3027137517929077, + "epoch": 1.088000575167158, + "kl_loss": 0.029732178896665573, + "loss_ib": 0.0007477473118342459, + "step": 3784 + }, + { + "ce_ib": 4.887805461883545, + "ce_orig": 1.2833137512207031, + "epoch": 1.088000575167158, + "kl_loss": 0.060742758214473724, + "loss_ib": 0.0010962081141769886, + "step": 3784 + }, + { + "epoch": 1.0882881587461355, + "grad_norm": 0.10831300914287567, + "learning_rate": 3.687892209902009e-05, + "loss": 0.7995, + "step": 3785 + }, + { + "ce_ib": 2.6715686321258545, + "ce_orig": 0.658282458782196, + "epoch": 1.0882881587461355, + "kl_loss": 0.0448446199297905, + "loss_ib": 0.0007156030624173582, + "step": 3785 + }, + { + "ce_ib": 2.957251787185669, + "ce_orig": 0.8883606195449829, + "epoch": 1.0882881587461355, + "kl_loss": 0.03408513218164444, + "loss_ib": 0.0006365765002556145, + "step": 3785 + }, + { + "ce_ib": 2.794650077819824, + "ce_orig": 0.42942631244659424, + "epoch": 1.0882881587461355, + "kl_loss": 0.04911360144615173, + "loss_ib": 0.0007706010364927351, + "step": 3785 + }, + { + "ce_ib": 3.2794673442840576, + "ce_orig": 1.031406044960022, + "epoch": 1.0882881587461355, + "kl_loss": 0.029230894520878792, + "loss_ib": 0.0006202556542120874, + "step": 3785 + }, + { + "ce_ib": 5.5054426193237305, + "ce_orig": 1.783713698387146, + "epoch": 1.0885757423251132, + "kl_loss": 0.0509771965444088, + "loss_ib": 0.0010603162227198482, + "step": 3786 + }, + { + "ce_ib": 3.6920595169067383, + "ce_orig": 1.1582016944885254, + "epoch": 1.0885757423251132, + "kl_loss": 0.05408135801553726, + "loss_ib": 0.000910019560251385, + "step": 3786 + }, + { + "ce_ib": 3.7427825927734375, + "ce_orig": 1.2341275215148926, + "epoch": 1.0885757423251132, + "kl_loss": 0.0490441769361496, + "loss_ib": 0.0008647200302220881, + "step": 3786 + }, + { + "ce_ib": 5.241150379180908, + "ce_orig": 1.4108662605285645, + "epoch": 1.0885757423251132, + "kl_loss": 0.045364148914813995, + "loss_ib": 0.000977756455540657, + "step": 3786 + }, + { + "ce_ib": 3.765476942062378, + "ce_orig": 0.9993256330490112, + "epoch": 1.088863325904091, + "kl_loss": 0.04967087507247925, + "loss_ib": 0.0008732564747333527, + "step": 3787 + }, + { + "ce_ib": 3.1192433834075928, + "ce_orig": 0.7281283140182495, + "epoch": 1.088863325904091, + "kl_loss": 0.07489010691642761, + "loss_ib": 0.0010608254233375192, + "step": 3787 + }, + { + "ce_ib": 5.330849647521973, + "ce_orig": 0.9893892407417297, + "epoch": 1.088863325904091, + "kl_loss": 0.0741439089179039, + "loss_ib": 0.0012745240237563848, + "step": 3787 + }, + { + "ce_ib": 3.7643396854400635, + "ce_orig": 0.9346387982368469, + "epoch": 1.088863325904091, + "kl_loss": 0.031148286536335945, + "loss_ib": 0.0006879167631268501, + "step": 3787 + }, + { + "ce_ib": 2.8125855922698975, + "ce_orig": 0.6383163928985596, + "epoch": 1.0891509094830685, + "kl_loss": 0.05651380866765976, + "loss_ib": 0.000846396666020155, + "step": 3788 + }, + { + "ce_ib": 2.468400478363037, + "ce_orig": 0.4831456243991852, + "epoch": 1.0891509094830685, + "kl_loss": 0.04600873589515686, + "loss_ib": 0.0007069273851811886, + "step": 3788 + }, + { + "ce_ib": 4.427337169647217, + "ce_orig": 1.4298596382141113, + "epoch": 1.0891509094830685, + "kl_loss": 0.03773130476474762, + "loss_ib": 0.0008200466982088983, + "step": 3788 + }, + { + "ce_ib": 2.6741726398468018, + "ce_orig": 0.6387848854064941, + "epoch": 1.0891509094830685, + "kl_loss": 0.03778871148824692, + "loss_ib": 0.0006453043897636235, + "step": 3788 + }, + { + "ce_ib": 3.670269012451172, + "ce_orig": 1.0592286586761475, + "epoch": 1.0894384930620462, + "kl_loss": 0.044557154178619385, + "loss_ib": 0.000812598445918411, + "step": 3789 + }, + { + "ce_ib": 2.2335870265960693, + "ce_orig": 0.6437737345695496, + "epoch": 1.0894384930620462, + "kl_loss": 0.019807243719697, + "loss_ib": 0.00042143111932091415, + "step": 3789 + }, + { + "ce_ib": 4.756264686584473, + "ce_orig": 1.268750786781311, + "epoch": 1.0894384930620462, + "kl_loss": 0.038921721279621124, + "loss_ib": 0.0008648437215015292, + "step": 3789 + }, + { + "ce_ib": 3.1235454082489014, + "ce_orig": 0.7270510196685791, + "epoch": 1.0894384930620462, + "kl_loss": 0.06063762307167053, + "loss_ib": 0.0009187308023683727, + "step": 3789 + }, + { + "epoch": 1.0897260766410237, + "grad_norm": 0.1105489507317543, + "learning_rate": 3.684476389492026e-05, + "loss": 0.8382, + "step": 3790 + }, + { + "ce_ib": 3.4395782947540283, + "ce_orig": 0.9011881351470947, + "epoch": 1.0897260766410237, + "kl_loss": 0.04668135568499565, + "loss_ib": 0.0008107713656499982, + "step": 3790 + }, + { + "ce_ib": 1.8437777757644653, + "ce_orig": 0.48543623089790344, + "epoch": 1.0897260766410237, + "kl_loss": 0.027629228308796883, + "loss_ib": 0.0004606700676959008, + "step": 3790 + }, + { + "ce_ib": 4.829056739807129, + "ce_orig": 1.378390908241272, + "epoch": 1.0897260766410237, + "kl_loss": 0.046129800379276276, + "loss_ib": 0.0009442036389373243, + "step": 3790 + }, + { + "ce_ib": 5.69083309173584, + "ce_orig": 1.2018083333969116, + "epoch": 1.0897260766410237, + "kl_loss": 0.048748426139354706, + "loss_ib": 0.001056567532941699, + "step": 3790 + }, + { + "ce_ib": 2.569614887237549, + "ce_orig": 0.44771236181259155, + "epoch": 1.0900136602200015, + "kl_loss": 0.04390004649758339, + "loss_ib": 0.000695961934980005, + "step": 3791 + }, + { + "ce_ib": 2.5383129119873047, + "ce_orig": 0.570460855960846, + "epoch": 1.0900136602200015, + "kl_loss": 0.04303041473031044, + "loss_ib": 0.0006841354188509285, + "step": 3791 + }, + { + "ce_ib": 3.4098141193389893, + "ce_orig": 0.804739773273468, + "epoch": 1.0900136602200015, + "kl_loss": 0.054059166461229324, + "loss_ib": 0.0008815730689093471, + "step": 3791 + }, + { + "ce_ib": 2.71193265914917, + "ce_orig": 0.6060654520988464, + "epoch": 1.0900136602200015, + "kl_loss": 0.04550483822822571, + "loss_ib": 0.0007262416183948517, + "step": 3791 + }, + { + "ce_ib": 2.1782164573669434, + "ce_orig": 0.20810241997241974, + "epoch": 1.090301243798979, + "kl_loss": 0.0364656001329422, + "loss_ib": 0.000582477601710707, + "step": 3792 + }, + { + "ce_ib": 2.566534996032715, + "ce_orig": 0.7316904067993164, + "epoch": 1.090301243798979, + "kl_loss": 0.035072293132543564, + "loss_ib": 0.0006073763943277299, + "step": 3792 + }, + { + "ce_ib": 4.189472198486328, + "ce_orig": 1.1917442083358765, + "epoch": 1.090301243798979, + "kl_loss": 0.04811154305934906, + "loss_ib": 0.0009000626159831882, + "step": 3792 + }, + { + "ce_ib": 2.54303240776062, + "ce_orig": 0.8381369709968567, + "epoch": 1.090301243798979, + "kl_loss": 0.03430204093456268, + "loss_ib": 0.0005973236402496696, + "step": 3792 + }, + { + "ce_ib": 5.518448352813721, + "ce_orig": 1.5196641683578491, + "epoch": 1.0905888273779567, + "kl_loss": 0.03813008964061737, + "loss_ib": 0.0009331456967629492, + "step": 3793 + }, + { + "ce_ib": 5.685070037841797, + "ce_orig": 1.3265409469604492, + "epoch": 1.0905888273779567, + "kl_loss": 0.04125938564538956, + "loss_ib": 0.0009811007184907794, + "step": 3793 + }, + { + "ce_ib": 6.258975028991699, + "ce_orig": 1.6467819213867188, + "epoch": 1.0905888273779567, + "kl_loss": 0.0552842915058136, + "loss_ib": 0.0011787404073402286, + "step": 3793 + }, + { + "ce_ib": 4.116883277893066, + "ce_orig": 0.5670591592788696, + "epoch": 1.0905888273779567, + "kl_loss": 0.05802937597036362, + "loss_ib": 0.000991982058621943, + "step": 3793 + }, + { + "ce_ib": 3.3053414821624756, + "ce_orig": 0.6221223473548889, + "epoch": 1.0908764109569344, + "kl_loss": 0.04729857295751572, + "loss_ib": 0.0008035198552533984, + "step": 3794 + }, + { + "ce_ib": 3.5002939701080322, + "ce_orig": 0.8145958781242371, + "epoch": 1.0908764109569344, + "kl_loss": 0.06326808035373688, + "loss_ib": 0.0009827101603150368, + "step": 3794 + }, + { + "ce_ib": 4.24208402633667, + "ce_orig": 1.2747219800949097, + "epoch": 1.0908764109569344, + "kl_loss": 0.04894288629293442, + "loss_ib": 0.0009136372827924788, + "step": 3794 + }, + { + "ce_ib": 1.432674527168274, + "ce_orig": 0.33149346709251404, + "epoch": 1.0908764109569344, + "kl_loss": 0.024006279185414314, + "loss_ib": 0.0003833302471321076, + "step": 3794 + }, + { + "epoch": 1.091163994535912, + "grad_norm": 0.10366562008857727, + "learning_rate": 3.681057715403497e-05, + "loss": 0.8147, + "step": 3795 + }, + { + "ce_ib": 4.145890235900879, + "ce_orig": 0.9297261238098145, + "epoch": 1.091163994535912, + "kl_loss": 0.069814033806324, + "loss_ib": 0.0011127293109893799, + "step": 3795 + }, + { + "ce_ib": 3.8247976303100586, + "ce_orig": 1.0223376750946045, + "epoch": 1.091163994535912, + "kl_loss": 0.03487340360879898, + "loss_ib": 0.0007312138332054019, + "step": 3795 + }, + { + "ce_ib": 1.6947813034057617, + "ce_orig": 0.38781288266181946, + "epoch": 1.091163994535912, + "kl_loss": 0.025986306369304657, + "loss_ib": 0.0004293411911930889, + "step": 3795 + }, + { + "ce_ib": 3.7441585063934326, + "ce_orig": 0.562834620475769, + "epoch": 1.091163994535912, + "kl_loss": 0.06341907382011414, + "loss_ib": 0.0010086065158247948, + "step": 3795 + }, + { + "ce_ib": 3.0169906616210938, + "ce_orig": 0.7024352550506592, + "epoch": 1.0914515781148897, + "kl_loss": 0.05056238919496536, + "loss_ib": 0.0008073229109868407, + "step": 3796 + }, + { + "ce_ib": 3.0670435428619385, + "ce_orig": 0.7986621260643005, + "epoch": 1.0914515781148897, + "kl_loss": 0.0359838530421257, + "loss_ib": 0.000666542851831764, + "step": 3796 + }, + { + "ce_ib": 2.1728882789611816, + "ce_orig": 0.5726261734962463, + "epoch": 1.0914515781148897, + "kl_loss": 0.041916996240615845, + "loss_ib": 0.0006364587461575866, + "step": 3796 + }, + { + "ce_ib": 1.693393588066101, + "ce_orig": 0.4340982139110565, + "epoch": 1.0914515781148897, + "kl_loss": 0.02159157581627369, + "loss_ib": 0.0003852550871670246, + "step": 3796 + }, + { + "ce_ib": 1.7385239601135254, + "ce_orig": 0.4108216166496277, + "epoch": 1.0917391616938672, + "kl_loss": 0.03451699763536453, + "loss_ib": 0.0005190223455429077, + "step": 3797 + }, + { + "ce_ib": 4.641890525817871, + "ce_orig": 0.9718688726425171, + "epoch": 1.0917391616938672, + "kl_loss": 0.05587700009346008, + "loss_ib": 0.001022959011606872, + "step": 3797 + }, + { + "ce_ib": 5.493823528289795, + "ce_orig": 0.7883731722831726, + "epoch": 1.0917391616938672, + "kl_loss": 0.06294715404510498, + "loss_ib": 0.0011788539122790098, + "step": 3797 + }, + { + "ce_ib": 2.805549144744873, + "ce_orig": 0.5870041847229004, + "epoch": 1.0917391616938672, + "kl_loss": 0.053797051310539246, + "loss_ib": 0.0008185253827832639, + "step": 3797 + }, + { + "ce_ib": 1.9461517333984375, + "ce_orig": 0.6835494041442871, + "epoch": 1.092026745272845, + "kl_loss": 0.027173683047294617, + "loss_ib": 0.00046635197941213846, + "step": 3798 + }, + { + "ce_ib": 2.5970523357391357, + "ce_orig": 0.7451503872871399, + "epoch": 1.092026745272845, + "kl_loss": 0.028784677386283875, + "loss_ib": 0.0005475519574247301, + "step": 3798 + }, + { + "ce_ib": 3.6215667724609375, + "ce_orig": 0.7944318056106567, + "epoch": 1.092026745272845, + "kl_loss": 0.04862569272518158, + "loss_ib": 0.0008484135614708066, + "step": 3798 + }, + { + "ce_ib": 3.5613226890563965, + "ce_orig": 0.5715258121490479, + "epoch": 1.092026745272845, + "kl_loss": 0.08117076754570007, + "loss_ib": 0.0011678398586809635, + "step": 3798 + }, + { + "ce_ib": 2.3912692070007324, + "ce_orig": 0.7511123418807983, + "epoch": 1.0923143288518227, + "kl_loss": 0.029710400849580765, + "loss_ib": 0.0005362309166230261, + "step": 3799 + }, + { + "ce_ib": 3.7535760402679443, + "ce_orig": 0.8777173757553101, + "epoch": 1.0923143288518227, + "kl_loss": 0.05423664674162865, + "loss_ib": 0.0009177240426652133, + "step": 3799 + }, + { + "ce_ib": 3.227693796157837, + "ce_orig": 0.8435637950897217, + "epoch": 1.0923143288518227, + "kl_loss": 0.040356479585170746, + "loss_ib": 0.0007263341103680432, + "step": 3799 + }, + { + "ce_ib": 3.0151607990264893, + "ce_orig": 0.8701406121253967, + "epoch": 1.0923143288518227, + "kl_loss": 0.05738649144768715, + "loss_ib": 0.0008753809961490333, + "step": 3799 + }, + { + "epoch": 1.0926019124308002, + "grad_norm": 0.11760343611240387, + "learning_rate": 3.677636195872802e-05, + "loss": 0.7847, + "step": 3800 + }, + { + "ce_ib": 3.287048816680908, + "ce_orig": 1.002402424812317, + "epoch": 1.0926019124308002, + "kl_loss": 0.033536627888679504, + "loss_ib": 0.0006640711217187345, + "step": 3800 + }, + { + "ce_ib": 3.465463876724243, + "ce_orig": 1.1063590049743652, + "epoch": 1.0926019124308002, + "kl_loss": 0.031133972108364105, + "loss_ib": 0.0006578860920853913, + "step": 3800 + }, + { + "ce_ib": 3.167801856994629, + "ce_orig": 0.7188864350318909, + "epoch": 1.0926019124308002, + "kl_loss": 0.06214483454823494, + "loss_ib": 0.000938228506129235, + "step": 3800 + }, + { + "ce_ib": 4.026973247528076, + "ce_orig": 1.3939636945724487, + "epoch": 1.0926019124308002, + "kl_loss": 0.04487244784832001, + "loss_ib": 0.0008514217915944755, + "step": 3800 + }, + { + "ce_ib": 1.3678197860717773, + "ce_orig": 0.2475348562002182, + "epoch": 1.092889496009778, + "kl_loss": 0.06987619400024414, + "loss_ib": 0.0008355439058504999, + "step": 3801 + }, + { + "ce_ib": 4.050053596496582, + "ce_orig": 1.0561883449554443, + "epoch": 1.092889496009778, + "kl_loss": 0.057650644332170486, + "loss_ib": 0.0009815117809921503, + "step": 3801 + }, + { + "ce_ib": 4.071281909942627, + "ce_orig": 0.9262402653694153, + "epoch": 1.092889496009778, + "kl_loss": 0.03568152338266373, + "loss_ib": 0.0007639434188604355, + "step": 3801 + }, + { + "ce_ib": 3.2594552040100098, + "ce_orig": 0.7241121530532837, + "epoch": 1.092889496009778, + "kl_loss": 0.06773495674133301, + "loss_ib": 0.0010032950667664409, + "step": 3801 + }, + { + "ce_ib": 1.9351885318756104, + "ce_orig": 0.3647642135620117, + "epoch": 1.0931770795887554, + "kl_loss": 0.02337459847331047, + "loss_ib": 0.00042726483661681414, + "step": 3802 + }, + { + "ce_ib": 5.362715244293213, + "ce_orig": 1.3288027048110962, + "epoch": 1.0931770795887554, + "kl_loss": 0.06293658167123795, + "loss_ib": 0.0011656373972073197, + "step": 3802 + }, + { + "ce_ib": 3.7440226078033447, + "ce_orig": 0.8206219673156738, + "epoch": 1.0931770795887554, + "kl_loss": 0.04942480847239494, + "loss_ib": 0.0008686503861099482, + "step": 3802 + }, + { + "ce_ib": 3.076801300048828, + "ce_orig": 0.6908454895019531, + "epoch": 1.0931770795887554, + "kl_loss": 0.04125557839870453, + "loss_ib": 0.0007202359265647829, + "step": 3802 + }, + { + "ce_ib": 4.098621368408203, + "ce_orig": 0.8266748189926147, + "epoch": 1.0934646631677332, + "kl_loss": 0.05962052196264267, + "loss_ib": 0.0010060673812404275, + "step": 3803 + }, + { + "ce_ib": 3.301959753036499, + "ce_orig": 0.7909917831420898, + "epoch": 1.0934646631677332, + "kl_loss": 0.03990527242422104, + "loss_ib": 0.0007292486843653023, + "step": 3803 + }, + { + "ce_ib": 1.904659628868103, + "ce_orig": 0.7395251393318176, + "epoch": 1.0934646631677332, + "kl_loss": 0.034504763782024384, + "loss_ib": 0.0005355136236175895, + "step": 3803 + }, + { + "ce_ib": 1.223630428314209, + "ce_orig": 0.3177008032798767, + "epoch": 1.0934646631677332, + "kl_loss": 0.028191953897476196, + "loss_ib": 0.000404282589443028, + "step": 3803 + }, + { + "ce_ib": 4.228010654449463, + "ce_orig": 1.0439274311065674, + "epoch": 1.0937522467467107, + "kl_loss": 0.05202943831682205, + "loss_ib": 0.0009430953650735319, + "step": 3804 + }, + { + "ce_ib": 3.8460187911987305, + "ce_orig": 1.0173442363739014, + "epoch": 1.0937522467467107, + "kl_loss": 0.04700538143515587, + "loss_ib": 0.000854655634611845, + "step": 3804 + }, + { + "ce_ib": 4.54215669631958, + "ce_orig": 1.0171327590942383, + "epoch": 1.0937522467467107, + "kl_loss": 0.04179629683494568, + "loss_ib": 0.000872178643476218, + "step": 3804 + }, + { + "ce_ib": 2.5316357612609863, + "ce_orig": 0.4480207860469818, + "epoch": 1.0937522467467107, + "kl_loss": 0.06367506831884384, + "loss_ib": 0.0008899142267182469, + "step": 3804 + }, + { + "epoch": 1.0940398303256884, + "grad_norm": 0.10146203637123108, + "learning_rate": 3.674211839143177e-05, + "loss": 0.8222, + "step": 3805 + }, + { + "ce_ib": 2.5582079887390137, + "ce_orig": 0.6924826502799988, + "epoch": 1.0940398303256884, + "kl_loss": 0.05447329580783844, + "loss_ib": 0.00080055370926857, + "step": 3805 + }, + { + "ce_ib": 1.674580454826355, + "ce_orig": 0.22173826396465302, + "epoch": 1.0940398303256884, + "kl_loss": 0.049502432346343994, + "loss_ib": 0.0006624823436141014, + "step": 3805 + }, + { + "ce_ib": 2.595327854156494, + "ce_orig": 0.8868363499641418, + "epoch": 1.0940398303256884, + "kl_loss": 0.025523841381072998, + "loss_ib": 0.0005147712072357535, + "step": 3805 + }, + { + "ce_ib": 2.1328718662261963, + "ce_orig": 0.6126255989074707, + "epoch": 1.0940398303256884, + "kl_loss": 0.03752349317073822, + "loss_ib": 0.0005885221180506051, + "step": 3805 + }, + { + "ce_ib": 3.0452451705932617, + "ce_orig": 0.7088829874992371, + "epoch": 1.0943274139046661, + "kl_loss": 0.07930466532707214, + "loss_ib": 0.0010975711047649384, + "step": 3806 + }, + { + "ce_ib": 3.145778179168701, + "ce_orig": 0.7860994935035706, + "epoch": 1.0943274139046661, + "kl_loss": 0.06957325339317322, + "loss_ib": 0.0010103103704750538, + "step": 3806 + }, + { + "ce_ib": 3.0355777740478516, + "ce_orig": 0.7138336896896362, + "epoch": 1.0943274139046661, + "kl_loss": 0.03761717304587364, + "loss_ib": 0.0006797295063734055, + "step": 3806 + }, + { + "ce_ib": 3.3895411491394043, + "ce_orig": 0.43626952171325684, + "epoch": 1.0943274139046661, + "kl_loss": 0.05385753512382507, + "loss_ib": 0.0008775294409133494, + "step": 3806 + }, + { + "ce_ib": 4.6766862869262695, + "ce_orig": 1.1217395067214966, + "epoch": 1.0946149974836437, + "kl_loss": 0.06565612554550171, + "loss_ib": 0.001124229864217341, + "step": 3807 + }, + { + "ce_ib": 4.520284175872803, + "ce_orig": 1.227316975593567, + "epoch": 1.0946149974836437, + "kl_loss": 0.054194383323192596, + "loss_ib": 0.0009939722949638963, + "step": 3807 + }, + { + "ce_ib": 2.426487684249878, + "ce_orig": 0.5218177437782288, + "epoch": 1.0946149974836437, + "kl_loss": 0.026222368702292442, + "loss_ib": 0.0005048724124208093, + "step": 3807 + }, + { + "ce_ib": 2.112677574157715, + "ce_orig": 0.6516031622886658, + "epoch": 1.0946149974836437, + "kl_loss": 0.06202997639775276, + "loss_ib": 0.0008315674494951963, + "step": 3807 + }, + { + "ce_ib": 3.800549268722534, + "ce_orig": 1.2307660579681396, + "epoch": 1.0949025810626214, + "kl_loss": 0.05345279723405838, + "loss_ib": 0.0009145828662440181, + "step": 3808 + }, + { + "ce_ib": 2.358090877532959, + "ce_orig": 0.5429490804672241, + "epoch": 1.0949025810626214, + "kl_loss": 0.0441618487238884, + "loss_ib": 0.0006774275680072606, + "step": 3808 + }, + { + "ce_ib": 3.2068533897399902, + "ce_orig": 0.8878046870231628, + "epoch": 1.0949025810626214, + "kl_loss": 0.03499512001872063, + "loss_ib": 0.0006706364802084863, + "step": 3808 + }, + { + "ce_ib": 2.4484283924102783, + "ce_orig": 0.5631781220436096, + "epoch": 1.0949025810626214, + "kl_loss": 0.03554060310125351, + "loss_ib": 0.0006002488662488759, + "step": 3808 + }, + { + "ce_ib": 2.7818970680236816, + "ce_orig": 0.6863386034965515, + "epoch": 1.095190164641599, + "kl_loss": 0.04275062680244446, + "loss_ib": 0.0007056959439069033, + "step": 3809 + }, + { + "ce_ib": 2.050353527069092, + "ce_orig": 0.5488885641098022, + "epoch": 1.095190164641599, + "kl_loss": 0.0312662348151207, + "loss_ib": 0.0005176977138034999, + "step": 3809 + }, + { + "ce_ib": 3.0255722999572754, + "ce_orig": 0.7522415518760681, + "epoch": 1.095190164641599, + "kl_loss": 0.05426524579524994, + "loss_ib": 0.0008452096371911466, + "step": 3809 + }, + { + "ce_ib": 4.1211676597595215, + "ce_orig": 0.9370542168617249, + "epoch": 1.095190164641599, + "kl_loss": 0.04937777295708656, + "loss_ib": 0.0009058944415301085, + "step": 3809 + }, + { + "epoch": 1.0954777482205766, + "grad_norm": 0.10730481147766113, + "learning_rate": 3.6707846534646905e-05, + "loss": 0.7927, + "step": 3810 + }, + { + "ce_ib": 3.227883815765381, + "ce_orig": 0.890907347202301, + "epoch": 1.0954777482205766, + "kl_loss": 0.03083869442343712, + "loss_ib": 0.000631175353191793, + "step": 3810 + }, + { + "ce_ib": 5.6619954109191895, + "ce_orig": 1.140417218208313, + "epoch": 1.0954777482205766, + "kl_loss": 0.045828159898519516, + "loss_ib": 0.0010244811419397593, + "step": 3810 + }, + { + "ce_ib": 5.331697463989258, + "ce_orig": 1.6061488389968872, + "epoch": 1.0954777482205766, + "kl_loss": 0.04612287878990173, + "loss_ib": 0.0009943984914571047, + "step": 3810 + }, + { + "ce_ib": 2.144191265106201, + "ce_orig": 0.46574780344963074, + "epoch": 1.0954777482205766, + "kl_loss": 0.02600458636879921, + "loss_ib": 0.00047446496319025755, + "step": 3810 + }, + { + "ce_ib": 3.2003910541534424, + "ce_orig": 0.676112949848175, + "epoch": 1.0957653317995542, + "kl_loss": 0.043508388102054596, + "loss_ib": 0.0007551229791715741, + "step": 3811 + }, + { + "ce_ib": 3.1219546794891357, + "ce_orig": 0.3176693618297577, + "epoch": 1.0957653317995542, + "kl_loss": 0.09693959355354309, + "loss_ib": 0.0012815913651138544, + "step": 3811 + }, + { + "ce_ib": 1.9850927591323853, + "ce_orig": 0.3818403482437134, + "epoch": 1.0957653317995542, + "kl_loss": 0.03400453180074692, + "loss_ib": 0.0005385545664466918, + "step": 3811 + }, + { + "ce_ib": 4.002222537994385, + "ce_orig": 0.8788353800773621, + "epoch": 1.0957653317995542, + "kl_loss": 0.09572027623653412, + "loss_ib": 0.0013574250042438507, + "step": 3811 + }, + { + "ce_ib": 2.7263362407684326, + "ce_orig": 0.7980220317840576, + "epoch": 1.096052915378532, + "kl_loss": 0.027158405631780624, + "loss_ib": 0.0005442176479846239, + "step": 3812 + }, + { + "ce_ib": 3.403925895690918, + "ce_orig": 0.7721123099327087, + "epoch": 1.096052915378532, + "kl_loss": 0.05875672772526741, + "loss_ib": 0.0009279598016291857, + "step": 3812 + }, + { + "ce_ib": 2.0208182334899902, + "ce_orig": 0.5683479309082031, + "epoch": 1.096052915378532, + "kl_loss": 0.04319146275520325, + "loss_ib": 0.0006339963874779642, + "step": 3812 + }, + { + "ce_ib": 2.3833553791046143, + "ce_orig": 0.562724769115448, + "epoch": 1.096052915378532, + "kl_loss": 0.05461671203374863, + "loss_ib": 0.0007845025975257158, + "step": 3812 + }, + { + "ce_ib": 3.134868621826172, + "ce_orig": 0.8009907603263855, + "epoch": 1.0963404989575096, + "kl_loss": 0.07928229123353958, + "loss_ib": 0.0011063097044825554, + "step": 3813 + }, + { + "ce_ib": 2.541898488998413, + "ce_orig": 0.7164412140846252, + "epoch": 1.0963404989575096, + "kl_loss": 0.025509826838970184, + "loss_ib": 0.0005092881037853658, + "step": 3813 + }, + { + "ce_ib": 3.140312433242798, + "ce_orig": 0.7396801114082336, + "epoch": 1.0963404989575096, + "kl_loss": 0.04602250084280968, + "loss_ib": 0.0007742562447674572, + "step": 3813 + }, + { + "ce_ib": 4.122194766998291, + "ce_orig": 0.8745043873786926, + "epoch": 1.0963404989575096, + "kl_loss": 0.04702916741371155, + "loss_ib": 0.0008825111435726285, + "step": 3813 + }, + { + "ce_ib": 3.7105343341827393, + "ce_orig": 0.7401580214500427, + "epoch": 1.0966280825364871, + "kl_loss": 0.04008825868368149, + "loss_ib": 0.0007719360291957855, + "step": 3814 + }, + { + "ce_ib": 5.303881645202637, + "ce_orig": 1.4297077655792236, + "epoch": 1.0966280825364871, + "kl_loss": 0.05745798349380493, + "loss_ib": 0.0011049680178985, + "step": 3814 + }, + { + "ce_ib": 2.281193494796753, + "ce_orig": 0.557598352432251, + "epoch": 1.0966280825364871, + "kl_loss": 0.034043088555336, + "loss_ib": 0.0005685501964762807, + "step": 3814 + }, + { + "ce_ib": 1.6983784437179565, + "ce_orig": 0.4492989182472229, + "epoch": 1.0966280825364871, + "kl_loss": 0.023483000695705414, + "loss_ib": 0.00040466783684678376, + "step": 3814 + }, + { + "epoch": 1.0969156661154649, + "grad_norm": 0.1080814003944397, + "learning_rate": 3.667354647094229e-05, + "loss": 0.806, + "step": 3815 + }, + { + "ce_ib": 2.473552942276001, + "ce_orig": 0.6440848112106323, + "epoch": 1.0969156661154649, + "kl_loss": 0.04373014718294144, + "loss_ib": 0.0006846567848697305, + "step": 3815 + }, + { + "ce_ib": 4.090908050537109, + "ce_orig": 0.8036707639694214, + "epoch": 1.0969156661154649, + "kl_loss": 0.06182201951742172, + "loss_ib": 0.0010273109655827284, + "step": 3815 + }, + { + "ce_ib": 3.1619489192962646, + "ce_orig": 0.7487618327140808, + "epoch": 1.0969156661154649, + "kl_loss": 0.09705309569835663, + "loss_ib": 0.0012867258628830314, + "step": 3815 + }, + { + "ce_ib": 2.3310375213623047, + "ce_orig": 0.39884740114212036, + "epoch": 1.0969156661154649, + "kl_loss": 0.09511130303144455, + "loss_ib": 0.0011842167004942894, + "step": 3815 + }, + { + "ce_ib": 4.198123931884766, + "ce_orig": 1.1227229833602905, + "epoch": 1.0972032496944424, + "kl_loss": 0.045694589614868164, + "loss_ib": 0.0008767582476139069, + "step": 3816 + }, + { + "ce_ib": 2.094822406768799, + "ce_orig": 0.44156190752983093, + "epoch": 1.0972032496944424, + "kl_loss": 0.050003983080387115, + "loss_ib": 0.0007095220498740673, + "step": 3816 + }, + { + "ce_ib": 4.249857425689697, + "ce_orig": 0.8240419626235962, + "epoch": 1.0972032496944424, + "kl_loss": 0.05320723354816437, + "loss_ib": 0.0009570580441504717, + "step": 3816 + }, + { + "ce_ib": 3.7049849033355713, + "ce_orig": 0.6437998414039612, + "epoch": 1.0972032496944424, + "kl_loss": 0.06265504658222198, + "loss_ib": 0.0009970489190891385, + "step": 3816 + }, + { + "ce_ib": 3.8626465797424316, + "ce_orig": 1.1235220432281494, + "epoch": 1.0974908332734201, + "kl_loss": 0.046659596264362335, + "loss_ib": 0.0008528606267645955, + "step": 3817 + }, + { + "ce_ib": 4.353835582733154, + "ce_orig": 1.0788394212722778, + "epoch": 1.0974908332734201, + "kl_loss": 0.06541016697883606, + "loss_ib": 0.001089485245756805, + "step": 3817 + }, + { + "ce_ib": 4.27995491027832, + "ce_orig": 1.134982705116272, + "epoch": 1.0974908332734201, + "kl_loss": 0.06442723423242569, + "loss_ib": 0.0010722677689045668, + "step": 3817 + }, + { + "ce_ib": 2.0794315338134766, + "ce_orig": 0.28696784377098083, + "epoch": 1.0974908332734201, + "kl_loss": 0.0677345022559166, + "loss_ib": 0.0008852881728671491, + "step": 3817 + }, + { + "ce_ib": 2.328369617462158, + "ce_orig": 0.7063790559768677, + "epoch": 1.0977784168523976, + "kl_loss": 0.026968639343976974, + "loss_ib": 0.0005025233258493245, + "step": 3818 + }, + { + "ce_ib": 3.4902114868164062, + "ce_orig": 0.9673996567726135, + "epoch": 1.0977784168523976, + "kl_loss": 0.023669829592108727, + "loss_ib": 0.0005857194191776216, + "step": 3818 + }, + { + "ce_ib": 3.2359728813171387, + "ce_orig": 1.0749634504318237, + "epoch": 1.0977784168523976, + "kl_loss": 0.05581950023770332, + "loss_ib": 0.0008817922789603472, + "step": 3818 + }, + { + "ce_ib": 3.249863624572754, + "ce_orig": 0.7661311626434326, + "epoch": 1.0977784168523976, + "kl_loss": 0.0469067320227623, + "loss_ib": 0.000794053659774363, + "step": 3818 + }, + { + "ce_ib": 3.9508461952209473, + "ce_orig": 1.003402590751648, + "epoch": 1.0980660004313754, + "kl_loss": 0.03288896754384041, + "loss_ib": 0.0007239742553792894, + "step": 3819 + }, + { + "ce_ib": 2.720139741897583, + "ce_orig": 0.6222206354141235, + "epoch": 1.0980660004313754, + "kl_loss": 0.0444350428879261, + "loss_ib": 0.0007163643604144454, + "step": 3819 + }, + { + "ce_ib": 2.7616662979125977, + "ce_orig": 0.7558862566947937, + "epoch": 1.0980660004313754, + "kl_loss": 0.04185128211975098, + "loss_ib": 0.0006946794455870986, + "step": 3819 + }, + { + "ce_ib": 1.492310881614685, + "ce_orig": 0.322893887758255, + "epoch": 1.0980660004313754, + "kl_loss": 0.06984443217515945, + "loss_ib": 0.0008476754301227629, + "step": 3819 + }, + { + "epoch": 1.098353584010353, + "grad_norm": 0.10574547946453094, + "learning_rate": 3.663921828295474e-05, + "loss": 0.8138, + "step": 3820 + }, + { + "ce_ib": 2.9654464721679688, + "ce_orig": 0.6999993324279785, + "epoch": 1.098353584010353, + "kl_loss": 0.03329232335090637, + "loss_ib": 0.0006294678896665573, + "step": 3820 + }, + { + "ce_ib": 2.2371978759765625, + "ce_orig": 0.46226000785827637, + "epoch": 1.098353584010353, + "kl_loss": 0.08793975412845612, + "loss_ib": 0.0011031172471120954, + "step": 3820 + }, + { + "ce_ib": 3.9516372680664062, + "ce_orig": 1.2644197940826416, + "epoch": 1.098353584010353, + "kl_loss": 0.0477156788110733, + "loss_ib": 0.0008723204955458641, + "step": 3820 + }, + { + "ce_ib": 3.7749125957489014, + "ce_orig": 0.8690842986106873, + "epoch": 1.098353584010353, + "kl_loss": 0.058464065194129944, + "loss_ib": 0.0009621318895369768, + "step": 3820 + }, + { + "ce_ib": 1.741251826286316, + "ce_orig": 0.4003100097179413, + "epoch": 1.0986411675893306, + "kl_loss": 0.020511915907263756, + "loss_ib": 0.00037924430216662586, + "step": 3821 + }, + { + "ce_ib": 5.433696746826172, + "ce_orig": 1.309335470199585, + "epoch": 1.0986411675893306, + "kl_loss": 0.04640767350792885, + "loss_ib": 0.0010074463207274675, + "step": 3821 + }, + { + "ce_ib": 3.280801773071289, + "ce_orig": 0.671692967414856, + "epoch": 1.0986411675893306, + "kl_loss": 0.0731976330280304, + "loss_ib": 0.0010600565001368523, + "step": 3821 + }, + { + "ce_ib": 3.146010398864746, + "ce_orig": 0.89557945728302, + "epoch": 1.0986411675893306, + "kl_loss": 0.03444511070847511, + "loss_ib": 0.0006590521079488099, + "step": 3821 + }, + { + "ce_ib": 2.123598337173462, + "ce_orig": 0.7323542237281799, + "epoch": 1.0989287511683083, + "kl_loss": 0.03905850648880005, + "loss_ib": 0.0006029448704794049, + "step": 3822 + }, + { + "ce_ib": 3.489654064178467, + "ce_orig": 0.9678696393966675, + "epoch": 1.0989287511683083, + "kl_loss": 0.02264290489256382, + "loss_ib": 0.0005753944860771298, + "step": 3822 + }, + { + "ce_ib": 2.809582471847534, + "ce_orig": 0.7631255388259888, + "epoch": 1.0989287511683083, + "kl_loss": 0.04945661872625351, + "loss_ib": 0.000775524415075779, + "step": 3822 + }, + { + "ce_ib": 4.910249710083008, + "ce_orig": 1.3603599071502686, + "epoch": 1.0989287511683083, + "kl_loss": 0.05066176876425743, + "loss_ib": 0.0009976426372304559, + "step": 3822 + }, + { + "ce_ib": 3.21875262260437, + "ce_orig": 0.5030861496925354, + "epoch": 1.0992163347472859, + "kl_loss": 0.0763460248708725, + "loss_ib": 0.0010853353887796402, + "step": 3823 + }, + { + "ce_ib": 2.321354627609253, + "ce_orig": 0.45836079120635986, + "epoch": 1.0992163347472859, + "kl_loss": 0.045369140803813934, + "loss_ib": 0.0006858268170617521, + "step": 3823 + }, + { + "ce_ib": 3.591824769973755, + "ce_orig": 0.5539960861206055, + "epoch": 1.0992163347472859, + "kl_loss": 0.04943203553557396, + "loss_ib": 0.0008535028318874538, + "step": 3823 + }, + { + "ce_ib": 3.505234718322754, + "ce_orig": 0.8544834852218628, + "epoch": 1.0992163347472859, + "kl_loss": 0.04872620850801468, + "loss_ib": 0.0008377855992875993, + "step": 3823 + }, + { + "ce_ib": 3.5015270709991455, + "ce_orig": 0.8279674649238586, + "epoch": 1.0995039183262636, + "kl_loss": 0.05839039012789726, + "loss_ib": 0.000934056646656245, + "step": 3824 + }, + { + "ce_ib": 2.7295899391174316, + "ce_orig": 0.6666223406791687, + "epoch": 1.0995039183262636, + "kl_loss": 0.0543961375951767, + "loss_ib": 0.000816920364741236, + "step": 3824 + }, + { + "ce_ib": 2.658578395843506, + "ce_orig": 0.7657173871994019, + "epoch": 1.0995039183262636, + "kl_loss": 0.062308475375175476, + "loss_ib": 0.0008889426244422793, + "step": 3824 + }, + { + "ce_ib": 2.258929967880249, + "ce_orig": 0.6819618940353394, + "epoch": 1.0995039183262636, + "kl_loss": 0.03590603172779083, + "loss_ib": 0.0005849532899446785, + "step": 3824 + }, + { + "epoch": 1.099791501905241, + "grad_norm": 0.11068874597549438, + "learning_rate": 3.660486205338883e-05, + "loss": 0.8487, + "step": 3825 + }, + { + "ce_ib": 4.840597629547119, + "ce_orig": 1.4293467998504639, + "epoch": 1.099791501905241, + "kl_loss": 0.03673117235302925, + "loss_ib": 0.0008513714419677854, + "step": 3825 + }, + { + "ce_ib": 2.3991811275482178, + "ce_orig": 0.6883370280265808, + "epoch": 1.099791501905241, + "kl_loss": 0.041925396770238876, + "loss_ib": 0.0006591720739379525, + "step": 3825 + }, + { + "ce_ib": 4.453692436218262, + "ce_orig": 0.9034865498542786, + "epoch": 1.099791501905241, + "kl_loss": 0.0677705854177475, + "loss_ib": 0.0011230750242248178, + "step": 3825 + }, + { + "ce_ib": 3.2926042079925537, + "ce_orig": 0.8836919665336609, + "epoch": 1.099791501905241, + "kl_loss": 0.05282627046108246, + "loss_ib": 0.0008575230604037642, + "step": 3825 + }, + { + "ce_ib": 2.823582172393799, + "ce_orig": 0.5118498802185059, + "epoch": 1.1000790854842188, + "kl_loss": 0.052340708673000336, + "loss_ib": 0.0008057652739807963, + "step": 3826 + }, + { + "ce_ib": 2.901411294937134, + "ce_orig": 0.8852375745773315, + "epoch": 1.1000790854842188, + "kl_loss": 0.04174107313156128, + "loss_ib": 0.0007075518369674683, + "step": 3826 + }, + { + "ce_ib": 2.7760047912597656, + "ce_orig": 0.639947772026062, + "epoch": 1.1000790854842188, + "kl_loss": 0.029647838324308395, + "loss_ib": 0.0005740788183175027, + "step": 3826 + }, + { + "ce_ib": 2.1646976470947266, + "ce_orig": 0.5470616817474365, + "epoch": 1.1000790854842188, + "kl_loss": 0.06486204266548157, + "loss_ib": 0.0008650901727378368, + "step": 3826 + }, + { + "ce_ib": 2.3698575496673584, + "ce_orig": 0.6651419401168823, + "epoch": 1.1003666690631966, + "kl_loss": 0.045177336782217026, + "loss_ib": 0.0006887590861879289, + "step": 3827 + }, + { + "ce_ib": 5.370783805847168, + "ce_orig": 1.3830674886703491, + "epoch": 1.1003666690631966, + "kl_loss": 0.08364259451627731, + "loss_ib": 0.0013735044049099088, + "step": 3827 + }, + { + "ce_ib": 2.977634906768799, + "ce_orig": 0.8311402201652527, + "epoch": 1.1003666690631966, + "kl_loss": 0.05394564941525459, + "loss_ib": 0.0008372199372388422, + "step": 3827 + }, + { + "ce_ib": 2.856811761856079, + "ce_orig": 0.49802178144454956, + "epoch": 1.1003666690631966, + "kl_loss": 0.051307313144207, + "loss_ib": 0.000798754277639091, + "step": 3827 + }, + { + "ce_ib": 4.872696399688721, + "ce_orig": 1.2220277786254883, + "epoch": 1.100654252642174, + "kl_loss": 0.04321812093257904, + "loss_ib": 0.0009194507729262114, + "step": 3828 + }, + { + "ce_ib": 5.73999547958374, + "ce_orig": 0.8103057742118835, + "epoch": 1.100654252642174, + "kl_loss": 0.05524517223238945, + "loss_ib": 0.0011264513013884425, + "step": 3828 + }, + { + "ce_ib": 2.9674973487854004, + "ce_orig": 0.7476353049278259, + "epoch": 1.100654252642174, + "kl_loss": 0.0552954338490963, + "loss_ib": 0.0008497040253132582, + "step": 3828 + }, + { + "ce_ib": 4.530817985534668, + "ce_orig": 0.9474941492080688, + "epoch": 1.100654252642174, + "kl_loss": 0.045268233865499496, + "loss_ib": 0.0009057640563696623, + "step": 3828 + }, + { + "ce_ib": 4.363532543182373, + "ce_orig": 1.2317763566970825, + "epoch": 1.1009418362211518, + "kl_loss": 0.04805910587310791, + "loss_ib": 0.0009169442928396165, + "step": 3829 + }, + { + "ce_ib": 2.700233221054077, + "ce_orig": 0.39904090762138367, + "epoch": 1.1009418362211518, + "kl_loss": 0.047461602836847305, + "loss_ib": 0.0007446393137797713, + "step": 3829 + }, + { + "ce_ib": 3.229630947113037, + "ce_orig": 0.7137423157691956, + "epoch": 1.1009418362211518, + "kl_loss": 0.06842871010303497, + "loss_ib": 0.0010072501609101892, + "step": 3829 + }, + { + "ce_ib": 3.9464244842529297, + "ce_orig": 0.9358215928077698, + "epoch": 1.1009418362211518, + "kl_loss": 0.055991001427173615, + "loss_ib": 0.0009545524371787906, + "step": 3829 + }, + { + "epoch": 1.1012294198001293, + "grad_norm": 0.11375933140516281, + "learning_rate": 3.657047786501668e-05, + "loss": 0.8009, + "step": 3830 + }, + { + "ce_ib": 3.9944636821746826, + "ce_orig": 0.7307642698287964, + "epoch": 1.1012294198001293, + "kl_loss": 0.04872974008321762, + "loss_ib": 0.0008867437718436122, + "step": 3830 + }, + { + "ce_ib": 2.552595853805542, + "ce_orig": 0.6385477781295776, + "epoch": 1.1012294198001293, + "kl_loss": 0.03752988949418068, + "loss_ib": 0.0006305584101937711, + "step": 3830 + }, + { + "ce_ib": 3.455413579940796, + "ce_orig": 0.7350919246673584, + "epoch": 1.1012294198001293, + "kl_loss": 0.03597482293844223, + "loss_ib": 0.0007052895380184054, + "step": 3830 + }, + { + "ce_ib": 2.9181761741638184, + "ce_orig": 0.4759095013141632, + "epoch": 1.1012294198001293, + "kl_loss": 0.03349301218986511, + "loss_ib": 0.0006267477292567492, + "step": 3830 + }, + { + "ce_ib": 2.030355215072632, + "ce_orig": 0.5667294263839722, + "epoch": 1.101517003379107, + "kl_loss": 0.05193133279681206, + "loss_ib": 0.0007223488064482808, + "step": 3831 + }, + { + "ce_ib": 2.8332479000091553, + "ce_orig": 0.7743021249771118, + "epoch": 1.101517003379107, + "kl_loss": 0.047061771154403687, + "loss_ib": 0.0007539424696005881, + "step": 3831 + }, + { + "ce_ib": 2.912588357925415, + "ce_orig": 0.5660434365272522, + "epoch": 1.101517003379107, + "kl_loss": 0.0379185751080513, + "loss_ib": 0.0006704445695504546, + "step": 3831 + }, + { + "ce_ib": 2.9922890663146973, + "ce_orig": 0.8911653757095337, + "epoch": 1.101517003379107, + "kl_loss": 0.035539254546165466, + "loss_ib": 0.0006546213990077376, + "step": 3831 + }, + { + "ce_ib": 1.9697740077972412, + "ce_orig": 0.31964248418807983, + "epoch": 1.1018045869580848, + "kl_loss": 0.03783689811825752, + "loss_ib": 0.0005753463483415544, + "step": 3832 + }, + { + "ce_ib": 4.711253643035889, + "ce_orig": 1.0192314386367798, + "epoch": 1.1018045869580848, + "kl_loss": 0.06442011147737503, + "loss_ib": 0.001115326420404017, + "step": 3832 + }, + { + "ce_ib": 3.893400192260742, + "ce_orig": 0.7004523873329163, + "epoch": 1.1018045869580848, + "kl_loss": 0.04039538651704788, + "loss_ib": 0.0007932938169687986, + "step": 3832 + }, + { + "ce_ib": 1.9443209171295166, + "ce_orig": 0.6614882349967957, + "epoch": 1.1018045869580848, + "kl_loss": 0.025029689073562622, + "loss_ib": 0.0004447289393283427, + "step": 3832 + }, + { + "ce_ib": 3.420520067214966, + "ce_orig": 0.9186857342720032, + "epoch": 1.1020921705370623, + "kl_loss": 0.06171276792883873, + "loss_ib": 0.0009591796551831067, + "step": 3833 + }, + { + "ce_ib": 2.842515468597412, + "ce_orig": 0.3906824290752411, + "epoch": 1.1020921705370623, + "kl_loss": 0.07205262035131454, + "loss_ib": 0.0010047777323052287, + "step": 3833 + }, + { + "ce_ib": 3.2631707191467285, + "ce_orig": 0.8538708686828613, + "epoch": 1.1020921705370623, + "kl_loss": 0.03771248459815979, + "loss_ib": 0.0007034419104456902, + "step": 3833 + }, + { + "ce_ib": 3.858712911605835, + "ce_orig": 0.9837356209754944, + "epoch": 1.1020921705370623, + "kl_loss": 0.0463130846619606, + "loss_ib": 0.0008490021573379636, + "step": 3833 + }, + { + "ce_ib": 5.0987772941589355, + "ce_orig": 0.7687249779701233, + "epoch": 1.10237975411604, + "kl_loss": 0.05463729798793793, + "loss_ib": 0.0010562506504356861, + "step": 3834 + }, + { + "ce_ib": 2.5856900215148926, + "ce_orig": 0.4897224009037018, + "epoch": 1.10237975411604, + "kl_loss": 0.03807024657726288, + "loss_ib": 0.0006392713985405862, + "step": 3834 + }, + { + "ce_ib": 3.8137335777282715, + "ce_orig": 1.1139214038848877, + "epoch": 1.10237975411604, + "kl_loss": 0.042910508811473846, + "loss_ib": 0.0008104784647002816, + "step": 3834 + }, + { + "ce_ib": 2.879122734069824, + "ce_orig": 0.6457798480987549, + "epoch": 1.10237975411604, + "kl_loss": 0.04260121285915375, + "loss_ib": 0.0007139243534766138, + "step": 3834 + }, + { + "epoch": 1.1026673376950176, + "grad_norm": 0.0972626581788063, + "learning_rate": 3.653606580067779e-05, + "loss": 0.7655, + "step": 3835 + }, + { + "ce_ib": 2.7134146690368652, + "ce_orig": 0.4781295359134674, + "epoch": 1.1026673376950176, + "kl_loss": 0.06742371618747711, + "loss_ib": 0.0009455786203034222, + "step": 3835 + }, + { + "ce_ib": 2.3493244647979736, + "ce_orig": 0.6051815152168274, + "epoch": 1.1026673376950176, + "kl_loss": 0.05268208682537079, + "loss_ib": 0.0007617532974109054, + "step": 3835 + }, + { + "ce_ib": 2.823765993118286, + "ce_orig": 0.5208446979522705, + "epoch": 1.1026673376950176, + "kl_loss": 0.04749230295419693, + "loss_ib": 0.0007572996546514332, + "step": 3835 + }, + { + "ce_ib": 2.5067808628082275, + "ce_orig": 0.7204694151878357, + "epoch": 1.1026673376950176, + "kl_loss": 0.029943149536848068, + "loss_ib": 0.0005501096020452678, + "step": 3835 + }, + { + "ce_ib": 4.088206768035889, + "ce_orig": 0.6357128620147705, + "epoch": 1.1029549212739953, + "kl_loss": 0.06653685122728348, + "loss_ib": 0.0010741892037913203, + "step": 3836 + }, + { + "ce_ib": 3.9321675300598145, + "ce_orig": 0.7074470520019531, + "epoch": 1.1029549212739953, + "kl_loss": 0.026471365243196487, + "loss_ib": 0.0006579303881153464, + "step": 3836 + }, + { + "ce_ib": 2.8160383701324463, + "ce_orig": 0.7218390107154846, + "epoch": 1.1029549212739953, + "kl_loss": 0.04523658752441406, + "loss_ib": 0.0007339697331190109, + "step": 3836 + }, + { + "ce_ib": 4.392248153686523, + "ce_orig": 0.9713643193244934, + "epoch": 1.1029549212739953, + "kl_loss": 0.045748621225357056, + "loss_ib": 0.0008967110188677907, + "step": 3836 + }, + { + "ce_ib": 3.8657689094543457, + "ce_orig": 1.0166844129562378, + "epoch": 1.1032425048529728, + "kl_loss": 0.03868819773197174, + "loss_ib": 0.0007734588580206037, + "step": 3837 + }, + { + "ce_ib": 2.2224481105804443, + "ce_orig": 0.5802907943725586, + "epoch": 1.1032425048529728, + "kl_loss": 0.05032535642385483, + "loss_ib": 0.0007254983065649867, + "step": 3837 + }, + { + "ce_ib": 4.507564067840576, + "ce_orig": 0.7998011708259583, + "epoch": 1.1032425048529728, + "kl_loss": 0.057794779539108276, + "loss_ib": 0.0010287042241543531, + "step": 3837 + }, + { + "ce_ib": 4.83842658996582, + "ce_orig": 1.0624010562896729, + "epoch": 1.1032425048529728, + "kl_loss": 0.024372432380914688, + "loss_ib": 0.0007275668904185295, + "step": 3837 + }, + { + "ce_ib": 3.8005778789520264, + "ce_orig": 1.037305235862732, + "epoch": 1.1035300884319506, + "kl_loss": 0.02487851306796074, + "loss_ib": 0.0006288428558036685, + "step": 3838 + }, + { + "ce_ib": 4.064548492431641, + "ce_orig": 0.9640501141548157, + "epoch": 1.1035300884319506, + "kl_loss": 0.08853332698345184, + "loss_ib": 0.0012917880667373538, + "step": 3838 + }, + { + "ce_ib": 4.82650899887085, + "ce_orig": 0.9905003309249878, + "epoch": 1.1035300884319506, + "kl_loss": 0.06331103295087814, + "loss_ib": 0.0011157612316310406, + "step": 3838 + }, + { + "ce_ib": 4.408515453338623, + "ce_orig": 0.9252334237098694, + "epoch": 1.1035300884319506, + "kl_loss": 0.06623080372810364, + "loss_ib": 0.0011031595058739185, + "step": 3838 + }, + { + "ce_ib": 2.678386688232422, + "ce_orig": 0.9518355131149292, + "epoch": 1.103817672010928, + "kl_loss": 0.0404869019985199, + "loss_ib": 0.0006727076834067702, + "step": 3839 + }, + { + "ce_ib": 1.6583446264266968, + "ce_orig": 0.3312152922153473, + "epoch": 1.103817672010928, + "kl_loss": 0.05839679017663002, + "loss_ib": 0.0007498023333027959, + "step": 3839 + }, + { + "ce_ib": 2.1562206745147705, + "ce_orig": 0.4592929780483246, + "epoch": 1.103817672010928, + "kl_loss": 0.04110049456357956, + "loss_ib": 0.0006266269483603537, + "step": 3839 + }, + { + "ce_ib": 4.328341484069824, + "ce_orig": 0.6627485156059265, + "epoch": 1.103817672010928, + "kl_loss": 0.02985633723437786, + "loss_ib": 0.0007313974783755839, + "step": 3839 + }, + { + "epoch": 1.1041052555899058, + "grad_norm": 0.0889381468296051, + "learning_rate": 3.6501625943278805e-05, + "loss": 0.8414, + "step": 3840 + }, + { + "ce_ib": 2.198913097381592, + "ce_orig": 0.7176544070243835, + "epoch": 1.1041052555899058, + "kl_loss": 0.051019299775362015, + "loss_ib": 0.0007300843135453761, + "step": 3840 + }, + { + "ce_ib": 2.9375271797180176, + "ce_orig": 0.7337110638618469, + "epoch": 1.1041052555899058, + "kl_loss": 0.04931403324007988, + "loss_ib": 0.0007868930697441101, + "step": 3840 + }, + { + "ce_ib": 3.102407693862915, + "ce_orig": 0.7173675298690796, + "epoch": 1.1041052555899058, + "kl_loss": 0.054585106670856476, + "loss_ib": 0.000856091792229563, + "step": 3840 + }, + { + "ce_ib": 2.3947460651397705, + "ce_orig": 0.5630067586898804, + "epoch": 1.1041052555899058, + "kl_loss": 0.029347212985157967, + "loss_ib": 0.0005329467239789665, + "step": 3840 + }, + { + "ce_ib": 2.8512489795684814, + "ce_orig": 0.5732477307319641, + "epoch": 1.1043928391688835, + "kl_loss": 0.042556844651699066, + "loss_ib": 0.0007106933044269681, + "step": 3841 + }, + { + "ce_ib": 3.2531421184539795, + "ce_orig": 0.5271488428115845, + "epoch": 1.1043928391688835, + "kl_loss": 0.050467267632484436, + "loss_ib": 0.000829986936878413, + "step": 3841 + }, + { + "ce_ib": 2.8965823650360107, + "ce_orig": 0.4934127926826477, + "epoch": 1.1043928391688835, + "kl_loss": 0.04206157475709915, + "loss_ib": 0.0007102739764377475, + "step": 3841 + }, + { + "ce_ib": 4.800617218017578, + "ce_orig": 1.0348652601242065, + "epoch": 1.1043928391688835, + "kl_loss": 0.04997619614005089, + "loss_ib": 0.000979823642410338, + "step": 3841 + }, + { + "ce_ib": 1.977502465248108, + "ce_orig": 0.312715619802475, + "epoch": 1.104680422747861, + "kl_loss": 0.08061082661151886, + "loss_ib": 0.0010038585169240832, + "step": 3842 + }, + { + "ce_ib": 2.368786096572876, + "ce_orig": 0.8035383820533752, + "epoch": 1.104680422747861, + "kl_loss": 0.17154766619205475, + "loss_ib": 0.0019523551454767585, + "step": 3842 + }, + { + "ce_ib": 2.0508649349212646, + "ce_orig": 0.4762822687625885, + "epoch": 1.104680422747861, + "kl_loss": 0.034142062067985535, + "loss_ib": 0.0005465070717036724, + "step": 3842 + }, + { + "ce_ib": 2.93483829498291, + "ce_orig": 0.8429305553436279, + "epoch": 1.104680422747861, + "kl_loss": 0.041972942650318146, + "loss_ib": 0.0007132132304832339, + "step": 3842 + }, + { + "ce_ib": 3.9329633712768555, + "ce_orig": 0.8109311461448669, + "epoch": 1.1049680063268388, + "kl_loss": 0.09754611551761627, + "loss_ib": 0.0013687574537470937, + "step": 3843 + }, + { + "ce_ib": 4.070502758026123, + "ce_orig": 0.9792804718017578, + "epoch": 1.1049680063268388, + "kl_loss": 0.04661601781845093, + "loss_ib": 0.0008732104324735701, + "step": 3843 + }, + { + "ce_ib": 2.0313422679901123, + "ce_orig": 0.5361481308937073, + "epoch": 1.1049680063268388, + "kl_loss": 0.034243740141391754, + "loss_ib": 0.0005455716163851321, + "step": 3843 + }, + { + "ce_ib": 3.2321319580078125, + "ce_orig": 0.6481894850730896, + "epoch": 1.1049680063268388, + "kl_loss": 0.06253796070814133, + "loss_ib": 0.0009485927876085043, + "step": 3843 + }, + { + "ce_ib": 4.640552520751953, + "ce_orig": 0.7296956777572632, + "epoch": 1.1052555899058163, + "kl_loss": 0.13262921571731567, + "loss_ib": 0.0017903473926708102, + "step": 3844 + }, + { + "ce_ib": 2.505295991897583, + "ce_orig": 0.6038931012153625, + "epoch": 1.1052555899058163, + "kl_loss": 0.04253438860177994, + "loss_ib": 0.000675873423460871, + "step": 3844 + }, + { + "ce_ib": 5.015744686126709, + "ce_orig": 0.9920241832733154, + "epoch": 1.1052555899058163, + "kl_loss": 0.03936807066202164, + "loss_ib": 0.000895255187060684, + "step": 3844 + }, + { + "ce_ib": 2.687638521194458, + "ce_orig": 0.6331307888031006, + "epoch": 1.1052555899058163, + "kl_loss": 0.0613764189183712, + "loss_ib": 0.0008825280237942934, + "step": 3844 + }, + { + "epoch": 1.105543173484794, + "grad_norm": 0.11487738788127899, + "learning_rate": 3.6467158375793344e-05, + "loss": 0.7982, + "step": 3845 + }, + { + "ce_ib": 5.256848335266113, + "ce_orig": 1.0624990463256836, + "epoch": 1.105543173484794, + "kl_loss": 0.0491204708814621, + "loss_ib": 0.0010168894659727812, + "step": 3845 + }, + { + "ce_ib": 3.0742173194885254, + "ce_orig": 0.8627798557281494, + "epoch": 1.105543173484794, + "kl_loss": 0.03499702364206314, + "loss_ib": 0.0006573919672518969, + "step": 3845 + }, + { + "ce_ib": 1.9910790920257568, + "ce_orig": 0.5435546040534973, + "epoch": 1.105543173484794, + "kl_loss": 0.04709786921739578, + "loss_ib": 0.0006700865924358368, + "step": 3845 + }, + { + "ce_ib": 2.256030559539795, + "ce_orig": 0.6110987663269043, + "epoch": 1.105543173484794, + "kl_loss": 0.04752620682120323, + "loss_ib": 0.000700865057297051, + "step": 3845 + }, + { + "ce_ib": 3.051478862762451, + "ce_orig": 0.647960901260376, + "epoch": 1.1058307570637718, + "kl_loss": 0.04115290194749832, + "loss_ib": 0.0007166768191382289, + "step": 3846 + }, + { + "ce_ib": 3.671511650085449, + "ce_orig": 0.8651738166809082, + "epoch": 1.1058307570637718, + "kl_loss": 0.02897556871175766, + "loss_ib": 0.0006569068063981831, + "step": 3846 + }, + { + "ce_ib": 3.816725015640259, + "ce_orig": 1.1010342836380005, + "epoch": 1.1058307570637718, + "kl_loss": 0.04151561111211777, + "loss_ib": 0.0007968285353854299, + "step": 3846 + }, + { + "ce_ib": 5.4980878829956055, + "ce_orig": 1.5947667360305786, + "epoch": 1.1058307570637718, + "kl_loss": 0.09743601083755493, + "loss_ib": 0.0015241687651723623, + "step": 3846 + }, + { + "ce_ib": 2.0685274600982666, + "ce_orig": 0.487117737531662, + "epoch": 1.1061183406427493, + "kl_loss": 0.0593576654791832, + "loss_ib": 0.0008004294359125197, + "step": 3847 + }, + { + "ce_ib": 2.189898729324341, + "ce_orig": 0.3698190450668335, + "epoch": 1.1061183406427493, + "kl_loss": 0.051306821405887604, + "loss_ib": 0.0007320580189116299, + "step": 3847 + }, + { + "ce_ib": 2.818258047103882, + "ce_orig": 0.4741562008857727, + "epoch": 1.1061183406427493, + "kl_loss": 0.052800748497247696, + "loss_ib": 0.0008098332327790558, + "step": 3847 + }, + { + "ce_ib": 3.714601516723633, + "ce_orig": 0.9860591888427734, + "epoch": 1.1061183406427493, + "kl_loss": 0.05041637271642685, + "loss_ib": 0.0008756237803027034, + "step": 3847 + }, + { + "ce_ib": 2.3969719409942627, + "ce_orig": 0.5021514296531677, + "epoch": 1.106405924221727, + "kl_loss": 0.027578283101320267, + "loss_ib": 0.0005154800019226968, + "step": 3848 + }, + { + "ce_ib": 5.833457946777344, + "ce_orig": 1.2593744993209839, + "epoch": 1.106405924221727, + "kl_loss": 0.04404546692967415, + "loss_ib": 0.0010238004615530372, + "step": 3848 + }, + { + "ce_ib": 3.6851401329040527, + "ce_orig": 0.9712732434272766, + "epoch": 1.106405924221727, + "kl_loss": 0.03273633494973183, + "loss_ib": 0.0006958773592486978, + "step": 3848 + }, + { + "ce_ib": 2.823007106781006, + "ce_orig": 0.6876266598701477, + "epoch": 1.106405924221727, + "kl_loss": 0.039216749370098114, + "loss_ib": 0.000674468174111098, + "step": 3848 + }, + { + "ce_ib": 2.7541661262512207, + "ce_orig": 0.46670353412628174, + "epoch": 1.1066935078007045, + "kl_loss": 0.034172073006629944, + "loss_ib": 0.0006171372951939702, + "step": 3849 + }, + { + "ce_ib": 4.048920154571533, + "ce_orig": 1.1670331954956055, + "epoch": 1.1066935078007045, + "kl_loss": 0.08899885416030884, + "loss_ib": 0.0012948805233463645, + "step": 3849 + }, + { + "ce_ib": 3.091805934906006, + "ce_orig": 0.8262961506843567, + "epoch": 1.1066935078007045, + "kl_loss": 0.07869359850883484, + "loss_ib": 0.0010961166117340326, + "step": 3849 + }, + { + "ce_ib": 3.9143102169036865, + "ce_orig": 1.1284246444702148, + "epoch": 1.1066935078007045, + "kl_loss": 0.040429383516311646, + "loss_ib": 0.0007957248017191887, + "step": 3849 + }, + { + "epoch": 1.1069810913796823, + "grad_norm": 0.09594079107046127, + "learning_rate": 3.6432663181261764e-05, + "loss": 0.8127, + "step": 3850 + }, + { + "ce_ib": 5.798057556152344, + "ce_orig": 1.703369140625, + "epoch": 1.1069810913796823, + "kl_loss": 0.04824857413768768, + "loss_ib": 0.0010622914414852858, + "step": 3850 + }, + { + "ce_ib": 2.855696439743042, + "ce_orig": 0.4998306930065155, + "epoch": 1.1069810913796823, + "kl_loss": 0.03245248273015022, + "loss_ib": 0.0006100944010540843, + "step": 3850 + }, + { + "ce_ib": 2.1151394844055176, + "ce_orig": 0.41041967272758484, + "epoch": 1.1069810913796823, + "kl_loss": 0.04474932700395584, + "loss_ib": 0.0006590071716345847, + "step": 3850 + }, + { + "ce_ib": 3.599562168121338, + "ce_orig": 0.6907743215560913, + "epoch": 1.1069810913796823, + "kl_loss": 0.04703041911125183, + "loss_ib": 0.0008302603382617235, + "step": 3850 + }, + { + "ce_ib": 2.705535650253296, + "ce_orig": 0.35333743691444397, + "epoch": 1.1072686749586598, + "kl_loss": 0.07495532929897308, + "loss_ib": 0.0010201068362221122, + "step": 3851 + }, + { + "ce_ib": 2.8161356449127197, + "ce_orig": 0.5195083618164062, + "epoch": 1.1072686749586598, + "kl_loss": 0.03148350864648819, + "loss_ib": 0.0005964486044831574, + "step": 3851 + }, + { + "ce_ib": 2.1120479106903076, + "ce_orig": 0.5151882171630859, + "epoch": 1.1072686749586598, + "kl_loss": 0.03808164596557617, + "loss_ib": 0.0005920212715864182, + "step": 3851 + }, + { + "ce_ib": 3.0274500846862793, + "ce_orig": 0.5859204530715942, + "epoch": 1.1072686749586598, + "kl_loss": 0.039752379059791565, + "loss_ib": 0.0007002687780186534, + "step": 3851 + }, + { + "ce_ib": 3.8025412559509277, + "ce_orig": 1.0930181741714478, + "epoch": 1.1075562585376375, + "kl_loss": 0.03917575627565384, + "loss_ib": 0.000772011699154973, + "step": 3852 + }, + { + "ce_ib": 3.7303433418273926, + "ce_orig": 0.9649632573127747, + "epoch": 1.1075562585376375, + "kl_loss": 0.048007696866989136, + "loss_ib": 0.0008531112689524889, + "step": 3852 + }, + { + "ce_ib": 2.5160791873931885, + "ce_orig": 0.7088150978088379, + "epoch": 1.1075562585376375, + "kl_loss": 0.03412005677819252, + "loss_ib": 0.0005928084719926119, + "step": 3852 + }, + { + "ce_ib": 4.884856224060059, + "ce_orig": 1.2275387048721313, + "epoch": 1.1075562585376375, + "kl_loss": 0.058622732758522034, + "loss_ib": 0.0010747129563242197, + "step": 3852 + }, + { + "ce_ib": 4.1551995277404785, + "ce_orig": 1.0441977977752686, + "epoch": 1.1078438421166152, + "kl_loss": 0.0465482696890831, + "loss_ib": 0.000881002692040056, + "step": 3853 + }, + { + "ce_ib": 3.5796215534210205, + "ce_orig": 1.0915192365646362, + "epoch": 1.1078438421166152, + "kl_loss": 0.041388001292943954, + "loss_ib": 0.0007718421402387321, + "step": 3853 + }, + { + "ce_ib": 3.7632839679718018, + "ce_orig": 1.0199986696243286, + "epoch": 1.1078438421166152, + "kl_loss": 0.04970633238554001, + "loss_ib": 0.0008733916911296546, + "step": 3853 + }, + { + "ce_ib": 6.267026424407959, + "ce_orig": 1.593409776687622, + "epoch": 1.1078438421166152, + "kl_loss": 0.03360064700245857, + "loss_ib": 0.0009627090184949338, + "step": 3853 + }, + { + "ce_ib": 3.3954293727874756, + "ce_orig": 0.8371408581733704, + "epoch": 1.1081314256955928, + "kl_loss": 0.05625945329666138, + "loss_ib": 0.0009021374280564487, + "step": 3854 + }, + { + "ce_ib": 2.153508186340332, + "ce_orig": 0.3827504515647888, + "epoch": 1.1081314256955928, + "kl_loss": 0.09397577494382858, + "loss_ib": 0.001155108562670648, + "step": 3854 + }, + { + "ce_ib": 2.428509473800659, + "ce_orig": 0.655569851398468, + "epoch": 1.1081314256955928, + "kl_loss": 0.03143496811389923, + "loss_ib": 0.0005572005757130682, + "step": 3854 + }, + { + "ce_ib": 6.373288154602051, + "ce_orig": 1.913136601448059, + "epoch": 1.1081314256955928, + "kl_loss": 0.052737366408109665, + "loss_ib": 0.0011647024657577276, + "step": 3854 + }, + { + "epoch": 1.1084190092745705, + "grad_norm": 0.10054439306259155, + "learning_rate": 3.639814044279101e-05, + "loss": 0.8938, + "step": 3855 + }, + { + "ce_ib": 1.7447012662887573, + "ce_orig": 0.4761362671852112, + "epoch": 1.1084190092745705, + "kl_loss": 0.022633284330368042, + "loss_ib": 0.0004008029354736209, + "step": 3855 + }, + { + "ce_ib": 3.0050926208496094, + "ce_orig": 0.5119544863700867, + "epoch": 1.1084190092745705, + "kl_loss": 0.07679425925016403, + "loss_ib": 0.0010684517910704017, + "step": 3855 + }, + { + "ce_ib": 3.874648332595825, + "ce_orig": 1.1621567010879517, + "epoch": 1.1084190092745705, + "kl_loss": 0.09336425364017487, + "loss_ib": 0.0013211073819547892, + "step": 3855 + }, + { + "ce_ib": 2.186662197113037, + "ce_orig": 0.35797616839408875, + "epoch": 1.1084190092745705, + "kl_loss": 0.028838256374001503, + "loss_ib": 0.0005070487386547029, + "step": 3855 + }, + { + "ce_ib": 3.480687141418457, + "ce_orig": 0.8044514656066895, + "epoch": 1.108706592853548, + "kl_loss": 0.02311914786696434, + "loss_ib": 0.000579260173253715, + "step": 3856 + }, + { + "ce_ib": 4.889618873596191, + "ce_orig": 1.389039397239685, + "epoch": 1.108706592853548, + "kl_loss": 0.05335771292448044, + "loss_ib": 0.0010225389851257205, + "step": 3856 + }, + { + "ce_ib": 6.308032035827637, + "ce_orig": 1.4436290264129639, + "epoch": 1.108706592853548, + "kl_loss": 0.0397992767393589, + "loss_ib": 0.0010287959594279528, + "step": 3856 + }, + { + "ce_ib": 2.7573635578155518, + "ce_orig": 0.6597349047660828, + "epoch": 1.108706592853548, + "kl_loss": 0.044434379786252975, + "loss_ib": 0.0007200801046565175, + "step": 3856 + }, + { + "ce_ib": 3.4013538360595703, + "ce_orig": 0.83870929479599, + "epoch": 1.1089941764325257, + "kl_loss": 0.035989388823509216, + "loss_ib": 0.0007000292534939945, + "step": 3857 + }, + { + "ce_ib": 4.070394992828369, + "ce_orig": 0.9143645167350769, + "epoch": 1.1089941764325257, + "kl_loss": 0.04265585541725159, + "loss_ib": 0.0008335980819538236, + "step": 3857 + }, + { + "ce_ib": 3.079624891281128, + "ce_orig": 0.7073559761047363, + "epoch": 1.1089941764325257, + "kl_loss": 0.030289065092802048, + "loss_ib": 0.0006108531379140913, + "step": 3857 + }, + { + "ce_ib": 4.153883934020996, + "ce_orig": 1.1340696811676025, + "epoch": 1.1089941764325257, + "kl_loss": 0.062148772180080414, + "loss_ib": 0.0010368761140853167, + "step": 3857 + }, + { + "ce_ib": 2.4913296699523926, + "ce_orig": 0.49120625853538513, + "epoch": 1.1092817600115032, + "kl_loss": 0.029398465529084206, + "loss_ib": 0.0005431175814010203, + "step": 3858 + }, + { + "ce_ib": 3.8915724754333496, + "ce_orig": 0.3846481740474701, + "epoch": 1.1092817600115032, + "kl_loss": 0.0641409307718277, + "loss_ib": 0.0010305665200576186, + "step": 3858 + }, + { + "ce_ib": 2.7818968296051025, + "ce_orig": 0.6596146821975708, + "epoch": 1.1092817600115032, + "kl_loss": 0.01950271800160408, + "loss_ib": 0.0004732168454211205, + "step": 3858 + }, + { + "ce_ib": 4.360506057739258, + "ce_orig": 1.1977912187576294, + "epoch": 1.1092817600115032, + "kl_loss": 0.04946804791688919, + "loss_ib": 0.0009307310683652759, + "step": 3858 + }, + { + "ce_ib": 2.964751720428467, + "ce_orig": 0.7257793545722961, + "epoch": 1.109569343590481, + "kl_loss": 0.05593553185462952, + "loss_ib": 0.0008558304398320615, + "step": 3859 + }, + { + "ce_ib": 4.957271099090576, + "ce_orig": 1.3117862939834595, + "epoch": 1.109569343590481, + "kl_loss": 0.04014898091554642, + "loss_ib": 0.0008972169016487896, + "step": 3859 + }, + { + "ce_ib": 3.4715912342071533, + "ce_orig": 0.7833749055862427, + "epoch": 1.109569343590481, + "kl_loss": 0.03944779932498932, + "loss_ib": 0.0007416371372528374, + "step": 3859 + }, + { + "ce_ib": 2.435302495956421, + "ce_orig": 0.5999802947044373, + "epoch": 1.109569343590481, + "kl_loss": 0.09319322556257248, + "loss_ib": 0.0011754623847082257, + "step": 3859 + }, + { + "epoch": 1.1098569271694587, + "grad_norm": 0.09606360644102097, + "learning_rate": 3.636359024355436e-05, + "loss": 0.7761, + "step": 3860 + }, + { + "ce_ib": 2.6486032009124756, + "ce_orig": 0.7595046162605286, + "epoch": 1.1098569271694587, + "kl_loss": 0.03130129724740982, + "loss_ib": 0.0005778732593171299, + "step": 3860 + }, + { + "ce_ib": 3.3505032062530518, + "ce_orig": 0.8176609873771667, + "epoch": 1.1098569271694587, + "kl_loss": 0.044649556279182434, + "loss_ib": 0.0007815458229742944, + "step": 3860 + }, + { + "ce_ib": 3.3410403728485107, + "ce_orig": 0.7368934154510498, + "epoch": 1.1098569271694587, + "kl_loss": 0.04764897748827934, + "loss_ib": 0.0008105937740765512, + "step": 3860 + }, + { + "ce_ib": 1.648269534111023, + "ce_orig": 0.4013633131980896, + "epoch": 1.1098569271694587, + "kl_loss": 0.029702743515372276, + "loss_ib": 0.0004618543607648462, + "step": 3860 + }, + { + "ce_ib": 2.1362273693084717, + "ce_orig": 0.7099612355232239, + "epoch": 1.1101445107484362, + "kl_loss": 0.03544216975569725, + "loss_ib": 0.0005680443719029427, + "step": 3861 + }, + { + "ce_ib": 3.2645106315612793, + "ce_orig": 0.8581206202507019, + "epoch": 1.1101445107484362, + "kl_loss": 0.02927486039698124, + "loss_ib": 0.0006191996508277953, + "step": 3861 + }, + { + "ce_ib": 3.891680955886841, + "ce_orig": 0.8557149171829224, + "epoch": 1.1101445107484362, + "kl_loss": 0.02248266711831093, + "loss_ib": 0.000613994721788913, + "step": 3861 + }, + { + "ce_ib": 2.411515951156616, + "ce_orig": 0.5008821487426758, + "epoch": 1.1101445107484362, + "kl_loss": 0.05046025663614273, + "loss_ib": 0.000745754165109247, + "step": 3861 + }, + { + "ce_ib": 3.0269598960876465, + "ce_orig": 1.0693217515945435, + "epoch": 1.110432094327414, + "kl_loss": 0.03337544575333595, + "loss_ib": 0.000636450422462076, + "step": 3862 + }, + { + "ce_ib": 2.540114402770996, + "ce_orig": 0.49817395210266113, + "epoch": 1.110432094327414, + "kl_loss": 0.04390503466129303, + "loss_ib": 0.0006930617964826524, + "step": 3862 + }, + { + "ce_ib": 3.178797960281372, + "ce_orig": 0.7715371251106262, + "epoch": 1.110432094327414, + "kl_loss": 0.04478475823998451, + "loss_ib": 0.0007657273672521114, + "step": 3862 + }, + { + "ce_ib": 3.533409595489502, + "ce_orig": 0.9730919599533081, + "epoch": 1.110432094327414, + "kl_loss": 0.03896061331033707, + "loss_ib": 0.0007429470424540341, + "step": 3862 + }, + { + "ce_ib": 4.27614688873291, + "ce_orig": 0.9894270896911621, + "epoch": 1.1107196779063915, + "kl_loss": 0.05527768284082413, + "loss_ib": 0.0009803915163502097, + "step": 3863 + }, + { + "ce_ib": 3.2628934383392334, + "ce_orig": 1.1418771743774414, + "epoch": 1.1107196779063915, + "kl_loss": 0.03983055800199509, + "loss_ib": 0.0007245948654599488, + "step": 3863 + }, + { + "ce_ib": 2.6785285472869873, + "ce_orig": 0.6296389102935791, + "epoch": 1.1107196779063915, + "kl_loss": 0.020587310194969177, + "loss_ib": 0.0004737259296234697, + "step": 3863 + }, + { + "ce_ib": 4.364458084106445, + "ce_orig": 1.0720957517623901, + "epoch": 1.1107196779063915, + "kl_loss": 0.06708066910505295, + "loss_ib": 0.0011072525521740317, + "step": 3863 + }, + { + "ce_ib": 3.536761522293091, + "ce_orig": 0.888566792011261, + "epoch": 1.1110072614853692, + "kl_loss": 0.05332178622484207, + "loss_ib": 0.0008868940058164299, + "step": 3864 + }, + { + "ce_ib": 2.8322246074676514, + "ce_orig": 0.8979050517082214, + "epoch": 1.1110072614853692, + "kl_loss": 0.028530221432447433, + "loss_ib": 0.0005685246433131397, + "step": 3864 + }, + { + "ce_ib": 3.4912469387054443, + "ce_orig": 0.8684653043746948, + "epoch": 1.1110072614853692, + "kl_loss": 0.07119659334421158, + "loss_ib": 0.001061090617440641, + "step": 3864 + }, + { + "ce_ib": 3.264401912689209, + "ce_orig": 0.6336137056350708, + "epoch": 1.1110072614853692, + "kl_loss": 0.06462714076042175, + "loss_ib": 0.0009727115975692868, + "step": 3864 + }, + { + "epoch": 1.1112948450643467, + "grad_norm": 0.09225373715162277, + "learning_rate": 3.632901266679127e-05, + "loss": 0.7795, + "step": 3865 + }, + { + "ce_ib": 2.7677114009857178, + "ce_orig": 0.6370932459831238, + "epoch": 1.1112948450643467, + "kl_loss": 0.04371907189488411, + "loss_ib": 0.000713961839210242, + "step": 3865 + }, + { + "ce_ib": 2.807427167892456, + "ce_orig": 0.7470698356628418, + "epoch": 1.1112948450643467, + "kl_loss": 0.04912947118282318, + "loss_ib": 0.0007720374269410968, + "step": 3865 + }, + { + "ce_ib": 3.0790622234344482, + "ce_orig": 0.5984537601470947, + "epoch": 1.1112948450643467, + "kl_loss": 0.05858683958649635, + "loss_ib": 0.000893774616997689, + "step": 3865 + }, + { + "ce_ib": 2.642993450164795, + "ce_orig": 0.6979913115501404, + "epoch": 1.1112948450643467, + "kl_loss": 0.03969787433743477, + "loss_ib": 0.0006612780271098018, + "step": 3865 + }, + { + "ce_ib": 2.7219929695129395, + "ce_orig": 0.7931494116783142, + "epoch": 1.1115824286433245, + "kl_loss": 0.026366792619228363, + "loss_ib": 0.0005358671769499779, + "step": 3866 + }, + { + "ce_ib": 4.262517929077148, + "ce_orig": 1.2828398942947388, + "epoch": 1.1115824286433245, + "kl_loss": 0.04165147244930267, + "loss_ib": 0.0008427664288319647, + "step": 3866 + }, + { + "ce_ib": 2.907156467437744, + "ce_orig": 0.5371217131614685, + "epoch": 1.1115824286433245, + "kl_loss": 0.04516967386007309, + "loss_ib": 0.000742412346880883, + "step": 3866 + }, + { + "ce_ib": 4.2548747062683105, + "ce_orig": 0.5638944506645203, + "epoch": 1.1115824286433245, + "kl_loss": 0.037037450820207596, + "loss_ib": 0.0007958619389683008, + "step": 3866 + }, + { + "ce_ib": 2.1023313999176025, + "ce_orig": 0.597545862197876, + "epoch": 1.1118700122223022, + "kl_loss": 0.02281883731484413, + "loss_ib": 0.00043842149898409843, + "step": 3867 + }, + { + "ce_ib": 3.9413723945617676, + "ce_orig": 1.1187529563903809, + "epoch": 1.1118700122223022, + "kl_loss": 0.04637856036424637, + "loss_ib": 0.0008579228888265789, + "step": 3867 + }, + { + "ce_ib": 3.12247371673584, + "ce_orig": 0.7549583315849304, + "epoch": 1.1118700122223022, + "kl_loss": 0.053317226469516754, + "loss_ib": 0.0008454195922240615, + "step": 3867 + }, + { + "ce_ib": 2.829617500305176, + "ce_orig": 0.6996360421180725, + "epoch": 1.1118700122223022, + "kl_loss": 0.0450618639588356, + "loss_ib": 0.0007335803238674998, + "step": 3867 + }, + { + "ce_ib": 3.0687854290008545, + "ce_orig": 0.761000394821167, + "epoch": 1.1121575958012797, + "kl_loss": 0.03871835395693779, + "loss_ib": 0.0006940620951354504, + "step": 3868 + }, + { + "ce_ib": 1.7553797960281372, + "ce_orig": 0.4141480624675751, + "epoch": 1.1121575958012797, + "kl_loss": 0.0320589505136013, + "loss_ib": 0.0004961274680681527, + "step": 3868 + }, + { + "ce_ib": 2.080410957336426, + "ce_orig": 0.4366541802883148, + "epoch": 1.1121575958012797, + "kl_loss": 0.04989089071750641, + "loss_ib": 0.000706949969753623, + "step": 3868 + }, + { + "ce_ib": 4.8835368156433105, + "ce_orig": 1.4350630044937134, + "epoch": 1.1121575958012797, + "kl_loss": 0.0599052868783474, + "loss_ib": 0.0010874065337702632, + "step": 3868 + }, + { + "ce_ib": 2.056960105895996, + "ce_orig": 0.6050542593002319, + "epoch": 1.1124451793802574, + "kl_loss": 0.03423459082841873, + "loss_ib": 0.0005480418913066387, + "step": 3869 + }, + { + "ce_ib": 1.8623228073120117, + "ce_orig": 0.3928214907646179, + "epoch": 1.1124451793802574, + "kl_loss": 0.03222876042127609, + "loss_ib": 0.000508519820868969, + "step": 3869 + }, + { + "ce_ib": 2.1445462703704834, + "ce_orig": 0.431418776512146, + "epoch": 1.1124451793802574, + "kl_loss": 0.04726073145866394, + "loss_ib": 0.000687061925418675, + "step": 3869 + }, + { + "ce_ib": 5.007286548614502, + "ce_orig": 1.5805776119232178, + "epoch": 1.1124451793802574, + "kl_loss": 0.044814616441726685, + "loss_ib": 0.00094887480372563, + "step": 3869 + }, + { + "epoch": 1.112732762959235, + "grad_norm": 0.1041933074593544, + "learning_rate": 3.629440779580715e-05, + "loss": 0.8681, + "step": 3870 + }, + { + "ce_ib": 2.633333683013916, + "ce_orig": 0.7542537450790405, + "epoch": 1.112732762959235, + "kl_loss": 0.044877104461193085, + "loss_ib": 0.0007121044327504933, + "step": 3870 + }, + { + "ce_ib": 4.9219183921813965, + "ce_orig": 1.3510658740997314, + "epoch": 1.112732762959235, + "kl_loss": 0.06342262029647827, + "loss_ib": 0.0011264180066064, + "step": 3870 + }, + { + "ce_ib": 3.3148858547210693, + "ce_orig": 0.6053744554519653, + "epoch": 1.112732762959235, + "kl_loss": 0.04492006450891495, + "loss_ib": 0.0007806892390362918, + "step": 3870 + }, + { + "ce_ib": 3.2620060443878174, + "ce_orig": 0.6924232244491577, + "epoch": 1.112732762959235, + "kl_loss": 0.0418190062046051, + "loss_ib": 0.0007443905924446881, + "step": 3870 + }, + { + "ce_ib": 1.5601873397827148, + "ce_orig": 0.41792359948158264, + "epoch": 1.1130203465382127, + "kl_loss": 0.04706965386867523, + "loss_ib": 0.0006267152493819594, + "step": 3871 + }, + { + "ce_ib": 3.184317111968994, + "ce_orig": 0.6006103157997131, + "epoch": 1.1130203465382127, + "kl_loss": 0.05924304574728012, + "loss_ib": 0.0009108621161431074, + "step": 3871 + }, + { + "ce_ib": 3.8600246906280518, + "ce_orig": 1.303369164466858, + "epoch": 1.1130203465382127, + "kl_loss": 0.0415462888777256, + "loss_ib": 0.0008014653576537967, + "step": 3871 + }, + { + "ce_ib": 5.671487808227539, + "ce_orig": 1.2845674753189087, + "epoch": 1.1130203465382127, + "kl_loss": 0.06206531450152397, + "loss_ib": 0.0011878019431605935, + "step": 3871 + }, + { + "ce_ib": 2.7906124591827393, + "ce_orig": 0.5917994379997253, + "epoch": 1.1133079301171902, + "kl_loss": 0.03133557736873627, + "loss_ib": 0.0005924169672653079, + "step": 3872 + }, + { + "ce_ib": 3.560506820678711, + "ce_orig": 0.5162822008132935, + "epoch": 1.1133079301171902, + "kl_loss": 0.04560749977827072, + "loss_ib": 0.0008121256250888109, + "step": 3872 + }, + { + "ce_ib": 5.067206382751465, + "ce_orig": 0.8839568495750427, + "epoch": 1.1133079301171902, + "kl_loss": 0.06323418021202087, + "loss_ib": 0.0011390623403713107, + "step": 3872 + }, + { + "ce_ib": 3.7891979217529297, + "ce_orig": 1.0436104536056519, + "epoch": 1.1133079301171902, + "kl_loss": 0.04702971875667572, + "loss_ib": 0.0008492169436067343, + "step": 3872 + }, + { + "ce_ib": 5.959282875061035, + "ce_orig": 1.5464142560958862, + "epoch": 1.113595513696168, + "kl_loss": 0.04920826852321625, + "loss_ib": 0.0010880109621211886, + "step": 3873 + }, + { + "ce_ib": 2.2779669761657715, + "ce_orig": 0.527877926826477, + "epoch": 1.113595513696168, + "kl_loss": 0.04977334663271904, + "loss_ib": 0.0007255301461555064, + "step": 3873 + }, + { + "ce_ib": 2.0952024459838867, + "ce_orig": 0.4437862038612366, + "epoch": 1.113595513696168, + "kl_loss": 0.05567578226327896, + "loss_ib": 0.0007662780699320138, + "step": 3873 + }, + { + "ce_ib": 4.47938871383667, + "ce_orig": 1.1530393362045288, + "epoch": 1.113595513696168, + "kl_loss": 0.04461806267499924, + "loss_ib": 0.0008941194391809404, + "step": 3873 + }, + { + "ce_ib": 3.741028070449829, + "ce_orig": 1.2310945987701416, + "epoch": 1.1138830972751457, + "kl_loss": 0.037062253803014755, + "loss_ib": 0.0007447252864949405, + "step": 3874 + }, + { + "ce_ib": 4.676703929901123, + "ce_orig": 1.1108920574188232, + "epoch": 1.1138830972751457, + "kl_loss": 0.034692902117967606, + "loss_ib": 0.0008145993924699724, + "step": 3874 + }, + { + "ce_ib": 2.2004358768463135, + "ce_orig": 0.474051833152771, + "epoch": 1.1138830972751457, + "kl_loss": 0.03437422960996628, + "loss_ib": 0.0005637858412228525, + "step": 3874 + }, + { + "ce_ib": 4.6916046142578125, + "ce_orig": 1.4210591316223145, + "epoch": 1.1138830972751457, + "kl_loss": 0.04375958442687988, + "loss_ib": 0.0009067562641575933, + "step": 3874 + }, + { + "epoch": 1.1141706808541232, + "grad_norm": 0.11914706975221634, + "learning_rate": 3.625977571397315e-05, + "loss": 0.8415, + "step": 3875 + }, + { + "ce_ib": 1.6578775644302368, + "ce_orig": 0.3848751485347748, + "epoch": 1.1141706808541232, + "kl_loss": 0.11239616572856903, + "loss_ib": 0.0012897494016215205, + "step": 3875 + }, + { + "ce_ib": 3.325282573699951, + "ce_orig": 0.6060757637023926, + "epoch": 1.1141706808541232, + "kl_loss": 0.07307100296020508, + "loss_ib": 0.0010632382472977042, + "step": 3875 + }, + { + "ce_ib": 2.4017157554626465, + "ce_orig": 0.7049551606178284, + "epoch": 1.1141706808541232, + "kl_loss": 0.046707164496183395, + "loss_ib": 0.0007072432199493051, + "step": 3875 + }, + { + "ce_ib": 2.607145309448242, + "ce_orig": 0.6118733286857605, + "epoch": 1.1141706808541232, + "kl_loss": 0.06180363893508911, + "loss_ib": 0.0008787508704699576, + "step": 3875 + }, + { + "ce_ib": 3.6315817832946777, + "ce_orig": 1.1105594635009766, + "epoch": 1.114458264433101, + "kl_loss": 0.04725942760705948, + "loss_ib": 0.0008357524638995528, + "step": 3876 + }, + { + "ce_ib": 3.5771780014038086, + "ce_orig": 0.7643224000930786, + "epoch": 1.114458264433101, + "kl_loss": 0.0809958279132843, + "loss_ib": 0.001167676062323153, + "step": 3876 + }, + { + "ce_ib": 4.875547409057617, + "ce_orig": 1.020644187927246, + "epoch": 1.114458264433101, + "kl_loss": 0.06902328878641129, + "loss_ib": 0.001177787547931075, + "step": 3876 + }, + { + "ce_ib": 3.645996332168579, + "ce_orig": 1.079506278038025, + "epoch": 1.114458264433101, + "kl_loss": 0.047522783279418945, + "loss_ib": 0.0008398274658247828, + "step": 3876 + }, + { + "ce_ib": 2.8309168815612793, + "ce_orig": 0.6220803260803223, + "epoch": 1.1147458480120784, + "kl_loss": 0.046846434473991394, + "loss_ib": 0.0007515560137107968, + "step": 3877 + }, + { + "ce_ib": 3.4155218601226807, + "ce_orig": 1.0322874784469604, + "epoch": 1.1147458480120784, + "kl_loss": 0.044574424624443054, + "loss_ib": 0.0007872964488342404, + "step": 3877 + }, + { + "ce_ib": 4.099472522735596, + "ce_orig": 1.1314010620117188, + "epoch": 1.1147458480120784, + "kl_loss": 0.0445302277803421, + "loss_ib": 0.0008552495273761451, + "step": 3877 + }, + { + "ce_ib": 4.578045845031738, + "ce_orig": 1.0650391578674316, + "epoch": 1.1147458480120784, + "kl_loss": 0.05741759389638901, + "loss_ib": 0.0010319805005565286, + "step": 3877 + }, + { + "ce_ib": 2.335775136947632, + "ce_orig": 0.7545249462127686, + "epoch": 1.1150334315910562, + "kl_loss": 0.021260205656290054, + "loss_ib": 0.00044617956154979765, + "step": 3878 + }, + { + "ce_ib": 3.4920599460601807, + "ce_orig": 0.860213577747345, + "epoch": 1.1150334315910562, + "kl_loss": 0.05703645572066307, + "loss_ib": 0.0009195705060847104, + "step": 3878 + }, + { + "ce_ib": 2.63698410987854, + "ce_orig": 0.649169921875, + "epoch": 1.1150334315910562, + "kl_loss": 0.040790148079395294, + "loss_ib": 0.0006715998752042651, + "step": 3878 + }, + { + "ce_ib": 4.425360202789307, + "ce_orig": 0.9950451850891113, + "epoch": 1.1150334315910562, + "kl_loss": 0.0552913099527359, + "loss_ib": 0.0009954491397365928, + "step": 3878 + }, + { + "ce_ib": 2.2862298488616943, + "ce_orig": 0.7705575823783875, + "epoch": 1.115321015170034, + "kl_loss": 0.04090700298547745, + "loss_ib": 0.0006376930396072567, + "step": 3879 + }, + { + "ce_ib": 3.3285863399505615, + "ce_orig": 0.7966204285621643, + "epoch": 1.115321015170034, + "kl_loss": 0.07582001388072968, + "loss_ib": 0.0010910587152466178, + "step": 3879 + }, + { + "ce_ib": 1.886778473854065, + "ce_orig": 0.5394027829170227, + "epoch": 1.115321015170034, + "kl_loss": 0.021913820877671242, + "loss_ib": 0.00040781605639494956, + "step": 3879 + }, + { + "ce_ib": 3.3610615730285645, + "ce_orig": 0.7732166051864624, + "epoch": 1.115321015170034, + "kl_loss": 0.020022766664624214, + "loss_ib": 0.0005363338277675211, + "step": 3879 + }, + { + "epoch": 1.1156085987490114, + "grad_norm": 0.12469659745693207, + "learning_rate": 3.622511650472601e-05, + "loss": 0.7852, + "step": 3880 + }, + { + "ce_ib": 4.1779608726501465, + "ce_orig": 1.2245407104492188, + "epoch": 1.1156085987490114, + "kl_loss": 0.04838830977678299, + "loss_ib": 0.000901679159142077, + "step": 3880 + }, + { + "ce_ib": 2.5198471546173096, + "ce_orig": 0.601540744304657, + "epoch": 1.1156085987490114, + "kl_loss": 0.02875765971839428, + "loss_ib": 0.0005395612679421902, + "step": 3880 + }, + { + "ce_ib": 3.4219324588775635, + "ce_orig": 0.7010173201560974, + "epoch": 1.1156085987490114, + "kl_loss": 0.06271395087242126, + "loss_ib": 0.0009693327592685819, + "step": 3880 + }, + { + "ce_ib": 5.0824875831604, + "ce_orig": 1.295799970626831, + "epoch": 1.1156085987490114, + "kl_loss": 0.02788160927593708, + "loss_ib": 0.0007870647823438048, + "step": 3880 + }, + { + "ce_ib": 3.2362453937530518, + "ce_orig": 1.0144238471984863, + "epoch": 1.1158961823279892, + "kl_loss": 0.03195980191230774, + "loss_ib": 0.0006432225345633924, + "step": 3881 + }, + { + "ce_ib": 3.0565524101257324, + "ce_orig": 0.6681481599807739, + "epoch": 1.1158961823279892, + "kl_loss": 0.024168211966753006, + "loss_ib": 0.0005473373457789421, + "step": 3881 + }, + { + "ce_ib": 1.8170098066329956, + "ce_orig": 0.21636265516281128, + "epoch": 1.1158961823279892, + "kl_loss": 0.09552490711212158, + "loss_ib": 0.001136949984356761, + "step": 3881 + }, + { + "ce_ib": 2.299259901046753, + "ce_orig": 0.525316059589386, + "epoch": 1.1158961823279892, + "kl_loss": 0.03350132703781128, + "loss_ib": 0.0005649392260238528, + "step": 3881 + }, + { + "ce_ib": 4.196875095367432, + "ce_orig": 1.2575933933258057, + "epoch": 1.1161837659069667, + "kl_loss": 0.06655467301607132, + "loss_ib": 0.0010852342238649726, + "step": 3882 + }, + { + "ce_ib": 2.9673569202423096, + "ce_orig": 0.774421215057373, + "epoch": 1.1161837659069667, + "kl_loss": 0.03947583585977554, + "loss_ib": 0.0006914940313436091, + "step": 3882 + }, + { + "ce_ib": 2.6369521617889404, + "ce_orig": 0.7095384001731873, + "epoch": 1.1161837659069667, + "kl_loss": 0.05099405348300934, + "loss_ib": 0.0007736356928944588, + "step": 3882 + }, + { + "ce_ib": 4.482769966125488, + "ce_orig": 0.892069935798645, + "epoch": 1.1161837659069667, + "kl_loss": 0.0471700057387352, + "loss_ib": 0.00091997702838853, + "step": 3882 + }, + { + "ce_ib": 2.228851556777954, + "ce_orig": 0.7227908968925476, + "epoch": 1.1164713494859444, + "kl_loss": 0.03408268466591835, + "loss_ib": 0.0005637119757011533, + "step": 3883 + }, + { + "ce_ib": 3.0927298069000244, + "ce_orig": 0.6626830101013184, + "epoch": 1.1164713494859444, + "kl_loss": 0.05336541682481766, + "loss_ib": 0.0008429270819760859, + "step": 3883 + }, + { + "ce_ib": 4.338747501373291, + "ce_orig": 1.2575347423553467, + "epoch": 1.1164713494859444, + "kl_loss": 0.077149398624897, + "loss_ib": 0.0012053686659783125, + "step": 3883 + }, + { + "ce_ib": 2.5097246170043945, + "ce_orig": 0.4659877121448517, + "epoch": 1.1164713494859444, + "kl_loss": 0.02726786583662033, + "loss_ib": 0.0005236510769464076, + "step": 3883 + }, + { + "ce_ib": 2.7833733558654785, + "ce_orig": 0.8167527914047241, + "epoch": 1.116758933064922, + "kl_loss": 0.045223407447338104, + "loss_ib": 0.0007305714534595609, + "step": 3884 + }, + { + "ce_ib": 4.203033924102783, + "ce_orig": 1.0029618740081787, + "epoch": 1.116758933064922, + "kl_loss": 0.04916595295071602, + "loss_ib": 0.000911962881218642, + "step": 3884 + }, + { + "ce_ib": 4.4988908767700195, + "ce_orig": 1.148505687713623, + "epoch": 1.116758933064922, + "kl_loss": 0.05449467524886131, + "loss_ib": 0.0009948357474058867, + "step": 3884 + }, + { + "ce_ib": 3.362406015396118, + "ce_orig": 0.7156971096992493, + "epoch": 1.116758933064922, + "kl_loss": 0.05177677422761917, + "loss_ib": 0.0008540083072148263, + "step": 3884 + }, + { + "epoch": 1.1170465166438996, + "grad_norm": 0.1141824945807457, + "learning_rate": 3.619043025156782e-05, + "loss": 0.851, + "step": 3885 + }, + { + "ce_ib": 3.039111375808716, + "ce_orig": 0.4738331735134125, + "epoch": 1.1170465166438996, + "kl_loss": 0.08348670601844788, + "loss_ib": 0.0011387781705707312, + "step": 3885 + }, + { + "ce_ib": 2.6155357360839844, + "ce_orig": 0.46938225626945496, + "epoch": 1.1170465166438996, + "kl_loss": 0.016216887161135674, + "loss_ib": 0.0004237224056851119, + "step": 3885 + }, + { + "ce_ib": 2.8097755908966064, + "ce_orig": 0.8293483853340149, + "epoch": 1.1170465166438996, + "kl_loss": 0.04311025142669678, + "loss_ib": 0.0007120800437405705, + "step": 3885 + }, + { + "ce_ib": 3.3019039630889893, + "ce_orig": 0.8990929126739502, + "epoch": 1.1170465166438996, + "kl_loss": 0.07042951881885529, + "loss_ib": 0.0010344855254516006, + "step": 3885 + }, + { + "ce_ib": 4.028023719787598, + "ce_orig": 0.7940239310264587, + "epoch": 1.1173341002228772, + "kl_loss": 0.05347496271133423, + "loss_ib": 0.0009375519584864378, + "step": 3886 + }, + { + "ce_ib": 3.3473165035247803, + "ce_orig": 0.9624444842338562, + "epoch": 1.1173341002228772, + "kl_loss": 0.04039300978183746, + "loss_ib": 0.0007386617362499237, + "step": 3886 + }, + { + "ce_ib": 2.7342984676361084, + "ce_orig": 0.8975852131843567, + "epoch": 1.1173341002228772, + "kl_loss": 0.03587556257843971, + "loss_ib": 0.0006321854889392853, + "step": 3886 + }, + { + "ce_ib": 3.2556395530700684, + "ce_orig": 0.7380816340446472, + "epoch": 1.1173341002228772, + "kl_loss": 0.03761713206768036, + "loss_ib": 0.0007017353200353682, + "step": 3886 + }, + { + "ce_ib": 5.188493728637695, + "ce_orig": 1.7108467817306519, + "epoch": 1.117621683801855, + "kl_loss": 0.05435672774910927, + "loss_ib": 0.0010624165879562497, + "step": 3887 + }, + { + "ce_ib": 2.6762330532073975, + "ce_orig": 0.5619564652442932, + "epoch": 1.117621683801855, + "kl_loss": 0.04027627035975456, + "loss_ib": 0.0006703859544359148, + "step": 3887 + }, + { + "ce_ib": 2.4288394451141357, + "ce_orig": 0.47416776418685913, + "epoch": 1.117621683801855, + "kl_loss": 0.03192201256752014, + "loss_ib": 0.0005621040472760797, + "step": 3887 + }, + { + "ce_ib": 4.902431011199951, + "ce_orig": 1.3921144008636475, + "epoch": 1.117621683801855, + "kl_loss": 0.04850609228014946, + "loss_ib": 0.0009753039921633899, + "step": 3887 + }, + { + "ce_ib": 2.6748836040496826, + "ce_orig": 0.7926574349403381, + "epoch": 1.1179092673808326, + "kl_loss": 0.021958094090223312, + "loss_ib": 0.00048706927918829024, + "step": 3888 + }, + { + "ce_ib": 2.9969427585601807, + "ce_orig": 0.6587496995925903, + "epoch": 1.1179092673808326, + "kl_loss": 0.04619406908750534, + "loss_ib": 0.0007616349612362683, + "step": 3888 + }, + { + "ce_ib": 3.598574638366699, + "ce_orig": 1.0091817378997803, + "epoch": 1.1179092673808326, + "kl_loss": 0.045863617211580276, + "loss_ib": 0.0008184936596080661, + "step": 3888 + }, + { + "ce_ib": 3.181083917617798, + "ce_orig": 0.9402598738670349, + "epoch": 1.1179092673808326, + "kl_loss": 0.041896507143974304, + "loss_ib": 0.0007370734238065779, + "step": 3888 + }, + { + "ce_ib": 2.5848522186279297, + "ce_orig": 0.7570788860321045, + "epoch": 1.1181968509598101, + "kl_loss": 0.0398191437125206, + "loss_ib": 0.0006566766533069313, + "step": 3889 + }, + { + "ce_ib": 4.0110015869140625, + "ce_orig": 1.1110804080963135, + "epoch": 1.1181968509598101, + "kl_loss": 0.08983980119228363, + "loss_ib": 0.001299498020671308, + "step": 3889 + }, + { + "ce_ib": 4.618993759155273, + "ce_orig": 0.943111002445221, + "epoch": 1.1181968509598101, + "kl_loss": 0.047537628561258316, + "loss_ib": 0.0009372756467200816, + "step": 3889 + }, + { + "ce_ib": 2.358236312866211, + "ce_orig": 0.610733151435852, + "epoch": 1.1181968509598101, + "kl_loss": 0.030166640877723694, + "loss_ib": 0.0005374900065362453, + "step": 3889 + }, + { + "epoch": 1.1184844345387879, + "grad_norm": 0.0993463471531868, + "learning_rate": 3.6155717038065786e-05, + "loss": 0.818, + "step": 3890 + }, + { + "ce_ib": 3.087681770324707, + "ce_orig": 0.7542642951011658, + "epoch": 1.1184844345387879, + "kl_loss": 0.07109452784061432, + "loss_ib": 0.0010197133524343371, + "step": 3890 + }, + { + "ce_ib": 3.100576639175415, + "ce_orig": 0.88427734375, + "epoch": 1.1184844345387879, + "kl_loss": 0.01846560277044773, + "loss_ib": 0.0004947137203998864, + "step": 3890 + }, + { + "ce_ib": 0.91781085729599, + "ce_orig": 0.1155041828751564, + "epoch": 1.1184844345387879, + "kl_loss": 0.09327965974807739, + "loss_ib": 0.0010245776502415538, + "step": 3890 + }, + { + "ce_ib": 2.3664419651031494, + "ce_orig": 0.5954967141151428, + "epoch": 1.1184844345387879, + "kl_loss": 0.030229276046156883, + "loss_ib": 0.0005389369325712323, + "step": 3890 + }, + { + "ce_ib": 1.822415828704834, + "ce_orig": 0.43406862020492554, + "epoch": 1.1187720181177654, + "kl_loss": 0.04691252484917641, + "loss_ib": 0.0006513668340630829, + "step": 3891 + }, + { + "ce_ib": 3.5136349201202393, + "ce_orig": 0.9484729170799255, + "epoch": 1.1187720181177654, + "kl_loss": 0.08338244259357452, + "loss_ib": 0.0011851878371089697, + "step": 3891 + }, + { + "ce_ib": 2.4923369884490967, + "ce_orig": 0.600945770740509, + "epoch": 1.1187720181177654, + "kl_loss": 0.04809572547674179, + "loss_ib": 0.0007301909499801695, + "step": 3891 + }, + { + "ce_ib": 2.0191214084625244, + "ce_orig": 0.4660525619983673, + "epoch": 1.1187720181177654, + "kl_loss": 0.049924157559871674, + "loss_ib": 0.0007011537090875208, + "step": 3891 + }, + { + "ce_ib": 3.2705726623535156, + "ce_orig": 1.156556487083435, + "epoch": 1.1190596016967431, + "kl_loss": 0.03752831369638443, + "loss_ib": 0.0007023403886705637, + "step": 3892 + }, + { + "ce_ib": 3.81880259513855, + "ce_orig": 1.0757784843444824, + "epoch": 1.1190596016967431, + "kl_loss": 0.02851686254143715, + "loss_ib": 0.0006670488510280848, + "step": 3892 + }, + { + "ce_ib": 2.3208744525909424, + "ce_orig": 0.3445805311203003, + "epoch": 1.1190596016967431, + "kl_loss": 0.10279138386249542, + "loss_ib": 0.0012600013287737966, + "step": 3892 + }, + { + "ce_ib": 3.300553798675537, + "ce_orig": 0.740790069103241, + "epoch": 1.1190596016967431, + "kl_loss": 0.04948049038648605, + "loss_ib": 0.0008248602389357984, + "step": 3892 + }, + { + "ce_ib": 4.32871150970459, + "ce_orig": 1.242394208908081, + "epoch": 1.1193471852757209, + "kl_loss": 0.05536274611949921, + "loss_ib": 0.0009864985477179289, + "step": 3893 + }, + { + "ce_ib": 1.7803480625152588, + "ce_orig": 0.2709926664829254, + "epoch": 1.1193471852757209, + "kl_loss": 0.02588311955332756, + "loss_ib": 0.0004368659865576774, + "step": 3893 + }, + { + "ce_ib": 3.754816770553589, + "ce_orig": 1.063711404800415, + "epoch": 1.1193471852757209, + "kl_loss": 0.04894328489899635, + "loss_ib": 0.0008649145020172, + "step": 3893 + }, + { + "ce_ib": 3.100916862487793, + "ce_orig": 0.7879678606987, + "epoch": 1.1193471852757209, + "kl_loss": 0.020985882729291916, + "loss_ib": 0.000519950466696173, + "step": 3893 + }, + { + "ce_ib": 2.1521642208099365, + "ce_orig": 0.5064207315444946, + "epoch": 1.1196347688546984, + "kl_loss": 0.02658728137612343, + "loss_ib": 0.00048108919872902334, + "step": 3894 + }, + { + "ce_ib": 3.103238582611084, + "ce_orig": 0.7561085224151611, + "epoch": 1.1196347688546984, + "kl_loss": 0.05904041975736618, + "loss_ib": 0.00090072798775509, + "step": 3894 + }, + { + "ce_ib": 2.907012939453125, + "ce_orig": 0.4808667302131653, + "epoch": 1.1196347688546984, + "kl_loss": 0.08102752268314362, + "loss_ib": 0.0011009764857590199, + "step": 3894 + }, + { + "ce_ib": 3.0239369869232178, + "ce_orig": 0.598647952079773, + "epoch": 1.1196347688546984, + "kl_loss": 0.05636496841907501, + "loss_ib": 0.0008660433813929558, + "step": 3894 + }, + { + "epoch": 1.119922352433676, + "grad_norm": 0.12083286792039871, + "learning_rate": 3.612097694785211e-05, + "loss": 0.8163, + "step": 3895 + }, + { + "ce_ib": 3.9950902462005615, + "ce_orig": 0.5821787714958191, + "epoch": 1.119922352433676, + "kl_loss": 0.05108487606048584, + "loss_ib": 0.0009103577467612922, + "step": 3895 + }, + { + "ce_ib": 2.9304733276367188, + "ce_orig": 0.7267715334892273, + "epoch": 1.119922352433676, + "kl_loss": 0.032966457307338715, + "loss_ib": 0.0006227119010873139, + "step": 3895 + }, + { + "ce_ib": 3.822227954864502, + "ce_orig": 1.0731087923049927, + "epoch": 1.119922352433676, + "kl_loss": 0.06490328162908554, + "loss_ib": 0.0010312555823475122, + "step": 3895 + }, + { + "ce_ib": 3.2252745628356934, + "ce_orig": 0.20207585394382477, + "epoch": 1.119922352433676, + "kl_loss": 0.07435889542102814, + "loss_ib": 0.0010661163832992315, + "step": 3895 + }, + { + "ce_ib": 3.5263912677764893, + "ce_orig": 1.0869133472442627, + "epoch": 1.1202099360126536, + "kl_loss": 0.05057160556316376, + "loss_ib": 0.0008583551389165223, + "step": 3896 + }, + { + "ce_ib": 3.0426461696624756, + "ce_orig": 0.8679606914520264, + "epoch": 1.1202099360126536, + "kl_loss": 0.02988622710108757, + "loss_ib": 0.0006031268858350813, + "step": 3896 + }, + { + "ce_ib": 3.093231678009033, + "ce_orig": 0.8262658715248108, + "epoch": 1.1202099360126536, + "kl_loss": 0.05277371034026146, + "loss_ib": 0.0008370602736249566, + "step": 3896 + }, + { + "ce_ib": 4.56315803527832, + "ce_orig": 1.1880090236663818, + "epoch": 1.1202099360126536, + "kl_loss": 0.0560513511300087, + "loss_ib": 0.0010168292792513967, + "step": 3896 + }, + { + "ce_ib": 3.593987226486206, + "ce_orig": 1.0266735553741455, + "epoch": 1.1204975195916314, + "kl_loss": 0.04575728625059128, + "loss_ib": 0.0008169715874828398, + "step": 3897 + }, + { + "ce_ib": 2.4547359943389893, + "ce_orig": 0.6333020925521851, + "epoch": 1.1204975195916314, + "kl_loss": 0.0749380961060524, + "loss_ib": 0.0009948544902727008, + "step": 3897 + }, + { + "ce_ib": 3.7079262733459473, + "ce_orig": 1.1630505323410034, + "epoch": 1.1204975195916314, + "kl_loss": 0.05851145088672638, + "loss_ib": 0.0009559071040712297, + "step": 3897 + }, + { + "ce_ib": 3.2964353561401367, + "ce_orig": 0.9044885039329529, + "epoch": 1.1204975195916314, + "kl_loss": 0.0767405554652214, + "loss_ib": 0.0010970490984618664, + "step": 3897 + }, + { + "ce_ib": 2.080799102783203, + "ce_orig": 0.5049774050712585, + "epoch": 1.1207851031706089, + "kl_loss": 0.03168979287147522, + "loss_ib": 0.0005249778041616082, + "step": 3898 + }, + { + "ce_ib": 3.2556893825531006, + "ce_orig": 0.9484633803367615, + "epoch": 1.1207851031706089, + "kl_loss": 0.033982060849666595, + "loss_ib": 0.0006653895252384245, + "step": 3898 + }, + { + "ce_ib": 2.8934545516967773, + "ce_orig": 0.7335721850395203, + "epoch": 1.1207851031706089, + "kl_loss": 0.0448441356420517, + "loss_ib": 0.0007377868168987334, + "step": 3898 + }, + { + "ce_ib": 2.497739315032959, + "ce_orig": 0.2905248999595642, + "epoch": 1.1207851031706089, + "kl_loss": 0.05606408417224884, + "loss_ib": 0.0008104147855192423, + "step": 3898 + }, + { + "ce_ib": 2.248014211654663, + "ce_orig": 0.3831651210784912, + "epoch": 1.1210726867495866, + "kl_loss": 0.04348374903202057, + "loss_ib": 0.0006596388993784785, + "step": 3899 + }, + { + "ce_ib": 5.086856365203857, + "ce_orig": 1.1258009672164917, + "epoch": 1.1210726867495866, + "kl_loss": 0.05553198233246803, + "loss_ib": 0.0010640054242685437, + "step": 3899 + }, + { + "ce_ib": 3.852391242980957, + "ce_orig": 1.0167694091796875, + "epoch": 1.1210726867495866, + "kl_loss": 0.04618903249502182, + "loss_ib": 0.0008471294422633946, + "step": 3899 + }, + { + "ce_ib": 5.539231300354004, + "ce_orig": 1.6557220220565796, + "epoch": 1.1210726867495866, + "kl_loss": 0.04806381091475487, + "loss_ib": 0.001034561195410788, + "step": 3899 + }, + { + "epoch": 1.1213602703285643, + "grad_norm": 0.12298120558261871, + "learning_rate": 3.608621006462373e-05, + "loss": 0.8907, + "step": 3900 + }, + { + "ce_ib": 5.51707124710083, + "ce_orig": 1.6360708475112915, + "epoch": 1.1213602703285643, + "kl_loss": 0.046088993549346924, + "loss_ib": 0.0010125971166417003, + "step": 3900 + }, + { + "ce_ib": 4.075573444366455, + "ce_orig": 1.0925078392028809, + "epoch": 1.1213602703285643, + "kl_loss": 0.03336776793003082, + "loss_ib": 0.0007412349805235863, + "step": 3900 + }, + { + "ce_ib": 4.199433326721191, + "ce_orig": 1.129034399986267, + "epoch": 1.1213602703285643, + "kl_loss": 0.05011337250471115, + "loss_ib": 0.0009210770367644727, + "step": 3900 + }, + { + "ce_ib": 4.284681797027588, + "ce_orig": 0.9855512380599976, + "epoch": 1.1213602703285643, + "kl_loss": 0.03853348270058632, + "loss_ib": 0.0008138029952533543, + "step": 3900 + }, + { + "ce_ib": 5.072605133056641, + "ce_orig": 1.4931060075759888, + "epoch": 1.1216478539075418, + "kl_loss": 0.03939667344093323, + "loss_ib": 0.0009012271766550839, + "step": 3901 + }, + { + "ce_ib": 4.014947414398193, + "ce_orig": 0.5817469358444214, + "epoch": 1.1216478539075418, + "kl_loss": 0.04112282395362854, + "loss_ib": 0.0008127229521051049, + "step": 3901 + }, + { + "ce_ib": 3.0688681602478027, + "ce_orig": 0.807977557182312, + "epoch": 1.1216478539075418, + "kl_loss": 0.04483075439929962, + "loss_ib": 0.000755194341763854, + "step": 3901 + }, + { + "ce_ib": 2.925117015838623, + "ce_orig": 0.6799410581588745, + "epoch": 1.1216478539075418, + "kl_loss": 0.04830962419509888, + "loss_ib": 0.0007756079430691898, + "step": 3901 + }, + { + "ce_ib": 2.5358195304870605, + "ce_orig": 0.5499390959739685, + "epoch": 1.1219354374865196, + "kl_loss": 0.03426721692085266, + "loss_ib": 0.0005962540744803846, + "step": 3902 + }, + { + "ce_ib": 2.314016580581665, + "ce_orig": 0.5883809328079224, + "epoch": 1.1219354374865196, + "kl_loss": 0.034407518804073334, + "loss_ib": 0.0005754768499173224, + "step": 3902 + }, + { + "ce_ib": 3.157871723175049, + "ce_orig": 0.9416939616203308, + "epoch": 1.1219354374865196, + "kl_loss": 0.03862018138170242, + "loss_ib": 0.0007019889308139682, + "step": 3902 + }, + { + "ce_ib": 4.956875324249268, + "ce_orig": 0.929934561252594, + "epoch": 1.1219354374865196, + "kl_loss": 0.04519558697938919, + "loss_ib": 0.0009476433624513447, + "step": 3902 + }, + { + "ce_ib": 2.9565844535827637, + "ce_orig": 0.6698582172393799, + "epoch": 1.122223021065497, + "kl_loss": 0.028465451672673225, + "loss_ib": 0.0005803129752166569, + "step": 3903 + }, + { + "ce_ib": 2.593050718307495, + "ce_orig": 0.5179158449172974, + "epoch": 1.122223021065497, + "kl_loss": 0.04705337435007095, + "loss_ib": 0.0007298387936316431, + "step": 3903 + }, + { + "ce_ib": 1.9466274976730347, + "ce_orig": 0.3035014271736145, + "epoch": 1.122223021065497, + "kl_loss": 0.10591593384742737, + "loss_ib": 0.0012538221199065447, + "step": 3903 + }, + { + "ce_ib": 3.3443639278411865, + "ce_orig": 0.7700005769729614, + "epoch": 1.122223021065497, + "kl_loss": 0.033994004130363464, + "loss_ib": 0.0006743763806298375, + "step": 3903 + }, + { + "ce_ib": 2.425452709197998, + "ce_orig": 0.5343197584152222, + "epoch": 1.1225106046444748, + "kl_loss": 0.02547868900001049, + "loss_ib": 0.0004973321338184178, + "step": 3904 + }, + { + "ce_ib": 2.438875436782837, + "ce_orig": 0.7296759486198425, + "epoch": 1.1225106046444748, + "kl_loss": 0.050607673823833466, + "loss_ib": 0.0007499642670154572, + "step": 3904 + }, + { + "ce_ib": 2.146456003189087, + "ce_orig": 0.4949483573436737, + "epoch": 1.1225106046444748, + "kl_loss": 0.07183922827243805, + "loss_ib": 0.0009330378379672766, + "step": 3904 + }, + { + "ce_ib": 3.1729722023010254, + "ce_orig": 0.523650050163269, + "epoch": 1.1225106046444748, + "kl_loss": 0.05519072711467743, + "loss_ib": 0.0008692044648341835, + "step": 3904 + }, + { + "epoch": 1.1227981882234523, + "grad_norm": 0.11578710377216339, + "learning_rate": 3.6051416472142144e-05, + "loss": 0.8145, + "step": 3905 + }, + { + "ce_ib": 2.7078354358673096, + "ce_orig": 0.7970941662788391, + "epoch": 1.1227981882234523, + "kl_loss": 0.057102277874946594, + "loss_ib": 0.0008418062934651971, + "step": 3905 + }, + { + "ce_ib": 3.27152419090271, + "ce_orig": 0.678154468536377, + "epoch": 1.1227981882234523, + "kl_loss": 0.019311143085360527, + "loss_ib": 0.0005202637985348701, + "step": 3905 + }, + { + "ce_ib": 3.706531524658203, + "ce_orig": 0.9393754005432129, + "epoch": 1.1227981882234523, + "kl_loss": 0.07375529408454895, + "loss_ib": 0.0011082059936597943, + "step": 3905 + }, + { + "ce_ib": 3.171959161758423, + "ce_orig": 0.6224421262741089, + "epoch": 1.1227981882234523, + "kl_loss": 0.031741246581077576, + "loss_ib": 0.0006346083828248084, + "step": 3905 + }, + { + "ce_ib": 4.867212772369385, + "ce_orig": 1.1521371603012085, + "epoch": 1.12308577180243, + "kl_loss": 0.034093037247657776, + "loss_ib": 0.0008276515873149037, + "step": 3906 + }, + { + "ce_ib": 2.437019109725952, + "ce_orig": 0.6020281314849854, + "epoch": 1.12308577180243, + "kl_loss": 0.03848858177661896, + "loss_ib": 0.0006285877316258848, + "step": 3906 + }, + { + "ce_ib": 3.736751079559326, + "ce_orig": 0.810859203338623, + "epoch": 1.12308577180243, + "kl_loss": 0.0884941816329956, + "loss_ib": 0.0012586169177666306, + "step": 3906 + }, + { + "ce_ib": 3.64487886428833, + "ce_orig": 0.7621055245399475, + "epoch": 1.12308577180243, + "kl_loss": 0.04029077664017677, + "loss_ib": 0.0007673956570215523, + "step": 3906 + }, + { + "ce_ib": 2.272542953491211, + "ce_orig": 0.5297432541847229, + "epoch": 1.1233733553814078, + "kl_loss": 0.031007569283246994, + "loss_ib": 0.0005373299936763942, + "step": 3907 + }, + { + "ce_ib": 6.604776859283447, + "ce_orig": 1.9724184274673462, + "epoch": 1.1233733553814078, + "kl_loss": 0.06180887296795845, + "loss_ib": 0.0012785664293915033, + "step": 3907 + }, + { + "ce_ib": 4.146025657653809, + "ce_orig": 1.1962980031967163, + "epoch": 1.1233733553814078, + "kl_loss": 0.03940131142735481, + "loss_ib": 0.0008086156449280679, + "step": 3907 + }, + { + "ce_ib": 4.703481674194336, + "ce_orig": 1.4825422763824463, + "epoch": 1.1233733553814078, + "kl_loss": 0.05648696422576904, + "loss_ib": 0.001035217777825892, + "step": 3907 + }, + { + "ce_ib": 3.052492380142212, + "ce_orig": 0.7718418836593628, + "epoch": 1.1236609389603853, + "kl_loss": 0.027723362669348717, + "loss_ib": 0.0005824828404001892, + "step": 3908 + }, + { + "ce_ib": 3.771991014480591, + "ce_orig": 1.0699855089187622, + "epoch": 1.1236609389603853, + "kl_loss": 0.06257134675979614, + "loss_ib": 0.0010029125260189176, + "step": 3908 + }, + { + "ce_ib": 3.9889986515045166, + "ce_orig": 1.0720516443252563, + "epoch": 1.1236609389603853, + "kl_loss": 0.05784706771373749, + "loss_ib": 0.0009773705387488008, + "step": 3908 + }, + { + "ce_ib": 4.102288246154785, + "ce_orig": 0.8092141151428223, + "epoch": 1.1236609389603853, + "kl_loss": 0.06951499730348587, + "loss_ib": 0.0011053787311539054, + "step": 3908 + }, + { + "ce_ib": 2.3789477348327637, + "ce_orig": 0.5560245513916016, + "epoch": 1.123948522539363, + "kl_loss": 0.033553287386894226, + "loss_ib": 0.0005734276492148638, + "step": 3909 + }, + { + "ce_ib": 2.456418752670288, + "ce_orig": 0.6768503189086914, + "epoch": 1.123948522539363, + "kl_loss": 0.024105610325932503, + "loss_ib": 0.0004866979434154928, + "step": 3909 + }, + { + "ce_ib": 5.5863447189331055, + "ce_orig": 1.4584026336669922, + "epoch": 1.123948522539363, + "kl_loss": 0.0433078333735466, + "loss_ib": 0.0009917127899825573, + "step": 3909 + }, + { + "ce_ib": 3.1718642711639404, + "ce_orig": 0.7011775970458984, + "epoch": 1.123948522539363, + "kl_loss": 0.06319297850131989, + "loss_ib": 0.0009491161908954382, + "step": 3909 + }, + { + "epoch": 1.1242361061183406, + "grad_norm": 0.10182259231805801, + "learning_rate": 3.601659625423319e-05, + "loss": 0.8296, + "step": 3910 + }, + { + "ce_ib": 3.108647584915161, + "ce_orig": 0.7845110893249512, + "epoch": 1.1242361061183406, + "kl_loss": 0.053634531795978546, + "loss_ib": 0.0008472100598737597, + "step": 3910 + }, + { + "ce_ib": 2.128509998321533, + "ce_orig": 0.5801325440406799, + "epoch": 1.1242361061183406, + "kl_loss": 0.032575998455286026, + "loss_ib": 0.0005386109696701169, + "step": 3910 + }, + { + "ce_ib": 4.573448657989502, + "ce_orig": 1.146582841873169, + "epoch": 1.1242361061183406, + "kl_loss": 0.03954499214887619, + "loss_ib": 0.0008527947939001024, + "step": 3910 + }, + { + "ce_ib": 4.310737609863281, + "ce_orig": 1.0056837797164917, + "epoch": 1.1242361061183406, + "kl_loss": 0.07422694563865662, + "loss_ib": 0.0011733431601896882, + "step": 3910 + }, + { + "ce_ib": 2.3678781986236572, + "ce_orig": 0.6822802424430847, + "epoch": 1.1245236896973183, + "kl_loss": 0.02818926051259041, + "loss_ib": 0.000518680433742702, + "step": 3911 + }, + { + "ce_ib": 2.6903793811798096, + "ce_orig": 0.6737673878669739, + "epoch": 1.1245236896973183, + "kl_loss": 0.04670489579439163, + "loss_ib": 0.0007360868621617556, + "step": 3911 + }, + { + "ce_ib": 3.3378467559814453, + "ce_orig": 0.8647925853729248, + "epoch": 1.1245236896973183, + "kl_loss": 0.054292503744363785, + "loss_ib": 0.0008767096442170441, + "step": 3911 + }, + { + "ce_ib": 2.286067247390747, + "ce_orig": 0.6669188141822815, + "epoch": 1.1245236896973183, + "kl_loss": 0.04355242848396301, + "loss_ib": 0.0006641310174018145, + "step": 3911 + }, + { + "ce_ib": 2.2086620330810547, + "ce_orig": 0.5038930177688599, + "epoch": 1.1248112732762958, + "kl_loss": 0.04517720267176628, + "loss_ib": 0.0006726382416673005, + "step": 3912 + }, + { + "ce_ib": 5.844481468200684, + "ce_orig": 1.7024450302124023, + "epoch": 1.1248112732762958, + "kl_loss": 0.049291037023067474, + "loss_ib": 0.0010773584945127368, + "step": 3912 + }, + { + "ce_ib": 4.730347633361816, + "ce_orig": 0.9982941746711731, + "epoch": 1.1248112732762958, + "kl_loss": 0.0709729790687561, + "loss_ib": 0.0011827645357698202, + "step": 3912 + }, + { + "ce_ib": 3.1131792068481445, + "ce_orig": 0.6372631788253784, + "epoch": 1.1248112732762958, + "kl_loss": 0.04641114920377731, + "loss_ib": 0.0007754293619655073, + "step": 3912 + }, + { + "ce_ib": 3.6029393672943115, + "ce_orig": 0.7390192747116089, + "epoch": 1.1250988568552736, + "kl_loss": 0.05918201804161072, + "loss_ib": 0.0009521141182631254, + "step": 3913 + }, + { + "ce_ib": 3.5572009086608887, + "ce_orig": 0.9470426440238953, + "epoch": 1.1250988568552736, + "kl_loss": 0.049321260303258896, + "loss_ib": 0.0008489327155984938, + "step": 3913 + }, + { + "ce_ib": 3.8110673427581787, + "ce_orig": 0.5969442129135132, + "epoch": 1.1250988568552736, + "kl_loss": 0.08064364641904831, + "loss_ib": 0.0011875431519001722, + "step": 3913 + }, + { + "ce_ib": 4.84641695022583, + "ce_orig": 1.2922741174697876, + "epoch": 1.1250988568552736, + "kl_loss": 0.06774218380451202, + "loss_ib": 0.0011620634468272328, + "step": 3913 + }, + { + "ce_ib": 1.6471571922302246, + "ce_orig": 0.4660007059574127, + "epoch": 1.1253864404342513, + "kl_loss": 0.019995786249637604, + "loss_ib": 0.00036467358586378396, + "step": 3914 + }, + { + "ce_ib": 2.208864688873291, + "ce_orig": 0.41468530893325806, + "epoch": 1.1253864404342513, + "kl_loss": 0.06105700507760048, + "loss_ib": 0.0008314564474858344, + "step": 3914 + }, + { + "ce_ib": 3.7902798652648926, + "ce_orig": 1.2214984893798828, + "epoch": 1.1253864404342513, + "kl_loss": 0.05960908904671669, + "loss_ib": 0.0009751188335940242, + "step": 3914 + }, + { + "ce_ib": 6.385854721069336, + "ce_orig": 1.439581036567688, + "epoch": 1.1253864404342513, + "kl_loss": 0.03709625452756882, + "loss_ib": 0.0010095479665324092, + "step": 3914 + }, + { + "epoch": 1.1256740240132288, + "grad_norm": 0.10994719713926315, + "learning_rate": 3.598174949478685e-05, + "loss": 0.7979, + "step": 3915 + }, + { + "ce_ib": 4.257468223571777, + "ce_orig": 1.193256139755249, + "epoch": 1.1256740240132288, + "kl_loss": 0.044949937611818314, + "loss_ib": 0.0008752461872063577, + "step": 3915 + }, + { + "ce_ib": 2.3742122650146484, + "ce_orig": 0.48448365926742554, + "epoch": 1.1256740240132288, + "kl_loss": 0.03533347323536873, + "loss_ib": 0.0005907559534534812, + "step": 3915 + }, + { + "ce_ib": 2.954404354095459, + "ce_orig": 0.5392034649848938, + "epoch": 1.1256740240132288, + "kl_loss": 0.053730498999357224, + "loss_ib": 0.0008327453979291022, + "step": 3915 + }, + { + "ce_ib": 6.659515857696533, + "ce_orig": 1.9839352369308472, + "epoch": 1.1256740240132288, + "kl_loss": 0.0570964589715004, + "loss_ib": 0.0012369161704555154, + "step": 3915 + }, + { + "ce_ib": 3.7215347290039062, + "ce_orig": 0.9588900208473206, + "epoch": 1.1259616075922065, + "kl_loss": 0.030061762779951096, + "loss_ib": 0.0006727710133418441, + "step": 3916 + }, + { + "ce_ib": 2.3462133407592773, + "ce_orig": 0.5557299256324768, + "epoch": 1.1259616075922065, + "kl_loss": 0.049402087926864624, + "loss_ib": 0.000728642160538584, + "step": 3916 + }, + { + "ce_ib": 3.2335147857666016, + "ce_orig": 1.0058062076568604, + "epoch": 1.1259616075922065, + "kl_loss": 0.03941921889781952, + "loss_ib": 0.0007175436476245522, + "step": 3916 + }, + { + "ce_ib": 4.3467631340026855, + "ce_orig": 1.1677653789520264, + "epoch": 1.1259616075922065, + "kl_loss": 0.0449262298643589, + "loss_ib": 0.0008839385700412095, + "step": 3916 + }, + { + "ce_ib": 2.1759512424468994, + "ce_orig": 0.46404778957366943, + "epoch": 1.126249191171184, + "kl_loss": 0.04734642058610916, + "loss_ib": 0.0006910592783242464, + "step": 3917 + }, + { + "ce_ib": 3.1014626026153564, + "ce_orig": 0.8111289143562317, + "epoch": 1.126249191171184, + "kl_loss": 0.038481466472148895, + "loss_ib": 0.0006949609378352761, + "step": 3917 + }, + { + "ce_ib": 5.756839275360107, + "ce_orig": 1.6664022207260132, + "epoch": 1.126249191171184, + "kl_loss": 0.057044535875320435, + "loss_ib": 0.001146129216067493, + "step": 3917 + }, + { + "ce_ib": 2.4202146530151367, + "ce_orig": 0.6690629124641418, + "epoch": 1.126249191171184, + "kl_loss": 0.036384083330631256, + "loss_ib": 0.0006058622966520488, + "step": 3917 + }, + { + "ce_ib": 4.5955305099487305, + "ce_orig": 1.235826849937439, + "epoch": 1.1265367747501618, + "kl_loss": 0.05408162623643875, + "loss_ib": 0.0010003693168982863, + "step": 3918 + }, + { + "ce_ib": 3.607978105545044, + "ce_orig": 1.0463249683380127, + "epoch": 1.1265367747501618, + "kl_loss": 0.03643406182527542, + "loss_ib": 0.0007251384085975587, + "step": 3918 + }, + { + "ce_ib": 2.9678874015808105, + "ce_orig": 0.6911141276359558, + "epoch": 1.1265367747501618, + "kl_loss": 0.024791939184069633, + "loss_ib": 0.0005447081639431417, + "step": 3918 + }, + { + "ce_ib": 2.40838360786438, + "ce_orig": 0.5897656679153442, + "epoch": 1.1265367747501618, + "kl_loss": 0.051343221217393875, + "loss_ib": 0.0007542705279774964, + "step": 3918 + }, + { + "ce_ib": 2.4632811546325684, + "ce_orig": 0.5504851937294006, + "epoch": 1.1268243583291393, + "kl_loss": 0.05464811250567436, + "loss_ib": 0.000792809238191694, + "step": 3919 + }, + { + "ce_ib": 4.748188018798828, + "ce_orig": 1.3358879089355469, + "epoch": 1.1268243583291393, + "kl_loss": 0.04452337324619293, + "loss_ib": 0.0009200525237247348, + "step": 3919 + }, + { + "ce_ib": 3.4242148399353027, + "ce_orig": 0.6298737525939941, + "epoch": 1.1268243583291393, + "kl_loss": 0.03625325858592987, + "loss_ib": 0.0007049540872685611, + "step": 3919 + }, + { + "ce_ib": 3.0820698738098145, + "ce_orig": 0.933085560798645, + "epoch": 1.1268243583291393, + "kl_loss": 0.026019593700766563, + "loss_ib": 0.0005684029310941696, + "step": 3919 + }, + { + "epoch": 1.127111941908117, + "grad_norm": 0.11362908035516739, + "learning_rate": 3.5946876277757066e-05, + "loss": 0.8692, + "step": 3920 + }, + { + "ce_ib": 2.400259017944336, + "ce_orig": 0.7358158826828003, + "epoch": 1.127111941908117, + "kl_loss": 0.04257434606552124, + "loss_ib": 0.0006657693302258849, + "step": 3920 + }, + { + "ce_ib": 2.1893017292022705, + "ce_orig": 0.4586848020553589, + "epoch": 1.127111941908117, + "kl_loss": 0.034182414412498474, + "loss_ib": 0.0005607543280348182, + "step": 3920 + }, + { + "ce_ib": 3.200744152069092, + "ce_orig": 0.6876376271247864, + "epoch": 1.127111941908117, + "kl_loss": 0.04479125514626503, + "loss_ib": 0.0007679869304411113, + "step": 3920 + }, + { + "ce_ib": 5.774428367614746, + "ce_orig": 1.7886781692504883, + "epoch": 1.127111941908117, + "kl_loss": 0.042713530361652374, + "loss_ib": 0.0010045781964436173, + "step": 3920 + }, + { + "ce_ib": 5.061184406280518, + "ce_orig": 1.7095967531204224, + "epoch": 1.1273995254870948, + "kl_loss": 0.049835264682769775, + "loss_ib": 0.0010044709779322147, + "step": 3921 + }, + { + "ce_ib": 4.003653526306152, + "ce_orig": 0.648827075958252, + "epoch": 1.1273995254870948, + "kl_loss": 0.04637153819203377, + "loss_ib": 0.0008640806772746146, + "step": 3921 + }, + { + "ce_ib": 4.113147258758545, + "ce_orig": 0.9189127683639526, + "epoch": 1.1273995254870948, + "kl_loss": 0.04926484078168869, + "loss_ib": 0.0009039631113409996, + "step": 3921 + }, + { + "ce_ib": 4.868839263916016, + "ce_orig": 1.2575147151947021, + "epoch": 1.1273995254870948, + "kl_loss": 0.04345545172691345, + "loss_ib": 0.0009214384481310844, + "step": 3921 + }, + { + "ce_ib": 3.769718885421753, + "ce_orig": 0.990261971950531, + "epoch": 1.1276871090660723, + "kl_loss": 0.03437025099992752, + "loss_ib": 0.000720674404874444, + "step": 3922 + }, + { + "ce_ib": 3.784532070159912, + "ce_orig": 1.0624674558639526, + "epoch": 1.1276871090660723, + "kl_loss": 0.04651673138141632, + "loss_ib": 0.000843620509840548, + "step": 3922 + }, + { + "ce_ib": 3.705944061279297, + "ce_orig": 1.1466797590255737, + "epoch": 1.1276871090660723, + "kl_loss": 0.038877010345458984, + "loss_ib": 0.0007593645132146776, + "step": 3922 + }, + { + "ce_ib": 4.543535232543945, + "ce_orig": 0.7742500901222229, + "epoch": 1.1276871090660723, + "kl_loss": 0.06181401014328003, + "loss_ib": 0.001072493614628911, + "step": 3922 + }, + { + "ce_ib": 4.375583648681641, + "ce_orig": 0.8855536580085754, + "epoch": 1.12797469264505, + "kl_loss": 0.09158609807491302, + "loss_ib": 0.0013534193858504295, + "step": 3923 + }, + { + "ce_ib": 3.4412832260131836, + "ce_orig": 0.5785518288612366, + "epoch": 1.12797469264505, + "kl_loss": 0.05534330755472183, + "loss_ib": 0.0008975613745860755, + "step": 3923 + }, + { + "ce_ib": 1.9448089599609375, + "ce_orig": 0.47553861141204834, + "epoch": 1.12797469264505, + "kl_loss": 0.03974555432796478, + "loss_ib": 0.0005919364048168063, + "step": 3923 + }, + { + "ce_ib": 4.665425777435303, + "ce_orig": 0.9504251480102539, + "epoch": 1.12797469264505, + "kl_loss": 0.050506167113780975, + "loss_ib": 0.000971604255028069, + "step": 3923 + }, + { + "ce_ib": 2.777053117752075, + "ce_orig": 0.4505653381347656, + "epoch": 1.1282622762240275, + "kl_loss": 0.06417036056518555, + "loss_ib": 0.0009194089216180146, + "step": 3924 + }, + { + "ce_ib": 4.729559421539307, + "ce_orig": 1.3096978664398193, + "epoch": 1.1282622762240275, + "kl_loss": 0.03865718096494675, + "loss_ib": 0.0008595277322456241, + "step": 3924 + }, + { + "ce_ib": 3.934568166732788, + "ce_orig": 0.8565050363540649, + "epoch": 1.1282622762240275, + "kl_loss": 0.03279884532094002, + "loss_ib": 0.0007214451907202601, + "step": 3924 + }, + { + "ce_ib": 5.2736053466796875, + "ce_orig": 1.3885053396224976, + "epoch": 1.1282622762240275, + "kl_loss": 0.04827719181776047, + "loss_ib": 0.0010101323714479804, + "step": 3924 + }, + { + "epoch": 1.1285498598030053, + "grad_norm": 0.12003128230571747, + "learning_rate": 3.5911976687161495e-05, + "loss": 0.9235, + "step": 3925 + }, + { + "ce_ib": 3.352043867111206, + "ce_orig": 0.744049608707428, + "epoch": 1.1285498598030053, + "kl_loss": 0.03628704696893692, + "loss_ib": 0.000698074814863503, + "step": 3925 + }, + { + "ce_ib": 2.7620508670806885, + "ce_orig": 0.5722774863243103, + "epoch": 1.1285498598030053, + "kl_loss": 0.03468436747789383, + "loss_ib": 0.0006230487488210201, + "step": 3925 + }, + { + "ce_ib": 5.341773509979248, + "ce_orig": 1.415787935256958, + "epoch": 1.1285498598030053, + "kl_loss": 0.05469691380858421, + "loss_ib": 0.0010811464162543416, + "step": 3925 + }, + { + "ce_ib": 4.255481719970703, + "ce_orig": 0.9565266966819763, + "epoch": 1.1285498598030053, + "kl_loss": 0.06653673946857452, + "loss_ib": 0.0010909155244007707, + "step": 3925 + }, + { + "ce_ib": 2.2224137783050537, + "ce_orig": 0.6385489106178284, + "epoch": 1.128837443381983, + "kl_loss": 0.03841354325413704, + "loss_ib": 0.000606376794166863, + "step": 3926 + }, + { + "ce_ib": 4.39332389831543, + "ce_orig": 1.333547592163086, + "epoch": 1.128837443381983, + "kl_loss": 0.03925442323088646, + "loss_ib": 0.0008318765903823078, + "step": 3926 + }, + { + "ce_ib": 4.332454681396484, + "ce_orig": 1.0569846630096436, + "epoch": 1.128837443381983, + "kl_loss": 0.04486367851495743, + "loss_ib": 0.0008818822097964585, + "step": 3926 + }, + { + "ce_ib": 2.469822883605957, + "ce_orig": 0.40427303314208984, + "epoch": 1.128837443381983, + "kl_loss": 0.05937405303120613, + "loss_ib": 0.0008407227578572929, + "step": 3926 + }, + { + "ce_ib": 4.110169887542725, + "ce_orig": 1.367255687713623, + "epoch": 1.1291250269609605, + "kl_loss": 0.04773808270692825, + "loss_ib": 0.0008883978007361293, + "step": 3927 + }, + { + "ce_ib": 2.5441133975982666, + "ce_orig": 0.5477756857872009, + "epoch": 1.1291250269609605, + "kl_loss": 0.030215367674827576, + "loss_ib": 0.0005565650062635541, + "step": 3927 + }, + { + "ce_ib": 2.8729028701782227, + "ce_orig": 0.7758079171180725, + "epoch": 1.1291250269609605, + "kl_loss": 0.03630292788147926, + "loss_ib": 0.0006503195036202669, + "step": 3927 + }, + { + "ce_ib": 3.5307705402374268, + "ce_orig": 1.0175081491470337, + "epoch": 1.1291250269609605, + "kl_loss": 0.03729350492358208, + "loss_ib": 0.0007260121055878699, + "step": 3927 + }, + { + "ce_ib": 5.092642784118652, + "ce_orig": 1.1677064895629883, + "epoch": 1.1294126105399382, + "kl_loss": 0.07314274460077286, + "loss_ib": 0.0012406916357576847, + "step": 3928 + }, + { + "ce_ib": 2.1972177028656006, + "ce_orig": 0.585379958152771, + "epoch": 1.1294126105399382, + "kl_loss": 0.05970073118805885, + "loss_ib": 0.0008167290361598134, + "step": 3928 + }, + { + "ce_ib": 2.7344796657562256, + "ce_orig": 0.6941724419593811, + "epoch": 1.1294126105399382, + "kl_loss": 0.049725696444511414, + "loss_ib": 0.0007707048789598048, + "step": 3928 + }, + { + "ce_ib": 2.3845715522766113, + "ce_orig": 0.6627113819122314, + "epoch": 1.1294126105399382, + "kl_loss": 0.023452546447515488, + "loss_ib": 0.0004729825886897743, + "step": 3928 + }, + { + "ce_ib": 2.5317893028259277, + "ce_orig": 0.5925617218017578, + "epoch": 1.1297001941189158, + "kl_loss": 0.09623053669929504, + "loss_ib": 0.0012154842261224985, + "step": 3929 + }, + { + "ce_ib": 3.1984164714813232, + "ce_orig": 0.7482951879501343, + "epoch": 1.1297001941189158, + "kl_loss": 0.060067158192396164, + "loss_ib": 0.0009205132373608649, + "step": 3929 + }, + { + "ce_ib": 3.004702091217041, + "ce_orig": 0.5526098012924194, + "epoch": 1.1297001941189158, + "kl_loss": 0.06780004501342773, + "loss_ib": 0.0009784706635400653, + "step": 3929 + }, + { + "ce_ib": 3.003397226333618, + "ce_orig": 0.7945706844329834, + "epoch": 1.1297001941189158, + "kl_loss": 0.06295059621334076, + "loss_ib": 0.0009298456716351211, + "step": 3929 + }, + { + "epoch": 1.1299877776978935, + "grad_norm": 0.12351267039775848, + "learning_rate": 3.587705080708137e-05, + "loss": 0.8632, + "step": 3930 + }, + { + "ce_ib": 2.6467068195343018, + "ce_orig": 0.46804845333099365, + "epoch": 1.1299877776978935, + "kl_loss": 0.03691389411687851, + "loss_ib": 0.0006338095990940928, + "step": 3930 + }, + { + "ce_ib": 2.0240209102630615, + "ce_orig": 0.45666787028312683, + "epoch": 1.1299877776978935, + "kl_loss": 0.06473789364099503, + "loss_ib": 0.0008497810340486467, + "step": 3930 + }, + { + "ce_ib": 4.617701530456543, + "ce_orig": 1.3133023977279663, + "epoch": 1.1299877776978935, + "kl_loss": 0.05322745442390442, + "loss_ib": 0.0009940447052940726, + "step": 3930 + }, + { + "ce_ib": 3.5449137687683105, + "ce_orig": 0.6302653551101685, + "epoch": 1.1299877776978935, + "kl_loss": 0.06194372475147247, + "loss_ib": 0.0009739285451360047, + "step": 3930 + }, + { + "ce_ib": 3.102900505065918, + "ce_orig": 0.5450571775436401, + "epoch": 1.130275361276871, + "kl_loss": 0.054245926439762115, + "loss_ib": 0.000852749333716929, + "step": 3931 + }, + { + "ce_ib": 3.230605125427246, + "ce_orig": 0.9053205251693726, + "epoch": 1.130275361276871, + "kl_loss": 0.05898222699761391, + "loss_ib": 0.0009128827368840575, + "step": 3931 + }, + { + "ce_ib": 2.375202178955078, + "ce_orig": 0.349849671125412, + "epoch": 1.130275361276871, + "kl_loss": 0.039366353303194046, + "loss_ib": 0.0006311837350949645, + "step": 3931 + }, + { + "ce_ib": 5.575078010559082, + "ce_orig": 1.5147809982299805, + "epoch": 1.130275361276871, + "kl_loss": 0.05611380934715271, + "loss_ib": 0.0011186458868905902, + "step": 3931 + }, + { + "ce_ib": 3.5893547534942627, + "ce_orig": 1.1606100797653198, + "epoch": 1.1305629448558487, + "kl_loss": 0.03000757098197937, + "loss_ib": 0.0006590111297555268, + "step": 3932 + }, + { + "ce_ib": 2.432908535003662, + "ce_orig": 0.5636517405509949, + "epoch": 1.1305629448558487, + "kl_loss": 0.08031251281499863, + "loss_ib": 0.0010464160004630685, + "step": 3932 + }, + { + "ce_ib": 4.462815761566162, + "ce_orig": 0.7780344486236572, + "epoch": 1.1305629448558487, + "kl_loss": 0.07523845136165619, + "loss_ib": 0.0011986660538241267, + "step": 3932 + }, + { + "ce_ib": 4.612793445587158, + "ce_orig": 1.4357136487960815, + "epoch": 1.1305629448558487, + "kl_loss": 0.039521053433418274, + "loss_ib": 0.0008564898162148893, + "step": 3932 + }, + { + "ce_ib": 2.6555817127227783, + "ce_orig": 0.6463582515716553, + "epoch": 1.1308505284348263, + "kl_loss": 0.03751010447740555, + "loss_ib": 0.0006406591855920851, + "step": 3933 + }, + { + "ce_ib": 5.032270431518555, + "ce_orig": 0.9164180755615234, + "epoch": 1.1308505284348263, + "kl_loss": 0.04935064911842346, + "loss_ib": 0.0009967335499823093, + "step": 3933 + }, + { + "ce_ib": 3.5069868564605713, + "ce_orig": 0.8393415808677673, + "epoch": 1.1308505284348263, + "kl_loss": 0.03260817006230354, + "loss_ib": 0.0006767803570255637, + "step": 3933 + }, + { + "ce_ib": 4.14511775970459, + "ce_orig": 1.2601544857025146, + "epoch": 1.1308505284348263, + "kl_loss": 0.044937863945961, + "loss_ib": 0.0008638903964310884, + "step": 3933 + }, + { + "ce_ib": 2.59696626663208, + "ce_orig": 0.7479726076126099, + "epoch": 1.131138112013804, + "kl_loss": 0.03337136656045914, + "loss_ib": 0.0005934102809987962, + "step": 3934 + }, + { + "ce_ib": 3.9729158878326416, + "ce_orig": 0.9130390286445618, + "epoch": 1.131138112013804, + "kl_loss": 0.08934309333562851, + "loss_ib": 0.0012907225172966719, + "step": 3934 + }, + { + "ce_ib": 1.8775774240493774, + "ce_orig": 0.5411234498023987, + "epoch": 1.131138112013804, + "kl_loss": 0.041366301476955414, + "loss_ib": 0.0006014207610860467, + "step": 3934 + }, + { + "ce_ib": 3.074126958847046, + "ce_orig": 0.8603256940841675, + "epoch": 1.131138112013804, + "kl_loss": 0.04962087422609329, + "loss_ib": 0.0008036213694140315, + "step": 3934 + }, + { + "epoch": 1.1314256955927817, + "grad_norm": 0.11605213582515717, + "learning_rate": 3.5842098721661224e-05, + "loss": 0.8466, + "step": 3935 + }, + { + "ce_ib": 2.2801315784454346, + "ce_orig": 0.4879854917526245, + "epoch": 1.1314256955927817, + "kl_loss": 0.04665331542491913, + "loss_ib": 0.0006945463246665895, + "step": 3935 + }, + { + "ce_ib": 3.822896957397461, + "ce_orig": 0.9690412282943726, + "epoch": 1.1314256955927817, + "kl_loss": 0.041626155376434326, + "loss_ib": 0.0007985512493178248, + "step": 3935 + }, + { + "ce_ib": 1.9674417972564697, + "ce_orig": 0.3392238914966583, + "epoch": 1.1314256955927817, + "kl_loss": 0.08398974686861038, + "loss_ib": 0.0010366415372118354, + "step": 3935 + }, + { + "ce_ib": 3.6826553344726562, + "ce_orig": 1.063525915145874, + "epoch": 1.1314256955927817, + "kl_loss": 0.04229925572872162, + "loss_ib": 0.000791258062236011, + "step": 3935 + }, + { + "ce_ib": 3.2814879417419434, + "ce_orig": 1.0327463150024414, + "epoch": 1.1317132791717592, + "kl_loss": 0.034678567200899124, + "loss_ib": 0.0006749344756826758, + "step": 3936 + }, + { + "ce_ib": 3.514008045196533, + "ce_orig": 0.7746804356575012, + "epoch": 1.1317132791717592, + "kl_loss": 0.046891920268535614, + "loss_ib": 0.0008203199249692261, + "step": 3936 + }, + { + "ce_ib": 2.3911497592926025, + "ce_orig": 0.48117128014564514, + "epoch": 1.1317132791717592, + "kl_loss": 0.03587929531931877, + "loss_ib": 0.000597907870542258, + "step": 3936 + }, + { + "ce_ib": 2.6490697860717773, + "ce_orig": 0.5614483952522278, + "epoch": 1.1317132791717592, + "kl_loss": 0.04648932069540024, + "loss_ib": 0.0007298001437447965, + "step": 3936 + }, + { + "ce_ib": 3.7643442153930664, + "ce_orig": 1.3409905433654785, + "epoch": 1.132000862750737, + "kl_loss": 0.04243085905909538, + "loss_ib": 0.0008007429423741996, + "step": 3937 + }, + { + "ce_ib": 3.00685715675354, + "ce_orig": 0.9483428001403809, + "epoch": 1.132000862750737, + "kl_loss": 0.05318521335721016, + "loss_ib": 0.0008325378294102848, + "step": 3937 + }, + { + "ce_ib": 3.5362918376922607, + "ce_orig": 0.8355571627616882, + "epoch": 1.132000862750737, + "kl_loss": 0.03118298575282097, + "loss_ib": 0.0006654590251855552, + "step": 3937 + }, + { + "ce_ib": 4.085864067077637, + "ce_orig": 1.1963192224502563, + "epoch": 1.132000862750737, + "kl_loss": 0.05483805388212204, + "loss_ib": 0.0009569669491611421, + "step": 3937 + }, + { + "ce_ib": 3.8430211544036865, + "ce_orig": 0.9871225953102112, + "epoch": 1.1322884463297145, + "kl_loss": 0.05248077213764191, + "loss_ib": 0.0009091098327189684, + "step": 3938 + }, + { + "ce_ib": 3.963247299194336, + "ce_orig": 0.9020754098892212, + "epoch": 1.1322884463297145, + "kl_loss": 0.04542887210845947, + "loss_ib": 0.0008506134035997093, + "step": 3938 + }, + { + "ce_ib": 1.7023614645004272, + "ce_orig": 0.5307387709617615, + "epoch": 1.1322884463297145, + "kl_loss": 0.02593894675374031, + "loss_ib": 0.00042962556472048163, + "step": 3938 + }, + { + "ce_ib": 2.4968385696411133, + "ce_orig": 0.4975266456604004, + "epoch": 1.1322884463297145, + "kl_loss": 0.16642308235168457, + "loss_ib": 0.0019139146897941828, + "step": 3938 + }, + { + "ce_ib": 3.011087656021118, + "ce_orig": 0.7489370107650757, + "epoch": 1.1325760299086922, + "kl_loss": 0.0653407871723175, + "loss_ib": 0.0009545165812596679, + "step": 3939 + }, + { + "ce_ib": 4.505891799926758, + "ce_orig": 1.2548246383666992, + "epoch": 1.1325760299086922, + "kl_loss": 0.04695805907249451, + "loss_ib": 0.0009201697539538145, + "step": 3939 + }, + { + "ce_ib": 3.7370498180389404, + "ce_orig": 0.815804123878479, + "epoch": 1.1325760299086922, + "kl_loss": 0.05441242456436157, + "loss_ib": 0.0009178291657008231, + "step": 3939 + }, + { + "ce_ib": 4.422025680541992, + "ce_orig": 0.9071668982505798, + "epoch": 1.1325760299086922, + "kl_loss": 0.08082231879234314, + "loss_ib": 0.0012504257028922439, + "step": 3939 + }, + { + "epoch": 1.13286361348767, + "grad_norm": 0.10844876617193222, + "learning_rate": 3.580712051510876e-05, + "loss": 0.842, + "step": 3940 + }, + { + "ce_ib": 2.2601230144500732, + "ce_orig": 0.4937383830547333, + "epoch": 1.13286361348767, + "kl_loss": 0.02214636094868183, + "loss_ib": 0.0004474759043660015, + "step": 3940 + }, + { + "ce_ib": 2.846142053604126, + "ce_orig": 0.6881020665168762, + "epoch": 1.13286361348767, + "kl_loss": 0.0516003780066967, + "loss_ib": 0.0008006179123185575, + "step": 3940 + }, + { + "ce_ib": 4.520119667053223, + "ce_orig": 1.2392702102661133, + "epoch": 1.13286361348767, + "kl_loss": 0.04605845734477043, + "loss_ib": 0.0009125965880230069, + "step": 3940 + }, + { + "ce_ib": 3.4169363975524902, + "ce_orig": 0.8648455739021301, + "epoch": 1.13286361348767, + "kl_loss": 0.04271221533417702, + "loss_ib": 0.0007688158075325191, + "step": 3940 + }, + { + "ce_ib": 3.3114633560180664, + "ce_orig": 0.6007875204086304, + "epoch": 1.1331511970666475, + "kl_loss": 0.05511438101530075, + "loss_ib": 0.0008822901290841401, + "step": 3941 + }, + { + "ce_ib": 1.6494686603546143, + "ce_orig": 0.4033755958080292, + "epoch": 1.1331511970666475, + "kl_loss": 0.029883891344070435, + "loss_ib": 0.00046378574916161597, + "step": 3941 + }, + { + "ce_ib": 3.9698307514190674, + "ce_orig": 1.3194129467010498, + "epoch": 1.1331511970666475, + "kl_loss": 0.08005492389202118, + "loss_ib": 0.0011975322850048542, + "step": 3941 + }, + { + "ce_ib": 4.029055595397949, + "ce_orig": 1.0619357824325562, + "epoch": 1.1331511970666475, + "kl_loss": 0.03990040719509125, + "loss_ib": 0.0008019095985218883, + "step": 3941 + }, + { + "ce_ib": 3.6533334255218506, + "ce_orig": 1.0484994649887085, + "epoch": 1.1334387806456252, + "kl_loss": 0.060047492384910583, + "loss_ib": 0.0009658082271926105, + "step": 3942 + }, + { + "ce_ib": 2.9914603233337402, + "ce_orig": 0.7944708466529846, + "epoch": 1.1334387806456252, + "kl_loss": 0.03931581228971481, + "loss_ib": 0.0006923041073605418, + "step": 3942 + }, + { + "ce_ib": 4.6577348709106445, + "ce_orig": 1.1865304708480835, + "epoch": 1.1334387806456252, + "kl_loss": 0.05984976887702942, + "loss_ib": 0.0010642712004482746, + "step": 3942 + }, + { + "ce_ib": 2.174647569656372, + "ce_orig": 0.374666690826416, + "epoch": 1.1334387806456252, + "kl_loss": 0.06313677132129669, + "loss_ib": 0.0008488324237987399, + "step": 3942 + }, + { + "ce_ib": 7.629187107086182, + "ce_orig": 1.6670275926589966, + "epoch": 1.1337263642246027, + "kl_loss": 0.05910329148173332, + "loss_ib": 0.0013539515202865005, + "step": 3943 + }, + { + "ce_ib": 2.617807149887085, + "ce_orig": 0.31802502274513245, + "epoch": 1.1337263642246027, + "kl_loss": 0.046170469373464584, + "loss_ib": 0.0007234854274429381, + "step": 3943 + }, + { + "ce_ib": 4.590149879455566, + "ce_orig": 0.9741613268852234, + "epoch": 1.1337263642246027, + "kl_loss": 0.030149564146995544, + "loss_ib": 0.0007605105638504028, + "step": 3943 + }, + { + "ce_ib": 1.4811899662017822, + "ce_orig": 0.32260194420814514, + "epoch": 1.1337263642246027, + "kl_loss": 0.10804155468940735, + "loss_ib": 0.0012285345001146197, + "step": 3943 + }, + { + "ce_ib": 4.723200798034668, + "ce_orig": 1.1688318252563477, + "epoch": 1.1340139478035804, + "kl_loss": 0.03570309281349182, + "loss_ib": 0.0008293509017676115, + "step": 3944 + }, + { + "ce_ib": 2.56620192527771, + "ce_orig": 0.4114103317260742, + "epoch": 1.1340139478035804, + "kl_loss": 0.05078960210084915, + "loss_ib": 0.000764516182243824, + "step": 3944 + }, + { + "ce_ib": 3.5711233615875244, + "ce_orig": 0.9450796246528625, + "epoch": 1.1340139478035804, + "kl_loss": 0.05120236426591873, + "loss_ib": 0.0008691360126249492, + "step": 3944 + }, + { + "ce_ib": 3.0729639530181885, + "ce_orig": 0.7366697192192078, + "epoch": 1.1340139478035804, + "kl_loss": 0.035194557160139084, + "loss_ib": 0.0006592419813387096, + "step": 3944 + }, + { + "epoch": 1.134301531382558, + "grad_norm": 0.114106185734272, + "learning_rate": 3.5772116271694586e-05, + "loss": 0.8591, + "step": 3945 + }, + { + "ce_ib": 2.5361688137054443, + "ce_orig": 0.594955563545227, + "epoch": 1.134301531382558, + "kl_loss": 0.026831910014152527, + "loss_ib": 0.0005219359300099313, + "step": 3945 + }, + { + "ce_ib": 2.002927541732788, + "ce_orig": 0.43679171800613403, + "epoch": 1.134301531382558, + "kl_loss": 0.03406849876046181, + "loss_ib": 0.0005409776931628585, + "step": 3945 + }, + { + "ce_ib": 3.3076937198638916, + "ce_orig": 0.7372007966041565, + "epoch": 1.134301531382558, + "kl_loss": 0.032577402889728546, + "loss_ib": 0.0006565433577634394, + "step": 3945 + }, + { + "ce_ib": 2.6134493350982666, + "ce_orig": 0.4185085594654083, + "epoch": 1.134301531382558, + "kl_loss": 0.022600647062063217, + "loss_ib": 0.0004873514117207378, + "step": 3945 + }, + { + "ce_ib": 5.292726516723633, + "ce_orig": 1.3310822248458862, + "epoch": 1.1345891149615357, + "kl_loss": 0.03184828907251358, + "loss_ib": 0.0008477555238641798, + "step": 3946 + }, + { + "ce_ib": 3.285141706466675, + "ce_orig": 0.605815589427948, + "epoch": 1.1345891149615357, + "kl_loss": 0.02652062103152275, + "loss_ib": 0.0005937203532084823, + "step": 3946 + }, + { + "ce_ib": 3.1746902465820312, + "ce_orig": 0.7806396484375, + "epoch": 1.1345891149615357, + "kl_loss": 0.06578423827886581, + "loss_ib": 0.0009753113845363259, + "step": 3946 + }, + { + "ce_ib": 2.808044195175171, + "ce_orig": 0.622989296913147, + "epoch": 1.1345891149615357, + "kl_loss": 0.03770667687058449, + "loss_ib": 0.0006578711909241974, + "step": 3946 + }, + { + "ce_ib": 3.335880756378174, + "ce_orig": 0.5970519781112671, + "epoch": 1.1348766985405134, + "kl_loss": 0.04481920227408409, + "loss_ib": 0.0007817801088094711, + "step": 3947 + }, + { + "ce_ib": 1.827393889427185, + "ce_orig": 0.2943595349788666, + "epoch": 1.1348766985405134, + "kl_loss": 0.03771749883890152, + "loss_ib": 0.0005599143332801759, + "step": 3947 + }, + { + "ce_ib": 3.8530173301696777, + "ce_orig": 0.8198567628860474, + "epoch": 1.1348766985405134, + "kl_loss": 0.045401930809020996, + "loss_ib": 0.0008393210009671748, + "step": 3947 + }, + { + "ce_ib": 5.387101173400879, + "ce_orig": 1.5738625526428223, + "epoch": 1.1348766985405134, + "kl_loss": 0.06823069602251053, + "loss_ib": 0.0012210170971229672, + "step": 3947 + }, + { + "ce_ib": 2.151365280151367, + "ce_orig": 0.5402926802635193, + "epoch": 1.135164282119491, + "kl_loss": 0.03988245502114296, + "loss_ib": 0.000613961077760905, + "step": 3948 + }, + { + "ce_ib": 3.166796922683716, + "ce_orig": 0.6171669363975525, + "epoch": 1.135164282119491, + "kl_loss": 0.05127011984586716, + "loss_ib": 0.0008293808787129819, + "step": 3948 + }, + { + "ce_ib": 2.4542155265808105, + "ce_orig": 0.7502285838127136, + "epoch": 1.135164282119491, + "kl_loss": 0.026500122621655464, + "loss_ib": 0.000510422745719552, + "step": 3948 + }, + { + "ce_ib": 2.594956874847412, + "ce_orig": 0.59825599193573, + "epoch": 1.135164282119491, + "kl_loss": 0.04471740871667862, + "loss_ib": 0.0007066697580739856, + "step": 3948 + }, + { + "ce_ib": 4.683850288391113, + "ce_orig": 1.2608189582824707, + "epoch": 1.1354518656984687, + "kl_loss": 0.06732061505317688, + "loss_ib": 0.0011415912304073572, + "step": 3949 + }, + { + "ce_ib": 4.448608875274658, + "ce_orig": 1.0839301347732544, + "epoch": 1.1354518656984687, + "kl_loss": 0.04563573747873306, + "loss_ib": 0.0009012182126753032, + "step": 3949 + }, + { + "ce_ib": 3.7429420948028564, + "ce_orig": 0.8820019364356995, + "epoch": 1.1354518656984687, + "kl_loss": 0.066710464656353, + "loss_ib": 0.0010413987329229712, + "step": 3949 + }, + { + "ce_ib": 2.5337350368499756, + "ce_orig": 0.4613261818885803, + "epoch": 1.1354518656984687, + "kl_loss": 0.05864349380135536, + "loss_ib": 0.0008398084319196641, + "step": 3949 + }, + { + "epoch": 1.1357394492774462, + "grad_norm": 0.10442899167537689, + "learning_rate": 3.573708607575205e-05, + "loss": 0.7566, + "step": 3950 + }, + { + "ce_ib": 2.5711605548858643, + "ce_orig": 0.7035712599754333, + "epoch": 1.1357394492774462, + "kl_loss": 0.03926976025104523, + "loss_ib": 0.000649813620839268, + "step": 3950 + }, + { + "ce_ib": 2.5125932693481445, + "ce_orig": 0.6366184949874878, + "epoch": 1.1357394492774462, + "kl_loss": 0.05449884384870529, + "loss_ib": 0.0007962476811371744, + "step": 3950 + }, + { + "ce_ib": 3.8713650703430176, + "ce_orig": 0.8955896496772766, + "epoch": 1.1357394492774462, + "kl_loss": 0.050892531871795654, + "loss_ib": 0.0008960617706179619, + "step": 3950 + }, + { + "ce_ib": 2.6716301441192627, + "ce_orig": 0.6747956275939941, + "epoch": 1.1357394492774462, + "kl_loss": 0.02375379577279091, + "loss_ib": 0.0005047009908594191, + "step": 3950 + }, + { + "ce_ib": 2.621201276779175, + "ce_orig": 0.5375458002090454, + "epoch": 1.136027032856424, + "kl_loss": 0.04892443120479584, + "loss_ib": 0.0007513644522987306, + "step": 3951 + }, + { + "ce_ib": 2.336761474609375, + "ce_orig": 0.6677812337875366, + "epoch": 1.136027032856424, + "kl_loss": 0.05045640096068382, + "loss_ib": 0.0007382401381619275, + "step": 3951 + }, + { + "ce_ib": 3.3002920150756836, + "ce_orig": 0.7834150791168213, + "epoch": 1.136027032856424, + "kl_loss": 0.03706764802336693, + "loss_ib": 0.000700705626513809, + "step": 3951 + }, + { + "ce_ib": 2.178554058074951, + "ce_orig": 0.3575994670391083, + "epoch": 1.136027032856424, + "kl_loss": 0.033846545964479446, + "loss_ib": 0.0005563208251260221, + "step": 3951 + }, + { + "ce_ib": 4.8069539070129395, + "ce_orig": 1.138944387435913, + "epoch": 1.1363146164354014, + "kl_loss": 0.049804434180259705, + "loss_ib": 0.0009787396993488073, + "step": 3952 + }, + { + "ce_ib": 2.2995853424072266, + "ce_orig": 0.5408632755279541, + "epoch": 1.1363146164354014, + "kl_loss": 0.02771814353764057, + "loss_ib": 0.0005071399500593543, + "step": 3952 + }, + { + "ce_ib": 1.5967520475387573, + "ce_orig": 0.4164586663246155, + "epoch": 1.1363146164354014, + "kl_loss": 0.10206004977226257, + "loss_ib": 0.0011802756926044822, + "step": 3952 + }, + { + "ce_ib": 2.4359304904937744, + "ce_orig": 0.646787166595459, + "epoch": 1.1363146164354014, + "kl_loss": 0.04948461055755615, + "loss_ib": 0.0007384390919469297, + "step": 3952 + }, + { + "ce_ib": 2.261625289916992, + "ce_orig": 0.6481450796127319, + "epoch": 1.1366022000143792, + "kl_loss": 0.02831868827342987, + "loss_ib": 0.0005093493964523077, + "step": 3953 + }, + { + "ce_ib": 2.4951465129852295, + "ce_orig": 0.595640242099762, + "epoch": 1.1366022000143792, + "kl_loss": 0.042987942695617676, + "loss_ib": 0.0006793940556235611, + "step": 3953 + }, + { + "ce_ib": 4.272885799407959, + "ce_orig": 0.7582027912139893, + "epoch": 1.1366022000143792, + "kl_loss": 0.06510318070650101, + "loss_ib": 0.001078320317901671, + "step": 3953 + }, + { + "ce_ib": 2.460529088973999, + "ce_orig": 0.4831947982311249, + "epoch": 1.1366022000143792, + "kl_loss": 0.024736937135457993, + "loss_ib": 0.0004934222670271993, + "step": 3953 + }, + { + "ce_ib": 3.0843193531036377, + "ce_orig": 0.8281334638595581, + "epoch": 1.136889783593357, + "kl_loss": 0.06525523960590363, + "loss_ib": 0.0009609842672944069, + "step": 3954 + }, + { + "ce_ib": 3.852426052093506, + "ce_orig": 0.8621560335159302, + "epoch": 1.136889783593357, + "kl_loss": 0.04764038696885109, + "loss_ib": 0.0008616464328952134, + "step": 3954 + }, + { + "ce_ib": 1.5542279481887817, + "ce_orig": 0.4239961504936218, + "epoch": 1.136889783593357, + "kl_loss": 0.031518932431936264, + "loss_ib": 0.00047061211080290377, + "step": 3954 + }, + { + "ce_ib": 2.568125009536743, + "ce_orig": 0.33351677656173706, + "epoch": 1.136889783593357, + "kl_loss": 0.020958419889211655, + "loss_ib": 0.00046639668289572, + "step": 3954 + }, + { + "epoch": 1.1371773671723344, + "grad_norm": 0.105554960668087, + "learning_rate": 3.570203001167703e-05, + "loss": 0.7646, + "step": 3955 + }, + { + "ce_ib": 4.301840782165527, + "ce_orig": 1.2059763669967651, + "epoch": 1.1371773671723344, + "kl_loss": 0.04969654604792595, + "loss_ib": 0.0009271494927816093, + "step": 3955 + }, + { + "ce_ib": 2.6555964946746826, + "ce_orig": 0.6079716086387634, + "epoch": 1.1371773671723344, + "kl_loss": 0.03617874160408974, + "loss_ib": 0.0006273470935411751, + "step": 3955 + }, + { + "ce_ib": 2.705061197280884, + "ce_orig": 0.8122026324272156, + "epoch": 1.1371773671723344, + "kl_loss": 0.026767734438180923, + "loss_ib": 0.0005381834344007075, + "step": 3955 + }, + { + "ce_ib": 7.5124993324279785, + "ce_orig": 2.0648181438446045, + "epoch": 1.1371773671723344, + "kl_loss": 0.06997150182723999, + "loss_ib": 0.0014509649481624365, + "step": 3955 + }, + { + "ce_ib": 2.320404052734375, + "ce_orig": 0.7172127962112427, + "epoch": 1.1374649507513122, + "kl_loss": 0.03170783072710037, + "loss_ib": 0.000549118674825877, + "step": 3956 + }, + { + "ce_ib": 5.425004959106445, + "ce_orig": 0.22367721796035767, + "epoch": 1.1374649507513122, + "kl_loss": 0.09723533689975739, + "loss_ib": 0.0015148537931963801, + "step": 3956 + }, + { + "ce_ib": 3.064018487930298, + "ce_orig": 0.6875466108322144, + "epoch": 1.1374649507513122, + "kl_loss": 0.03318041190505028, + "loss_ib": 0.0006382059655152261, + "step": 3956 + }, + { + "ce_ib": 3.089853048324585, + "ce_orig": 0.7248472571372986, + "epoch": 1.1374649507513122, + "kl_loss": 0.04334475100040436, + "loss_ib": 0.0007424327777698636, + "step": 3956 + }, + { + "ce_ib": 3.874885320663452, + "ce_orig": 1.1141220331192017, + "epoch": 1.1377525343302897, + "kl_loss": 0.059750668704509735, + "loss_ib": 0.0009849951602518559, + "step": 3957 + }, + { + "ce_ib": 2.073636770248413, + "ce_orig": 0.3706561326980591, + "epoch": 1.1377525343302897, + "kl_loss": 0.06317630410194397, + "loss_ib": 0.0008391267037950456, + "step": 3957 + }, + { + "ce_ib": 2.123661994934082, + "ce_orig": 0.5467737913131714, + "epoch": 1.1377525343302897, + "kl_loss": 0.024894339963793755, + "loss_ib": 0.0004613095661625266, + "step": 3957 + }, + { + "ce_ib": 3.0907816886901855, + "ce_orig": 0.7742004990577698, + "epoch": 1.1377525343302897, + "kl_loss": 0.05340982601046562, + "loss_ib": 0.0008431763853877783, + "step": 3957 + }, + { + "ce_ib": 4.686032772064209, + "ce_orig": 1.3346874713897705, + "epoch": 1.1380401179092674, + "kl_loss": 0.04941758140921593, + "loss_ib": 0.0009627790423110127, + "step": 3958 + }, + { + "ce_ib": 3.2715725898742676, + "ce_orig": 0.7597560286521912, + "epoch": 1.1380401179092674, + "kl_loss": 0.022105887532234192, + "loss_ib": 0.0005482161068357527, + "step": 3958 + }, + { + "ce_ib": 5.327739715576172, + "ce_orig": 1.5429339408874512, + "epoch": 1.1380401179092674, + "kl_loss": 0.04747170954942703, + "loss_ib": 0.001007491024211049, + "step": 3958 + }, + { + "ce_ib": 3.0865769386291504, + "ce_orig": 0.9233364462852478, + "epoch": 1.1380401179092674, + "kl_loss": 0.03630727156996727, + "loss_ib": 0.0006717303767800331, + "step": 3958 + }, + { + "ce_ib": 2.7710163593292236, + "ce_orig": 0.614412248134613, + "epoch": 1.1383277014882451, + "kl_loss": 0.05735953524708748, + "loss_ib": 0.000850696989800781, + "step": 3959 + }, + { + "ce_ib": 2.641664981842041, + "ce_orig": 0.7406402826309204, + "epoch": 1.1383277014882451, + "kl_loss": 0.020757950842380524, + "loss_ib": 0.0004717460251413286, + "step": 3959 + }, + { + "ce_ib": 4.716709613800049, + "ce_orig": 1.3514842987060547, + "epoch": 1.1383277014882451, + "kl_loss": 0.049562472850084305, + "loss_ib": 0.0009672956657595932, + "step": 3959 + }, + { + "ce_ib": 3.0761661529541016, + "ce_orig": 0.5878722667694092, + "epoch": 1.1383277014882451, + "kl_loss": 0.05079405754804611, + "loss_ib": 0.0008155571413226426, + "step": 3959 + }, + { + "epoch": 1.1386152850672226, + "grad_norm": 0.09960247576236725, + "learning_rate": 3.5666948163927716e-05, + "loss": 0.8303, + "step": 3960 + }, + { + "ce_ib": 3.417031764984131, + "ce_orig": 0.4359208345413208, + "epoch": 1.1386152850672226, + "kl_loss": 0.06019732356071472, + "loss_ib": 0.0009436763357371092, + "step": 3960 + }, + { + "ce_ib": 2.1920530796051025, + "ce_orig": 0.6315271258354187, + "epoch": 1.1386152850672226, + "kl_loss": 0.03311799466609955, + "loss_ib": 0.000550385273527354, + "step": 3960 + }, + { + "ce_ib": 3.879110097885132, + "ce_orig": 1.060995101928711, + "epoch": 1.1386152850672226, + "kl_loss": 0.04414132237434387, + "loss_ib": 0.0008293242426589131, + "step": 3960 + }, + { + "ce_ib": 2.786065101623535, + "ce_orig": 0.4627625048160553, + "epoch": 1.1386152850672226, + "kl_loss": 0.047765906900167465, + "loss_ib": 0.0007562655955553055, + "step": 3960 + }, + { + "ce_ib": 3.6057305335998535, + "ce_orig": 0.7306743860244751, + "epoch": 1.1389028686462004, + "kl_loss": 0.05706029757857323, + "loss_ib": 0.0009311760077252984, + "step": 3961 + }, + { + "ce_ib": 2.418595552444458, + "ce_orig": 0.604764997959137, + "epoch": 1.1389028686462004, + "kl_loss": 0.16302475333213806, + "loss_ib": 0.0018721070373430848, + "step": 3961 + }, + { + "ce_ib": 3.5047433376312256, + "ce_orig": 0.9145015478134155, + "epoch": 1.1389028686462004, + "kl_loss": 0.025060899555683136, + "loss_ib": 0.0006010833312757313, + "step": 3961 + }, + { + "ce_ib": 2.6422603130340576, + "ce_orig": 0.6454520225524902, + "epoch": 1.1389028686462004, + "kl_loss": 0.028686242178082466, + "loss_ib": 0.0005510884220711887, + "step": 3961 + }, + { + "ce_ib": 2.361912727355957, + "ce_orig": 0.7353960275650024, + "epoch": 1.139190452225178, + "kl_loss": 0.032368674874305725, + "loss_ib": 0.0005598780116997659, + "step": 3962 + }, + { + "ce_ib": 3.6512224674224854, + "ce_orig": 0.8669365644454956, + "epoch": 1.139190452225178, + "kl_loss": 0.033822767436504364, + "loss_ib": 0.000703349825926125, + "step": 3962 + }, + { + "ce_ib": 2.502969980239868, + "ce_orig": 0.6013566851615906, + "epoch": 1.139190452225178, + "kl_loss": 0.023656878620386124, + "loss_ib": 0.00048686578520573676, + "step": 3962 + }, + { + "ce_ib": 3.9830446243286133, + "ce_orig": 1.045287013053894, + "epoch": 1.139190452225178, + "kl_loss": 0.037446580827236176, + "loss_ib": 0.0007727702613919973, + "step": 3962 + }, + { + "ce_ib": 3.7766647338867188, + "ce_orig": 0.7178951501846313, + "epoch": 1.1394780358041556, + "kl_loss": 0.05116207152605057, + "loss_ib": 0.0008892870973795652, + "step": 3963 + }, + { + "ce_ib": 3.642815351486206, + "ce_orig": 0.6029084920883179, + "epoch": 1.1394780358041556, + "kl_loss": 0.05569426715373993, + "loss_ib": 0.000921224185731262, + "step": 3963 + }, + { + "ce_ib": 3.558384656906128, + "ce_orig": 0.954025149345398, + "epoch": 1.1394780358041556, + "kl_loss": 0.05572031810879707, + "loss_ib": 0.0009130416437983513, + "step": 3963 + }, + { + "ce_ib": 2.9754819869995117, + "ce_orig": 0.4794960916042328, + "epoch": 1.1394780358041556, + "kl_loss": 0.10456506907939911, + "loss_ib": 0.0013431988190859556, + "step": 3963 + }, + { + "ce_ib": 2.924952983856201, + "ce_orig": 0.5285451412200928, + "epoch": 1.1397656193831331, + "kl_loss": 0.03822421282529831, + "loss_ib": 0.0006747373845428228, + "step": 3964 + }, + { + "ce_ib": 4.832385540008545, + "ce_orig": 1.2708604335784912, + "epoch": 1.1397656193831331, + "kl_loss": 0.04362677037715912, + "loss_ib": 0.0009195062448270619, + "step": 3964 + }, + { + "ce_ib": 2.317169427871704, + "ce_orig": 0.4627331793308258, + "epoch": 1.1397656193831331, + "kl_loss": 0.035289548337459564, + "loss_ib": 0.0005846124258823693, + "step": 3964 + }, + { + "ce_ib": 4.665520191192627, + "ce_orig": 1.1530852317810059, + "epoch": 1.1397656193831331, + "kl_loss": 0.03726090490818024, + "loss_ib": 0.0008391610463149846, + "step": 3964 + }, + { + "epoch": 1.1400532029621109, + "grad_norm": 0.09738995879888535, + "learning_rate": 3.5631840617024426e-05, + "loss": 0.8279, + "step": 3965 + }, + { + "ce_ib": 3.63889479637146, + "ce_orig": 0.8774006366729736, + "epoch": 1.1400532029621109, + "kl_loss": 0.04986806958913803, + "loss_ib": 0.0008625701884739101, + "step": 3965 + }, + { + "ce_ib": 3.0830328464508057, + "ce_orig": 0.6158949136734009, + "epoch": 1.1400532029621109, + "kl_loss": 0.037611450999975204, + "loss_ib": 0.0006844177842140198, + "step": 3965 + }, + { + "ce_ib": 3.1302528381347656, + "ce_orig": 0.6674473881721497, + "epoch": 1.1400532029621109, + "kl_loss": 0.04687425494194031, + "loss_ib": 0.0007817677687853575, + "step": 3965 + }, + { + "ce_ib": 2.94876766204834, + "ce_orig": 0.8195706605911255, + "epoch": 1.1400532029621109, + "kl_loss": 0.043468933552503586, + "loss_ib": 0.0007295660325326025, + "step": 3965 + }, + { + "ce_ib": 2.2965242862701416, + "ce_orig": 0.7191186547279358, + "epoch": 1.1403407865410884, + "kl_loss": 0.035343073308467865, + "loss_ib": 0.0005830831942148507, + "step": 3966 + }, + { + "ce_ib": 4.345073699951172, + "ce_orig": 0.9824148416519165, + "epoch": 1.1403407865410884, + "kl_loss": 0.0638875961303711, + "loss_ib": 0.0010733832605183125, + "step": 3966 + }, + { + "ce_ib": 2.4757027626037598, + "ce_orig": 0.6354948282241821, + "epoch": 1.1403407865410884, + "kl_loss": 0.02666490525007248, + "loss_ib": 0.000514219340402633, + "step": 3966 + }, + { + "ce_ib": 2.918998956680298, + "ce_orig": 0.6926636099815369, + "epoch": 1.1403407865410884, + "kl_loss": 0.03007548302412033, + "loss_ib": 0.0005926546873524785, + "step": 3966 + }, + { + "ce_ib": 2.195725679397583, + "ce_orig": 0.6418673992156982, + "epoch": 1.1406283701200661, + "kl_loss": 0.025707166641950607, + "loss_ib": 0.0004766442289110273, + "step": 3967 + }, + { + "ce_ib": 3.921227216720581, + "ce_orig": 0.8824716210365295, + "epoch": 1.1406283701200661, + "kl_loss": 0.042443759739398956, + "loss_ib": 0.0008165602921508253, + "step": 3967 + }, + { + "ce_ib": 2.1516010761260986, + "ce_orig": 0.5603939890861511, + "epoch": 1.1406283701200661, + "kl_loss": 0.026479119434952736, + "loss_ib": 0.0004799512680619955, + "step": 3967 + }, + { + "ce_ib": 4.221168041229248, + "ce_orig": 0.9359564781188965, + "epoch": 1.1406283701200661, + "kl_loss": 0.040482744574546814, + "loss_ib": 0.0008269442478194833, + "step": 3967 + }, + { + "ce_ib": 3.242159605026245, + "ce_orig": 0.6973503828048706, + "epoch": 1.1409159536990439, + "kl_loss": 0.055757101625204086, + "loss_ib": 0.0008817869238555431, + "step": 3968 + }, + { + "ce_ib": 3.818408489227295, + "ce_orig": 1.0754213333129883, + "epoch": 1.1409159536990439, + "kl_loss": 0.04536646977066994, + "loss_ib": 0.0008355055470019579, + "step": 3968 + }, + { + "ce_ib": 3.816375970840454, + "ce_orig": 1.0974563360214233, + "epoch": 1.1409159536990439, + "kl_loss": 0.051562175154685974, + "loss_ib": 0.0008972593350335956, + "step": 3968 + }, + { + "ce_ib": 4.2736968994140625, + "ce_orig": 0.8255736827850342, + "epoch": 1.1409159536990439, + "kl_loss": 0.0691622942686081, + "loss_ib": 0.0011189925717189908, + "step": 3968 + }, + { + "ce_ib": 3.2699692249298096, + "ce_orig": 0.8920905590057373, + "epoch": 1.1412035372780214, + "kl_loss": 0.04724736511707306, + "loss_ib": 0.0007994705229066312, + "step": 3969 + }, + { + "ce_ib": 2.3973963260650635, + "ce_orig": 0.6152431964874268, + "epoch": 1.1412035372780214, + "kl_loss": 0.03081408515572548, + "loss_ib": 0.0005478804232552648, + "step": 3969 + }, + { + "ce_ib": 2.262826919555664, + "ce_orig": 0.4464540481567383, + "epoch": 1.1412035372780214, + "kl_loss": 0.04340177774429321, + "loss_ib": 0.000660300487652421, + "step": 3969 + }, + { + "ce_ib": 2.424860715866089, + "ce_orig": 0.6904832124710083, + "epoch": 1.1412035372780214, + "kl_loss": 0.030610471963882446, + "loss_ib": 0.0005485907895490527, + "step": 3969 + }, + { + "epoch": 1.141491120856999, + "grad_norm": 0.10774527490139008, + "learning_rate": 3.5596707455549386e-05, + "loss": 0.7641, + "step": 3970 + }, + { + "ce_ib": 3.173295021057129, + "ce_orig": 0.6625747680664062, + "epoch": 1.141491120856999, + "kl_loss": 0.06890840828418732, + "loss_ib": 0.001006413483992219, + "step": 3970 + }, + { + "ce_ib": 5.534042835235596, + "ce_orig": 1.5674775838851929, + "epoch": 1.141491120856999, + "kl_loss": 0.050245095044374466, + "loss_ib": 0.00105585518758744, + "step": 3970 + }, + { + "ce_ib": 3.029127359390259, + "ce_orig": 0.9006035327911377, + "epoch": 1.141491120856999, + "kl_loss": 0.034693650901317596, + "loss_ib": 0.000649849243927747, + "step": 3970 + }, + { + "ce_ib": 2.057051658630371, + "ce_orig": 0.6635637283325195, + "epoch": 1.141491120856999, + "kl_loss": 0.028066303580999374, + "loss_ib": 0.000486368197016418, + "step": 3970 + }, + { + "ce_ib": 4.809971809387207, + "ce_orig": 1.3074982166290283, + "epoch": 1.1417787044359766, + "kl_loss": 0.04568733274936676, + "loss_ib": 0.0009378704125992954, + "step": 3971 + }, + { + "ce_ib": 3.7492997646331787, + "ce_orig": 0.8574011921882629, + "epoch": 1.1417787044359766, + "kl_loss": 0.039618946611881256, + "loss_ib": 0.0007711194339208305, + "step": 3971 + }, + { + "ce_ib": 2.673205852508545, + "ce_orig": 0.3626721501350403, + "epoch": 1.1417787044359766, + "kl_loss": 0.07299941778182983, + "loss_ib": 0.0009973146952688694, + "step": 3971 + }, + { + "ce_ib": 2.651540756225586, + "ce_orig": 0.7327148914337158, + "epoch": 1.1417787044359766, + "kl_loss": 0.028600577265024185, + "loss_ib": 0.0005511598428711295, + "step": 3971 + }, + { + "ce_ib": 3.8164782524108887, + "ce_orig": 0.7545835375785828, + "epoch": 1.1420662880149544, + "kl_loss": 0.03837088868021965, + "loss_ib": 0.0007653567008674145, + "step": 3972 + }, + { + "ce_ib": 1.96595299243927, + "ce_orig": 0.5252442359924316, + "epoch": 1.1420662880149544, + "kl_loss": 0.04681726172566414, + "loss_ib": 0.0006647679256275296, + "step": 3972 + }, + { + "ce_ib": 4.088979721069336, + "ce_orig": 1.2825098037719727, + "epoch": 1.1420662880149544, + "kl_loss": 0.06582159548997879, + "loss_ib": 0.0010671138297766447, + "step": 3972 + }, + { + "ce_ib": 4.994932651519775, + "ce_orig": 1.1432162523269653, + "epoch": 1.1420662880149544, + "kl_loss": 0.035065922886133194, + "loss_ib": 0.0008501525153405964, + "step": 3972 + }, + { + "ce_ib": 3.938666582107544, + "ce_orig": 1.0752408504486084, + "epoch": 1.142353871593932, + "kl_loss": 0.05020863562822342, + "loss_ib": 0.0008959529804997146, + "step": 3973 + }, + { + "ce_ib": 3.668292760848999, + "ce_orig": 1.0241155624389648, + "epoch": 1.142353871593932, + "kl_loss": 0.05141017213463783, + "loss_ib": 0.0008809309802018106, + "step": 3973 + }, + { + "ce_ib": 2.5567080974578857, + "ce_orig": 0.5775535702705383, + "epoch": 1.142353871593932, + "kl_loss": 0.030244998633861542, + "loss_ib": 0.0005581207806244493, + "step": 3973 + }, + { + "ce_ib": 3.8122429847717285, + "ce_orig": 0.4451135993003845, + "epoch": 1.142353871593932, + "kl_loss": 0.038469113409519196, + "loss_ib": 0.0007659154362045228, + "step": 3973 + }, + { + "ce_ib": 1.6494098901748657, + "ce_orig": 0.4452982246875763, + "epoch": 1.1426414551729096, + "kl_loss": 0.10155956447124481, + "loss_ib": 0.0011805365793406963, + "step": 3974 + }, + { + "ce_ib": 1.8628631830215454, + "ce_orig": 0.5274722576141357, + "epoch": 1.1426414551729096, + "kl_loss": 0.026136767119169235, + "loss_ib": 0.0004476539616007358, + "step": 3974 + }, + { + "ce_ib": 4.409182071685791, + "ce_orig": 1.1572290658950806, + "epoch": 1.1426414551729096, + "kl_loss": 0.05453775078058243, + "loss_ib": 0.0009862956358119845, + "step": 3974 + }, + { + "ce_ib": 3.0244507789611816, + "ce_orig": 0.7259711623191833, + "epoch": 1.1426414551729096, + "kl_loss": 0.05533235892653465, + "loss_ib": 0.0008557686232961714, + "step": 3974 + }, + { + "epoch": 1.1429290387518873, + "grad_norm": 0.1091889813542366, + "learning_rate": 3.5561548764146524e-05, + "loss": 0.8762, + "step": 3975 + }, + { + "ce_ib": 3.379016637802124, + "ce_orig": 0.6841682195663452, + "epoch": 1.1429290387518873, + "kl_loss": 0.040045030415058136, + "loss_ib": 0.0007383518968708813, + "step": 3975 + }, + { + "ce_ib": 2.8582656383514404, + "ce_orig": 0.5755123496055603, + "epoch": 1.1429290387518873, + "kl_loss": 0.05268629640340805, + "loss_ib": 0.0008126894827000797, + "step": 3975 + }, + { + "ce_ib": 3.6731114387512207, + "ce_orig": 0.8536228537559509, + "epoch": 1.1429290387518873, + "kl_loss": 0.06069863587617874, + "loss_ib": 0.0009742975234985352, + "step": 3975 + }, + { + "ce_ib": 2.9643361568450928, + "ce_orig": 0.7052881717681885, + "epoch": 1.1429290387518873, + "kl_loss": 0.04974380135536194, + "loss_ib": 0.0007938715862110257, + "step": 3975 + }, + { + "ce_ib": 3.234368085861206, + "ce_orig": 0.7432865500450134, + "epoch": 1.1432166223308649, + "kl_loss": 0.06189199537038803, + "loss_ib": 0.00094235670985654, + "step": 3976 + }, + { + "ce_ib": 2.919515609741211, + "ce_orig": 0.5305343866348267, + "epoch": 1.1432166223308649, + "kl_loss": 0.04674055427312851, + "loss_ib": 0.0007593570626340806, + "step": 3976 + }, + { + "ce_ib": 3.3451013565063477, + "ce_orig": 0.9627108573913574, + "epoch": 1.1432166223308649, + "kl_loss": 0.09997676312923431, + "loss_ib": 0.001334277680143714, + "step": 3976 + }, + { + "ce_ib": 3.0854642391204834, + "ce_orig": 0.7447674870491028, + "epoch": 1.1432166223308649, + "kl_loss": 0.046219345182180405, + "loss_ib": 0.0007707399199716747, + "step": 3976 + }, + { + "ce_ib": 3.75925350189209, + "ce_orig": 1.100075364112854, + "epoch": 1.1435042059098426, + "kl_loss": 0.048255275934934616, + "loss_ib": 0.0008584780734963715, + "step": 3977 + }, + { + "ce_ib": 2.75050950050354, + "ce_orig": 0.49259984493255615, + "epoch": 1.1435042059098426, + "kl_loss": 0.05512889474630356, + "loss_ib": 0.0008263398776762187, + "step": 3977 + }, + { + "ce_ib": 2.7891037464141846, + "ce_orig": 0.7440882921218872, + "epoch": 1.1435042059098426, + "kl_loss": 0.04897342994809151, + "loss_ib": 0.0007686446188017726, + "step": 3977 + }, + { + "ce_ib": 2.8981757164001465, + "ce_orig": 0.8611764907836914, + "epoch": 1.1435042059098426, + "kl_loss": 0.04707389324903488, + "loss_ib": 0.0007605564314872026, + "step": 3977 + }, + { + "ce_ib": 4.391083717346191, + "ce_orig": 0.9808478355407715, + "epoch": 1.14379178948882, + "kl_loss": 0.057337239384651184, + "loss_ib": 0.0010124807013198733, + "step": 3978 + }, + { + "ce_ib": 2.1433122158050537, + "ce_orig": 0.7766147255897522, + "epoch": 1.14379178948882, + "kl_loss": 0.02811869978904724, + "loss_ib": 0.0004955182084813714, + "step": 3978 + }, + { + "ce_ib": 4.559109687805176, + "ce_orig": 1.037916898727417, + "epoch": 1.14379178948882, + "kl_loss": 0.05647678300738335, + "loss_ib": 0.001020678784698248, + "step": 3978 + }, + { + "ce_ib": 2.3066012859344482, + "ce_orig": 0.7030373215675354, + "epoch": 1.14379178948882, + "kl_loss": 0.030096177011728287, + "loss_ib": 0.000531621859408915, + "step": 3978 + }, + { + "ce_ib": 3.403892993927002, + "ce_orig": 0.7486399412155151, + "epoch": 1.1440793730677978, + "kl_loss": 0.03913135081529617, + "loss_ib": 0.0007317027193494141, + "step": 3979 + }, + { + "ce_ib": 2.606321334838867, + "ce_orig": 0.714551568031311, + "epoch": 1.1440793730677978, + "kl_loss": 0.031250905245542526, + "loss_ib": 0.0005731412093155086, + "step": 3979 + }, + { + "ce_ib": 2.567603588104248, + "ce_orig": 0.6199600100517273, + "epoch": 1.1440793730677978, + "kl_loss": 0.023989243432879448, + "loss_ib": 0.0004966527922078967, + "step": 3979 + }, + { + "ce_ib": 3.9308528900146484, + "ce_orig": 1.1144204139709473, + "epoch": 1.1440793730677978, + "kl_loss": 0.04712661728262901, + "loss_ib": 0.000864351459313184, + "step": 3979 + }, + { + "epoch": 1.1443669566467753, + "grad_norm": 0.1168527752161026, + "learning_rate": 3.552636462752132e-05, + "loss": 0.8251, + "step": 3980 + }, + { + "ce_ib": 3.370286226272583, + "ce_orig": 0.8438466191291809, + "epoch": 1.1443669566467753, + "kl_loss": 0.03628354147076607, + "loss_ib": 0.0006998640019446611, + "step": 3980 + }, + { + "ce_ib": 4.920966148376465, + "ce_orig": 1.3883593082427979, + "epoch": 1.1443669566467753, + "kl_loss": 0.03979814052581787, + "loss_ib": 0.0008900780230760574, + "step": 3980 + }, + { + "ce_ib": 2.839338779449463, + "ce_orig": 0.7242956757545471, + "epoch": 1.1443669566467753, + "kl_loss": 0.03570795804262161, + "loss_ib": 0.0006410134374164045, + "step": 3980 + }, + { + "ce_ib": 3.7033352851867676, + "ce_orig": 1.1482443809509277, + "epoch": 1.1443669566467753, + "kl_loss": 0.044999200850725174, + "loss_ib": 0.0008203255129046738, + "step": 3980 + }, + { + "ce_ib": 2.1081736087799072, + "ce_orig": 0.6231125593185425, + "epoch": 1.144654540225753, + "kl_loss": 0.04560597240924835, + "loss_ib": 0.0006668770802207291, + "step": 3981 + }, + { + "ce_ib": 4.416264533996582, + "ce_orig": 1.2386906147003174, + "epoch": 1.144654540225753, + "kl_loss": 0.055071186274290085, + "loss_ib": 0.0009923382895067334, + "step": 3981 + }, + { + "ce_ib": 3.272763967514038, + "ce_orig": 0.787896990776062, + "epoch": 1.144654540225753, + "kl_loss": 0.02039000391960144, + "loss_ib": 0.0005311764543876052, + "step": 3981 + }, + { + "ce_ib": 2.904022216796875, + "ce_orig": 0.7139590978622437, + "epoch": 1.144654540225753, + "kl_loss": 0.04137157276272774, + "loss_ib": 0.0007041179342195392, + "step": 3981 + }, + { + "ce_ib": 3.9786221981048584, + "ce_orig": 0.6560775637626648, + "epoch": 1.1449421238047308, + "kl_loss": 0.0636756420135498, + "loss_ib": 0.0010346185881644487, + "step": 3982 + }, + { + "ce_ib": 3.2891955375671387, + "ce_orig": 0.6082417964935303, + "epoch": 1.1449421238047308, + "kl_loss": 0.044151730835437775, + "loss_ib": 0.0007704368326812983, + "step": 3982 + }, + { + "ce_ib": 2.8383424282073975, + "ce_orig": 0.650090217590332, + "epoch": 1.1449421238047308, + "kl_loss": 0.0315457284450531, + "loss_ib": 0.0005992915248498321, + "step": 3982 + }, + { + "ce_ib": 3.810906410217285, + "ce_orig": 0.5950309634208679, + "epoch": 1.1449421238047308, + "kl_loss": 0.05416754633188248, + "loss_ib": 0.0009227661066688597, + "step": 3982 + }, + { + "ce_ib": 3.913048267364502, + "ce_orig": 1.0405292510986328, + "epoch": 1.1452297073837083, + "kl_loss": 0.04376773536205292, + "loss_ib": 0.0008289820980280638, + "step": 3983 + }, + { + "ce_ib": 2.5278284549713135, + "ce_orig": 0.45562493801116943, + "epoch": 1.1452297073837083, + "kl_loss": 0.0534517839550972, + "loss_ib": 0.0007873006979934871, + "step": 3983 + }, + { + "ce_ib": 4.563870906829834, + "ce_orig": 1.3763465881347656, + "epoch": 1.1452297073837083, + "kl_loss": 0.07556610554456711, + "loss_ib": 0.0012120481114834547, + "step": 3983 + }, + { + "ce_ib": 3.0808751583099365, + "ce_orig": 0.9320799112319946, + "epoch": 1.1452297073837083, + "kl_loss": 0.041879523545503616, + "loss_ib": 0.0007268827175721526, + "step": 3983 + }, + { + "ce_ib": 5.48430061340332, + "ce_orig": 1.4964412450790405, + "epoch": 1.145517290962686, + "kl_loss": 0.05152415856719017, + "loss_ib": 0.001063671661540866, + "step": 3984 + }, + { + "ce_ib": 2.476388931274414, + "ce_orig": 0.6455918550491333, + "epoch": 1.145517290962686, + "kl_loss": 0.023316524922847748, + "loss_ib": 0.00048080412670969963, + "step": 3984 + }, + { + "ce_ib": 4.7926859855651855, + "ce_orig": 1.1824994087219238, + "epoch": 1.145517290962686, + "kl_loss": 0.07134056836366653, + "loss_ib": 0.0011926742736250162, + "step": 3984 + }, + { + "ce_ib": 2.0732457637786865, + "ce_orig": 0.4516094923019409, + "epoch": 1.145517290962686, + "kl_loss": 0.05614997446537018, + "loss_ib": 0.0007688243058510125, + "step": 3984 + }, + { + "epoch": 1.1458048745416636, + "grad_norm": 0.11047742515802383, + "learning_rate": 3.549115513044049e-05, + "loss": 0.7921, + "step": 3985 + }, + { + "ce_ib": 2.775651693344116, + "ce_orig": 0.6710633635520935, + "epoch": 1.1458048745416636, + "kl_loss": 0.03249884769320488, + "loss_ib": 0.0006025535985827446, + "step": 3985 + }, + { + "ce_ib": 4.29301643371582, + "ce_orig": 0.9250194430351257, + "epoch": 1.1458048745416636, + "kl_loss": 0.0703975260257721, + "loss_ib": 0.0011332768481224775, + "step": 3985 + }, + { + "ce_ib": 2.671915292739868, + "ce_orig": 0.8584989309310913, + "epoch": 1.1458048745416636, + "kl_loss": 0.034718699753284454, + "loss_ib": 0.0006143784848973155, + "step": 3985 + }, + { + "ce_ib": 2.3902320861816406, + "ce_orig": 0.476657897233963, + "epoch": 1.1458048745416636, + "kl_loss": 0.03251270577311516, + "loss_ib": 0.0005641502793878317, + "step": 3985 + }, + { + "ce_ib": 4.927796840667725, + "ce_orig": 1.3550273180007935, + "epoch": 1.1460924581206413, + "kl_loss": 0.046598881483078, + "loss_ib": 0.0009587684762664139, + "step": 3986 + }, + { + "ce_ib": 4.132077693939209, + "ce_orig": 1.1776390075683594, + "epoch": 1.1460924581206413, + "kl_loss": 0.05506886541843414, + "loss_ib": 0.0009638963383622468, + "step": 3986 + }, + { + "ce_ib": 3.880206823348999, + "ce_orig": 0.9746888279914856, + "epoch": 1.1460924581206413, + "kl_loss": 0.05379309505224228, + "loss_ib": 0.0009259515791200101, + "step": 3986 + }, + { + "ce_ib": 4.320608139038086, + "ce_orig": 1.118600845336914, + "epoch": 1.1460924581206413, + "kl_loss": 0.04596209153532982, + "loss_ib": 0.0008916817605495453, + "step": 3986 + }, + { + "ce_ib": 2.4296162128448486, + "ce_orig": 0.6844934225082397, + "epoch": 1.146380041699619, + "kl_loss": 0.030529966577887535, + "loss_ib": 0.0005482612177729607, + "step": 3987 + }, + { + "ce_ib": 2.411827564239502, + "ce_orig": 0.9210015535354614, + "epoch": 1.146380041699619, + "kl_loss": 0.01948552578687668, + "loss_ib": 0.0004360379825811833, + "step": 3987 + }, + { + "ce_ib": 3.724984884262085, + "ce_orig": 1.055718183517456, + "epoch": 1.146380041699619, + "kl_loss": 0.04183006286621094, + "loss_ib": 0.0007907990948297083, + "step": 3987 + }, + { + "ce_ib": 4.907166481018066, + "ce_orig": 1.2615448236465454, + "epoch": 1.146380041699619, + "kl_loss": 0.05139300227165222, + "loss_ib": 0.0010046466486528516, + "step": 3987 + }, + { + "ce_ib": 2.3224570751190186, + "ce_orig": 0.557630717754364, + "epoch": 1.1466676252785966, + "kl_loss": 0.025268230587244034, + "loss_ib": 0.0004849279939662665, + "step": 3988 + }, + { + "ce_ib": 3.355074405670166, + "ce_orig": 0.46218881011009216, + "epoch": 1.1466676252785966, + "kl_loss": 0.07734501361846924, + "loss_ib": 0.0011089574545621872, + "step": 3988 + }, + { + "ce_ib": 2.8122549057006836, + "ce_orig": 0.73360675573349, + "epoch": 1.1466676252785966, + "kl_loss": 0.040200814604759216, + "loss_ib": 0.0006832335493527353, + "step": 3988 + }, + { + "ce_ib": 4.714356422424316, + "ce_orig": 0.8784962296485901, + "epoch": 1.1466676252785966, + "kl_loss": 0.03614981472492218, + "loss_ib": 0.0008329337579198182, + "step": 3988 + }, + { + "ce_ib": 6.755397796630859, + "ce_orig": 1.7618476152420044, + "epoch": 1.1469552088575743, + "kl_loss": 0.05350513756275177, + "loss_ib": 0.0012105910573154688, + "step": 3989 + }, + { + "ce_ib": 3.5081355571746826, + "ce_orig": 0.6727059483528137, + "epoch": 1.1469552088575743, + "kl_loss": 0.05282013863325119, + "loss_ib": 0.0008790148422122002, + "step": 3989 + }, + { + "ce_ib": 2.5572874546051025, + "ce_orig": 0.6094183325767517, + "epoch": 1.1469552088575743, + "kl_loss": 0.04408217966556549, + "loss_ib": 0.000696550530847162, + "step": 3989 + }, + { + "ce_ib": 3.9992897510528564, + "ce_orig": 1.1700600385665894, + "epoch": 1.1469552088575743, + "kl_loss": 0.04917306452989578, + "loss_ib": 0.0008916595252230763, + "step": 3989 + }, + { + "epoch": 1.1472427924365518, + "grad_norm": 0.12553584575653076, + "learning_rate": 3.545592035773192e-05, + "loss": 0.8539, + "step": 3990 + }, + { + "ce_ib": 5.241089820861816, + "ce_orig": 1.3460499048233032, + "epoch": 1.1472427924365518, + "kl_loss": 0.056112512946128845, + "loss_ib": 0.0010852341074496508, + "step": 3990 + }, + { + "ce_ib": 2.9317195415496826, + "ce_orig": 0.7939624786376953, + "epoch": 1.1472427924365518, + "kl_loss": 0.05323665589094162, + "loss_ib": 0.0008255384746007621, + "step": 3990 + }, + { + "ce_ib": 2.621426820755005, + "ce_orig": 0.6973094344139099, + "epoch": 1.1472427924365518, + "kl_loss": 0.07449403405189514, + "loss_ib": 0.0010070829885080457, + "step": 3990 + }, + { + "ce_ib": 2.2003226280212402, + "ce_orig": 0.5212267637252808, + "epoch": 1.1472427924365518, + "kl_loss": 0.05175488814711571, + "loss_ib": 0.0007375811110250652, + "step": 3990 + }, + { + "ce_ib": 2.8468003273010254, + "ce_orig": 0.6212342381477356, + "epoch": 1.1475303760155295, + "kl_loss": 0.04014105349779129, + "loss_ib": 0.000686090555973351, + "step": 3991 + }, + { + "ce_ib": 5.397668361663818, + "ce_orig": 1.5960735082626343, + "epoch": 1.1475303760155295, + "kl_loss": 0.04331841319799423, + "loss_ib": 0.0009729508892633021, + "step": 3991 + }, + { + "ce_ib": 2.951786518096924, + "ce_orig": 0.5207709670066833, + "epoch": 1.1475303760155295, + "kl_loss": 0.05864841863512993, + "loss_ib": 0.0008816628251224756, + "step": 3991 + }, + { + "ce_ib": 2.113353729248047, + "ce_orig": 0.5311703085899353, + "epoch": 1.1475303760155295, + "kl_loss": 0.05498894304037094, + "loss_ib": 0.0007612247718498111, + "step": 3991 + }, + { + "ce_ib": 3.715850830078125, + "ce_orig": 0.7261084914207458, + "epoch": 1.1478179595945073, + "kl_loss": 0.04419225826859474, + "loss_ib": 0.0008135076495818794, + "step": 3992 + }, + { + "ce_ib": 2.0093696117401123, + "ce_orig": 0.45276108384132385, + "epoch": 1.1478179595945073, + "kl_loss": 0.03774812072515488, + "loss_ib": 0.0005784181412309408, + "step": 3992 + }, + { + "ce_ib": 1.9068468809127808, + "ce_orig": 0.5566633939743042, + "epoch": 1.1478179595945073, + "kl_loss": 0.03321072831749916, + "loss_ib": 0.0005227919318713248, + "step": 3992 + }, + { + "ce_ib": 3.2785065174102783, + "ce_orig": 0.6490910649299622, + "epoch": 1.1478179595945073, + "kl_loss": 0.06813523918390274, + "loss_ib": 0.001009203027933836, + "step": 3992 + }, + { + "ce_ib": 4.444438934326172, + "ce_orig": 0.9232819676399231, + "epoch": 1.1481055431734848, + "kl_loss": 0.04025701433420181, + "loss_ib": 0.0008470140164718032, + "step": 3993 + }, + { + "ce_ib": 2.0477454662323, + "ce_orig": 0.4842819571495056, + "epoch": 1.1481055431734848, + "kl_loss": 0.032908350229263306, + "loss_ib": 0.0005338580231182277, + "step": 3993 + }, + { + "ce_ib": 3.944338083267212, + "ce_orig": 1.2753641605377197, + "epoch": 1.1481055431734848, + "kl_loss": 0.046591125428676605, + "loss_ib": 0.0008603450260125101, + "step": 3993 + }, + { + "ce_ib": 2.4550647735595703, + "ce_orig": 0.5135481357574463, + "epoch": 1.1481055431734848, + "kl_loss": 0.05386991798877716, + "loss_ib": 0.0007842056220397353, + "step": 3993 + }, + { + "ce_ib": 2.9140071868896484, + "ce_orig": 0.7372917532920837, + "epoch": 1.1483931267524625, + "kl_loss": 0.0269874706864357, + "loss_ib": 0.0005612754030153155, + "step": 3994 + }, + { + "ce_ib": 3.743680953979492, + "ce_orig": 0.6529519557952881, + "epoch": 1.1483931267524625, + "kl_loss": 0.05631276220083237, + "loss_ib": 0.0009374956716783345, + "step": 3994 + }, + { + "ce_ib": 5.510990619659424, + "ce_orig": 1.6109461784362793, + "epoch": 1.1483931267524625, + "kl_loss": 0.05013822764158249, + "loss_ib": 0.0010524812387302518, + "step": 3994 + }, + { + "ce_ib": 2.0468766689300537, + "ce_orig": 0.38536524772644043, + "epoch": 1.1483931267524625, + "kl_loss": 0.03885405510663986, + "loss_ib": 0.000593228149227798, + "step": 3994 + }, + { + "epoch": 1.14868071033144, + "grad_norm": 0.11047635972499847, + "learning_rate": 3.5420660394284325e-05, + "loss": 0.8249, + "step": 3995 + }, + { + "ce_ib": 5.974514961242676, + "ce_orig": 1.278207778930664, + "epoch": 1.14868071033144, + "kl_loss": 0.05679243430495262, + "loss_ib": 0.0011653758119791746, + "step": 3995 + }, + { + "ce_ib": 2.6877174377441406, + "ce_orig": 0.6954204440116882, + "epoch": 1.14868071033144, + "kl_loss": 0.04948300123214722, + "loss_ib": 0.0007636017398908734, + "step": 3995 + }, + { + "ce_ib": 3.2821547985076904, + "ce_orig": 0.5641030669212341, + "epoch": 1.14868071033144, + "kl_loss": 0.04381052404642105, + "loss_ib": 0.0007663206779398024, + "step": 3995 + }, + { + "ce_ib": 3.1073708534240723, + "ce_orig": 0.7424187064170837, + "epoch": 1.14868071033144, + "kl_loss": 0.0463671013712883, + "loss_ib": 0.0007744081085547805, + "step": 3995 + }, + { + "ce_ib": 3.2540552616119385, + "ce_orig": 0.6897731423377991, + "epoch": 1.1489682939104178, + "kl_loss": 0.03733167052268982, + "loss_ib": 0.0006987222004681826, + "step": 3996 + }, + { + "ce_ib": 3.145958423614502, + "ce_orig": 0.7534764409065247, + "epoch": 1.1489682939104178, + "kl_loss": 0.04384021833539009, + "loss_ib": 0.0007529979920946062, + "step": 3996 + }, + { + "ce_ib": 2.563598871231079, + "ce_orig": 0.7862297892570496, + "epoch": 1.1489682939104178, + "kl_loss": 0.03918109089136124, + "loss_ib": 0.0006481707678176463, + "step": 3996 + }, + { + "ce_ib": 2.068082332611084, + "ce_orig": 0.48578161001205444, + "epoch": 1.1489682939104178, + "kl_loss": 0.024970557540655136, + "loss_ib": 0.0004565137787722051, + "step": 3996 + }, + { + "ce_ib": 3.616788864135742, + "ce_orig": 0.6187289357185364, + "epoch": 1.1492558774893953, + "kl_loss": 0.016490736976265907, + "loss_ib": 0.0005265862564556301, + "step": 3997 + }, + { + "ce_ib": 2.4585275650024414, + "ce_orig": 0.6364685893058777, + "epoch": 1.1492558774893953, + "kl_loss": 0.033873334527015686, + "loss_ib": 0.0005845861160196364, + "step": 3997 + }, + { + "ce_ib": 2.475154161453247, + "ce_orig": 0.6713868975639343, + "epoch": 1.1492558774893953, + "kl_loss": 0.037081167101860046, + "loss_ib": 0.0006183270597830415, + "step": 3997 + }, + { + "ce_ib": 2.5546817779541016, + "ce_orig": 0.5453158020973206, + "epoch": 1.1492558774893953, + "kl_loss": 0.03516549617052078, + "loss_ib": 0.0006071231327950954, + "step": 3997 + }, + { + "ce_ib": 2.4989511966705322, + "ce_orig": 0.5962426066398621, + "epoch": 1.149543461068373, + "kl_loss": 0.03389296680688858, + "loss_ib": 0.0005888247978873551, + "step": 3998 + }, + { + "ce_ib": 3.7016725540161133, + "ce_orig": 1.1987842321395874, + "epoch": 1.149543461068373, + "kl_loss": 0.03264719247817993, + "loss_ib": 0.0006966391229070723, + "step": 3998 + }, + { + "ce_ib": 1.1463786363601685, + "ce_orig": 0.1724180281162262, + "epoch": 1.149543461068373, + "kl_loss": 0.1001715213060379, + "loss_ib": 0.0011163529707118869, + "step": 3998 + }, + { + "ce_ib": 4.877892971038818, + "ce_orig": 0.8172743320465088, + "epoch": 1.149543461068373, + "kl_loss": 0.04952909052371979, + "loss_ib": 0.0009830801282078028, + "step": 3998 + }, + { + "ce_ib": 3.762120008468628, + "ce_orig": 0.7854161858558655, + "epoch": 1.1498310446473505, + "kl_loss": 0.04721011966466904, + "loss_ib": 0.0008483131532557309, + "step": 3999 + }, + { + "ce_ib": 3.9529168605804443, + "ce_orig": 0.7472771406173706, + "epoch": 1.1498310446473505, + "kl_loss": 0.04641661047935486, + "loss_ib": 0.0008594577666372061, + "step": 3999 + }, + { + "ce_ib": 2.675691843032837, + "ce_orig": 0.782515823841095, + "epoch": 1.1498310446473505, + "kl_loss": 0.032094717025756836, + "loss_ib": 0.0005885163554921746, + "step": 3999 + }, + { + "ce_ib": 6.283645153045654, + "ce_orig": 1.5055235624313354, + "epoch": 1.1498310446473505, + "kl_loss": 0.04332561045885086, + "loss_ib": 0.001061620656400919, + "step": 3999 + }, + { + "epoch": 1.1501186282263283, + "grad_norm": 0.09907598048448563, + "learning_rate": 3.5385375325047166e-05, + "loss": 0.7749, + "step": 4000 + }, + { + "ce_ib": 2.2949485778808594, + "ce_orig": 0.5213364958763123, + "epoch": 1.1501186282263283, + "kl_loss": 0.030410002917051315, + "loss_ib": 0.0005335948662832379, + "step": 4000 + }, + { + "ce_ib": 4.119268894195557, + "ce_orig": 1.0872535705566406, + "epoch": 1.1501186282263283, + "kl_loss": 0.03452321141958237, + "loss_ib": 0.0007571589667350054, + "step": 4000 + }, + { + "ce_ib": 4.447226524353027, + "ce_orig": 0.6273318529129028, + "epoch": 1.1501186282263283, + "kl_loss": 0.07821568101644516, + "loss_ib": 0.0012268794234842062, + "step": 4000 + }, + { + "ce_ib": 1.9010454416275024, + "ce_orig": 0.38526275753974915, + "epoch": 1.1501186282263283, + "kl_loss": 0.03489460423588753, + "loss_ib": 0.0005390505539253354, + "step": 4000 + }, + { + "ce_ib": 5.180746078491211, + "ce_orig": 1.6032670736312866, + "epoch": 1.150406211805306, + "kl_loss": 0.05387070029973984, + "loss_ib": 0.001056781504303217, + "step": 4001 + }, + { + "ce_ib": 4.274350166320801, + "ce_orig": 0.9834691882133484, + "epoch": 1.150406211805306, + "kl_loss": 0.04729102551937103, + "loss_ib": 0.000900345272384584, + "step": 4001 + }, + { + "ce_ib": 3.443840980529785, + "ce_orig": 0.7899782061576843, + "epoch": 1.150406211805306, + "kl_loss": 0.04397984594106674, + "loss_ib": 0.0007841825135983527, + "step": 4001 + }, + { + "ce_ib": 4.197214126586914, + "ce_orig": 0.9653199911117554, + "epoch": 1.150406211805306, + "kl_loss": 0.05697706341743469, + "loss_ib": 0.0009894920513033867, + "step": 4001 + }, + { + "ce_ib": 2.974428415298462, + "ce_orig": 0.7981731295585632, + "epoch": 1.1506937953842835, + "kl_loss": 0.034069307148456573, + "loss_ib": 0.0006381358834914863, + "step": 4002 + }, + { + "ce_ib": 4.0193963050842285, + "ce_orig": 1.0763298273086548, + "epoch": 1.1506937953842835, + "kl_loss": 0.046006400138139725, + "loss_ib": 0.0008620035951025784, + "step": 4002 + }, + { + "ce_ib": 3.077587842941284, + "ce_orig": 0.6169232130050659, + "epoch": 1.1506937953842835, + "kl_loss": 0.0439843088388443, + "loss_ib": 0.0007476017926819623, + "step": 4002 + }, + { + "ce_ib": 2.84848952293396, + "ce_orig": 0.6825082302093506, + "epoch": 1.1506937953842835, + "kl_loss": 0.04412931203842163, + "loss_ib": 0.0007261420832946897, + "step": 4002 + }, + { + "ce_ib": 3.6538381576538086, + "ce_orig": 0.9173955321311951, + "epoch": 1.1509813789632612, + "kl_loss": 0.04864135757088661, + "loss_ib": 0.0008517973474226892, + "step": 4003 + }, + { + "ce_ib": 5.152512073516846, + "ce_orig": 1.5128684043884277, + "epoch": 1.1509813789632612, + "kl_loss": 0.04017804190516472, + "loss_ib": 0.0009170316043309867, + "step": 4003 + }, + { + "ce_ib": 3.6260366439819336, + "ce_orig": 0.6701598763465881, + "epoch": 1.1509813789632612, + "kl_loss": 0.059591326862573624, + "loss_ib": 0.0009585169027559459, + "step": 4003 + }, + { + "ce_ib": 3.629930019378662, + "ce_orig": 1.0183757543563843, + "epoch": 1.1509813789632612, + "kl_loss": 0.04454769939184189, + "loss_ib": 0.0008084699511528015, + "step": 4003 + }, + { + "ce_ib": 2.8858916759490967, + "ce_orig": 0.9460700750350952, + "epoch": 1.1512689625422388, + "kl_loss": 0.03723495081067085, + "loss_ib": 0.0006609386764466763, + "step": 4004 + }, + { + "ce_ib": 2.8810739517211914, + "ce_orig": 0.47086793184280396, + "epoch": 1.1512689625422388, + "kl_loss": 0.020001383498311043, + "loss_ib": 0.0004881212080363184, + "step": 4004 + }, + { + "ce_ib": 2.6673176288604736, + "ce_orig": 0.41885924339294434, + "epoch": 1.1512689625422388, + "kl_loss": 0.04593101143836975, + "loss_ib": 0.0007260418497025967, + "step": 4004 + }, + { + "ce_ib": 3.518915891647339, + "ce_orig": 1.1670854091644287, + "epoch": 1.1512689625422388, + "kl_loss": 0.04702746868133545, + "loss_ib": 0.0008221662719734013, + "step": 4004 + }, + { + "epoch": 1.1515565461212165, + "grad_norm": 0.12952256202697754, + "learning_rate": 3.535006523503034e-05, + "loss": 0.7508, + "step": 4005 + }, + { + "ce_ib": 2.616443157196045, + "ce_orig": 0.54509437084198, + "epoch": 1.1515565461212165, + "kl_loss": 0.03185660019516945, + "loss_ib": 0.0005802102969028056, + "step": 4005 + }, + { + "ce_ib": 4.189174652099609, + "ce_orig": 0.7005299925804138, + "epoch": 1.1515565461212165, + "kl_loss": 0.048738934099674225, + "loss_ib": 0.0009063067846000195, + "step": 4005 + }, + { + "ce_ib": 1.8235617876052856, + "ce_orig": 0.47140511870384216, + "epoch": 1.1515565461212165, + "kl_loss": 0.04627024009823799, + "loss_ib": 0.0006450585788115859, + "step": 4005 + }, + { + "ce_ib": 3.967681884765625, + "ce_orig": 1.1250059604644775, + "epoch": 1.1515565461212165, + "kl_loss": 0.0287554282695055, + "loss_ib": 0.0006843224982731044, + "step": 4005 + }, + { + "ce_ib": 3.935680627822876, + "ce_orig": 0.8409740328788757, + "epoch": 1.1518441297001942, + "kl_loss": 0.05737445130944252, + "loss_ib": 0.0009673126041889191, + "step": 4006 + }, + { + "ce_ib": 3.676067590713501, + "ce_orig": 0.8885965347290039, + "epoch": 1.1518441297001942, + "kl_loss": 0.04696505889296532, + "loss_ib": 0.0008372573647648096, + "step": 4006 + }, + { + "ce_ib": 3.211794376373291, + "ce_orig": 0.7339122891426086, + "epoch": 1.1518441297001942, + "kl_loss": 0.0683111697435379, + "loss_ib": 0.0010042911162599921, + "step": 4006 + }, + { + "ce_ib": 4.82465124130249, + "ce_orig": 1.2950117588043213, + "epoch": 1.1518441297001942, + "kl_loss": 0.07612299919128418, + "loss_ib": 0.0012436950346454978, + "step": 4006 + }, + { + "ce_ib": 3.218715190887451, + "ce_orig": 0.5627292990684509, + "epoch": 1.1521317132791717, + "kl_loss": 0.0520017072558403, + "loss_ib": 0.0008418885990977287, + "step": 4007 + }, + { + "ce_ib": 6.167364597320557, + "ce_orig": 1.2605695724487305, + "epoch": 1.1521317132791717, + "kl_loss": 0.04549955576658249, + "loss_ib": 0.0010717320255935192, + "step": 4007 + }, + { + "ce_ib": 2.8431150913238525, + "ce_orig": 0.8264856934547424, + "epoch": 1.1521317132791717, + "kl_loss": 0.043574947863817215, + "loss_ib": 0.000720060954336077, + "step": 4007 + }, + { + "ce_ib": 2.0037426948547363, + "ce_orig": 0.45825862884521484, + "epoch": 1.1521317132791717, + "kl_loss": 0.052881933748722076, + "loss_ib": 0.0007291936199180782, + "step": 4007 + }, + { + "ce_ib": 2.6787469387054443, + "ce_orig": 0.5569115281105042, + "epoch": 1.1524192968581495, + "kl_loss": 0.060201019048690796, + "loss_ib": 0.000869884854182601, + "step": 4008 + }, + { + "ce_ib": 4.977940082550049, + "ce_orig": 1.4971716403961182, + "epoch": 1.1524192968581495, + "kl_loss": 0.03615269809961319, + "loss_ib": 0.0008593209786340594, + "step": 4008 + }, + { + "ce_ib": 3.869220495223999, + "ce_orig": 0.9275022149085999, + "epoch": 1.1524192968581495, + "kl_loss": 0.05022440850734711, + "loss_ib": 0.000889166141860187, + "step": 4008 + }, + { + "ce_ib": 4.009045124053955, + "ce_orig": 1.0316381454467773, + "epoch": 1.1524192968581495, + "kl_loss": 0.06900586932897568, + "loss_ib": 0.0010909631382673979, + "step": 4008 + }, + { + "ce_ib": 2.8618884086608887, + "ce_orig": 0.6914111971855164, + "epoch": 1.152706880437127, + "kl_loss": 0.04311812296509743, + "loss_ib": 0.0007173700723797083, + "step": 4009 + }, + { + "ce_ib": 4.064422607421875, + "ce_orig": 1.1356898546218872, + "epoch": 1.152706880437127, + "kl_loss": 0.05051116645336151, + "loss_ib": 0.000911553914193064, + "step": 4009 + }, + { + "ce_ib": 3.507629632949829, + "ce_orig": 0.7514046430587769, + "epoch": 1.152706880437127, + "kl_loss": 0.06221737340092659, + "loss_ib": 0.0009729366283863783, + "step": 4009 + }, + { + "ce_ib": 1.657945156097412, + "ce_orig": 0.27209386229515076, + "epoch": 1.152706880437127, + "kl_loss": 0.027201851829886436, + "loss_ib": 0.0004378130252007395, + "step": 4009 + }, + { + "epoch": 1.1529944640161047, + "grad_norm": 0.11322087794542313, + "learning_rate": 3.531473020930406e-05, + "loss": 0.788, + "step": 4010 + }, + { + "ce_ib": 4.66653299331665, + "ce_orig": 1.2134966850280762, + "epoch": 1.1529944640161047, + "kl_loss": 0.05599729344248772, + "loss_ib": 0.0010266262106597424, + "step": 4010 + }, + { + "ce_ib": 4.062137603759766, + "ce_orig": 1.0172638893127441, + "epoch": 1.1529944640161047, + "kl_loss": 0.04299195110797882, + "loss_ib": 0.0008361332002095878, + "step": 4010 + }, + { + "ce_ib": 5.528640270233154, + "ce_orig": 0.770465075969696, + "epoch": 1.1529944640161047, + "kl_loss": 0.03217417746782303, + "loss_ib": 0.0008746057283133268, + "step": 4010 + }, + { + "ce_ib": 2.6390841007232666, + "ce_orig": 0.5779123902320862, + "epoch": 1.1529944640161047, + "kl_loss": 0.049682606011629105, + "loss_ib": 0.000760734430514276, + "step": 4010 + }, + { + "ce_ib": 2.969198226928711, + "ce_orig": 0.77579665184021, + "epoch": 1.1532820475950822, + "kl_loss": 0.025761371478438377, + "loss_ib": 0.0005545335006900132, + "step": 4011 + }, + { + "ce_ib": 5.3092942237854, + "ce_orig": 1.630521297454834, + "epoch": 1.1532820475950822, + "kl_loss": 0.048160117119550705, + "loss_ib": 0.001012530643492937, + "step": 4011 + }, + { + "ce_ib": 4.136592864990234, + "ce_orig": 1.2552436590194702, + "epoch": 1.1532820475950822, + "kl_loss": 0.03709261491894722, + "loss_ib": 0.0007845854852348566, + "step": 4011 + }, + { + "ce_ib": 3.8049371242523193, + "ce_orig": 0.8740779757499695, + "epoch": 1.1532820475950822, + "kl_loss": 0.043567389249801636, + "loss_ib": 0.0008161675650626421, + "step": 4011 + }, + { + "ce_ib": 2.1752684116363525, + "ce_orig": 0.5522670149803162, + "epoch": 1.15356963117406, + "kl_loss": 0.04818004369735718, + "loss_ib": 0.0006993272691033781, + "step": 4012 + }, + { + "ce_ib": 2.480550765991211, + "ce_orig": 0.6635025143623352, + "epoch": 1.15356963117406, + "kl_loss": 0.06825828552246094, + "loss_ib": 0.0009306379361078143, + "step": 4012 + }, + { + "ce_ib": 2.1220858097076416, + "ce_orig": 0.4048159420490265, + "epoch": 1.15356963117406, + "kl_loss": 0.03681094944477081, + "loss_ib": 0.0005803180392831564, + "step": 4012 + }, + { + "ce_ib": 2.9263086318969727, + "ce_orig": 0.9132031798362732, + "epoch": 1.15356963117406, + "kl_loss": 0.028889846056699753, + "loss_ib": 0.0005815292824991047, + "step": 4012 + }, + { + "ce_ib": 3.4150710105895996, + "ce_orig": 1.208825707435608, + "epoch": 1.1538572147530375, + "kl_loss": 0.027591094374656677, + "loss_ib": 0.0006174180307425559, + "step": 4013 + }, + { + "ce_ib": 2.3504397869110107, + "ce_orig": 0.5414032936096191, + "epoch": 1.1538572147530375, + "kl_loss": 0.028479330241680145, + "loss_ib": 0.0005198372527956963, + "step": 4013 + }, + { + "ce_ib": 4.406181812286377, + "ce_orig": 1.4287179708480835, + "epoch": 1.1538572147530375, + "kl_loss": 0.039443254470825195, + "loss_ib": 0.0008350507123395801, + "step": 4013 + }, + { + "ce_ib": 3.5396339893341064, + "ce_orig": 0.8011346459388733, + "epoch": 1.1538572147530375, + "kl_loss": 0.03033214807510376, + "loss_ib": 0.0006572848651558161, + "step": 4013 + }, + { + "ce_ib": 3.5116219520568848, + "ce_orig": 0.9996297359466553, + "epoch": 1.1541447983320152, + "kl_loss": 0.03580613434314728, + "loss_ib": 0.0007092235027812421, + "step": 4014 + }, + { + "ce_ib": 4.257142066955566, + "ce_orig": 1.1087695360183716, + "epoch": 1.1541447983320152, + "kl_loss": 0.0484350360929966, + "loss_ib": 0.0009100645547732711, + "step": 4014 + }, + { + "ce_ib": 3.139979362487793, + "ce_orig": 0.893013596534729, + "epoch": 1.1541447983320152, + "kl_loss": 0.03717508539557457, + "loss_ib": 0.0006857487605884671, + "step": 4014 + }, + { + "ce_ib": 2.6418535709381104, + "ce_orig": 0.47329822182655334, + "epoch": 1.1541447983320152, + "kl_loss": 0.020990800112485886, + "loss_ib": 0.0004740933363791555, + "step": 4014 + }, + { + "epoch": 1.154432381910993, + "grad_norm": 0.11079756915569305, + "learning_rate": 3.5279370332998614e-05, + "loss": 0.8723, + "step": 4015 + }, + { + "ce_ib": 2.5510802268981934, + "ce_orig": 0.7842560410499573, + "epoch": 1.154432381910993, + "kl_loss": 0.03581923991441727, + "loss_ib": 0.0006133004208095372, + "step": 4015 + }, + { + "ce_ib": 3.249312400817871, + "ce_orig": 0.61381596326828, + "epoch": 1.154432381910993, + "kl_loss": 0.05344343185424805, + "loss_ib": 0.0008593655074946582, + "step": 4015 + }, + { + "ce_ib": 3.236973285675049, + "ce_orig": 0.8183207511901855, + "epoch": 1.154432381910993, + "kl_loss": 0.056148357689380646, + "loss_ib": 0.0008851808961480856, + "step": 4015 + }, + { + "ce_ib": 3.2573671340942383, + "ce_orig": 0.685647189617157, + "epoch": 1.154432381910993, + "kl_loss": 0.04438965767621994, + "loss_ib": 0.0007696332759223878, + "step": 4015 + }, + { + "ce_ib": 3.2511239051818848, + "ce_orig": 1.0182240009307861, + "epoch": 1.1547199654899705, + "kl_loss": 0.029841843992471695, + "loss_ib": 0.0006235308246687055, + "step": 4016 + }, + { + "ce_ib": 4.605830192565918, + "ce_orig": 1.4160791635513306, + "epoch": 1.1547199654899705, + "kl_loss": 0.04750405624508858, + "loss_ib": 0.0009356235386803746, + "step": 4016 + }, + { + "ce_ib": 1.926575779914856, + "ce_orig": 0.46800529956817627, + "epoch": 1.1547199654899705, + "kl_loss": 0.09571446478366852, + "loss_ib": 0.0011498022358864546, + "step": 4016 + }, + { + "ce_ib": 3.8945226669311523, + "ce_orig": 0.8751407861709595, + "epoch": 1.1547199654899705, + "kl_loss": 0.018739474937319756, + "loss_ib": 0.0005768469418399036, + "step": 4016 + }, + { + "ce_ib": 2.3691365718841553, + "ce_orig": 0.41639986634254456, + "epoch": 1.1550075490689482, + "kl_loss": 0.05012400075793266, + "loss_ib": 0.00073815364157781, + "step": 4017 + }, + { + "ce_ib": 4.3131208419799805, + "ce_orig": 1.1538739204406738, + "epoch": 1.1550075490689482, + "kl_loss": 0.03432334214448929, + "loss_ib": 0.0007745454786345363, + "step": 4017 + }, + { + "ce_ib": 2.483464241027832, + "ce_orig": 0.7863860726356506, + "epoch": 1.1550075490689482, + "kl_loss": 0.02449922077357769, + "loss_ib": 0.0004933386226184666, + "step": 4017 + }, + { + "ce_ib": 4.7105207443237305, + "ce_orig": 0.9849264025688171, + "epoch": 1.1550075490689482, + "kl_loss": 0.04064887762069702, + "loss_ib": 0.0008775408496148884, + "step": 4017 + }, + { + "ce_ib": 2.6499855518341064, + "ce_orig": 0.8170518279075623, + "epoch": 1.1552951326479257, + "kl_loss": 0.05257553607225418, + "loss_ib": 0.0007907538674771786, + "step": 4018 + }, + { + "ce_ib": 4.078948974609375, + "ce_orig": 1.1741811037063599, + "epoch": 1.1552951326479257, + "kl_loss": 0.051503077149391174, + "loss_ib": 0.0009229255956597626, + "step": 4018 + }, + { + "ce_ib": 3.82639479637146, + "ce_orig": 0.6336854100227356, + "epoch": 1.1552951326479257, + "kl_loss": 0.0729006975889206, + "loss_ib": 0.0011116464156657457, + "step": 4018 + }, + { + "ce_ib": 2.965428352355957, + "ce_orig": 0.4460776746273041, + "epoch": 1.1552951326479257, + "kl_loss": 0.06145478039979935, + "loss_ib": 0.0009110906394198537, + "step": 4018 + }, + { + "ce_ib": 3.278414487838745, + "ce_orig": 0.8433628678321838, + "epoch": 1.1555827162269035, + "kl_loss": 0.05833327770233154, + "loss_ib": 0.0009111742256209254, + "step": 4019 + }, + { + "ce_ib": 3.2953057289123535, + "ce_orig": 0.8230745196342468, + "epoch": 1.1555827162269035, + "kl_loss": 0.057299837470054626, + "loss_ib": 0.0009025288745760918, + "step": 4019 + }, + { + "ce_ib": 3.176886796951294, + "ce_orig": 0.9179885387420654, + "epoch": 1.1555827162269035, + "kl_loss": 0.04229883849620819, + "loss_ib": 0.0007406770018860698, + "step": 4019 + }, + { + "ce_ib": 4.819404602050781, + "ce_orig": 1.2441352605819702, + "epoch": 1.1555827162269035, + "kl_loss": 0.049616534262895584, + "loss_ib": 0.0009781058179214597, + "step": 4019 + }, + { + "epoch": 1.1558702998058812, + "grad_norm": 0.1139313206076622, + "learning_rate": 3.5243985691304147e-05, + "loss": 0.8226, + "step": 4020 + }, + { + "ce_ib": 2.7511119842529297, + "ce_orig": 0.6718195080757141, + "epoch": 1.1558702998058812, + "kl_loss": 0.06361295282840729, + "loss_ib": 0.0009112406987696886, + "step": 4020 + }, + { + "ce_ib": 2.889084577560425, + "ce_orig": 0.7667460441589355, + "epoch": 1.1558702998058812, + "kl_loss": 0.030300233513116837, + "loss_ib": 0.0005919107934460044, + "step": 4020 + }, + { + "ce_ib": 3.314699411392212, + "ce_orig": 0.8291280269622803, + "epoch": 1.1558702998058812, + "kl_loss": 0.054194431751966476, + "loss_ib": 0.0008734142757020891, + "step": 4020 + }, + { + "ce_ib": 6.353790283203125, + "ce_orig": 2.04085373878479, + "epoch": 1.1558702998058812, + "kl_loss": 0.03959634527564049, + "loss_ib": 0.0010313424281775951, + "step": 4020 + }, + { + "ce_ib": 4.134117126464844, + "ce_orig": 0.713947594165802, + "epoch": 1.1561578833848587, + "kl_loss": 0.0526311993598938, + "loss_ib": 0.0009397236863151193, + "step": 4021 + }, + { + "ce_ib": 2.168928623199463, + "ce_orig": 0.6450079083442688, + "epoch": 1.1561578833848587, + "kl_loss": 0.03212229907512665, + "loss_ib": 0.000538115855306387, + "step": 4021 + }, + { + "ce_ib": 2.219546318054199, + "ce_orig": 0.45096027851104736, + "epoch": 1.1561578833848587, + "kl_loss": 0.04848484322428703, + "loss_ib": 0.0007068030536174774, + "step": 4021 + }, + { + "ce_ib": 3.9224448204040527, + "ce_orig": 0.8269857168197632, + "epoch": 1.1561578833848587, + "kl_loss": 0.04696211963891983, + "loss_ib": 0.0008618656429462135, + "step": 4021 + }, + { + "ce_ib": 2.9463229179382324, + "ce_orig": 0.6122345924377441, + "epoch": 1.1564454669638364, + "kl_loss": 0.0855017751455307, + "loss_ib": 0.0011496500810608268, + "step": 4022 + }, + { + "ce_ib": 4.52903413772583, + "ce_orig": 1.3513200283050537, + "epoch": 1.1564454669638364, + "kl_loss": 0.05760578811168671, + "loss_ib": 0.001028961269184947, + "step": 4022 + }, + { + "ce_ib": 2.671970844268799, + "ce_orig": 0.6855776906013489, + "epoch": 1.1564454669638364, + "kl_loss": 0.03640631213784218, + "loss_ib": 0.0006312601617537439, + "step": 4022 + }, + { + "ce_ib": 4.121387004852295, + "ce_orig": 1.080114722251892, + "epoch": 1.1564454669638364, + "kl_loss": 0.04239718243479729, + "loss_ib": 0.0008361105574294925, + "step": 4022 + }, + { + "ce_ib": 3.337810516357422, + "ce_orig": 0.9750275611877441, + "epoch": 1.156733050542814, + "kl_loss": 0.03390197455883026, + "loss_ib": 0.0006728008156642318, + "step": 4023 + }, + { + "ce_ib": 3.4557108879089355, + "ce_orig": 0.6387427449226379, + "epoch": 1.156733050542814, + "kl_loss": 0.0537324883043766, + "loss_ib": 0.0008828959544189274, + "step": 4023 + }, + { + "ce_ib": 2.3361825942993164, + "ce_orig": 0.514985978603363, + "epoch": 1.156733050542814, + "kl_loss": 0.03606419637799263, + "loss_ib": 0.0005942602292634547, + "step": 4023 + }, + { + "ce_ib": 3.1881842613220215, + "ce_orig": 0.8992137908935547, + "epoch": 1.156733050542814, + "kl_loss": 0.055064328014850616, + "loss_ib": 0.000869461742695421, + "step": 4023 + }, + { + "ce_ib": 4.649409770965576, + "ce_orig": 1.2390127182006836, + "epoch": 1.1570206341217917, + "kl_loss": 0.07329435646533966, + "loss_ib": 0.0011978845577687025, + "step": 4024 + }, + { + "ce_ib": 3.052550792694092, + "ce_orig": 0.7434772253036499, + "epoch": 1.1570206341217917, + "kl_loss": 0.04835429787635803, + "loss_ib": 0.0007887980318628252, + "step": 4024 + }, + { + "ce_ib": 2.236698865890503, + "ce_orig": 0.5916994214057922, + "epoch": 1.1570206341217917, + "kl_loss": 0.038405172526836395, + "loss_ib": 0.0006077215657569468, + "step": 4024 + }, + { + "ce_ib": 4.140371799468994, + "ce_orig": 1.1109952926635742, + "epoch": 1.1570206341217917, + "kl_loss": 0.03561468422412872, + "loss_ib": 0.0007701840368099511, + "step": 4024 + }, + { + "epoch": 1.1573082177007692, + "grad_norm": 0.11579321324825287, + "learning_rate": 3.520857636947046e-05, + "loss": 0.8775, + "step": 4025 + }, + { + "ce_ib": 4.493718147277832, + "ce_orig": 1.0727115869522095, + "epoch": 1.1573082177007692, + "kl_loss": 0.044775523245334625, + "loss_ib": 0.0008971270872280002, + "step": 4025 + }, + { + "ce_ib": 3.5907206535339355, + "ce_orig": 0.8518352508544922, + "epoch": 1.1573082177007692, + "kl_loss": 0.048233047127723694, + "loss_ib": 0.0008414025651291013, + "step": 4025 + }, + { + "ce_ib": 4.973185062408447, + "ce_orig": 0.9611297845840454, + "epoch": 1.1573082177007692, + "kl_loss": 0.041541051119565964, + "loss_ib": 0.0009127290104515851, + "step": 4025 + }, + { + "ce_ib": 5.239221096038818, + "ce_orig": 1.4280668497085571, + "epoch": 1.1573082177007692, + "kl_loss": 0.07712472975254059, + "loss_ib": 0.001295169466175139, + "step": 4025 + }, + { + "ce_ib": 3.9995226860046387, + "ce_orig": 0.9175568222999573, + "epoch": 1.157595801279747, + "kl_loss": 0.07885697484016418, + "loss_ib": 0.001188521971926093, + "step": 4026 + }, + { + "ce_ib": 4.614779472351074, + "ce_orig": 1.0883052349090576, + "epoch": 1.157595801279747, + "kl_loss": 0.05950620397925377, + "loss_ib": 0.001056539942510426, + "step": 4026 + }, + { + "ce_ib": 2.7696990966796875, + "ce_orig": 0.7389417290687561, + "epoch": 1.157595801279747, + "kl_loss": 0.027518117800354958, + "loss_ib": 0.000552151061128825, + "step": 4026 + }, + { + "ce_ib": 2.818652629852295, + "ce_orig": 0.6855888366699219, + "epoch": 1.157595801279747, + "kl_loss": 0.03536015748977661, + "loss_ib": 0.0006354668876156211, + "step": 4026 + }, + { + "ce_ib": 3.8813602924346924, + "ce_orig": 0.6642559766769409, + "epoch": 1.1578833848587247, + "kl_loss": 0.06809131801128387, + "loss_ib": 0.0010690492345020175, + "step": 4027 + }, + { + "ce_ib": 2.0333070755004883, + "ce_orig": 0.5022704601287842, + "epoch": 1.1578833848587247, + "kl_loss": 0.0411800816655159, + "loss_ib": 0.000615131517406553, + "step": 4027 + }, + { + "ce_ib": 3.1189162731170654, + "ce_orig": 0.7667632102966309, + "epoch": 1.1578833848587247, + "kl_loss": 0.05485868453979492, + "loss_ib": 0.0008604784379713237, + "step": 4027 + }, + { + "ce_ib": 2.08298921585083, + "ce_orig": 0.38922685384750366, + "epoch": 1.1578833848587247, + "kl_loss": 0.04383233189582825, + "loss_ib": 0.0006466222694143653, + "step": 4027 + }, + { + "ce_ib": 2.8726162910461426, + "ce_orig": 0.6902590394020081, + "epoch": 1.1581709684377022, + "kl_loss": 0.04152488708496094, + "loss_ib": 0.0007025104714557528, + "step": 4028 + }, + { + "ce_ib": 2.3893465995788574, + "ce_orig": 0.504281759262085, + "epoch": 1.1581709684377022, + "kl_loss": 0.032603830099105835, + "loss_ib": 0.0005649729864671826, + "step": 4028 + }, + { + "ce_ib": 1.6219874620437622, + "ce_orig": 0.30287450551986694, + "epoch": 1.1581709684377022, + "kl_loss": 0.044171642512083054, + "loss_ib": 0.0006039151339791715, + "step": 4028 + }, + { + "ce_ib": 3.3700385093688965, + "ce_orig": 1.0237433910369873, + "epoch": 1.1581709684377022, + "kl_loss": 0.05540694668889046, + "loss_ib": 0.0008910733158700168, + "step": 4028 + }, + { + "ce_ib": 3.076585531234741, + "ce_orig": 0.7261185050010681, + "epoch": 1.15845855201668, + "kl_loss": 0.07024423778057098, + "loss_ib": 0.0010101009393110871, + "step": 4029 + }, + { + "ce_ib": 2.036841869354248, + "ce_orig": 0.6249517798423767, + "epoch": 1.15845855201668, + "kl_loss": 0.035820335149765015, + "loss_ib": 0.0005618875147774816, + "step": 4029 + }, + { + "ce_ib": 2.8520493507385254, + "ce_orig": 0.5918117165565491, + "epoch": 1.15845855201668, + "kl_loss": 0.029716622084379196, + "loss_ib": 0.0005823711398988962, + "step": 4029 + }, + { + "ce_ib": 4.344344139099121, + "ce_orig": 1.1713719367980957, + "epoch": 1.15845855201668, + "kl_loss": 0.03852587193250656, + "loss_ib": 0.000819693086668849, + "step": 4029 + }, + { + "epoch": 1.1587461355956574, + "grad_norm": 0.1049450933933258, + "learning_rate": 3.517314245280684e-05, + "loss": 0.8276, + "step": 4030 + }, + { + "ce_ib": 4.959714889526367, + "ce_orig": 1.419992208480835, + "epoch": 1.1587461355956574, + "kl_loss": 0.047838158905506134, + "loss_ib": 0.0009743531118147075, + "step": 4030 + }, + { + "ce_ib": 2.109138250350952, + "ce_orig": 0.45957598090171814, + "epoch": 1.1587461355956574, + "kl_loss": 0.04329891502857208, + "loss_ib": 0.0006439029821194708, + "step": 4030 + }, + { + "ce_ib": 2.884993314743042, + "ce_orig": 0.5822508931159973, + "epoch": 1.1587461355956574, + "kl_loss": 0.06496277451515198, + "loss_ib": 0.0009381269919686019, + "step": 4030 + }, + { + "ce_ib": 2.594658136367798, + "ce_orig": 0.7197889089584351, + "epoch": 1.1587461355956574, + "kl_loss": 0.025495244190096855, + "loss_ib": 0.0005144182359799743, + "step": 4030 + }, + { + "ce_ib": 3.6935367584228516, + "ce_orig": 0.8142971992492676, + "epoch": 1.1590337191746352, + "kl_loss": 0.056016892194747925, + "loss_ib": 0.0009295225609093904, + "step": 4031 + }, + { + "ce_ib": 2.754855155944824, + "ce_orig": 0.5840640068054199, + "epoch": 1.1590337191746352, + "kl_loss": 0.059772200882434845, + "loss_ib": 0.0008732075220905244, + "step": 4031 + }, + { + "ce_ib": 3.9647555351257324, + "ce_orig": 0.6530203223228455, + "epoch": 1.1590337191746352, + "kl_loss": 0.07297176867723465, + "loss_ib": 0.0011261932086199522, + "step": 4031 + }, + { + "ce_ib": 2.0846920013427734, + "ce_orig": 0.6335982084274292, + "epoch": 1.1590337191746352, + "kl_loss": 0.03604842349886894, + "loss_ib": 0.0005689534591510892, + "step": 4031 + }, + { + "ce_ib": 2.844386577606201, + "ce_orig": 0.7821143865585327, + "epoch": 1.1593213027536127, + "kl_loss": 0.06450650095939636, + "loss_ib": 0.0009295036434195936, + "step": 4032 + }, + { + "ce_ib": 3.7437524795532227, + "ce_orig": 1.3058373928070068, + "epoch": 1.1593213027536127, + "kl_loss": 0.035933420062065125, + "loss_ib": 0.0007337094866670668, + "step": 4032 + }, + { + "ce_ib": 2.909090042114258, + "ce_orig": 0.6879762411117554, + "epoch": 1.1593213027536127, + "kl_loss": 0.045472532510757446, + "loss_ib": 0.0007456342573277652, + "step": 4032 + }, + { + "ce_ib": 3.07857084274292, + "ce_orig": 0.7910183072090149, + "epoch": 1.1593213027536127, + "kl_loss": 0.04755621403455734, + "loss_ib": 0.0007834192365407944, + "step": 4032 + }, + { + "ce_ib": 2.7121880054473877, + "ce_orig": 0.7900862693786621, + "epoch": 1.1596088863325904, + "kl_loss": 0.03679333254694939, + "loss_ib": 0.0006391520728357136, + "step": 4033 + }, + { + "ce_ib": 4.950155735015869, + "ce_orig": 0.9711329340934753, + "epoch": 1.1596088863325904, + "kl_loss": 0.03775236755609512, + "loss_ib": 0.0008725392399355769, + "step": 4033 + }, + { + "ce_ib": 2.388178586959839, + "ce_orig": 0.5257648229598999, + "epoch": 1.1596088863325904, + "kl_loss": 0.04048829525709152, + "loss_ib": 0.0006437007687054574, + "step": 4033 + }, + { + "ce_ib": 4.254854202270508, + "ce_orig": 0.8677660822868347, + "epoch": 1.1596088863325904, + "kl_loss": 0.058199554681777954, + "loss_ib": 0.0010074808960780501, + "step": 4033 + }, + { + "ce_ib": 4.3150715827941895, + "ce_orig": 1.2151402235031128, + "epoch": 1.1598964699115681, + "kl_loss": 0.03746016323566437, + "loss_ib": 0.0008061087573878467, + "step": 4034 + }, + { + "ce_ib": 2.5974788665771484, + "ce_orig": 0.7522977590560913, + "epoch": 1.1598964699115681, + "kl_loss": 0.03109104000031948, + "loss_ib": 0.0005706583033315837, + "step": 4034 + }, + { + "ce_ib": 5.049442291259766, + "ce_orig": 1.41557776927948, + "epoch": 1.1598964699115681, + "kl_loss": 0.045884229242801666, + "loss_ib": 0.000963786500506103, + "step": 4034 + }, + { + "ce_ib": 3.706049919128418, + "ce_orig": 1.1393886804580688, + "epoch": 1.1598964699115681, + "kl_loss": 0.037076596170663834, + "loss_ib": 0.0007413708954118192, + "step": 4034 + }, + { + "epoch": 1.1601840534905457, + "grad_norm": 0.113786980509758, + "learning_rate": 3.513768402668182e-05, + "loss": 0.8345, + "step": 4035 + }, + { + "ce_ib": 2.577603578567505, + "ce_orig": 0.7082864046096802, + "epoch": 1.1601840534905457, + "kl_loss": 0.0327826589345932, + "loss_ib": 0.0005855869385413826, + "step": 4035 + }, + { + "ce_ib": 4.464132785797119, + "ce_orig": 1.117449402809143, + "epoch": 1.1601840534905457, + "kl_loss": 0.0408693328499794, + "loss_ib": 0.0008551065693609416, + "step": 4035 + }, + { + "ce_ib": 3.7020392417907715, + "ce_orig": 0.6631783843040466, + "epoch": 1.1601840534905457, + "kl_loss": 0.02018464356660843, + "loss_ib": 0.0005720503395423293, + "step": 4035 + }, + { + "ce_ib": 2.6256566047668457, + "ce_orig": 0.6323530673980713, + "epoch": 1.1601840534905457, + "kl_loss": 0.02516910620033741, + "loss_ib": 0.0005142567097209394, + "step": 4035 + }, + { + "ce_ib": 3.9978437423706055, + "ce_orig": 0.5704741477966309, + "epoch": 1.1604716370695234, + "kl_loss": 0.13704122602939606, + "loss_ib": 0.001770196482539177, + "step": 4036 + }, + { + "ce_ib": 2.643766164779663, + "ce_orig": 0.7303608059883118, + "epoch": 1.1604716370695234, + "kl_loss": 0.04495236277580261, + "loss_ib": 0.0007139002555049956, + "step": 4036 + }, + { + "ce_ib": 2.7780721187591553, + "ce_orig": 0.9892863035202026, + "epoch": 1.1604716370695234, + "kl_loss": 0.024647511541843414, + "loss_ib": 0.0005242822808213532, + "step": 4036 + }, + { + "ce_ib": 2.9176747798919678, + "ce_orig": 0.7056872248649597, + "epoch": 1.1604716370695234, + "kl_loss": 0.03807409107685089, + "loss_ib": 0.0006725083803758025, + "step": 4036 + }, + { + "ce_ib": 4.647562503814697, + "ce_orig": 1.4821820259094238, + "epoch": 1.160759220648501, + "kl_loss": 0.04486750066280365, + "loss_ib": 0.0009134311694651842, + "step": 4037 + }, + { + "ce_ib": 3.109532356262207, + "ce_orig": 0.3794897496700287, + "epoch": 1.160759220648501, + "kl_loss": 0.03372500091791153, + "loss_ib": 0.0006482031894847751, + "step": 4037 + }, + { + "ce_ib": 2.663159132003784, + "ce_orig": 0.7393332123756409, + "epoch": 1.160759220648501, + "kl_loss": 0.0355466827750206, + "loss_ib": 0.0006217827321961522, + "step": 4037 + }, + { + "ce_ib": 2.694304943084717, + "ce_orig": 0.6361145973205566, + "epoch": 1.160759220648501, + "kl_loss": 0.0313301645219326, + "loss_ib": 0.0005827320856042206, + "step": 4037 + }, + { + "ce_ib": 3.6586689949035645, + "ce_orig": 0.9259918332099915, + "epoch": 1.1610468042274786, + "kl_loss": 0.024827448651194572, + "loss_ib": 0.000614141346886754, + "step": 4038 + }, + { + "ce_ib": 4.145098686218262, + "ce_orig": 1.0425997972488403, + "epoch": 1.1610468042274786, + "kl_loss": 0.05022052675485611, + "loss_ib": 0.0009167150710709393, + "step": 4038 + }, + { + "ce_ib": 2.057124137878418, + "ce_orig": 0.3195764422416687, + "epoch": 1.1610468042274786, + "kl_loss": 0.03562542051076889, + "loss_ib": 0.000561966560781002, + "step": 4038 + }, + { + "ce_ib": 2.618757486343384, + "ce_orig": 0.7442713975906372, + "epoch": 1.1610468042274786, + "kl_loss": 0.04257766157388687, + "loss_ib": 0.0006876523839309812, + "step": 4038 + }, + { + "ce_ib": 4.169754981994629, + "ce_orig": 0.8059644103050232, + "epoch": 1.1613343878064564, + "kl_loss": 0.16592766344547272, + "loss_ib": 0.002076252130791545, + "step": 4039 + }, + { + "ce_ib": 3.9759016036987305, + "ce_orig": 1.184097170829773, + "epoch": 1.1613343878064564, + "kl_loss": 0.04411336034536362, + "loss_ib": 0.0008387237321585417, + "step": 4039 + }, + { + "ce_ib": 3.964780569076538, + "ce_orig": 0.8562270998954773, + "epoch": 1.1613343878064564, + "kl_loss": 0.05677253380417824, + "loss_ib": 0.0009642033837735653, + "step": 4039 + }, + { + "ce_ib": 4.451704502105713, + "ce_orig": 1.0894567966461182, + "epoch": 1.1613343878064564, + "kl_loss": 0.05257919430732727, + "loss_ib": 0.0009709623409435153, + "step": 4039 + }, + { + "epoch": 1.1616219713854339, + "grad_norm": 0.10893480479717255, + "learning_rate": 3.510220117652297e-05, + "loss": 0.8204, + "step": 4040 + }, + { + "ce_ib": 3.4356045722961426, + "ce_orig": 1.0739333629608154, + "epoch": 1.1616219713854339, + "kl_loss": 0.04669499024748802, + "loss_ib": 0.0008105103624984622, + "step": 4040 + }, + { + "ce_ib": 3.0137276649475098, + "ce_orig": 0.47163841128349304, + "epoch": 1.1616219713854339, + "kl_loss": 0.03692590445280075, + "loss_ib": 0.0006706318235956132, + "step": 4040 + }, + { + "ce_ib": 5.316560745239258, + "ce_orig": 1.5117143392562866, + "epoch": 1.1616219713854339, + "kl_loss": 0.059212468564510345, + "loss_ib": 0.0011237807339057326, + "step": 4040 + }, + { + "ce_ib": 1.4104584455490112, + "ce_orig": 0.3291364312171936, + "epoch": 1.1616219713854339, + "kl_loss": 0.07759392261505127, + "loss_ib": 0.000916985038202256, + "step": 4040 + }, + { + "ce_ib": 4.469212532043457, + "ce_orig": 1.250985860824585, + "epoch": 1.1619095549644116, + "kl_loss": 0.05118683725595474, + "loss_ib": 0.0009587895474396646, + "step": 4041 + }, + { + "ce_ib": 4.029460430145264, + "ce_orig": 0.5173509120941162, + "epoch": 1.1619095549644116, + "kl_loss": 0.11570829153060913, + "loss_ib": 0.001560028875246644, + "step": 4041 + }, + { + "ce_ib": 2.442401170730591, + "ce_orig": 0.44627517461776733, + "epoch": 1.1619095549644116, + "kl_loss": 0.04627104103565216, + "loss_ib": 0.0007069504936225712, + "step": 4041 + }, + { + "ce_ib": 3.298811912536621, + "ce_orig": 0.8803318738937378, + "epoch": 1.1619095549644116, + "kl_loss": 0.054609544575214386, + "loss_ib": 0.0008759766351431608, + "step": 4041 + }, + { + "ce_ib": 4.462754726409912, + "ce_orig": 1.0008065700531006, + "epoch": 1.1621971385433891, + "kl_loss": 0.03496851399540901, + "loss_ib": 0.0007959605427458882, + "step": 4042 + }, + { + "ce_ib": 3.751896381378174, + "ce_orig": 1.201964020729065, + "epoch": 1.1621971385433891, + "kl_loss": 0.07111578434705734, + "loss_ib": 0.0010863475035876036, + "step": 4042 + }, + { + "ce_ib": 2.9222657680511475, + "ce_orig": 0.7353483438491821, + "epoch": 1.1621971385433891, + "kl_loss": 0.044849298894405365, + "loss_ib": 0.0007407195516861975, + "step": 4042 + }, + { + "ce_ib": 4.068714618682861, + "ce_orig": 1.0355116128921509, + "epoch": 1.1621971385433891, + "kl_loss": 0.058364853262901306, + "loss_ib": 0.0009905199985951185, + "step": 4042 + }, + { + "ce_ib": 3.4876019954681396, + "ce_orig": 1.0236544609069824, + "epoch": 1.1624847221223669, + "kl_loss": 0.06433963775634766, + "loss_ib": 0.0009921565651893616, + "step": 4043 + }, + { + "ce_ib": 2.3623263835906982, + "ce_orig": 0.5882461667060852, + "epoch": 1.1624847221223669, + "kl_loss": 0.02850610762834549, + "loss_ib": 0.000521293724887073, + "step": 4043 + }, + { + "ce_ib": 3.9461705684661865, + "ce_orig": 1.4295326471328735, + "epoch": 1.1624847221223669, + "kl_loss": 0.048520080745220184, + "loss_ib": 0.0008798178751021624, + "step": 4043 + }, + { + "ce_ib": 3.207139492034912, + "ce_orig": 0.8626168370246887, + "epoch": 1.1624847221223669, + "kl_loss": 0.06015486270189285, + "loss_ib": 0.0009222624939866364, + "step": 4043 + }, + { + "ce_ib": 3.0544021129608154, + "ce_orig": 0.7288475632667542, + "epoch": 1.1627723057013444, + "kl_loss": 0.031109042465686798, + "loss_ib": 0.0006165305967442691, + "step": 4044 + }, + { + "ce_ib": 4.5237860679626465, + "ce_orig": 1.4270788431167603, + "epoch": 1.1627723057013444, + "kl_loss": 0.03508526831865311, + "loss_ib": 0.0008032312616705894, + "step": 4044 + }, + { + "ce_ib": 2.6749534606933594, + "ce_orig": 0.7402317523956299, + "epoch": 1.1627723057013444, + "kl_loss": 0.027757544070482254, + "loss_ib": 0.0005450707394629717, + "step": 4044 + }, + { + "ce_ib": 7.324645042419434, + "ce_orig": 2.446401596069336, + "epoch": 1.1627723057013444, + "kl_loss": 0.0511571541428566, + "loss_ib": 0.0012440360151231289, + "step": 4044 + }, + { + "epoch": 1.1630598892803221, + "grad_norm": 0.11455990374088287, + "learning_rate": 3.506669398781671e-05, + "loss": 0.8768, + "step": 4045 + }, + { + "ce_ib": 4.985386848449707, + "ce_orig": 1.30997633934021, + "epoch": 1.1630598892803221, + "kl_loss": 0.15447445213794708, + "loss_ib": 0.002043283311650157, + "step": 4045 + }, + { + "ce_ib": 2.3206710815429688, + "ce_orig": 0.6276873350143433, + "epoch": 1.1630598892803221, + "kl_loss": 0.031131336465477943, + "loss_ib": 0.0005433804471977055, + "step": 4045 + }, + { + "ce_ib": 4.723538398742676, + "ce_orig": 1.291208028793335, + "epoch": 1.1630598892803221, + "kl_loss": 0.05791282281279564, + "loss_ib": 0.0010514820460230112, + "step": 4045 + }, + { + "ce_ib": 1.6522088050842285, + "ce_orig": 0.4245874881744385, + "epoch": 1.1630598892803221, + "kl_loss": 0.025864887982606888, + "loss_ib": 0.00042386972927488387, + "step": 4045 + }, + { + "ce_ib": 3.0458123683929443, + "ce_orig": 0.7803192734718323, + "epoch": 1.1633474728592996, + "kl_loss": 0.03641805052757263, + "loss_ib": 0.0006687617278657854, + "step": 4046 + }, + { + "ce_ib": 2.9134457111358643, + "ce_orig": 0.5071167945861816, + "epoch": 1.1633474728592996, + "kl_loss": 0.05339156091213226, + "loss_ib": 0.0008252601255662739, + "step": 4046 + }, + { + "ce_ib": 5.220381259918213, + "ce_orig": 1.4963266849517822, + "epoch": 1.1633474728592996, + "kl_loss": 0.04156707227230072, + "loss_ib": 0.0009377088281325996, + "step": 4046 + }, + { + "ce_ib": 2.066455364227295, + "ce_orig": 0.48219427466392517, + "epoch": 1.1633474728592996, + "kl_loss": 0.030755288898944855, + "loss_ib": 0.000514198443852365, + "step": 4046 + }, + { + "ce_ib": 2.7081246376037598, + "ce_orig": 0.6403168439865112, + "epoch": 1.1636350564382774, + "kl_loss": 0.06734728813171387, + "loss_ib": 0.0009442853042855859, + "step": 4047 + }, + { + "ce_ib": 3.6635208129882812, + "ce_orig": 1.2263753414154053, + "epoch": 1.1636350564382774, + "kl_loss": 0.04638352245092392, + "loss_ib": 0.000830187345854938, + "step": 4047 + }, + { + "ce_ib": 4.4883646965026855, + "ce_orig": 0.9883413314819336, + "epoch": 1.1636350564382774, + "kl_loss": 0.04842612147331238, + "loss_ib": 0.0009330976754426956, + "step": 4047 + }, + { + "ce_ib": 2.292341470718384, + "ce_orig": 0.46396175026893616, + "epoch": 1.1636350564382774, + "kl_loss": 0.03860094025731087, + "loss_ib": 0.0006152435089461505, + "step": 4047 + }, + { + "ce_ib": 4.102289199829102, + "ce_orig": 0.9422762989997864, + "epoch": 1.163922640017255, + "kl_loss": 0.07397480309009552, + "loss_ib": 0.001149976858869195, + "step": 4048 + }, + { + "ce_ib": 2.8704171180725098, + "ce_orig": 0.5179280042648315, + "epoch": 1.163922640017255, + "kl_loss": 0.06617522239685059, + "loss_ib": 0.0009487938950769603, + "step": 4048 + }, + { + "ce_ib": 5.5304036140441895, + "ce_orig": 1.5830683708190918, + "epoch": 1.163922640017255, + "kl_loss": 0.05032861605286598, + "loss_ib": 0.0010563264368101954, + "step": 4048 + }, + { + "ce_ib": 2.645291328430176, + "ce_orig": 0.6821208000183105, + "epoch": 1.163922640017255, + "kl_loss": 0.03398430347442627, + "loss_ib": 0.0006043721805326641, + "step": 4048 + }, + { + "ce_ib": 3.880979299545288, + "ce_orig": 0.8284997344017029, + "epoch": 1.1642102235962326, + "kl_loss": 0.031779952347278595, + "loss_ib": 0.0007058974006213248, + "step": 4049 + }, + { + "ce_ib": 2.745021343231201, + "ce_orig": 0.5092727541923523, + "epoch": 1.1642102235962326, + "kl_loss": 0.04849826544523239, + "loss_ib": 0.0007594847702421248, + "step": 4049 + }, + { + "ce_ib": 2.8538520336151123, + "ce_orig": 0.6210089921951294, + "epoch": 1.1642102235962326, + "kl_loss": 0.04309634119272232, + "loss_ib": 0.0007163485861383379, + "step": 4049 + }, + { + "ce_ib": 2.0092742443084717, + "ce_orig": 0.670125424861908, + "epoch": 1.1642102235962326, + "kl_loss": 0.024591824039816856, + "loss_ib": 0.0004468456609174609, + "step": 4049 + }, + { + "epoch": 1.1644978071752103, + "grad_norm": 0.10616492480039597, + "learning_rate": 3.50311625461081e-05, + "loss": 0.8543, + "step": 4050 + }, + { + "ce_ib": 2.605832815170288, + "ce_orig": 0.6222933530807495, + "epoch": 1.1644978071752103, + "kl_loss": 0.04111053794622421, + "loss_ib": 0.0006716885836794972, + "step": 4050 + }, + { + "ce_ib": 5.1445465087890625, + "ce_orig": 1.1735836267471313, + "epoch": 1.1644978071752103, + "kl_loss": 0.09121158719062805, + "loss_ib": 0.0014265705831348896, + "step": 4050 + }, + { + "ce_ib": 4.20490837097168, + "ce_orig": 0.8194015026092529, + "epoch": 1.1644978071752103, + "kl_loss": 0.06771643459796906, + "loss_ib": 0.0010976551566272974, + "step": 4050 + }, + { + "ce_ib": 2.3200361728668213, + "ce_orig": 0.5822427272796631, + "epoch": 1.1644978071752103, + "kl_loss": 0.041237086057662964, + "loss_ib": 0.0006443744641728699, + "step": 4050 + }, + { + "ce_ib": 2.822741746902466, + "ce_orig": 0.7577524781227112, + "epoch": 1.1647853907541879, + "kl_loss": 0.04204846918582916, + "loss_ib": 0.0007027588435448706, + "step": 4051 + }, + { + "ce_ib": 3.1639256477355957, + "ce_orig": 0.8956714272499084, + "epoch": 1.1647853907541879, + "kl_loss": 0.05987988039851189, + "loss_ib": 0.0009151913109235466, + "step": 4051 + }, + { + "ce_ib": 4.408069133758545, + "ce_orig": 1.0484721660614014, + "epoch": 1.1647853907541879, + "kl_loss": 0.03204209357500076, + "loss_ib": 0.0007612277986481786, + "step": 4051 + }, + { + "ce_ib": 2.323366641998291, + "ce_orig": 0.5341171026229858, + "epoch": 1.1647853907541879, + "kl_loss": 0.04043489322066307, + "loss_ib": 0.0006366855232045054, + "step": 4051 + }, + { + "ce_ib": 3.558195114135742, + "ce_orig": 0.7665886878967285, + "epoch": 1.1650729743331656, + "kl_loss": 0.038447536528110504, + "loss_ib": 0.0007402948685921729, + "step": 4052 + }, + { + "ce_ib": 2.4580442905426025, + "ce_orig": 0.566079318523407, + "epoch": 1.1650729743331656, + "kl_loss": 0.03390956670045853, + "loss_ib": 0.0005849000881426036, + "step": 4052 + }, + { + "ce_ib": 2.033724069595337, + "ce_orig": 0.5580686330795288, + "epoch": 1.1650729743331656, + "kl_loss": 0.031225070357322693, + "loss_ib": 0.0005156230763532221, + "step": 4052 + }, + { + "ce_ib": 3.133094310760498, + "ce_orig": 0.9837997555732727, + "epoch": 1.1650729743331656, + "kl_loss": 0.04303106665611267, + "loss_ib": 0.0007436200976371765, + "step": 4052 + }, + { + "ce_ib": 2.8375749588012695, + "ce_orig": 0.6071165204048157, + "epoch": 1.1653605579121433, + "kl_loss": 0.058555252850055695, + "loss_ib": 0.0008693100535310805, + "step": 4053 + }, + { + "ce_ib": 5.265157699584961, + "ce_orig": 1.5911054611206055, + "epoch": 1.1653605579121433, + "kl_loss": 0.047031573951244354, + "loss_ib": 0.0009968314552679658, + "step": 4053 + }, + { + "ce_ib": 2.7454161643981934, + "ce_orig": 0.7391772866249084, + "epoch": 1.1653605579121433, + "kl_loss": 0.023389674723148346, + "loss_ib": 0.0005084383301436901, + "step": 4053 + }, + { + "ce_ib": 1.8647037744522095, + "ce_orig": 0.33054202795028687, + "epoch": 1.1653605579121433, + "kl_loss": 0.046082548797130585, + "loss_ib": 0.0006472958484664559, + "step": 4053 + }, + { + "ce_ib": 4.833590984344482, + "ce_orig": 1.0041897296905518, + "epoch": 1.1656481414911208, + "kl_loss": 0.07680582255125046, + "loss_ib": 0.0012514173286035657, + "step": 4054 + }, + { + "ce_ib": 2.104947328567505, + "ce_orig": 0.46420034766197205, + "epoch": 1.1656481414911208, + "kl_loss": 0.03025611862540245, + "loss_ib": 0.0005130558856762946, + "step": 4054 + }, + { + "ce_ib": 3.955225706100464, + "ce_orig": 0.8505429625511169, + "epoch": 1.1656481414911208, + "kl_loss": 0.0500507727265358, + "loss_ib": 0.0008960302802734077, + "step": 4054 + }, + { + "ce_ib": 3.5047707557678223, + "ce_orig": 0.5195265412330627, + "epoch": 1.1656481414911208, + "kl_loss": 0.05160006508231163, + "loss_ib": 0.0008664776687510312, + "step": 4054 + }, + { + "epoch": 1.1659357250700986, + "grad_norm": 0.09610315412282944, + "learning_rate": 3.4995606937000644e-05, + "loss": 0.8673, + "step": 4055 + }, + { + "ce_ib": 3.400763750076294, + "ce_orig": 0.8699588775634766, + "epoch": 1.1659357250700986, + "kl_loss": 0.04678850620985031, + "loss_ib": 0.0008079613326117396, + "step": 4055 + }, + { + "ce_ib": 3.8669283390045166, + "ce_orig": 0.9250701069831848, + "epoch": 1.1659357250700986, + "kl_loss": 0.05212882161140442, + "loss_ib": 0.0009079810115508735, + "step": 4055 + }, + { + "ce_ib": 2.6365342140197754, + "ce_orig": 0.675567626953125, + "epoch": 1.1659357250700986, + "kl_loss": 0.04288187250494957, + "loss_ib": 0.000692472152877599, + "step": 4055 + }, + { + "ce_ib": 4.128648281097412, + "ce_orig": 1.058768391609192, + "epoch": 1.1659357250700986, + "kl_loss": 0.04581312835216522, + "loss_ib": 0.0008709960966371, + "step": 4055 + }, + { + "ce_ib": 3.151785373687744, + "ce_orig": 0.7851739525794983, + "epoch": 1.166223308649076, + "kl_loss": 0.037903934717178345, + "loss_ib": 0.0006942178006283939, + "step": 4056 + }, + { + "ce_ib": 3.42557430267334, + "ce_orig": 0.9377487897872925, + "epoch": 1.166223308649076, + "kl_loss": 0.03299473971128464, + "loss_ib": 0.0006725048297084868, + "step": 4056 + }, + { + "ce_ib": 3.2379674911499023, + "ce_orig": 0.7932174801826477, + "epoch": 1.166223308649076, + "kl_loss": 0.03768693283200264, + "loss_ib": 0.0007006660453043878, + "step": 4056 + }, + { + "ce_ib": 5.599350452423096, + "ce_orig": 1.5223034620285034, + "epoch": 1.166223308649076, + "kl_loss": 0.04452358931303024, + "loss_ib": 0.0010051708668470383, + "step": 4056 + }, + { + "ce_ib": 3.7180721759796143, + "ce_orig": 0.9114311933517456, + "epoch": 1.1665108922280538, + "kl_loss": 0.06035696715116501, + "loss_ib": 0.0009753768681548536, + "step": 4057 + }, + { + "ce_ib": 3.3712611198425293, + "ce_orig": 0.8997890949249268, + "epoch": 1.1665108922280538, + "kl_loss": 0.04288886860013008, + "loss_ib": 0.0007660147384740412, + "step": 4057 + }, + { + "ce_ib": 2.231168270111084, + "ce_orig": 0.6435089111328125, + "epoch": 1.1665108922280538, + "kl_loss": 0.035665132105350494, + "loss_ib": 0.000579768093302846, + "step": 4057 + }, + { + "ce_ib": 4.1045613288879395, + "ce_orig": 0.8278507590293884, + "epoch": 1.1665108922280538, + "kl_loss": 0.043898969888687134, + "loss_ib": 0.000849445816129446, + "step": 4057 + }, + { + "ce_ib": 2.559800148010254, + "ce_orig": 0.40503549575805664, + "epoch": 1.1667984758070313, + "kl_loss": 0.0411166213452816, + "loss_ib": 0.000667146232444793, + "step": 4058 + }, + { + "ce_ib": 2.8501439094543457, + "ce_orig": 0.6608766317367554, + "epoch": 1.1667984758070313, + "kl_loss": 0.06344451755285263, + "loss_ib": 0.0009194595040753484, + "step": 4058 + }, + { + "ce_ib": 3.2025723457336426, + "ce_orig": 0.9278931021690369, + "epoch": 1.1667984758070313, + "kl_loss": 0.03074437752366066, + "loss_ib": 0.000627700996119529, + "step": 4058 + }, + { + "ce_ib": 1.628861427307129, + "ce_orig": 0.35916781425476074, + "epoch": 1.1667984758070313, + "kl_loss": 0.02308756113052368, + "loss_ib": 0.00039376175845973194, + "step": 4058 + }, + { + "ce_ib": 3.553788661956787, + "ce_orig": 0.571487307548523, + "epoch": 1.167086059386009, + "kl_loss": 0.04537519812583923, + "loss_ib": 0.0008091307827271521, + "step": 4059 + }, + { + "ce_ib": 2.8253333568573, + "ce_orig": 0.8206667900085449, + "epoch": 1.167086059386009, + "kl_loss": 0.04689133167266846, + "loss_ib": 0.0007514466415159404, + "step": 4059 + }, + { + "ce_ib": 3.2065212726593018, + "ce_orig": 0.9238781929016113, + "epoch": 1.167086059386009, + "kl_loss": 0.07477691024541855, + "loss_ib": 0.0010684211738407612, + "step": 4059 + }, + { + "ce_ib": 2.6566708087921143, + "ce_orig": 0.752912163734436, + "epoch": 1.167086059386009, + "kl_loss": 0.03464968502521515, + "loss_ib": 0.0006121639162302017, + "step": 4059 + }, + { + "epoch": 1.1673736429649866, + "grad_norm": 0.09872525930404663, + "learning_rate": 3.496002724615604e-05, + "loss": 0.7527, + "step": 4060 + }, + { + "ce_ib": 3.9102060794830322, + "ce_orig": 0.5668700337409973, + "epoch": 1.1673736429649866, + "kl_loss": 0.0424942784011364, + "loss_ib": 0.0008159633725881577, + "step": 4060 + }, + { + "ce_ib": 2.4913887977600098, + "ce_orig": 0.5761697292327881, + "epoch": 1.1673736429649866, + "kl_loss": 0.037889331579208374, + "loss_ib": 0.0006280321395024657, + "step": 4060 + }, + { + "ce_ib": 3.0290687084198, + "ce_orig": 0.5823022127151489, + "epoch": 1.1673736429649866, + "kl_loss": 0.04530239850282669, + "loss_ib": 0.0007559308432973921, + "step": 4060 + }, + { + "ce_ib": 3.6225826740264893, + "ce_orig": 1.0092939138412476, + "epoch": 1.1673736429649866, + "kl_loss": 0.054158613085746765, + "loss_ib": 0.0009038443677127361, + "step": 4060 + }, + { + "ce_ib": 3.268702507019043, + "ce_orig": 0.7589768171310425, + "epoch": 1.1676612265439643, + "kl_loss": 0.05030781775712967, + "loss_ib": 0.0008299484034068882, + "step": 4061 + }, + { + "ce_ib": 2.9411401748657227, + "ce_orig": 0.7850885987281799, + "epoch": 1.1676612265439643, + "kl_loss": 0.037656933069229126, + "loss_ib": 0.0006706833955831826, + "step": 4061 + }, + { + "ce_ib": 4.931870460510254, + "ce_orig": 1.2870469093322754, + "epoch": 1.1676612265439643, + "kl_loss": 0.050168171525001526, + "loss_ib": 0.0009948686929419637, + "step": 4061 + }, + { + "ce_ib": 1.2978776693344116, + "ce_orig": 0.36829835176467896, + "epoch": 1.1676612265439643, + "kl_loss": 0.019984988495707512, + "loss_ib": 0.00032963763806037605, + "step": 4061 + }, + { + "ce_ib": 2.786343812942505, + "ce_orig": 0.7378695607185364, + "epoch": 1.167948810122942, + "kl_loss": 0.03285918012261391, + "loss_ib": 0.0006072261603549123, + "step": 4062 + }, + { + "ce_ib": 4.252902984619141, + "ce_orig": 0.7641015648841858, + "epoch": 1.167948810122942, + "kl_loss": 0.08740092813968658, + "loss_ib": 0.0012992995325475931, + "step": 4062 + }, + { + "ce_ib": 3.847883462905884, + "ce_orig": 1.081510305404663, + "epoch": 1.167948810122942, + "kl_loss": 0.058077581226825714, + "loss_ib": 0.0009655641042627394, + "step": 4062 + }, + { + "ce_ib": 3.6666247844696045, + "ce_orig": 0.961589515209198, + "epoch": 1.167948810122942, + "kl_loss": 0.04566904902458191, + "loss_ib": 0.0008233528933487833, + "step": 4062 + }, + { + "ce_ib": 2.68292498588562, + "ce_orig": 0.5992985367774963, + "epoch": 1.1682363937019196, + "kl_loss": 0.044127337634563446, + "loss_ib": 0.0007095658802427351, + "step": 4063 + }, + { + "ce_ib": 2.053018569946289, + "ce_orig": 0.5505955219268799, + "epoch": 1.1682363937019196, + "kl_loss": 0.03565334528684616, + "loss_ib": 0.0005618353025056422, + "step": 4063 + }, + { + "ce_ib": 4.817410469055176, + "ce_orig": 1.3095455169677734, + "epoch": 1.1682363937019196, + "kl_loss": 0.061068467795848846, + "loss_ib": 0.0010924256639555097, + "step": 4063 + }, + { + "ce_ib": 5.103312969207764, + "ce_orig": 1.3026551008224487, + "epoch": 1.1682363937019196, + "kl_loss": 0.06228150427341461, + "loss_ib": 0.0011331462301313877, + "step": 4063 + }, + { + "ce_ib": 2.5988917350769043, + "ce_orig": 0.46157151460647583, + "epoch": 1.1685239772808973, + "kl_loss": 0.03676232695579529, + "loss_ib": 0.0006275124032981694, + "step": 4064 + }, + { + "ce_ib": 1.6223269701004028, + "ce_orig": 0.3666318655014038, + "epoch": 1.1685239772808973, + "kl_loss": 0.021423712372779846, + "loss_ib": 0.0003764697758015245, + "step": 4064 + }, + { + "ce_ib": 5.234652042388916, + "ce_orig": 1.2889529466629028, + "epoch": 1.1685239772808973, + "kl_loss": 0.0515214204788208, + "loss_ib": 0.0010386793874204159, + "step": 4064 + }, + { + "ce_ib": 3.6575634479522705, + "ce_orig": 1.0779242515563965, + "epoch": 1.1685239772808973, + "kl_loss": 0.06400740146636963, + "loss_ib": 0.001005830243229866, + "step": 4064 + }, + { + "epoch": 1.1688115608598748, + "grad_norm": 0.10350034385919571, + "learning_rate": 3.4924423559294036e-05, + "loss": 0.842, + "step": 4065 + }, + { + "ce_ib": 2.8377909660339355, + "ce_orig": 0.4854942262172699, + "epoch": 1.1688115608598748, + "kl_loss": 0.03983641415834427, + "loss_ib": 0.0006821432616561651, + "step": 4065 + }, + { + "ce_ib": 5.057666301727295, + "ce_orig": 1.3497129678726196, + "epoch": 1.1688115608598748, + "kl_loss": 0.04934851452708244, + "loss_ib": 0.0009992517298087478, + "step": 4065 + }, + { + "ce_ib": 2.997882843017578, + "ce_orig": 0.6519203782081604, + "epoch": 1.1688115608598748, + "kl_loss": 0.07341272383928299, + "loss_ib": 0.001033915439620614, + "step": 4065 + }, + { + "ce_ib": 5.586431503295898, + "ce_orig": 1.7082481384277344, + "epoch": 1.1688115608598748, + "kl_loss": 0.049155429005622864, + "loss_ib": 0.001050197402946651, + "step": 4065 + }, + { + "ce_ib": 3.0636746883392334, + "ce_orig": 0.8149264454841614, + "epoch": 1.1690991444388525, + "kl_loss": 0.0500720739364624, + "loss_ib": 0.0008070881594903767, + "step": 4066 + }, + { + "ce_ib": 1.736357569694519, + "ce_orig": 0.4427812993526459, + "epoch": 1.1690991444388525, + "kl_loss": 0.09760592877864838, + "loss_ib": 0.001149695017375052, + "step": 4066 + }, + { + "ce_ib": 2.26033353805542, + "ce_orig": 0.5331912040710449, + "epoch": 1.1690991444388525, + "kl_loss": 0.021162014454603195, + "loss_ib": 0.00043765350710600615, + "step": 4066 + }, + { + "ce_ib": 3.6184964179992676, + "ce_orig": 1.1612712144851685, + "epoch": 1.1690991444388525, + "kl_loss": 0.05607621371746063, + "loss_ib": 0.0009226117981597781, + "step": 4066 + }, + { + "ce_ib": 1.9027600288391113, + "ce_orig": 0.377095103263855, + "epoch": 1.1693867280178303, + "kl_loss": 0.031361497938632965, + "loss_ib": 0.0005038909730501473, + "step": 4067 + }, + { + "ce_ib": 4.12993860244751, + "ce_orig": 0.7423149347305298, + "epoch": 1.1693867280178303, + "kl_loss": 0.05467706918716431, + "loss_ib": 0.0009597645257599652, + "step": 4067 + }, + { + "ce_ib": 1.8754316568374634, + "ce_orig": 0.38290122151374817, + "epoch": 1.1693867280178303, + "kl_loss": 0.024127138778567314, + "loss_ib": 0.0004288145573809743, + "step": 4067 + }, + { + "ce_ib": 3.544107437133789, + "ce_orig": 1.1354224681854248, + "epoch": 1.1693867280178303, + "kl_loss": 0.03558792173862457, + "loss_ib": 0.0007102899253368378, + "step": 4067 + }, + { + "ce_ib": 3.9950361251831055, + "ce_orig": 1.23890221118927, + "epoch": 1.1696743115968078, + "kl_loss": 0.0294375978410244, + "loss_ib": 0.0006938795559108257, + "step": 4068 + }, + { + "ce_ib": 2.6996676921844482, + "ce_orig": 0.5552813410758972, + "epoch": 1.1696743115968078, + "kl_loss": 0.058985013514757156, + "loss_ib": 0.0008598168496973813, + "step": 4068 + }, + { + "ce_ib": 2.8289029598236084, + "ce_orig": 0.6823354959487915, + "epoch": 1.1696743115968078, + "kl_loss": 0.04871935397386551, + "loss_ib": 0.0007700838032178581, + "step": 4068 + }, + { + "ce_ib": 2.647233486175537, + "ce_orig": 0.6352203488349915, + "epoch": 1.1696743115968078, + "kl_loss": 0.03957917541265488, + "loss_ib": 0.000660515099298209, + "step": 4068 + }, + { + "ce_ib": 2.222639322280884, + "ce_orig": 0.4653036296367645, + "epoch": 1.1699618951757855, + "kl_loss": 0.030514758080244064, + "loss_ib": 0.0005274115246720612, + "step": 4069 + }, + { + "ce_ib": 2.722980260848999, + "ce_orig": 0.8253926634788513, + "epoch": 1.1699618951757855, + "kl_loss": 0.043120816349983215, + "loss_ib": 0.0007035061717033386, + "step": 4069 + }, + { + "ce_ib": 3.879711151123047, + "ce_orig": 0.6639466881752014, + "epoch": 1.1699618951757855, + "kl_loss": 0.04803430289030075, + "loss_ib": 0.0008683141204528511, + "step": 4069 + }, + { + "ce_ib": 3.0783565044403076, + "ce_orig": 0.9273906350135803, + "epoch": 1.1699618951757855, + "kl_loss": 0.028941744938492775, + "loss_ib": 0.0005972530343569815, + "step": 4069 + }, + { + "epoch": 1.170249478754763, + "grad_norm": 0.10524775087833405, + "learning_rate": 3.4888795962192155e-05, + "loss": 0.8281, + "step": 4070 + }, + { + "ce_ib": 3.398386240005493, + "ce_orig": 0.8323834538459778, + "epoch": 1.170249478754763, + "kl_loss": 0.04928077012300491, + "loss_ib": 0.0008326463284902275, + "step": 4070 + }, + { + "ce_ib": 3.835973024368286, + "ce_orig": 0.9296879172325134, + "epoch": 1.170249478754763, + "kl_loss": 0.05229274183511734, + "loss_ib": 0.0009065246558748186, + "step": 4070 + }, + { + "ce_ib": 3.6220204830169678, + "ce_orig": 0.9106321930885315, + "epoch": 1.170249478754763, + "kl_loss": 0.03716099262237549, + "loss_ib": 0.0007338119903579354, + "step": 4070 + }, + { + "ce_ib": 2.299471139907837, + "ce_orig": 0.6100324988365173, + "epoch": 1.170249478754763, + "kl_loss": 0.025870507583022118, + "loss_ib": 0.0004886521492153406, + "step": 4070 + }, + { + "ce_ib": 0.7533550262451172, + "ce_orig": 0.030538931488990784, + "epoch": 1.1705370623337408, + "kl_loss": 0.09668419510126114, + "loss_ib": 0.0010421774350106716, + "step": 4071 + }, + { + "ce_ib": 4.141875267028809, + "ce_orig": 0.9549717903137207, + "epoch": 1.1705370623337408, + "kl_loss": 0.1376059204339981, + "loss_ib": 0.001790246577002108, + "step": 4071 + }, + { + "ce_ib": 3.6255548000335693, + "ce_orig": 0.8394689559936523, + "epoch": 1.1705370623337408, + "kl_loss": 0.0446990467607975, + "loss_ib": 0.000809545919764787, + "step": 4071 + }, + { + "ce_ib": 5.328487873077393, + "ce_orig": 1.3145289421081543, + "epoch": 1.1705370623337408, + "kl_loss": 0.04820544272661209, + "loss_ib": 0.00101490318775177, + "step": 4071 + }, + { + "ce_ib": 4.6509881019592285, + "ce_orig": 1.136006474494934, + "epoch": 1.1708246459127183, + "kl_loss": 0.045678310096263885, + "loss_ib": 0.0009218819322995842, + "step": 4072 + }, + { + "ce_ib": 4.902939319610596, + "ce_orig": 1.6274610757827759, + "epoch": 1.1708246459127183, + "kl_loss": 0.03422142565250397, + "loss_ib": 0.0008325082017108798, + "step": 4072 + }, + { + "ce_ib": 1.5947140455245972, + "ce_orig": 0.32179200649261475, + "epoch": 1.1708246459127183, + "kl_loss": 0.04796458035707474, + "loss_ib": 0.0006391172064468265, + "step": 4072 + }, + { + "ce_ib": 2.0145320892333984, + "ce_orig": 0.4682922959327698, + "epoch": 1.1708246459127183, + "kl_loss": 0.028791222721338272, + "loss_ib": 0.0004893654258921742, + "step": 4072 + }, + { + "ce_ib": 3.1896066665649414, + "ce_orig": 0.927838921546936, + "epoch": 1.171112229491696, + "kl_loss": 0.03327640891075134, + "loss_ib": 0.0006517247529700398, + "step": 4073 + }, + { + "ce_ib": 3.2386362552642822, + "ce_orig": 0.8281010389328003, + "epoch": 1.171112229491696, + "kl_loss": 0.05452140420675278, + "loss_ib": 0.000869077630341053, + "step": 4073 + }, + { + "ce_ib": 4.099564552307129, + "ce_orig": 1.0425961017608643, + "epoch": 1.171112229491696, + "kl_loss": 0.0620039626955986, + "loss_ib": 0.0010299960849806666, + "step": 4073 + }, + { + "ce_ib": 2.965021848678589, + "ce_orig": 0.7314980030059814, + "epoch": 1.171112229491696, + "kl_loss": 0.0365925170481205, + "loss_ib": 0.0006624272791668773, + "step": 4073 + }, + { + "ce_ib": 3.6163880825042725, + "ce_orig": 0.6324554681777954, + "epoch": 1.1713998130706738, + "kl_loss": 0.05591830238699913, + "loss_ib": 0.0009208218543790281, + "step": 4074 + }, + { + "ce_ib": 2.4370973110198975, + "ce_orig": 0.5521202683448792, + "epoch": 1.1713998130706738, + "kl_loss": 0.053697336465120316, + "loss_ib": 0.000780683069024235, + "step": 4074 + }, + { + "ce_ib": 2.746570110321045, + "ce_orig": 0.622891366481781, + "epoch": 1.1713998130706738, + "kl_loss": 0.046694472432136536, + "loss_ib": 0.000741601746995002, + "step": 4074 + }, + { + "ce_ib": 3.7185921669006348, + "ce_orig": 0.9203096628189087, + "epoch": 1.1713998130706738, + "kl_loss": 0.05725063756108284, + "loss_ib": 0.0009443655144423246, + "step": 4074 + }, + { + "epoch": 1.1716873966496513, + "grad_norm": 0.10544202476739883, + "learning_rate": 3.485314454068558e-05, + "loss": 0.8212, + "step": 4075 + }, + { + "ce_ib": 3.6253325939178467, + "ce_orig": 0.7486560344696045, + "epoch": 1.1716873966496513, + "kl_loss": 0.05820288881659508, + "loss_ib": 0.0009445620817132294, + "step": 4075 + }, + { + "ce_ib": 3.6637825965881348, + "ce_orig": 0.4669122099876404, + "epoch": 1.1716873966496513, + "kl_loss": 0.029283307492733, + "loss_ib": 0.0006592112476937473, + "step": 4075 + }, + { + "ce_ib": 3.772803544998169, + "ce_orig": 1.121263861656189, + "epoch": 1.1716873966496513, + "kl_loss": 0.04306325316429138, + "loss_ib": 0.0008079128456301987, + "step": 4075 + }, + { + "ce_ib": 4.317783355712891, + "ce_orig": 1.0599689483642578, + "epoch": 1.1716873966496513, + "kl_loss": 0.07396396994590759, + "loss_ib": 0.0011714180000126362, + "step": 4075 + }, + { + "ce_ib": 4.898653030395508, + "ce_orig": 1.3807965517044067, + "epoch": 1.171974980228629, + "kl_loss": 0.11653190106153488, + "loss_ib": 0.0016551843145862222, + "step": 4076 + }, + { + "ce_ib": 4.484192848205566, + "ce_orig": 1.1732375621795654, + "epoch": 1.171974980228629, + "kl_loss": 0.05913281440734863, + "loss_ib": 0.0010397473815828562, + "step": 4076 + }, + { + "ce_ib": 3.92900013923645, + "ce_orig": 1.2076663970947266, + "epoch": 1.171974980228629, + "kl_loss": 0.03755408525466919, + "loss_ib": 0.0007684407755732536, + "step": 4076 + }, + { + "ce_ib": 2.6351757049560547, + "ce_orig": 0.5846911072731018, + "epoch": 1.171974980228629, + "kl_loss": 0.02901219204068184, + "loss_ib": 0.0005536394892260432, + "step": 4076 + }, + { + "ce_ib": 3.0684573650360107, + "ce_orig": 0.45550766587257385, + "epoch": 1.1722625638076065, + "kl_loss": 0.031723253428936005, + "loss_ib": 0.0006240782677195966, + "step": 4077 + }, + { + "ce_ib": 4.423678874969482, + "ce_orig": 0.840323269367218, + "epoch": 1.1722625638076065, + "kl_loss": 0.14055076241493225, + "loss_ib": 0.001847875420935452, + "step": 4077 + }, + { + "ce_ib": 4.4762773513793945, + "ce_orig": 1.1601848602294922, + "epoch": 1.1722625638076065, + "kl_loss": 0.0261690691113472, + "loss_ib": 0.0007093183812685311, + "step": 4077 + }, + { + "ce_ib": 2.3888003826141357, + "ce_orig": 0.49905532598495483, + "epoch": 1.1722625638076065, + "kl_loss": 0.052962079644203186, + "loss_ib": 0.0007685008458793163, + "step": 4077 + }, + { + "ce_ib": 2.9201672077178955, + "ce_orig": 0.6927530169487, + "epoch": 1.1725501473865843, + "kl_loss": 0.031822673976421356, + "loss_ib": 0.0006102434126660228, + "step": 4078 + }, + { + "ce_ib": 4.832096099853516, + "ce_orig": 1.4701164960861206, + "epoch": 1.1725501473865843, + "kl_loss": 0.05134724825620651, + "loss_ib": 0.00099668197799474, + "step": 4078 + }, + { + "ce_ib": 3.1296839714050293, + "ce_orig": 0.9800900816917419, + "epoch": 1.1725501473865843, + "kl_loss": 0.04423609748482704, + "loss_ib": 0.0007553293253295124, + "step": 4078 + }, + { + "ce_ib": 3.997786045074463, + "ce_orig": 1.1086337566375732, + "epoch": 1.1725501473865843, + "kl_loss": 0.0796542689204216, + "loss_ib": 0.0011963212164118886, + "step": 4078 + }, + { + "ce_ib": 3.2037789821624756, + "ce_orig": 0.9349008798599243, + "epoch": 1.1728377309655618, + "kl_loss": 0.16748711466789246, + "loss_ib": 0.0019952489528805017, + "step": 4079 + }, + { + "ce_ib": 4.985045909881592, + "ce_orig": 1.2662875652313232, + "epoch": 1.1728377309655618, + "kl_loss": 0.04192208871245384, + "loss_ib": 0.0009177253814414144, + "step": 4079 + }, + { + "ce_ib": 3.0519533157348633, + "ce_orig": 0.8274635672569275, + "epoch": 1.1728377309655618, + "kl_loss": 0.04132882505655289, + "loss_ib": 0.000718483526725322, + "step": 4079 + }, + { + "ce_ib": 4.024394512176514, + "ce_orig": 0.9390108585357666, + "epoch": 1.1728377309655618, + "kl_loss": 0.06149311736226082, + "loss_ib": 0.001017370610497892, + "step": 4079 + }, + { + "epoch": 1.1731253145445395, + "grad_norm": 0.10858830064535141, + "learning_rate": 3.481746938066684e-05, + "loss": 0.8572, + "step": 4080 + }, + { + "ce_ib": 3.3888490200042725, + "ce_orig": 0.794526219367981, + "epoch": 1.1731253145445395, + "kl_loss": 0.05966857075691223, + "loss_ib": 0.0009355705697089434, + "step": 4080 + }, + { + "ce_ib": 4.337804317474365, + "ce_orig": 1.2282518148422241, + "epoch": 1.1731253145445395, + "kl_loss": 0.06490458548069, + "loss_ib": 0.0010828262893483043, + "step": 4080 + }, + { + "ce_ib": 1.4365785121917725, + "ce_orig": 0.35023465752601624, + "epoch": 1.1731253145445395, + "kl_loss": 0.018813468515872955, + "loss_ib": 0.00033179251477122307, + "step": 4080 + }, + { + "ce_ib": 2.769723415374756, + "ce_orig": 0.7542724609375, + "epoch": 1.1731253145445395, + "kl_loss": 0.060660794377326965, + "loss_ib": 0.0008835803018882871, + "step": 4080 + }, + { + "ce_ib": 2.104435920715332, + "ce_orig": 0.6438078284263611, + "epoch": 1.1734128981235172, + "kl_loss": 0.14924734830856323, + "loss_ib": 0.0017029170412570238, + "step": 4081 + }, + { + "ce_ib": 3.8430914878845215, + "ce_orig": 1.2286161184310913, + "epoch": 1.1734128981235172, + "kl_loss": 0.04860653728246689, + "loss_ib": 0.0008703744970262051, + "step": 4081 + }, + { + "ce_ib": 4.675858974456787, + "ce_orig": 1.1745373010635376, + "epoch": 1.1734128981235172, + "kl_loss": 0.03678382933139801, + "loss_ib": 0.0008354241726920009, + "step": 4081 + }, + { + "ce_ib": 2.957329750061035, + "ce_orig": 0.5393466353416443, + "epoch": 1.1734128981235172, + "kl_loss": 0.054182276129722595, + "loss_ib": 0.000837555737234652, + "step": 4081 + }, + { + "ce_ib": 4.113556861877441, + "ce_orig": 1.433794379234314, + "epoch": 1.1737004817024947, + "kl_loss": 0.03778562322258949, + "loss_ib": 0.0007892118883319199, + "step": 4082 + }, + { + "ce_ib": 4.341063022613525, + "ce_orig": 1.0653448104858398, + "epoch": 1.1737004817024947, + "kl_loss": 0.04262026399374008, + "loss_ib": 0.0008603089372627437, + "step": 4082 + }, + { + "ce_ib": 5.399065971374512, + "ce_orig": 0.8188450336456299, + "epoch": 1.1737004817024947, + "kl_loss": 0.04945480078458786, + "loss_ib": 0.0010344545589759946, + "step": 4082 + }, + { + "ce_ib": 3.880985975265503, + "ce_orig": 0.8497952818870544, + "epoch": 1.1737004817024947, + "kl_loss": 0.026362251490354538, + "loss_ib": 0.0006517210276797414, + "step": 4082 + }, + { + "ce_ib": 3.778005599975586, + "ce_orig": 0.9335747361183167, + "epoch": 1.1739880652814725, + "kl_loss": 0.05280935764312744, + "loss_ib": 0.000905894092284143, + "step": 4083 + }, + { + "ce_ib": 2.556504964828491, + "ce_orig": 0.7719770669937134, + "epoch": 1.1739880652814725, + "kl_loss": 0.03750016540288925, + "loss_ib": 0.0006306521245278418, + "step": 4083 + }, + { + "ce_ib": 2.481630325317383, + "ce_orig": 0.4800114035606384, + "epoch": 1.1739880652814725, + "kl_loss": 0.053603749722242355, + "loss_ib": 0.0007842004997655749, + "step": 4083 + }, + { + "ce_ib": 3.163784980773926, + "ce_orig": 0.5287951231002808, + "epoch": 1.1739880652814725, + "kl_loss": 0.04427625983953476, + "loss_ib": 0.0007591410540044308, + "step": 4083 + }, + { + "ce_ib": 3.4596457481384277, + "ce_orig": 0.8085588216781616, + "epoch": 1.17427564886045, + "kl_loss": 0.06773950159549713, + "loss_ib": 0.0010233595967292786, + "step": 4084 + }, + { + "ce_ib": 5.608945369720459, + "ce_orig": 1.7262425422668457, + "epoch": 1.17427564886045, + "kl_loss": 0.048893555998802185, + "loss_ib": 0.001049830112606287, + "step": 4084 + }, + { + "ce_ib": 4.799240589141846, + "ce_orig": 0.9314437508583069, + "epoch": 1.17427564886045, + "kl_loss": 0.07800642400979996, + "loss_ib": 0.0012599881738424301, + "step": 4084 + }, + { + "ce_ib": 3.3558976650238037, + "ce_orig": 1.016404151916504, + "epoch": 1.17427564886045, + "kl_loss": 0.04058028757572174, + "loss_ib": 0.0007413926650770009, + "step": 4084 + }, + { + "epoch": 1.1745632324394277, + "grad_norm": 0.10617344081401825, + "learning_rate": 3.478177056808567e-05, + "loss": 0.8473, + "step": 4085 + }, + { + "ce_ib": 3.6633660793304443, + "ce_orig": 1.0925794839859009, + "epoch": 1.1745632324394277, + "kl_loss": 0.037831954658031464, + "loss_ib": 0.0007446561357937753, + "step": 4085 + }, + { + "ce_ib": 2.7335283756256104, + "ce_orig": 0.6142001748085022, + "epoch": 1.1745632324394277, + "kl_loss": 0.05927637219429016, + "loss_ib": 0.000866116548422724, + "step": 4085 + }, + { + "ce_ib": 1.8363975286483765, + "ce_orig": 0.5200205445289612, + "epoch": 1.1745632324394277, + "kl_loss": 0.03341217711567879, + "loss_ib": 0.0005177615093998611, + "step": 4085 + }, + { + "ce_ib": 4.200697422027588, + "ce_orig": 1.04977548122406, + "epoch": 1.1745632324394277, + "kl_loss": 0.058865051716566086, + "loss_ib": 0.0010087202535942197, + "step": 4085 + }, + { + "ce_ib": 2.6220991611480713, + "ce_orig": 0.6114948987960815, + "epoch": 1.1748508160184055, + "kl_loss": 0.04566420614719391, + "loss_ib": 0.0007188519812189043, + "step": 4086 + }, + { + "ce_ib": 4.160725116729736, + "ce_orig": 0.8715831637382507, + "epoch": 1.1748508160184055, + "kl_loss": 0.09531934559345245, + "loss_ib": 0.001369265839457512, + "step": 4086 + }, + { + "ce_ib": 3.1439082622528076, + "ce_orig": 0.7993518114089966, + "epoch": 1.1748508160184055, + "kl_loss": 0.038689590990543365, + "loss_ib": 0.0007012867135927081, + "step": 4086 + }, + { + "ce_ib": 2.887664556503296, + "ce_orig": 0.4073776304721832, + "epoch": 1.1748508160184055, + "kl_loss": 0.04905752092599869, + "loss_ib": 0.0007793416734784842, + "step": 4086 + }, + { + "ce_ib": 4.0757317543029785, + "ce_orig": 1.0045369863510132, + "epoch": 1.175138399597383, + "kl_loss": 0.039289288222789764, + "loss_ib": 0.0008004660485312343, + "step": 4087 + }, + { + "ce_ib": 1.982803463935852, + "ce_orig": 0.2946225702762604, + "epoch": 1.175138399597383, + "kl_loss": 0.05745472013950348, + "loss_ib": 0.0007728275377303362, + "step": 4087 + }, + { + "ce_ib": 2.9275031089782715, + "ce_orig": 0.5476044416427612, + "epoch": 1.175138399597383, + "kl_loss": 0.03400924801826477, + "loss_ib": 0.0006328427698463202, + "step": 4087 + }, + { + "ce_ib": 4.749717712402344, + "ce_orig": 1.127967357635498, + "epoch": 1.175138399597383, + "kl_loss": 0.03135591745376587, + "loss_ib": 0.0007885309169068933, + "step": 4087 + }, + { + "ce_ib": 3.6045584678649902, + "ce_orig": 0.6205173134803772, + "epoch": 1.1754259831763607, + "kl_loss": 0.02539929375052452, + "loss_ib": 0.000614448799751699, + "step": 4088 + }, + { + "ce_ib": 4.117014408111572, + "ce_orig": 1.2948213815689087, + "epoch": 1.1754259831763607, + "kl_loss": 0.04194267839193344, + "loss_ib": 0.0008311282144859433, + "step": 4088 + }, + { + "ce_ib": 2.1626040935516357, + "ce_orig": 0.7051073312759399, + "epoch": 1.1754259831763607, + "kl_loss": 0.02900460548698902, + "loss_ib": 0.0005063064163550735, + "step": 4088 + }, + { + "ce_ib": 3.7640836238861084, + "ce_orig": 1.1549547910690308, + "epoch": 1.1754259831763607, + "kl_loss": 0.03698757290840149, + "loss_ib": 0.0007462840876542032, + "step": 4088 + }, + { + "ce_ib": 4.662383079528809, + "ce_orig": 1.1959589719772339, + "epoch": 1.1757135667553382, + "kl_loss": 0.0407695434987545, + "loss_ib": 0.0008739337208680809, + "step": 4089 + }, + { + "ce_ib": 3.1567018032073975, + "ce_orig": 0.8344554901123047, + "epoch": 1.1757135667553382, + "kl_loss": 0.0437551811337471, + "loss_ib": 0.0007532219169661403, + "step": 4089 + }, + { + "ce_ib": 2.2125253677368164, + "ce_orig": 0.565579354763031, + "epoch": 1.1757135667553382, + "kl_loss": 0.035599540919065475, + "loss_ib": 0.0005772479344159365, + "step": 4089 + }, + { + "ce_ib": 3.5368144512176514, + "ce_orig": 0.9624037146568298, + "epoch": 1.1757135667553382, + "kl_loss": 0.05931694060564041, + "loss_ib": 0.000946850806940347, + "step": 4089 + }, + { + "epoch": 1.176001150334316, + "grad_norm": 0.10090769082307816, + "learning_rate": 3.4746048188948805e-05, + "loss": 0.8455, + "step": 4090 + }, + { + "ce_ib": 3.8643462657928467, + "ce_orig": 1.0811874866485596, + "epoch": 1.176001150334316, + "kl_loss": 0.06840025633573532, + "loss_ib": 0.0010704371379688382, + "step": 4090 + }, + { + "ce_ib": 3.089115619659424, + "ce_orig": 0.9570595026016235, + "epoch": 1.176001150334316, + "kl_loss": 0.02945854887366295, + "loss_ib": 0.0006034970283508301, + "step": 4090 + }, + { + "ce_ib": 5.183467864990234, + "ce_orig": 1.4724388122558594, + "epoch": 1.176001150334316, + "kl_loss": 0.053548797965049744, + "loss_ib": 0.0010538346832618117, + "step": 4090 + }, + { + "ce_ib": 3.990940809249878, + "ce_orig": 1.0505563020706177, + "epoch": 1.176001150334316, + "kl_loss": 0.06942702829837799, + "loss_ib": 0.0010933643206954002, + "step": 4090 + }, + { + "ce_ib": 4.21773624420166, + "ce_orig": 0.9819501042366028, + "epoch": 1.1762887339132935, + "kl_loss": 0.052820123732089996, + "loss_ib": 0.0009499748121015728, + "step": 4091 + }, + { + "ce_ib": 1.9573994874954224, + "ce_orig": 0.49501022696495056, + "epoch": 1.1762887339132935, + "kl_loss": 0.025559578090906143, + "loss_ib": 0.0004513357125688344, + "step": 4091 + }, + { + "ce_ib": 3.2579119205474854, + "ce_orig": 0.802492618560791, + "epoch": 1.1762887339132935, + "kl_loss": 0.05006483197212219, + "loss_ib": 0.0008264395291917026, + "step": 4091 + }, + { + "ce_ib": 1.9511853456497192, + "ce_orig": 0.4709097445011139, + "epoch": 1.1762887339132935, + "kl_loss": 0.037240393459796906, + "loss_ib": 0.0005675224238075316, + "step": 4091 + }, + { + "ce_ib": 3.5160696506500244, + "ce_orig": 0.8788793087005615, + "epoch": 1.1765763174922712, + "kl_loss": 0.03261923789978027, + "loss_ib": 0.000677799282129854, + "step": 4092 + }, + { + "ce_ib": 5.099472522735596, + "ce_orig": 1.354433298110962, + "epoch": 1.1765763174922712, + "kl_loss": 0.033358655869960785, + "loss_ib": 0.000843533780425787, + "step": 4092 + }, + { + "ce_ib": 7.47324800491333, + "ce_orig": 2.1325783729553223, + "epoch": 1.1765763174922712, + "kl_loss": 0.03002026490867138, + "loss_ib": 0.0010475274175405502, + "step": 4092 + }, + { + "ce_ib": 3.3339905738830566, + "ce_orig": 0.7370797395706177, + "epoch": 1.1765763174922712, + "kl_loss": 0.03074461780488491, + "loss_ib": 0.0006408452172763646, + "step": 4092 + }, + { + "ce_ib": 3.7695906162261963, + "ce_orig": 0.80293869972229, + "epoch": 1.1768639010712487, + "kl_loss": 0.06360404193401337, + "loss_ib": 0.0010129994479939342, + "step": 4093 + }, + { + "ce_ib": 3.47586727142334, + "ce_orig": 0.868964672088623, + "epoch": 1.1768639010712487, + "kl_loss": 0.04715689644217491, + "loss_ib": 0.0008191557135432959, + "step": 4093 + }, + { + "ce_ib": 2.212841272354126, + "ce_orig": 0.6796269416809082, + "epoch": 1.1768639010712487, + "kl_loss": 0.03961137682199478, + "loss_ib": 0.0006173978326842189, + "step": 4093 + }, + { + "ce_ib": 2.4548399448394775, + "ce_orig": 0.8673037886619568, + "epoch": 1.1768639010712487, + "kl_loss": 0.01835959032177925, + "loss_ib": 0.00042907989700324833, + "step": 4093 + }, + { + "ce_ib": 2.9265589714050293, + "ce_orig": 0.8055236339569092, + "epoch": 1.1771514846502265, + "kl_loss": 0.03978993371129036, + "loss_ib": 0.0006905551999807358, + "step": 4094 + }, + { + "ce_ib": 3.75889253616333, + "ce_orig": 1.004042387008667, + "epoch": 1.1771514846502265, + "kl_loss": 0.04860831797122955, + "loss_ib": 0.0008619723957963288, + "step": 4094 + }, + { + "ce_ib": 3.946061611175537, + "ce_orig": 1.0689488649368286, + "epoch": 1.1771514846502265, + "kl_loss": 0.06540164351463318, + "loss_ib": 0.0010486225364729762, + "step": 4094 + }, + { + "ce_ib": 3.200946569442749, + "ce_orig": 0.7716652154922485, + "epoch": 1.1771514846502265, + "kl_loss": 0.0669221505522728, + "loss_ib": 0.0009893160313367844, + "step": 4094 + }, + { + "epoch": 1.1774390682292042, + "grad_norm": 0.10269176214933395, + "learning_rate": 3.471030232931975e-05, + "loss": 0.8262, + "step": 4095 + }, + { + "ce_ib": 2.3462793827056885, + "ce_orig": 0.5735611319541931, + "epoch": 1.1774390682292042, + "kl_loss": 0.04643160104751587, + "loss_ib": 0.000698943913448602, + "step": 4095 + }, + { + "ce_ib": 3.256420612335205, + "ce_orig": 0.7720333933830261, + "epoch": 1.1774390682292042, + "kl_loss": 0.050093088299036026, + "loss_ib": 0.0008265728829428554, + "step": 4095 + }, + { + "ce_ib": 2.7989113330841064, + "ce_orig": 0.8084468245506287, + "epoch": 1.1774390682292042, + "kl_loss": 0.03365886211395264, + "loss_ib": 0.0006164797232486308, + "step": 4095 + }, + { + "ce_ib": 3.2840051651000977, + "ce_orig": 0.7052784562110901, + "epoch": 1.1774390682292042, + "kl_loss": 0.03459359332919121, + "loss_ib": 0.00067433639196679, + "step": 4095 + }, + { + "ce_ib": 2.788020372390747, + "ce_orig": 0.5542169809341431, + "epoch": 1.1777266518081817, + "kl_loss": 0.027974678203463554, + "loss_ib": 0.000558548781555146, + "step": 4096 + }, + { + "ce_ib": 4.215413570404053, + "ce_orig": 1.2573494911193848, + "epoch": 1.1777266518081817, + "kl_loss": 0.03570351004600525, + "loss_ib": 0.0007785764173604548, + "step": 4096 + }, + { + "ce_ib": 4.208841800689697, + "ce_orig": 1.5282254219055176, + "epoch": 1.1777266518081817, + "kl_loss": 0.03400009870529175, + "loss_ib": 0.0007608851883560419, + "step": 4096 + }, + { + "ce_ib": 4.906008243560791, + "ce_orig": 1.207848072052002, + "epoch": 1.1777266518081817, + "kl_loss": 0.058770276606082916, + "loss_ib": 0.0010783035540953279, + "step": 4096 + }, + { + "ce_ib": 1.615176796913147, + "ce_orig": 0.4278407096862793, + "epoch": 1.1780142353871594, + "kl_loss": 0.026982128620147705, + "loss_ib": 0.0004313389363233, + "step": 4097 + }, + { + "ce_ib": 3.0805768966674805, + "ce_orig": 0.7540904879570007, + "epoch": 1.1780142353871594, + "kl_loss": 0.047914035618305206, + "loss_ib": 0.0007871980196796358, + "step": 4097 + }, + { + "ce_ib": 1.9432787895202637, + "ce_orig": 0.48191651701927185, + "epoch": 1.1780142353871594, + "kl_loss": 0.09305145591497421, + "loss_ib": 0.0011248424416407943, + "step": 4097 + }, + { + "ce_ib": 3.264585256576538, + "ce_orig": 0.8077659606933594, + "epoch": 1.1780142353871594, + "kl_loss": 0.04927460476756096, + "loss_ib": 0.0008192046079784632, + "step": 4097 + }, + { + "ce_ib": 6.87401008605957, + "ce_orig": 1.1046199798583984, + "epoch": 1.178301818966137, + "kl_loss": 0.03759792447090149, + "loss_ib": 0.001063380273990333, + "step": 4098 + }, + { + "ce_ib": 1.0795478820800781, + "ce_orig": 0.1965106576681137, + "epoch": 1.178301818966137, + "kl_loss": 0.03970737010240555, + "loss_ib": 0.0005050284671597183, + "step": 4098 + }, + { + "ce_ib": 2.86240553855896, + "ce_orig": 0.6864200234413147, + "epoch": 1.178301818966137, + "kl_loss": 0.030681269243359566, + "loss_ib": 0.000593053235206753, + "step": 4098 + }, + { + "ce_ib": 3.9076366424560547, + "ce_orig": 0.9478845000267029, + "epoch": 1.178301818966137, + "kl_loss": 0.04955653101205826, + "loss_ib": 0.0008863289258442819, + "step": 4098 + }, + { + "ce_ib": 3.7144174575805664, + "ce_orig": 0.9718445539474487, + "epoch": 1.1785894025451147, + "kl_loss": 0.03398981690406799, + "loss_ib": 0.000711339816916734, + "step": 4099 + }, + { + "ce_ib": 3.182572841644287, + "ce_orig": 0.6814892888069153, + "epoch": 1.1785894025451147, + "kl_loss": 0.04721170663833618, + "loss_ib": 0.0007903743535280228, + "step": 4099 + }, + { + "ce_ib": 2.6996376514434814, + "ce_orig": 0.694029688835144, + "epoch": 1.1785894025451147, + "kl_loss": 0.03834633529186249, + "loss_ib": 0.0006534270942211151, + "step": 4099 + }, + { + "ce_ib": 3.25199818611145, + "ce_orig": 0.6533325910568237, + "epoch": 1.1785894025451147, + "kl_loss": 0.052359797060489655, + "loss_ib": 0.0008487977902404964, + "step": 4099 + }, + { + "epoch": 1.1788769861240924, + "grad_norm": 0.11244124174118042, + "learning_rate": 3.4674533075318575e-05, + "loss": 0.855, + "step": 4100 + }, + { + "ce_ib": 3.512049913406372, + "ce_orig": 0.856780469417572, + "epoch": 1.1788769861240924, + "kl_loss": 0.04918307065963745, + "loss_ib": 0.0008430356974713504, + "step": 4100 + }, + { + "ce_ib": 1.930924654006958, + "ce_orig": 0.6327962279319763, + "epoch": 1.1788769861240924, + "kl_loss": 0.03436974808573723, + "loss_ib": 0.0005367899429984391, + "step": 4100 + }, + { + "ce_ib": 3.5834739208221436, + "ce_orig": 1.0136778354644775, + "epoch": 1.1788769861240924, + "kl_loss": 0.044055014848709106, + "loss_ib": 0.0007988975266925991, + "step": 4100 + }, + { + "ce_ib": 1.5417922735214233, + "ce_orig": 0.2620598375797272, + "epoch": 1.1788769861240924, + "kl_loss": 0.07107705622911453, + "loss_ib": 0.0008649497758597136, + "step": 4100 + }, + { + "ce_ib": 4.8665995597839355, + "ce_orig": 1.3445930480957031, + "epoch": 1.17916456970307, + "kl_loss": 0.03804903104901314, + "loss_ib": 0.0008671502582728863, + "step": 4101 + }, + { + "ce_ib": 4.306032180786133, + "ce_orig": 1.0466358661651611, + "epoch": 1.17916456970307, + "kl_loss": 0.0388450026512146, + "loss_ib": 0.0008190532098524272, + "step": 4101 + }, + { + "ce_ib": 3.9451615810394287, + "ce_orig": 1.1379040479660034, + "epoch": 1.17916456970307, + "kl_loss": 0.04264376312494278, + "loss_ib": 0.0008209537481889129, + "step": 4101 + }, + { + "ce_ib": 2.4097378253936768, + "ce_orig": 0.5963806509971619, + "epoch": 1.17916456970307, + "kl_loss": 0.03603529930114746, + "loss_ib": 0.0006013267557136714, + "step": 4101 + }, + { + "ce_ib": 4.231184482574463, + "ce_orig": 1.0067731142044067, + "epoch": 1.1794521532820477, + "kl_loss": 0.03290722146630287, + "loss_ib": 0.0007521906518377364, + "step": 4102 + }, + { + "ce_ib": 1.3492039442062378, + "ce_orig": 0.2520616352558136, + "epoch": 1.1794521532820477, + "kl_loss": 0.058759719133377075, + "loss_ib": 0.000722517550457269, + "step": 4102 + }, + { + "ce_ib": 3.189283609390259, + "ce_orig": 0.8607039451599121, + "epoch": 1.1794521532820477, + "kl_loss": 0.047782253473997116, + "loss_ib": 0.0007967508863657713, + "step": 4102 + }, + { + "ce_ib": 1.757505178451538, + "ce_orig": 0.4477183222770691, + "epoch": 1.1794521532820477, + "kl_loss": 0.038632526993751526, + "loss_ib": 0.0005620757583528757, + "step": 4102 + }, + { + "ce_ib": 2.723435163497925, + "ce_orig": 0.5117030739784241, + "epoch": 1.1797397368610252, + "kl_loss": 0.048233762383461, + "loss_ib": 0.00075468112481758, + "step": 4103 + }, + { + "ce_ib": 3.7353057861328125, + "ce_orig": 1.2154629230499268, + "epoch": 1.1797397368610252, + "kl_loss": 0.043749578297138214, + "loss_ib": 0.0008110263734124601, + "step": 4103 + }, + { + "ce_ib": 3.935136556625366, + "ce_orig": 1.0420209169387817, + "epoch": 1.1797397368610252, + "kl_loss": 0.04051772877573967, + "loss_ib": 0.0007986908894963562, + "step": 4103 + }, + { + "ce_ib": 2.647617816925049, + "ce_orig": 0.882086992263794, + "epoch": 1.1797397368610252, + "kl_loss": 0.03787729889154434, + "loss_ib": 0.0006435348186641932, + "step": 4103 + }, + { + "ce_ib": 3.850994825363159, + "ce_orig": 1.0246312618255615, + "epoch": 1.180027320440003, + "kl_loss": 0.04843863099813461, + "loss_ib": 0.0008694857242517173, + "step": 4104 + }, + { + "ce_ib": 4.007253646850586, + "ce_orig": 0.8393428325653076, + "epoch": 1.180027320440003, + "kl_loss": 0.05691166967153549, + "loss_ib": 0.0009698420763015747, + "step": 4104 + }, + { + "ce_ib": 4.730422019958496, + "ce_orig": 1.2312390804290771, + "epoch": 1.180027320440003, + "kl_loss": 0.05076725035905838, + "loss_ib": 0.0009807146852836013, + "step": 4104 + }, + { + "ce_ib": 2.0152974128723145, + "ce_orig": 0.6057228446006775, + "epoch": 1.180027320440003, + "kl_loss": 0.033083055168390274, + "loss_ib": 0.0005323602817952633, + "step": 4104 + }, + { + "epoch": 1.1803149040189804, + "grad_norm": 0.11137829720973969, + "learning_rate": 3.463874051312172e-05, + "loss": 0.8544, + "step": 4105 + }, + { + "ce_ib": 4.03087854385376, + "ce_orig": 0.9384725689888, + "epoch": 1.1803149040189804, + "kl_loss": 0.06059998646378517, + "loss_ib": 0.0010090876603499055, + "step": 4105 + }, + { + "ce_ib": 2.847588300704956, + "ce_orig": 0.5274084806442261, + "epoch": 1.1803149040189804, + "kl_loss": 0.02192714810371399, + "loss_ib": 0.0005040302639827132, + "step": 4105 + }, + { + "ce_ib": 4.130232810974121, + "ce_orig": 1.0593607425689697, + "epoch": 1.1803149040189804, + "kl_loss": 0.050473880022764206, + "loss_ib": 0.0009177620522677898, + "step": 4105 + }, + { + "ce_ib": 2.2491610050201416, + "ce_orig": 0.7374749779701233, + "epoch": 1.1803149040189804, + "kl_loss": 0.019327398389577866, + "loss_ib": 0.00041819005855359137, + "step": 4105 + }, + { + "ce_ib": 3.616654872894287, + "ce_orig": 0.7310463190078735, + "epoch": 1.1806024875979582, + "kl_loss": 0.041505858302116394, + "loss_ib": 0.0007767240749672055, + "step": 4106 + }, + { + "ce_ib": 4.037013053894043, + "ce_orig": 1.058584451675415, + "epoch": 1.1806024875979582, + "kl_loss": 0.042909666895866394, + "loss_ib": 0.000832797959446907, + "step": 4106 + }, + { + "ce_ib": 3.0185914039611816, + "ce_orig": 0.9077674150466919, + "epoch": 1.1806024875979582, + "kl_loss": 0.07406196743249893, + "loss_ib": 0.0010424788342788815, + "step": 4106 + }, + { + "ce_ib": 2.7820122241973877, + "ce_orig": 0.8172048330307007, + "epoch": 1.1806024875979582, + "kl_loss": 0.054494671523571014, + "loss_ib": 0.000823147885967046, + "step": 4106 + }, + { + "ce_ib": 4.483510971069336, + "ce_orig": 0.9649346470832825, + "epoch": 1.1808900711769357, + "kl_loss": 0.04747399687767029, + "loss_ib": 0.0009230910218320787, + "step": 4107 + }, + { + "ce_ib": 3.621595859527588, + "ce_orig": 0.8689681887626648, + "epoch": 1.1808900711769357, + "kl_loss": 0.05343274027109146, + "loss_ib": 0.0008964869193732738, + "step": 4107 + }, + { + "ce_ib": 5.645698070526123, + "ce_orig": 1.419403076171875, + "epoch": 1.1808900711769357, + "kl_loss": 0.059960901737213135, + "loss_ib": 0.00116417882964015, + "step": 4107 + }, + { + "ce_ib": 4.293817043304443, + "ce_orig": 1.1257299184799194, + "epoch": 1.1808900711769357, + "kl_loss": 0.04649469628930092, + "loss_ib": 0.0008943286375142634, + "step": 4107 + }, + { + "ce_ib": 3.901054620742798, + "ce_orig": 1.2291839122772217, + "epoch": 1.1811776547559134, + "kl_loss": 0.04261721670627594, + "loss_ib": 0.0008162776357494295, + "step": 4108 + }, + { + "ce_ib": 3.63789701461792, + "ce_orig": 0.7562597990036011, + "epoch": 1.1811776547559134, + "kl_loss": 0.06391295045614243, + "loss_ib": 0.0010029191616922617, + "step": 4108 + }, + { + "ce_ib": 1.5982718467712402, + "ce_orig": 0.3079203963279724, + "epoch": 1.1811776547559134, + "kl_loss": 0.03254257142543793, + "loss_ib": 0.00048525288002565503, + "step": 4108 + }, + { + "ce_ib": 2.3288230895996094, + "ce_orig": 0.48744094371795654, + "epoch": 1.1811776547559134, + "kl_loss": 0.045809514820575714, + "loss_ib": 0.0006909773801453412, + "step": 4108 + }, + { + "ce_ib": 4.641037940979004, + "ce_orig": 1.2759085893630981, + "epoch": 1.1814652383348911, + "kl_loss": 0.051327310502529144, + "loss_ib": 0.0009773768251761794, + "step": 4109 + }, + { + "ce_ib": 2.904545783996582, + "ce_orig": 0.5093358755111694, + "epoch": 1.1814652383348911, + "kl_loss": 0.07222701609134674, + "loss_ib": 0.0010127247078344226, + "step": 4109 + }, + { + "ce_ib": 2.193906307220459, + "ce_orig": 0.3861885666847229, + "epoch": 1.1814652383348911, + "kl_loss": 0.039252378046512604, + "loss_ib": 0.0006119143799878657, + "step": 4109 + }, + { + "ce_ib": 4.290389537811279, + "ce_orig": 1.2565556764602661, + "epoch": 1.1814652383348911, + "kl_loss": 0.02991582825779915, + "loss_ib": 0.0007281972211785614, + "step": 4109 + }, + { + "epoch": 1.1817528219138687, + "grad_norm": 0.09701438248157501, + "learning_rate": 3.460292472896176e-05, + "loss": 0.795, + "step": 4110 + }, + { + "ce_ib": 4.35580587387085, + "ce_orig": 0.7314831018447876, + "epoch": 1.1817528219138687, + "kl_loss": 0.04587741941213608, + "loss_ib": 0.0008943547145463526, + "step": 4110 + }, + { + "ce_ib": 3.020577907562256, + "ce_orig": 0.8951601982116699, + "epoch": 1.1817528219138687, + "kl_loss": 0.037657175213098526, + "loss_ib": 0.0006786295562051237, + "step": 4110 + }, + { + "ce_ib": 2.136690378189087, + "ce_orig": 0.6910571455955505, + "epoch": 1.1817528219138687, + "kl_loss": 0.03360600769519806, + "loss_ib": 0.0005497290985658765, + "step": 4110 + }, + { + "ce_ib": 5.149462699890137, + "ce_orig": 1.5845998525619507, + "epoch": 1.1817528219138687, + "kl_loss": 0.047800492495298386, + "loss_ib": 0.0009929510997608304, + "step": 4110 + }, + { + "ce_ib": 2.504129648208618, + "ce_orig": 0.5642690658569336, + "epoch": 1.1820404054928464, + "kl_loss": 0.06611359119415283, + "loss_ib": 0.0009115488501265645, + "step": 4111 + }, + { + "ce_ib": 2.420827865600586, + "ce_orig": 0.6804417967796326, + "epoch": 1.1820404054928464, + "kl_loss": 0.03973441198468208, + "loss_ib": 0.0006394268712028861, + "step": 4111 + }, + { + "ce_ib": 3.9728646278381348, + "ce_orig": 1.2776161432266235, + "epoch": 1.1820404054928464, + "kl_loss": 0.045508574694395065, + "loss_ib": 0.0008523722062818706, + "step": 4111 + }, + { + "ce_ib": 3.057152509689331, + "ce_orig": 0.892647385597229, + "epoch": 1.1820404054928464, + "kl_loss": 0.026429682970046997, + "loss_ib": 0.0005700120818801224, + "step": 4111 + }, + { + "ce_ib": 2.1643662452697754, + "ce_orig": 0.49316632747650146, + "epoch": 1.182327989071824, + "kl_loss": 0.04637383669614792, + "loss_ib": 0.0006801749696023762, + "step": 4112 + }, + { + "ce_ib": 3.8099489212036133, + "ce_orig": 1.1766287088394165, + "epoch": 1.182327989071824, + "kl_loss": 0.03707131743431091, + "loss_ib": 0.0007517080521211028, + "step": 4112 + }, + { + "ce_ib": 3.2484591007232666, + "ce_orig": 0.7002201676368713, + "epoch": 1.182327989071824, + "kl_loss": 0.07167743891477585, + "loss_ib": 0.001041620271280408, + "step": 4112 + }, + { + "ce_ib": 2.2388432025909424, + "ce_orig": 0.6891441941261292, + "epoch": 1.182327989071824, + "kl_loss": 0.04456893727183342, + "loss_ib": 0.0006695736665278673, + "step": 4112 + }, + { + "ce_ib": 1.4523022174835205, + "ce_orig": 0.3567383289337158, + "epoch": 1.1826155726508016, + "kl_loss": 0.11064593493938446, + "loss_ib": 0.001251689507625997, + "step": 4113 + }, + { + "ce_ib": 2.571147918701172, + "ce_orig": 0.5453408360481262, + "epoch": 1.1826155726508016, + "kl_loss": 0.062447406351566315, + "loss_ib": 0.0008815887849777937, + "step": 4113 + }, + { + "ce_ib": 4.196478843688965, + "ce_orig": 0.7673947811126709, + "epoch": 1.1826155726508016, + "kl_loss": 0.04961930960416794, + "loss_ib": 0.0009158409666270018, + "step": 4113 + }, + { + "ce_ib": 3.466614246368408, + "ce_orig": 0.7640759348869324, + "epoch": 1.1826155726508016, + "kl_loss": 0.04433758556842804, + "loss_ib": 0.0007900372729636729, + "step": 4113 + }, + { + "ce_ib": 2.633575677871704, + "ce_orig": 0.7548553943634033, + "epoch": 1.1829031562297794, + "kl_loss": 0.046628449112176895, + "loss_ib": 0.0007296420517377555, + "step": 4114 + }, + { + "ce_ib": 2.7739388942718506, + "ce_orig": 0.8052148818969727, + "epoch": 1.1829031562297794, + "kl_loss": 0.028577394783496857, + "loss_ib": 0.0005631678504869342, + "step": 4114 + }, + { + "ce_ib": 3.257443904876709, + "ce_orig": 0.8099477291107178, + "epoch": 1.1829031562297794, + "kl_loss": 0.041780099272727966, + "loss_ib": 0.0007435453590005636, + "step": 4114 + }, + { + "ce_ib": 3.16601300239563, + "ce_orig": 1.1386668682098389, + "epoch": 1.1829031562297794, + "kl_loss": 0.037310875952243805, + "loss_ib": 0.0006897100829519331, + "step": 4114 + }, + { + "epoch": 1.1831907398087569, + "grad_norm": 0.11267747730016708, + "learning_rate": 3.456708580912725e-05, + "loss": 0.8424, + "step": 4115 + }, + { + "ce_ib": 5.016397476196289, + "ce_orig": 1.2948570251464844, + "epoch": 1.1831907398087569, + "kl_loss": 0.06048107147216797, + "loss_ib": 0.0011064503341913223, + "step": 4115 + }, + { + "ce_ib": 2.10347580909729, + "ce_orig": 0.5747244954109192, + "epoch": 1.1831907398087569, + "kl_loss": 0.02318824827671051, + "loss_ib": 0.00044223005534149706, + "step": 4115 + }, + { + "ce_ib": 2.4943466186523438, + "ce_orig": 0.6606947183609009, + "epoch": 1.1831907398087569, + "kl_loss": 0.046432748436927795, + "loss_ib": 0.0007137621287256479, + "step": 4115 + }, + { + "ce_ib": 5.251664638519287, + "ce_orig": 0.9774067401885986, + "epoch": 1.1831907398087569, + "kl_loss": 0.037423618137836456, + "loss_ib": 0.0008994025993160903, + "step": 4115 + }, + { + "ce_ib": 3.2131457328796387, + "ce_orig": 0.7004299163818359, + "epoch": 1.1834783233877346, + "kl_loss": 0.03487040475010872, + "loss_ib": 0.0006700186058878899, + "step": 4116 + }, + { + "ce_ib": 3.2231924533843994, + "ce_orig": 1.0597739219665527, + "epoch": 1.1834783233877346, + "kl_loss": 0.03948308154940605, + "loss_ib": 0.0007171500474214554, + "step": 4116 + }, + { + "ce_ib": 3.1380021572113037, + "ce_orig": 0.6756719946861267, + "epoch": 1.1834783233877346, + "kl_loss": 0.0379507914185524, + "loss_ib": 0.0006933080730959773, + "step": 4116 + }, + { + "ce_ib": 2.0442185401916504, + "ce_orig": 0.5986024737358093, + "epoch": 1.1834783233877346, + "kl_loss": 0.031509529799222946, + "loss_ib": 0.0005195171106606722, + "step": 4116 + }, + { + "ce_ib": 5.463915824890137, + "ce_orig": 1.5825501680374146, + "epoch": 1.1837659069667121, + "kl_loss": 0.05509118363261223, + "loss_ib": 0.0010973033495247364, + "step": 4117 + }, + { + "ce_ib": 4.081575870513916, + "ce_orig": 0.9125543236732483, + "epoch": 1.1837659069667121, + "kl_loss": 0.043122656643390656, + "loss_ib": 0.0008393841562792659, + "step": 4117 + }, + { + "ce_ib": 4.2717156410217285, + "ce_orig": 1.36388099193573, + "epoch": 1.1837659069667121, + "kl_loss": 0.02876734733581543, + "loss_ib": 0.0007148450240492821, + "step": 4117 + }, + { + "ce_ib": 3.8505311012268066, + "ce_orig": 1.030965805053711, + "epoch": 1.1837659069667121, + "kl_loss": 0.048872653394937515, + "loss_ib": 0.0008737796451896429, + "step": 4117 + }, + { + "ce_ib": 2.978689432144165, + "ce_orig": 0.8931180238723755, + "epoch": 1.1840534905456899, + "kl_loss": 0.03296312689781189, + "loss_ib": 0.0006275001796893775, + "step": 4118 + }, + { + "ce_ib": 4.414439678192139, + "ce_orig": 1.1897997856140137, + "epoch": 1.1840534905456899, + "kl_loss": 0.04331401735544205, + "loss_ib": 0.0008745841332711279, + "step": 4118 + }, + { + "ce_ib": 5.242098808288574, + "ce_orig": 1.3908759355545044, + "epoch": 1.1840534905456899, + "kl_loss": 0.04052725434303284, + "loss_ib": 0.0009294823976233602, + "step": 4118 + }, + { + "ce_ib": 4.255011081695557, + "ce_orig": 0.6050722599029541, + "epoch": 1.1840534905456899, + "kl_loss": 0.06700211763381958, + "loss_ib": 0.0010955221951007843, + "step": 4118 + }, + { + "ce_ib": 3.881540536880493, + "ce_orig": 0.4770354628562927, + "epoch": 1.1843410741246676, + "kl_loss": 0.037290751934051514, + "loss_ib": 0.0007610616157762706, + "step": 4119 + }, + { + "ce_ib": 2.8689794540405273, + "ce_orig": 0.7341291904449463, + "epoch": 1.1843410741246676, + "kl_loss": 0.03946757689118385, + "loss_ib": 0.0006815737579017878, + "step": 4119 + }, + { + "ce_ib": 5.010638236999512, + "ce_orig": 1.3153318166732788, + "epoch": 1.1843410741246676, + "kl_loss": 0.049925547093153, + "loss_ib": 0.0010003192583099008, + "step": 4119 + }, + { + "ce_ib": 3.5250349044799805, + "ce_orig": 1.0898810625076294, + "epoch": 1.1843410741246676, + "kl_loss": 0.039512209594249725, + "loss_ib": 0.000747625541407615, + "step": 4119 + }, + { + "epoch": 1.1846286577036451, + "grad_norm": 0.11444760113954544, + "learning_rate": 3.4531223839962453e-05, + "loss": 0.8485, + "step": 4120 + }, + { + "ce_ib": 3.4020862579345703, + "ce_orig": 0.6965646147727966, + "epoch": 1.1846286577036451, + "kl_loss": 0.07042412459850311, + "loss_ib": 0.0010444498620927334, + "step": 4120 + }, + { + "ce_ib": 4.072257995605469, + "ce_orig": 0.7672595977783203, + "epoch": 1.1846286577036451, + "kl_loss": 0.05561373382806778, + "loss_ib": 0.0009633630979806185, + "step": 4120 + }, + { + "ce_ib": 2.3595046997070312, + "ce_orig": 0.597150444984436, + "epoch": 1.1846286577036451, + "kl_loss": 0.04404594749212265, + "loss_ib": 0.0006764098652638495, + "step": 4120 + }, + { + "ce_ib": 3.0067625045776367, + "ce_orig": 0.39774420857429504, + "epoch": 1.1846286577036451, + "kl_loss": 0.05459222570061684, + "loss_ib": 0.0008465985301882029, + "step": 4120 + }, + { + "ce_ib": 1.706845998764038, + "ce_orig": 0.41621044278144836, + "epoch": 1.1849162412826229, + "kl_loss": 0.08031831681728363, + "loss_ib": 0.000973867776338011, + "step": 4121 + }, + { + "ce_ib": 2.3590247631073, + "ce_orig": 0.4527750611305237, + "epoch": 1.1849162412826229, + "kl_loss": 0.05337991192936897, + "loss_ib": 0.0007697016117163002, + "step": 4121 + }, + { + "ce_ib": 1.84221613407135, + "ce_orig": 0.531937837600708, + "epoch": 1.1849162412826229, + "kl_loss": 0.023526456207036972, + "loss_ib": 0.00041948616853915155, + "step": 4121 + }, + { + "ce_ib": 2.535256862640381, + "ce_orig": 0.7898885011672974, + "epoch": 1.1849162412826229, + "kl_loss": 0.026834269985556602, + "loss_ib": 0.0005218684091232717, + "step": 4121 + }, + { + "ce_ib": 3.452545404434204, + "ce_orig": 0.8024022579193115, + "epoch": 1.1852038248616004, + "kl_loss": 0.06451360881328583, + "loss_ib": 0.0009903906611725688, + "step": 4122 + }, + { + "ce_ib": 3.4435646533966064, + "ce_orig": 0.9175349473953247, + "epoch": 1.1852038248616004, + "kl_loss": 0.05436237156391144, + "loss_ib": 0.0008879801607690752, + "step": 4122 + }, + { + "ce_ib": 3.816559076309204, + "ce_orig": 0.6937893629074097, + "epoch": 1.1852038248616004, + "kl_loss": 0.04108655825257301, + "loss_ib": 0.0007925215177237988, + "step": 4122 + }, + { + "ce_ib": 1.8327714204788208, + "ce_orig": 0.577699601650238, + "epoch": 1.1852038248616004, + "kl_loss": 0.022296933457255363, + "loss_ib": 0.0004062464868184179, + "step": 4122 + }, + { + "ce_ib": 2.4527392387390137, + "ce_orig": 0.6443850994110107, + "epoch": 1.185491408440578, + "kl_loss": 0.03593745082616806, + "loss_ib": 0.0006046484340913594, + "step": 4123 + }, + { + "ce_ib": 5.5243377685546875, + "ce_orig": 1.3548508882522583, + "epoch": 1.185491408440578, + "kl_loss": 0.061861202120780945, + "loss_ib": 0.0011710457038134336, + "step": 4123 + }, + { + "ce_ib": 2.8849005699157715, + "ce_orig": 0.7359893918037415, + "epoch": 1.185491408440578, + "kl_loss": 0.04530318081378937, + "loss_ib": 0.0007415218278765678, + "step": 4123 + }, + { + "ce_ib": 2.5974414348602295, + "ce_orig": 0.688400149345398, + "epoch": 1.185491408440578, + "kl_loss": 0.03344060108065605, + "loss_ib": 0.0005941501003690064, + "step": 4123 + }, + { + "ce_ib": 1.7578495740890503, + "ce_orig": 0.37354815006256104, + "epoch": 1.1857789920195556, + "kl_loss": 0.09433000534772873, + "loss_ib": 0.0011190850054845214, + "step": 4124 + }, + { + "ce_ib": 3.301607847213745, + "ce_orig": 1.0792378187179565, + "epoch": 1.1857789920195556, + "kl_loss": 0.02920498326420784, + "loss_ib": 0.0006222106167115271, + "step": 4124 + }, + { + "ce_ib": 2.777808904647827, + "ce_orig": 0.8417788743972778, + "epoch": 1.1857789920195556, + "kl_loss": 0.02182532474398613, + "loss_ib": 0.0004960341029800475, + "step": 4124 + }, + { + "ce_ib": 2.5702483654022217, + "ce_orig": 0.7974628210067749, + "epoch": 1.1857789920195556, + "kl_loss": 0.016391413286328316, + "loss_ib": 0.0004209389444440603, + "step": 4124 + }, + { + "epoch": 1.1860665755985333, + "grad_norm": 0.12707746028900146, + "learning_rate": 3.449533890786718e-05, + "loss": 0.8467, + "step": 4125 + }, + { + "ce_ib": 2.3577582836151123, + "ce_orig": 0.7836565971374512, + "epoch": 1.1860665755985333, + "kl_loss": 0.03921603411436081, + "loss_ib": 0.0006279360968619585, + "step": 4125 + }, + { + "ce_ib": 6.382932662963867, + "ce_orig": 1.863720178604126, + "epoch": 1.1860665755985333, + "kl_loss": 0.06206509470939636, + "loss_ib": 0.001258944277651608, + "step": 4125 + }, + { + "ce_ib": 2.4215989112854004, + "ce_orig": 0.41738027334213257, + "epoch": 1.1860665755985333, + "kl_loss": 0.022421734407544136, + "loss_ib": 0.00046637721243314445, + "step": 4125 + }, + { + "ce_ib": 4.179843425750732, + "ce_orig": 1.280474066734314, + "epoch": 1.1860665755985333, + "kl_loss": 0.05252546817064285, + "loss_ib": 0.0009432390215806663, + "step": 4125 + }, + { + "ce_ib": 2.9095020294189453, + "ce_orig": 0.9957078099250793, + "epoch": 1.1863541591775109, + "kl_loss": 0.026845108717679977, + "loss_ib": 0.0005594012909568846, + "step": 4126 + }, + { + "ce_ib": 4.344351768493652, + "ce_orig": 0.7826040387153625, + "epoch": 1.1863541591775109, + "kl_loss": 0.04985486716032028, + "loss_ib": 0.0009329838212579489, + "step": 4126 + }, + { + "ce_ib": 3.0243120193481445, + "ce_orig": 0.6711755394935608, + "epoch": 1.1863541591775109, + "kl_loss": 0.0877290889620781, + "loss_ib": 0.0011797220213338733, + "step": 4126 + }, + { + "ce_ib": 4.478969097137451, + "ce_orig": 0.7944027185440063, + "epoch": 1.1863541591775109, + "kl_loss": 0.05823855102062225, + "loss_ib": 0.001030282350257039, + "step": 4126 + }, + { + "ce_ib": 5.169232368469238, + "ce_orig": 1.310314655303955, + "epoch": 1.1866417427564886, + "kl_loss": 0.03358330577611923, + "loss_ib": 0.0008527562604285777, + "step": 4127 + }, + { + "ce_ib": 1.94269859790802, + "ce_orig": 0.31790581345558167, + "epoch": 1.1866417427564886, + "kl_loss": 0.058707479387521744, + "loss_ib": 0.0007813445990905166, + "step": 4127 + }, + { + "ce_ib": 2.9359941482543945, + "ce_orig": 0.708942174911499, + "epoch": 1.1866417427564886, + "kl_loss": 0.0425400584936142, + "loss_ib": 0.0007189999450929463, + "step": 4127 + }, + { + "ce_ib": 2.324506998062134, + "ce_orig": 0.3873315751552582, + "epoch": 1.1866417427564886, + "kl_loss": 0.08827231824398041, + "loss_ib": 0.0011151737999171019, + "step": 4127 + }, + { + "ce_ib": 2.981813669204712, + "ce_orig": 0.7610673308372498, + "epoch": 1.1869293263354663, + "kl_loss": 0.02289702370762825, + "loss_ib": 0.0005271515692584217, + "step": 4128 + }, + { + "ce_ib": 2.8658087253570557, + "ce_orig": 0.5590887665748596, + "epoch": 1.1869293263354663, + "kl_loss": 0.05672168359160423, + "loss_ib": 0.0008537977118976414, + "step": 4128 + }, + { + "ce_ib": 3.497929811477661, + "ce_orig": 0.8085107803344727, + "epoch": 1.1869293263354663, + "kl_loss": 0.05127087980508804, + "loss_ib": 0.0008625017944723368, + "step": 4128 + }, + { + "ce_ib": 2.8351316452026367, + "ce_orig": 0.7345764636993408, + "epoch": 1.1869293263354663, + "kl_loss": 0.04526571184396744, + "loss_ib": 0.0007361702737398446, + "step": 4128 + }, + { + "ce_ib": 3.2440483570098877, + "ce_orig": 0.5571584701538086, + "epoch": 1.1872169099144438, + "kl_loss": 0.051451630890369415, + "loss_ib": 0.0008389211143366992, + "step": 4129 + }, + { + "ce_ib": 3.582444429397583, + "ce_orig": 0.7454531192779541, + "epoch": 1.1872169099144438, + "kl_loss": 0.04291212558746338, + "loss_ib": 0.0007873657159507275, + "step": 4129 + }, + { + "ce_ib": 1.1093556880950928, + "ce_orig": 0.1914501041173935, + "epoch": 1.1872169099144438, + "kl_loss": 0.08225026726722717, + "loss_ib": 0.0009334382484667003, + "step": 4129 + }, + { + "ce_ib": 4.749001979827881, + "ce_orig": 1.3809125423431396, + "epoch": 1.1872169099144438, + "kl_loss": 0.04319838061928749, + "loss_ib": 0.0009068839717656374, + "step": 4129 + }, + { + "epoch": 1.1875044934934216, + "grad_norm": 0.10661740601062775, + "learning_rate": 3.445943109929657e-05, + "loss": 0.7861, + "step": 4130 + }, + { + "ce_ib": 1.8640952110290527, + "ce_orig": 0.5164588689804077, + "epoch": 1.1875044934934216, + "kl_loss": 0.04163585603237152, + "loss_ib": 0.0006027680356055498, + "step": 4130 + }, + { + "ce_ib": 3.58040452003479, + "ce_orig": 0.6841862201690674, + "epoch": 1.1875044934934216, + "kl_loss": 0.05742833390831947, + "loss_ib": 0.0009323237463831902, + "step": 4130 + }, + { + "ce_ib": 4.3905744552612305, + "ce_orig": 0.6291157603263855, + "epoch": 1.1875044934934216, + "kl_loss": 0.0609881617128849, + "loss_ib": 0.0010489390697330236, + "step": 4130 + }, + { + "ce_ib": 3.5796380043029785, + "ce_orig": 0.8332534432411194, + "epoch": 1.1875044934934216, + "kl_loss": 0.06617820262908936, + "loss_ib": 0.001019745715893805, + "step": 4130 + }, + { + "ce_ib": 2.5754659175872803, + "ce_orig": 0.5761885046958923, + "epoch": 1.187792077072399, + "kl_loss": 0.032440248876810074, + "loss_ib": 0.0005819490179419518, + "step": 4131 + }, + { + "ce_ib": 2.2985873222351074, + "ce_orig": 0.5835036635398865, + "epoch": 1.187792077072399, + "kl_loss": 0.027438916265964508, + "loss_ib": 0.0005042478442192078, + "step": 4131 + }, + { + "ce_ib": 4.118464469909668, + "ce_orig": 0.9073284268379211, + "epoch": 1.187792077072399, + "kl_loss": 0.04210824519395828, + "loss_ib": 0.0008329288684763014, + "step": 4131 + }, + { + "ce_ib": 3.1423630714416504, + "ce_orig": 0.5779902935028076, + "epoch": 1.187792077072399, + "kl_loss": 0.05429002642631531, + "loss_ib": 0.0008571365033276379, + "step": 4131 + }, + { + "ce_ib": 3.837203025817871, + "ce_orig": 0.926260769367218, + "epoch": 1.1880796606513768, + "kl_loss": 0.03179824724793434, + "loss_ib": 0.0007017027237452567, + "step": 4132 + }, + { + "ce_ib": 5.820156097412109, + "ce_orig": 1.8491336107254028, + "epoch": 1.1880796606513768, + "kl_loss": 0.06634218990802765, + "loss_ib": 0.0012454374227672815, + "step": 4132 + }, + { + "ce_ib": 3.1341171264648438, + "ce_orig": 0.5828026533126831, + "epoch": 1.1880796606513768, + "kl_loss": 0.026080846786499023, + "loss_ib": 0.0005742201465182006, + "step": 4132 + }, + { + "ce_ib": 5.190187931060791, + "ce_orig": 1.0644030570983887, + "epoch": 1.1880796606513768, + "kl_loss": 0.06787175685167313, + "loss_ib": 0.0011977363610640168, + "step": 4132 + }, + { + "ce_ib": 3.798114538192749, + "ce_orig": 0.8966249227523804, + "epoch": 1.1883672442303546, + "kl_loss": 0.07535215467214584, + "loss_ib": 0.0011333329603075981, + "step": 4133 + }, + { + "ce_ib": 4.719799041748047, + "ce_orig": 1.1518748998641968, + "epoch": 1.1883672442303546, + "kl_loss": 0.04817977547645569, + "loss_ib": 0.0009537776350043714, + "step": 4133 + }, + { + "ce_ib": 4.3787031173706055, + "ce_orig": 1.227022409439087, + "epoch": 1.1883672442303546, + "kl_loss": 0.043215490877628326, + "loss_ib": 0.0008700251346454024, + "step": 4133 + }, + { + "ce_ib": 2.818857192993164, + "ce_orig": 0.763292133808136, + "epoch": 1.1883672442303546, + "kl_loss": 0.04413185641169548, + "loss_ib": 0.000723204284440726, + "step": 4133 + }, + { + "ce_ib": 3.125790596008301, + "ce_orig": 0.6211824417114258, + "epoch": 1.188654827809332, + "kl_loss": 0.053941406309604645, + "loss_ib": 0.0008519930997863412, + "step": 4134 + }, + { + "ce_ib": 3.8716237545013428, + "ce_orig": 1.1556717157363892, + "epoch": 1.188654827809332, + "kl_loss": 0.04587564617395401, + "loss_ib": 0.0008459187811240554, + "step": 4134 + }, + { + "ce_ib": 4.217957973480225, + "ce_orig": 0.8917379975318909, + "epoch": 1.188654827809332, + "kl_loss": 0.1134573370218277, + "loss_ib": 0.0015563690103590488, + "step": 4134 + }, + { + "ce_ib": 3.5595688819885254, + "ce_orig": 0.5876014232635498, + "epoch": 1.188654827809332, + "kl_loss": 0.047059692442417145, + "loss_ib": 0.0008265538490377367, + "step": 4134 + }, + { + "epoch": 1.1889424113883098, + "grad_norm": 0.10033486783504486, + "learning_rate": 3.442350050076085e-05, + "loss": 0.8195, + "step": 4135 + }, + { + "ce_ib": 3.6424965858459473, + "ce_orig": 0.6888864040374756, + "epoch": 1.1889424113883098, + "kl_loss": 0.045082174241542816, + "loss_ib": 0.0008150713983923197, + "step": 4135 + }, + { + "ce_ib": 5.213893413543701, + "ce_orig": 1.5716687440872192, + "epoch": 1.1889424113883098, + "kl_loss": 0.03174780309200287, + "loss_ib": 0.0008388673304580152, + "step": 4135 + }, + { + "ce_ib": 3.502180576324463, + "ce_orig": 0.8205206990242004, + "epoch": 1.1889424113883098, + "kl_loss": 0.04437510296702385, + "loss_ib": 0.0007939690258353949, + "step": 4135 + }, + { + "ce_ib": 2.516106128692627, + "ce_orig": 0.5914499163627625, + "epoch": 1.1889424113883098, + "kl_loss": 0.04640788584947586, + "loss_ib": 0.0007156895007938147, + "step": 4135 + }, + { + "ce_ib": 3.1711158752441406, + "ce_orig": 0.8941069841384888, + "epoch": 1.1892299949672873, + "kl_loss": 0.04467601329088211, + "loss_ib": 0.0007638716488145292, + "step": 4136 + }, + { + "ce_ib": 3.582387685775757, + "ce_orig": 0.7720442414283752, + "epoch": 1.1892299949672873, + "kl_loss": 0.05172695964574814, + "loss_ib": 0.0008755082963034511, + "step": 4136 + }, + { + "ce_ib": 2.3047432899475098, + "ce_orig": 0.631783664226532, + "epoch": 1.1892299949672873, + "kl_loss": 0.03721814602613449, + "loss_ib": 0.0006026557530276477, + "step": 4136 + }, + { + "ce_ib": 4.362317085266113, + "ce_orig": 1.4387363195419312, + "epoch": 1.1892299949672873, + "kl_loss": 0.059522394090890884, + "loss_ib": 0.001031455583870411, + "step": 4136 + }, + { + "ce_ib": 3.594930648803711, + "ce_orig": 0.8722409605979919, + "epoch": 1.189517578546265, + "kl_loss": 0.04160648584365845, + "loss_ib": 0.0007755578844808042, + "step": 4137 + }, + { + "ce_ib": 3.9693796634674072, + "ce_orig": 1.0198239088058472, + "epoch": 1.189517578546265, + "kl_loss": 0.07633348554372787, + "loss_ib": 0.001160272746346891, + "step": 4137 + }, + { + "ce_ib": 2.663846969604492, + "ce_orig": 0.7256826758384705, + "epoch": 1.189517578546265, + "kl_loss": 0.054082948714494705, + "loss_ib": 0.0008072141208685935, + "step": 4137 + }, + { + "ce_ib": 2.5242202281951904, + "ce_orig": 0.39819756150245667, + "epoch": 1.189517578546265, + "kl_loss": 0.02537700906395912, + "loss_ib": 0.0005061920965090394, + "step": 4137 + }, + { + "ce_ib": 2.854412078857422, + "ce_orig": 0.592054009437561, + "epoch": 1.1898051621252426, + "kl_loss": 0.04193562641739845, + "loss_ib": 0.0007047975086607039, + "step": 4138 + }, + { + "ce_ib": 3.057744026184082, + "ce_orig": 0.8822559118270874, + "epoch": 1.1898051621252426, + "kl_loss": 0.05863422527909279, + "loss_ib": 0.0008921166299842298, + "step": 4138 + }, + { + "ce_ib": 3.9867329597473145, + "ce_orig": 0.7910796999931335, + "epoch": 1.1898051621252426, + "kl_loss": 0.037245798856019974, + "loss_ib": 0.0007711312500759959, + "step": 4138 + }, + { + "ce_ib": 4.00822639465332, + "ce_orig": 0.8398913741111755, + "epoch": 1.1898051621252426, + "kl_loss": 0.06392379105091095, + "loss_ib": 0.0010400605387985706, + "step": 4138 + }, + { + "ce_ib": 3.0273773670196533, + "ce_orig": 0.7032596468925476, + "epoch": 1.1900927457042203, + "kl_loss": 0.031758613884449005, + "loss_ib": 0.000620323873590678, + "step": 4139 + }, + { + "ce_ib": 4.168975353240967, + "ce_orig": 1.1863340139389038, + "epoch": 1.1900927457042203, + "kl_loss": 0.04994021728634834, + "loss_ib": 0.0009162996429949999, + "step": 4139 + }, + { + "ce_ib": 5.209795951843262, + "ce_orig": 1.2580881118774414, + "epoch": 1.1900927457042203, + "kl_loss": 0.04439668729901314, + "loss_ib": 0.0009649464045651257, + "step": 4139 + }, + { + "ce_ib": 7.875407695770264, + "ce_orig": 2.3263418674468994, + "epoch": 1.1900927457042203, + "kl_loss": 0.03134977072477341, + "loss_ib": 0.0011010384187102318, + "step": 4139 + }, + { + "epoch": 1.1903803292831978, + "grad_norm": 0.12346263229846954, + "learning_rate": 3.4387547198825186e-05, + "loss": 0.8511, + "step": 4140 + }, + { + "ce_ib": 3.6094865798950195, + "ce_orig": 0.8642986416816711, + "epoch": 1.1903803292831978, + "kl_loss": 0.03888673335313797, + "loss_ib": 0.0007498160121031106, + "step": 4140 + }, + { + "ce_ib": 2.8812341690063477, + "ce_orig": 0.668563187122345, + "epoch": 1.1903803292831978, + "kl_loss": 0.025598015636205673, + "loss_ib": 0.0005441035609692335, + "step": 4140 + }, + { + "ce_ib": 3.979548692703247, + "ce_orig": 0.9982954263687134, + "epoch": 1.1903803292831978, + "kl_loss": 0.04267660528421402, + "loss_ib": 0.0008247208315879107, + "step": 4140 + }, + { + "ce_ib": 1.096567153930664, + "ce_orig": 0.18601946532726288, + "epoch": 1.1903803292831978, + "kl_loss": 0.10853227972984314, + "loss_ib": 0.0011949795298278332, + "step": 4140 + }, + { + "ce_ib": 3.0851197242736816, + "ce_orig": 0.9249438643455505, + "epoch": 1.1906679128621755, + "kl_loss": 0.045140136033296585, + "loss_ib": 0.0007599132950417697, + "step": 4141 + }, + { + "ce_ib": 3.632523536682129, + "ce_orig": 0.938107967376709, + "epoch": 1.1906679128621755, + "kl_loss": 0.04484426975250244, + "loss_ib": 0.0008116950048133731, + "step": 4141 + }, + { + "ce_ib": 3.4287400245666504, + "ce_orig": 0.7511678338050842, + "epoch": 1.1906679128621755, + "kl_loss": 0.05582793056964874, + "loss_ib": 0.0009011533111333847, + "step": 4141 + }, + { + "ce_ib": 2.66921329498291, + "ce_orig": 0.8769998550415039, + "epoch": 1.1906679128621755, + "kl_loss": 0.02828081138432026, + "loss_ib": 0.000549729389604181, + "step": 4141 + }, + { + "ce_ib": 2.773730993270874, + "ce_orig": 0.7795428037643433, + "epoch": 1.1909554964411533, + "kl_loss": 0.04066668078303337, + "loss_ib": 0.0006840399000793695, + "step": 4142 + }, + { + "ce_ib": 3.8467185497283936, + "ce_orig": 1.0514665842056274, + "epoch": 1.1909554964411533, + "kl_loss": 0.0379781648516655, + "loss_ib": 0.0007644534925930202, + "step": 4142 + }, + { + "ce_ib": 2.028203010559082, + "ce_orig": 0.4685932397842407, + "epoch": 1.1909554964411533, + "kl_loss": 0.08435863256454468, + "loss_ib": 0.0010464065708220005, + "step": 4142 + }, + { + "ce_ib": 2.3791937828063965, + "ce_orig": 0.45562079548835754, + "epoch": 1.1909554964411533, + "kl_loss": 0.0414016954600811, + "loss_ib": 0.0006519362796097994, + "step": 4142 + }, + { + "ce_ib": 2.6976981163024902, + "ce_orig": 0.8520416021347046, + "epoch": 1.1912430800201308, + "kl_loss": 0.0593087300658226, + "loss_ib": 0.0008628570940345526, + "step": 4143 + }, + { + "ce_ib": 2.457205295562744, + "ce_orig": 0.46520742774009705, + "epoch": 1.1912430800201308, + "kl_loss": 0.06482629477977753, + "loss_ib": 0.0008939834660850465, + "step": 4143 + }, + { + "ce_ib": 2.8142731189727783, + "ce_orig": 0.7573763132095337, + "epoch": 1.1912430800201308, + "kl_loss": 0.035745665431022644, + "loss_ib": 0.0006388839101418853, + "step": 4143 + }, + { + "ce_ib": 3.289066791534424, + "ce_orig": 0.7491209506988525, + "epoch": 1.1912430800201308, + "kl_loss": 0.04813476651906967, + "loss_ib": 0.0008102543652057648, + "step": 4143 + }, + { + "ce_ib": 3.3957393169403076, + "ce_orig": 0.943698525428772, + "epoch": 1.1915306635991085, + "kl_loss": 0.07743547856807709, + "loss_ib": 0.0011139287380501628, + "step": 4144 + }, + { + "ce_ib": 2.900021553039551, + "ce_orig": 0.8574633002281189, + "epoch": 1.1915306635991085, + "kl_loss": 0.032558076083660126, + "loss_ib": 0.000615582917816937, + "step": 4144 + }, + { + "ce_ib": 2.092351198196411, + "ce_orig": 0.564410388469696, + "epoch": 1.1915306635991085, + "kl_loss": 0.03397852182388306, + "loss_ib": 0.0005490203038789332, + "step": 4144 + }, + { + "ce_ib": 3.4339098930358887, + "ce_orig": 0.8205342292785645, + "epoch": 1.1915306635991085, + "kl_loss": 0.03218643367290497, + "loss_ib": 0.0006652552983723581, + "step": 4144 + }, + { + "epoch": 1.191818247178086, + "grad_norm": 0.10431908071041107, + "learning_rate": 3.435157128010943e-05, + "loss": 0.7932, + "step": 4145 + }, + { + "ce_ib": 3.834156036376953, + "ce_orig": 0.9658108353614807, + "epoch": 1.191818247178086, + "kl_loss": 0.053625792264938354, + "loss_ib": 0.0009196734754368663, + "step": 4145 + }, + { + "ce_ib": 4.728546142578125, + "ce_orig": 1.0089792013168335, + "epoch": 1.191818247178086, + "kl_loss": 0.05536103993654251, + "loss_ib": 0.001026464975439012, + "step": 4145 + }, + { + "ce_ib": 3.170743465423584, + "ce_orig": 0.9857804775238037, + "epoch": 1.191818247178086, + "kl_loss": 0.04138536751270294, + "loss_ib": 0.0007309280335903168, + "step": 4145 + }, + { + "ce_ib": 1.4604759216308594, + "ce_orig": 0.25696930289268494, + "epoch": 1.191818247178086, + "kl_loss": 0.0935266986489296, + "loss_ib": 0.0010813145199790597, + "step": 4145 + }, + { + "ce_ib": 4.951676845550537, + "ce_orig": 1.5280487537384033, + "epoch": 1.1921058307570638, + "kl_loss": 0.056711599230766296, + "loss_ib": 0.0010622836416587234, + "step": 4146 + }, + { + "ce_ib": 3.630941390991211, + "ce_orig": 0.7652023434638977, + "epoch": 1.1921058307570638, + "kl_loss": 0.06286638230085373, + "loss_ib": 0.0009917579591274261, + "step": 4146 + }, + { + "ce_ib": 3.00393009185791, + "ce_orig": 0.39497077465057373, + "epoch": 1.1921058307570638, + "kl_loss": 0.036971528083086014, + "loss_ib": 0.0006701082456856966, + "step": 4146 + }, + { + "ce_ib": 1.50321626663208, + "ce_orig": 0.4201778769493103, + "epoch": 1.1921058307570638, + "kl_loss": 0.04994584619998932, + "loss_ib": 0.0006497800932265818, + "step": 4146 + }, + { + "ce_ib": 3.1859700679779053, + "ce_orig": 0.6679723262786865, + "epoch": 1.1923934143360415, + "kl_loss": 0.05552966520190239, + "loss_ib": 0.0008738936157897115, + "step": 4147 + }, + { + "ce_ib": 4.083553791046143, + "ce_orig": 1.2290925979614258, + "epoch": 1.1923934143360415, + "kl_loss": 0.07143595814704895, + "loss_ib": 0.001122714951634407, + "step": 4147 + }, + { + "ce_ib": 3.1189677715301514, + "ce_orig": 0.7389185428619385, + "epoch": 1.1923934143360415, + "kl_loss": 0.04036012291908264, + "loss_ib": 0.0007154979975894094, + "step": 4147 + }, + { + "ce_ib": 2.8218472003936768, + "ce_orig": 0.5769400000572205, + "epoch": 1.1923934143360415, + "kl_loss": 0.030027780681848526, + "loss_ib": 0.0005824625259265304, + "step": 4147 + }, + { + "ce_ib": 4.367647647857666, + "ce_orig": 1.0132262706756592, + "epoch": 1.192680997915019, + "kl_loss": 0.033113885670900345, + "loss_ib": 0.0007679036352783442, + "step": 4148 + }, + { + "ce_ib": 2.985915422439575, + "ce_orig": 0.7415164709091187, + "epoch": 1.192680997915019, + "kl_loss": 0.04333260655403137, + "loss_ib": 0.0007319176220335066, + "step": 4148 + }, + { + "ce_ib": 4.449913024902344, + "ce_orig": 1.359144926071167, + "epoch": 1.192680997915019, + "kl_loss": 0.031260520219802856, + "loss_ib": 0.000757596455514431, + "step": 4148 + }, + { + "ce_ib": 3.0369746685028076, + "ce_orig": 0.5860255360603333, + "epoch": 1.192680997915019, + "kl_loss": 0.09440244734287262, + "loss_ib": 0.0012477218406274915, + "step": 4148 + }, + { + "ce_ib": 2.2486815452575684, + "ce_orig": 0.5292026400566101, + "epoch": 1.1929685814939968, + "kl_loss": 0.051769379526376724, + "loss_ib": 0.0007425619405694306, + "step": 4149 + }, + { + "ce_ib": 3.311758041381836, + "ce_orig": 0.5560868978500366, + "epoch": 1.1929685814939968, + "kl_loss": 0.03875255584716797, + "loss_ib": 0.0007187013397924602, + "step": 4149 + }, + { + "ce_ib": 3.7829837799072266, + "ce_orig": 1.0766468048095703, + "epoch": 1.1929685814939968, + "kl_loss": 0.040032319724559784, + "loss_ib": 0.0007786215865053236, + "step": 4149 + }, + { + "ce_ib": 3.694082498550415, + "ce_orig": 1.1470954418182373, + "epoch": 1.1929685814939968, + "kl_loss": 0.060059718787670135, + "loss_ib": 0.000970005348790437, + "step": 4149 + }, + { + "epoch": 1.1932561650729743, + "grad_norm": 0.13121309876441956, + "learning_rate": 3.431557283128791e-05, + "loss": 0.8498, + "step": 4150 + }, + { + "ce_ib": 2.729367971420288, + "ce_orig": 0.6250098943710327, + "epoch": 1.1932561650729743, + "kl_loss": 0.03482259064912796, + "loss_ib": 0.000621162704192102, + "step": 4150 + }, + { + "ce_ib": 2.736889123916626, + "ce_orig": 0.7387199997901917, + "epoch": 1.1932561650729743, + "kl_loss": 0.03205092251300812, + "loss_ib": 0.00059419812168926, + "step": 4150 + }, + { + "ce_ib": 3.448812484741211, + "ce_orig": 0.4248059093952179, + "epoch": 1.1932561650729743, + "kl_loss": 0.05168779194355011, + "loss_ib": 0.0008617591811344028, + "step": 4150 + }, + { + "ce_ib": 3.2709624767303467, + "ce_orig": 0.6163445711135864, + "epoch": 1.1932561650729743, + "kl_loss": 0.031457412987947464, + "loss_ib": 0.0006416703690774739, + "step": 4150 + }, + { + "ce_ib": 2.2796404361724854, + "ce_orig": 0.5905858874320984, + "epoch": 1.193543748651952, + "kl_loss": 0.05082324147224426, + "loss_ib": 0.0007361964671872556, + "step": 4151 + }, + { + "ce_ib": 2.8961286544799805, + "ce_orig": 0.887712836265564, + "epoch": 1.193543748651952, + "kl_loss": 0.04426626116037369, + "loss_ib": 0.0007322754827328026, + "step": 4151 + }, + { + "ce_ib": 4.390934944152832, + "ce_orig": 0.8292094469070435, + "epoch": 1.193543748651952, + "kl_loss": 0.030441906303167343, + "loss_ib": 0.0007435124716721475, + "step": 4151 + }, + { + "ce_ib": 2.9180924892425537, + "ce_orig": 0.7260488271713257, + "epoch": 1.193543748651952, + "kl_loss": 0.030326493084430695, + "loss_ib": 0.0005950741469860077, + "step": 4151 + }, + { + "ce_ib": 2.9048538208007812, + "ce_orig": 0.7434256076812744, + "epoch": 1.1938313322309295, + "kl_loss": 0.0882345587015152, + "loss_ib": 0.0011728309327736497, + "step": 4152 + }, + { + "ce_ib": 4.9638848304748535, + "ce_orig": 1.4494773149490356, + "epoch": 1.1938313322309295, + "kl_loss": 0.04482859745621681, + "loss_ib": 0.0009446744224987924, + "step": 4152 + }, + { + "ce_ib": 3.0737695693969727, + "ce_orig": 0.6858647465705872, + "epoch": 1.1938313322309295, + "kl_loss": 0.04977384954690933, + "loss_ib": 0.0008051153854466975, + "step": 4152 + }, + { + "ce_ib": 1.5396637916564941, + "ce_orig": 0.33310142159461975, + "epoch": 1.1938313322309295, + "kl_loss": 0.04476555436849594, + "loss_ib": 0.0006016219267621636, + "step": 4152 + }, + { + "ce_ib": 3.0405352115631104, + "ce_orig": 0.8016562461853027, + "epoch": 1.1941189158099073, + "kl_loss": 0.054808344691991806, + "loss_ib": 0.0008521369891241193, + "step": 4153 + }, + { + "ce_ib": 3.234060764312744, + "ce_orig": 0.7457076907157898, + "epoch": 1.1941189158099073, + "kl_loss": 0.04848921298980713, + "loss_ib": 0.0008082981803454459, + "step": 4153 + }, + { + "ce_ib": 2.932331085205078, + "ce_orig": 0.5686230659484863, + "epoch": 1.1941189158099073, + "kl_loss": 0.11853057891130447, + "loss_ib": 0.0014785387320443988, + "step": 4153 + }, + { + "ce_ib": 4.336119174957275, + "ce_orig": 1.4291669130325317, + "epoch": 1.1941189158099073, + "kl_loss": 0.044589802622795105, + "loss_ib": 0.0008795098983682692, + "step": 4153 + }, + { + "ce_ib": 4.3534040451049805, + "ce_orig": 0.6578548550605774, + "epoch": 1.194406499388885, + "kl_loss": 0.06603051722049713, + "loss_ib": 0.0010956455953419209, + "step": 4154 + }, + { + "ce_ib": 2.789228916168213, + "ce_orig": 0.6845126748085022, + "epoch": 1.194406499388885, + "kl_loss": 0.04698992520570755, + "loss_ib": 0.0007488220580853522, + "step": 4154 + }, + { + "ce_ib": 3.5635697841644287, + "ce_orig": 0.6471850872039795, + "epoch": 1.194406499388885, + "kl_loss": 0.07701049000024796, + "loss_ib": 0.0011264618951827288, + "step": 4154 + }, + { + "ce_ib": 2.990694284439087, + "ce_orig": 0.7094593048095703, + "epoch": 1.194406499388885, + "kl_loss": 0.03843246400356293, + "loss_ib": 0.0006833940860815346, + "step": 4154 + }, + { + "epoch": 1.1946940829678625, + "grad_norm": 0.09587715566158295, + "learning_rate": 3.427955193908925e-05, + "loss": 0.809, + "step": 4155 + }, + { + "ce_ib": 5.02479887008667, + "ce_orig": 1.1061253547668457, + "epoch": 1.1946940829678625, + "kl_loss": 0.034330375492572784, + "loss_ib": 0.0008457836229354143, + "step": 4155 + }, + { + "ce_ib": 2.2072677612304688, + "ce_orig": 0.6615086197853088, + "epoch": 1.1946940829678625, + "kl_loss": 0.03477136045694351, + "loss_ib": 0.000568440358620137, + "step": 4155 + }, + { + "ce_ib": 2.172793388366699, + "ce_orig": 0.6332687139511108, + "epoch": 1.1946940829678625, + "kl_loss": 0.03058113530278206, + "loss_ib": 0.0005230906535871327, + "step": 4155 + }, + { + "ce_ib": 2.932926893234253, + "ce_orig": 0.7502961754798889, + "epoch": 1.1946940829678625, + "kl_loss": 0.05193651467561722, + "loss_ib": 0.0008126578177325428, + "step": 4155 + }, + { + "ce_ib": 3.355172872543335, + "ce_orig": 1.0534422397613525, + "epoch": 1.1949816665468402, + "kl_loss": 0.030377784743905067, + "loss_ib": 0.000639295089058578, + "step": 4156 + }, + { + "ce_ib": 3.9685730934143066, + "ce_orig": 1.0718477964401245, + "epoch": 1.1949816665468402, + "kl_loss": 0.047781802713871, + "loss_ib": 0.0008746753446757793, + "step": 4156 + }, + { + "ce_ib": 4.630876064300537, + "ce_orig": 1.2035455703735352, + "epoch": 1.1949816665468402, + "kl_loss": 0.054829612374305725, + "loss_ib": 0.0010113836033269763, + "step": 4156 + }, + { + "ce_ib": 3.153534173965454, + "ce_orig": 0.7314517498016357, + "epoch": 1.1949816665468402, + "kl_loss": 0.0652988851070404, + "loss_ib": 0.0009683422395028174, + "step": 4156 + }, + { + "ce_ib": 3.249093770980835, + "ce_orig": 0.9317795038223267, + "epoch": 1.1952692501258178, + "kl_loss": 0.06771834939718246, + "loss_ib": 0.0010020927293226123, + "step": 4157 + }, + { + "ce_ib": 3.384584665298462, + "ce_orig": 1.1089684963226318, + "epoch": 1.1952692501258178, + "kl_loss": 0.040137775242328644, + "loss_ib": 0.0007398362504318357, + "step": 4157 + }, + { + "ce_ib": 2.0865724086761475, + "ce_orig": 0.4890846312046051, + "epoch": 1.1952692501258178, + "kl_loss": 0.039101168513298035, + "loss_ib": 0.0005996688851155341, + "step": 4157 + }, + { + "ce_ib": 2.009021043777466, + "ce_orig": 0.6026872992515564, + "epoch": 1.1952692501258178, + "kl_loss": 0.03235820680856705, + "loss_ib": 0.0005244841449894011, + "step": 4157 + }, + { + "ce_ib": 1.3884148597717285, + "ce_orig": 0.34989774227142334, + "epoch": 1.1955568337047955, + "kl_loss": 0.02617279626429081, + "loss_ib": 0.0004005694354418665, + "step": 4158 + }, + { + "ce_ib": 3.4327707290649414, + "ce_orig": 0.6594461798667908, + "epoch": 1.1955568337047955, + "kl_loss": 0.05699716508388519, + "loss_ib": 0.0009132486884482205, + "step": 4158 + }, + { + "ce_ib": 3.756575107574463, + "ce_orig": 1.1413178443908691, + "epoch": 1.1955568337047955, + "kl_loss": 0.048784032464027405, + "loss_ib": 0.000863497843965888, + "step": 4158 + }, + { + "ce_ib": 2.484297752380371, + "ce_orig": 0.658859372138977, + "epoch": 1.1955568337047955, + "kl_loss": 0.0329960398375988, + "loss_ib": 0.0005783901433460414, + "step": 4158 + }, + { + "ce_ib": 3.878474712371826, + "ce_orig": 0.9128177762031555, + "epoch": 1.195844417283773, + "kl_loss": 0.05431842803955078, + "loss_ib": 0.0009310317109338939, + "step": 4159 + }, + { + "ce_ib": 2.6616222858428955, + "ce_orig": 0.39992037415504456, + "epoch": 1.195844417283773, + "kl_loss": 0.04015097767114639, + "loss_ib": 0.0006676720222458243, + "step": 4159 + }, + { + "ce_ib": 3.1120591163635254, + "ce_orig": 0.6716679930686951, + "epoch": 1.195844417283773, + "kl_loss": 0.04806753247976303, + "loss_ib": 0.0007918812334537506, + "step": 4159 + }, + { + "ce_ib": 6.648313045501709, + "ce_orig": 1.8816481828689575, + "epoch": 1.195844417283773, + "kl_loss": 0.05458299070596695, + "loss_ib": 0.0012106612557545304, + "step": 4159 + }, + { + "epoch": 1.1961320008627507, + "grad_norm": 0.10501505434513092, + "learning_rate": 3.4243508690296135e-05, + "loss": 0.8536, + "step": 4160 + }, + { + "ce_ib": 1.9130594730377197, + "ce_orig": 0.4877781867980957, + "epoch": 1.1961320008627507, + "kl_loss": 0.024744225665926933, + "loss_ib": 0.0004387482185848057, + "step": 4160 + }, + { + "ce_ib": 2.819690465927124, + "ce_orig": 0.602295994758606, + "epoch": 1.1961320008627507, + "kl_loss": 0.030950307846069336, + "loss_ib": 0.0005914721405133605, + "step": 4160 + }, + { + "ce_ib": 3.607243061065674, + "ce_orig": 1.1741869449615479, + "epoch": 1.1961320008627507, + "kl_loss": 0.03262577950954437, + "loss_ib": 0.0006869821227155626, + "step": 4160 + }, + { + "ce_ib": 3.8119590282440186, + "ce_orig": 0.9664261341094971, + "epoch": 1.1961320008627507, + "kl_loss": 0.053839512169361115, + "loss_ib": 0.0009195909951813519, + "step": 4160 + }, + { + "ce_ib": 2.9171273708343506, + "ce_orig": 0.6256864666938782, + "epoch": 1.1964195844417285, + "kl_loss": 0.03192515671253204, + "loss_ib": 0.0006109642563387752, + "step": 4161 + }, + { + "ce_ib": 3.077369451522827, + "ce_orig": 0.6763877868652344, + "epoch": 1.1964195844417285, + "kl_loss": 0.04892919212579727, + "loss_ib": 0.0007970288279466331, + "step": 4161 + }, + { + "ce_ib": 3.1599278450012207, + "ce_orig": 0.5695479512214661, + "epoch": 1.1964195844417285, + "kl_loss": 0.0690075159072876, + "loss_ib": 0.0010060679633170366, + "step": 4161 + }, + { + "ce_ib": 4.500799655914307, + "ce_orig": 1.3302468061447144, + "epoch": 1.1964195844417285, + "kl_loss": 0.03787155821919441, + "loss_ib": 0.0008287955424748361, + "step": 4161 + }, + { + "ce_ib": 2.6741445064544678, + "ce_orig": 0.5173804759979248, + "epoch": 1.196707168020706, + "kl_loss": 0.07218394428491592, + "loss_ib": 0.0009892538655549288, + "step": 4162 + }, + { + "ce_ib": 4.709879398345947, + "ce_orig": 1.0540215969085693, + "epoch": 1.196707168020706, + "kl_loss": 0.06089284271001816, + "loss_ib": 0.0010799163719639182, + "step": 4162 + }, + { + "ce_ib": 4.783022880554199, + "ce_orig": 1.4391371011734009, + "epoch": 1.196707168020706, + "kl_loss": 0.03210623562335968, + "loss_ib": 0.0007993645849637687, + "step": 4162 + }, + { + "ce_ib": 2.864063262939453, + "ce_orig": 0.6423730254173279, + "epoch": 1.196707168020706, + "kl_loss": 0.037166133522987366, + "loss_ib": 0.0006580675835721195, + "step": 4162 + }, + { + "ce_ib": 2.143961191177368, + "ce_orig": 0.2454887181520462, + "epoch": 1.1969947515996837, + "kl_loss": 0.06093332916498184, + "loss_ib": 0.0008237293804995716, + "step": 4163 + }, + { + "ce_ib": 2.590595245361328, + "ce_orig": 0.4566636085510254, + "epoch": 1.1969947515996837, + "kl_loss": 0.028967730700969696, + "loss_ib": 0.0005487368325702846, + "step": 4163 + }, + { + "ce_ib": 4.048351287841797, + "ce_orig": 0.7195188999176025, + "epoch": 1.1969947515996837, + "kl_loss": 0.04425952211022377, + "loss_ib": 0.0008474303758703172, + "step": 4163 + }, + { + "ce_ib": 2.45235013961792, + "ce_orig": 0.6547219157218933, + "epoch": 1.1969947515996837, + "kl_loss": 0.050851400941610336, + "loss_ib": 0.0007537489873357117, + "step": 4163 + }, + { + "ce_ib": 5.375039100646973, + "ce_orig": 1.5761126279830933, + "epoch": 1.1972823351786612, + "kl_loss": 0.061522357165813446, + "loss_ib": 0.001152727403678, + "step": 4164 + }, + { + "ce_ib": 2.777808904647827, + "ce_orig": 0.723708987236023, + "epoch": 1.1972823351786612, + "kl_loss": 0.03045576810836792, + "loss_ib": 0.0005823385436087847, + "step": 4164 + }, + { + "ce_ib": 2.897625207901001, + "ce_orig": 0.7818360328674316, + "epoch": 1.1972823351786612, + "kl_loss": 0.037648025900125504, + "loss_ib": 0.0006662427331320941, + "step": 4164 + }, + { + "ce_ib": 3.3420419692993164, + "ce_orig": 0.6597034335136414, + "epoch": 1.1972823351786612, + "kl_loss": 0.0329098254442215, + "loss_ib": 0.0006633023731410503, + "step": 4164 + }, + { + "epoch": 1.197569918757639, + "grad_norm": 0.10229850560426712, + "learning_rate": 3.420744317174512e-05, + "loss": 0.8178, + "step": 4165 + }, + { + "ce_ib": 3.654026508331299, + "ce_orig": 0.5937637090682983, + "epoch": 1.197569918757639, + "kl_loss": 0.048719003796577454, + "loss_ib": 0.0008525926969014108, + "step": 4165 + }, + { + "ce_ib": 3.803304672241211, + "ce_orig": 0.8707147240638733, + "epoch": 1.197569918757639, + "kl_loss": 0.04708011448383331, + "loss_ib": 0.000851131568197161, + "step": 4165 + }, + { + "ce_ib": 2.5547022819519043, + "ce_orig": 0.8464571833610535, + "epoch": 1.197569918757639, + "kl_loss": 0.03141430765390396, + "loss_ib": 0.0005696132429875433, + "step": 4165 + }, + { + "ce_ib": 4.956923484802246, + "ce_orig": 1.3218086957931519, + "epoch": 1.197569918757639, + "kl_loss": 0.047415897250175476, + "loss_ib": 0.000969851273111999, + "step": 4165 + }, + { + "ce_ib": 3.3913662433624268, + "ce_orig": 0.8009448647499084, + "epoch": 1.1978575023366167, + "kl_loss": 0.049003805965185165, + "loss_ib": 0.0008291746489703655, + "step": 4166 + }, + { + "ce_ib": 3.7558810710906982, + "ce_orig": 0.6721141338348389, + "epoch": 1.1978575023366167, + "kl_loss": 0.047287534922361374, + "loss_ib": 0.0008484634454362094, + "step": 4166 + }, + { + "ce_ib": 3.0582385063171387, + "ce_orig": 0.746864378452301, + "epoch": 1.1978575023366167, + "kl_loss": 0.05315084755420685, + "loss_ib": 0.0008373322780244052, + "step": 4166 + }, + { + "ce_ib": 2.7770943641662598, + "ce_orig": 0.6564648151397705, + "epoch": 1.1978575023366167, + "kl_loss": 0.039133332669734955, + "loss_ib": 0.0006690427544526756, + "step": 4166 + }, + { + "ce_ib": 2.250359058380127, + "ce_orig": 0.7143835425376892, + "epoch": 1.1981450859155942, + "kl_loss": 0.03741458058357239, + "loss_ib": 0.0005991816869936883, + "step": 4167 + }, + { + "ce_ib": 2.2714545726776123, + "ce_orig": 0.6694082617759705, + "epoch": 1.1981450859155942, + "kl_loss": 0.04362326115369797, + "loss_ib": 0.0006633781013078988, + "step": 4167 + }, + { + "ce_ib": 4.111244201660156, + "ce_orig": 1.0341191291809082, + "epoch": 1.1981450859155942, + "kl_loss": 0.04822080582380295, + "loss_ib": 0.0008933324716053903, + "step": 4167 + }, + { + "ce_ib": 2.6029796600341797, + "ce_orig": 0.6766178607940674, + "epoch": 1.1981450859155942, + "kl_loss": 0.044239383190870285, + "loss_ib": 0.0007026917883194983, + "step": 4167 + }, + { + "ce_ib": 4.349878787994385, + "ce_orig": 1.2908891439437866, + "epoch": 1.198432669494572, + "kl_loss": 0.046154942363500595, + "loss_ib": 0.000896537268999964, + "step": 4168 + }, + { + "ce_ib": 4.057145595550537, + "ce_orig": 0.9418500065803528, + "epoch": 1.198432669494572, + "kl_loss": 0.05099070444703102, + "loss_ib": 0.0009156215819530189, + "step": 4168 + }, + { + "ce_ib": 4.210719108581543, + "ce_orig": 1.1885995864868164, + "epoch": 1.198432669494572, + "kl_loss": 0.038625568151474, + "loss_ib": 0.0008073276258073747, + "step": 4168 + }, + { + "ce_ib": 3.241684675216675, + "ce_orig": 0.7137289047241211, + "epoch": 1.198432669494572, + "kl_loss": 0.04245012253522873, + "loss_ib": 0.0007486696122214198, + "step": 4168 + }, + { + "ce_ib": 4.385807037353516, + "ce_orig": 1.1783623695373535, + "epoch": 1.1987202530735495, + "kl_loss": 0.04093417525291443, + "loss_ib": 0.0008479224052280188, + "step": 4169 + }, + { + "ce_ib": 3.0025155544281006, + "ce_orig": 0.7999699115753174, + "epoch": 1.1987202530735495, + "kl_loss": 0.05246800184249878, + "loss_ib": 0.0008249315433204174, + "step": 4169 + }, + { + "ce_ib": 2.7176709175109863, + "ce_orig": 0.5207055807113647, + "epoch": 1.1987202530735495, + "kl_loss": 0.037918027490377426, + "loss_ib": 0.0006509473896585405, + "step": 4169 + }, + { + "ce_ib": 2.2494492530822754, + "ce_orig": 0.3534604609012604, + "epoch": 1.1987202530735495, + "kl_loss": 0.04870454967021942, + "loss_ib": 0.0007119904039427638, + "step": 4169 + }, + { + "epoch": 1.1990078366525272, + "grad_norm": 0.09462816268205643, + "learning_rate": 3.4171355470326414e-05, + "loss": 0.8513, + "step": 4170 + }, + { + "ce_ib": 2.5769565105438232, + "ce_orig": 0.6546069979667664, + "epoch": 1.1990078366525272, + "kl_loss": 0.03440394625067711, + "loss_ib": 0.0006017350824549794, + "step": 4170 + }, + { + "ce_ib": 1.435997486114502, + "ce_orig": 0.40091249346733093, + "epoch": 1.1990078366525272, + "kl_loss": 0.04974623769521713, + "loss_ib": 0.0006410620990209281, + "step": 4170 + }, + { + "ce_ib": 3.7985737323760986, + "ce_orig": 1.2527360916137695, + "epoch": 1.1990078366525272, + "kl_loss": 0.04346587508916855, + "loss_ib": 0.0008145160973072052, + "step": 4170 + }, + { + "ce_ib": 3.3671202659606934, + "ce_orig": 1.0194729566574097, + "epoch": 1.1990078366525272, + "kl_loss": 0.03128139674663544, + "loss_ib": 0.0006495259585790336, + "step": 4170 + }, + { + "ce_ib": 2.8214378356933594, + "ce_orig": 0.8478133678436279, + "epoch": 1.1992954202315047, + "kl_loss": 0.04800795763731003, + "loss_ib": 0.0007622233824804425, + "step": 4171 + }, + { + "ce_ib": 2.4172329902648926, + "ce_orig": 0.5832883715629578, + "epoch": 1.1992954202315047, + "kl_loss": 0.03167595714330673, + "loss_ib": 0.000558482832275331, + "step": 4171 + }, + { + "ce_ib": 4.282593250274658, + "ce_orig": 1.0788607597351074, + "epoch": 1.1992954202315047, + "kl_loss": 0.05532173812389374, + "loss_ib": 0.0009814766235649586, + "step": 4171 + }, + { + "ce_ib": 3.010634422302246, + "ce_orig": 0.8955910801887512, + "epoch": 1.1992954202315047, + "kl_loss": 0.03507048264145851, + "loss_ib": 0.0006517682923004031, + "step": 4171 + }, + { + "ce_ib": 3.8881726264953613, + "ce_orig": 1.1202696561813354, + "epoch": 1.1995830038104824, + "kl_loss": 0.04064565896987915, + "loss_ib": 0.0007952738087624311, + "step": 4172 + }, + { + "ce_ib": 3.7795965671539307, + "ce_orig": 0.9150530099868774, + "epoch": 1.1995830038104824, + "kl_loss": 0.08206643164157867, + "loss_ib": 0.0011986239114776254, + "step": 4172 + }, + { + "ce_ib": 2.9150290489196777, + "ce_orig": 0.6907677054405212, + "epoch": 1.1995830038104824, + "kl_loss": 0.03203955292701721, + "loss_ib": 0.0006118984310887754, + "step": 4172 + }, + { + "ce_ib": 2.790323495864868, + "ce_orig": 0.5718783736228943, + "epoch": 1.1995830038104824, + "kl_loss": 0.041060078889131546, + "loss_ib": 0.0006896330742165446, + "step": 4172 + }, + { + "ce_ib": 2.5734612941741943, + "ce_orig": 0.6807194352149963, + "epoch": 1.19987058738946, + "kl_loss": 0.04015705734491348, + "loss_ib": 0.0006589166587218642, + "step": 4173 + }, + { + "ce_ib": 4.651427745819092, + "ce_orig": 1.146695852279663, + "epoch": 1.19987058738946, + "kl_loss": 0.062013640999794006, + "loss_ib": 0.0010852791601791978, + "step": 4173 + }, + { + "ce_ib": 4.576812744140625, + "ce_orig": 1.4410868883132935, + "epoch": 1.19987058738946, + "kl_loss": 0.049619466066360474, + "loss_ib": 0.0009538758895359933, + "step": 4173 + }, + { + "ce_ib": 3.5463757514953613, + "ce_orig": 0.8773106932640076, + "epoch": 1.19987058738946, + "kl_loss": 0.07328692078590393, + "loss_ib": 0.0010875067673623562, + "step": 4173 + }, + { + "ce_ib": 3.19416880607605, + "ce_orig": 0.9175944924354553, + "epoch": 1.2001581709684377, + "kl_loss": 0.04869482293725014, + "loss_ib": 0.0008063650457188487, + "step": 4174 + }, + { + "ce_ib": 5.2234625816345215, + "ce_orig": 0.80591881275177, + "epoch": 1.2001581709684377, + "kl_loss": 0.05692581087350845, + "loss_ib": 0.0010916043538600206, + "step": 4174 + }, + { + "ce_ib": 3.486192464828491, + "ce_orig": 0.8593600392341614, + "epoch": 1.2001581709684377, + "kl_loss": 0.037004776298999786, + "loss_ib": 0.0007186669972725213, + "step": 4174 + }, + { + "ce_ib": 3.0938730239868164, + "ce_orig": 0.9494797587394714, + "epoch": 1.2001581709684377, + "kl_loss": 0.024164924398064613, + "loss_ib": 0.0005510365008376539, + "step": 4174 + }, + { + "epoch": 1.2004457545474154, + "grad_norm": 0.110223688185215, + "learning_rate": 3.413524567298366e-05, + "loss": 0.8526, + "step": 4175 + }, + { + "ce_ib": 2.037660837173462, + "ce_orig": 0.4949173033237457, + "epoch": 1.2004457545474154, + "kl_loss": 0.019841421395540237, + "loss_ib": 0.0004021802742499858, + "step": 4175 + }, + { + "ce_ib": 3.8058359622955322, + "ce_orig": 1.2394635677337646, + "epoch": 1.2004457545474154, + "kl_loss": 0.03639448806643486, + "loss_ib": 0.0007445284281857312, + "step": 4175 + }, + { + "ce_ib": 2.807175397872925, + "ce_orig": 0.9016671776771545, + "epoch": 1.2004457545474154, + "kl_loss": 0.027493298053741455, + "loss_ib": 0.0005556505057029426, + "step": 4175 + }, + { + "ce_ib": 2.852092742919922, + "ce_orig": 0.5922084450721741, + "epoch": 1.2004457545474154, + "kl_loss": 0.03599383682012558, + "loss_ib": 0.0006451476365327835, + "step": 4175 + }, + { + "ce_ib": 3.312568426132202, + "ce_orig": 1.1349462270736694, + "epoch": 1.200733338126393, + "kl_loss": 0.030776962637901306, + "loss_ib": 0.0006390264607034624, + "step": 4176 + }, + { + "ce_ib": 2.031292200088501, + "ce_orig": 0.4824226200580597, + "epoch": 1.200733338126393, + "kl_loss": 0.028999878093600273, + "loss_ib": 0.0004931279690936208, + "step": 4176 + }, + { + "ce_ib": 2.899446487426758, + "ce_orig": 0.7005216479301453, + "epoch": 1.200733338126393, + "kl_loss": 0.057818759232759476, + "loss_ib": 0.0008681322215124965, + "step": 4176 + }, + { + "ce_ib": 2.7118349075317383, + "ce_orig": 0.8549861907958984, + "epoch": 1.200733338126393, + "kl_loss": 0.040076419711112976, + "loss_ib": 0.0006719476659782231, + "step": 4176 + }, + { + "ce_ib": 2.7166666984558105, + "ce_orig": 0.8964973092079163, + "epoch": 1.2010209217053707, + "kl_loss": 0.031240517273545265, + "loss_ib": 0.0005840718513354659, + "step": 4177 + }, + { + "ce_ib": 2.980867385864258, + "ce_orig": 0.6447399258613586, + "epoch": 1.2010209217053707, + "kl_loss": 0.040138985961675644, + "loss_ib": 0.0006994765717536211, + "step": 4177 + }, + { + "ce_ib": 2.3348753452301025, + "ce_orig": 0.6942010521888733, + "epoch": 1.2010209217053707, + "kl_loss": 0.03979240357875824, + "loss_ib": 0.0006314115016721189, + "step": 4177 + }, + { + "ce_ib": 3.7158186435699463, + "ce_orig": 0.9628584980964661, + "epoch": 1.2010209217053707, + "kl_loss": 0.06702545285224915, + "loss_ib": 0.0010418363381177187, + "step": 4177 + }, + { + "ce_ib": 3.7132105827331543, + "ce_orig": 0.5306869149208069, + "epoch": 1.2013085052843482, + "kl_loss": 0.05016995221376419, + "loss_ib": 0.0008730205590836704, + "step": 4178 + }, + { + "ce_ib": 3.422705888748169, + "ce_orig": 0.8476244807243347, + "epoch": 1.2013085052843482, + "kl_loss": 0.024539466947317123, + "loss_ib": 0.0005876652430742979, + "step": 4178 + }, + { + "ce_ib": 1.6560484170913696, + "ce_orig": 0.441388338804245, + "epoch": 1.2013085052843482, + "kl_loss": 0.04021956026554108, + "loss_ib": 0.0005678004235960543, + "step": 4178 + }, + { + "ce_ib": 2.063408136367798, + "ce_orig": 0.6271644234657288, + "epoch": 1.2013085052843482, + "kl_loss": 0.020953234285116196, + "loss_ib": 0.00041587313171476126, + "step": 4178 + }, + { + "ce_ib": 4.562362194061279, + "ce_orig": 1.0456160306930542, + "epoch": 1.201596088863326, + "kl_loss": 0.04315374791622162, + "loss_ib": 0.0008877736399881542, + "step": 4179 + }, + { + "ce_ib": 1.9474128484725952, + "ce_orig": 0.5550938844680786, + "epoch": 1.201596088863326, + "kl_loss": 0.03710225597023964, + "loss_ib": 0.0005657638539560139, + "step": 4179 + }, + { + "ce_ib": 1.0666770935058594, + "ce_orig": 0.18684692680835724, + "epoch": 1.201596088863326, + "kl_loss": 0.07106594741344452, + "loss_ib": 0.0008173271780833602, + "step": 4179 + }, + { + "ce_ib": 2.4563710689544678, + "ce_orig": 0.6346617937088013, + "epoch": 1.201596088863326, + "kl_loss": 0.01483073364943266, + "loss_ib": 0.0003939444140996784, + "step": 4179 + }, + { + "epoch": 1.2018836724423037, + "grad_norm": 0.1116391122341156, + "learning_rate": 3.409911386671375e-05, + "loss": 0.8243, + "step": 4180 + }, + { + "ce_ib": 5.043065071105957, + "ce_orig": 1.3120977878570557, + "epoch": 1.2018836724423037, + "kl_loss": 0.051321715116500854, + "loss_ib": 0.0010175235802307725, + "step": 4180 + }, + { + "ce_ib": 2.6025915145874023, + "ce_orig": 0.49584174156188965, + "epoch": 1.2018836724423037, + "kl_loss": 0.028737889602780342, + "loss_ib": 0.0005476379883475602, + "step": 4180 + }, + { + "ce_ib": 2.4639065265655518, + "ce_orig": 0.5724833607673645, + "epoch": 1.2018836724423037, + "kl_loss": 0.0499059334397316, + "loss_ib": 0.0007454499718733132, + "step": 4180 + }, + { + "ce_ib": 2.315582036972046, + "ce_orig": 0.6411441564559937, + "epoch": 1.2018836724423037, + "kl_loss": 0.04870783910155296, + "loss_ib": 0.0007186365546658635, + "step": 4180 + }, + { + "ce_ib": 3.873152494430542, + "ce_orig": 0.9547771215438843, + "epoch": 1.2021712560212812, + "kl_loss": 0.05104968696832657, + "loss_ib": 0.0008978120749816298, + "step": 4181 + }, + { + "ce_ib": 2.524493932723999, + "ce_orig": 0.5885399580001831, + "epoch": 1.2021712560212812, + "kl_loss": 0.11214935779571533, + "loss_ib": 0.0013739429414272308, + "step": 4181 + }, + { + "ce_ib": 3.5142645835876465, + "ce_orig": 1.2352542877197266, + "epoch": 1.2021712560212812, + "kl_loss": 0.026401124894618988, + "loss_ib": 0.0006154376897029579, + "step": 4181 + }, + { + "ce_ib": 2.950211524963379, + "ce_orig": 0.5394836664199829, + "epoch": 1.2021712560212812, + "kl_loss": 0.06995408982038498, + "loss_ib": 0.0009945620549842715, + "step": 4181 + }, + { + "ce_ib": 3.327174663543701, + "ce_orig": 0.8103596568107605, + "epoch": 1.202458839600259, + "kl_loss": 0.06098088622093201, + "loss_ib": 0.0009425262687727809, + "step": 4182 + }, + { + "ce_ib": 2.054957866668701, + "ce_orig": 0.580076277256012, + "epoch": 1.202458839600259, + "kl_loss": 0.029999548569321632, + "loss_ib": 0.0005054912762716413, + "step": 4182 + }, + { + "ce_ib": 4.31653356552124, + "ce_orig": 1.234505534172058, + "epoch": 1.202458839600259, + "kl_loss": 0.04010548070073128, + "loss_ib": 0.0008327081450261176, + "step": 4182 + }, + { + "ce_ib": 3.2809078693389893, + "ce_orig": 1.0633429288864136, + "epoch": 1.202458839600259, + "kl_loss": 0.02609114721417427, + "loss_ib": 0.0005890022730454803, + "step": 4182 + }, + { + "ce_ib": 2.230936050415039, + "ce_orig": 0.4485999643802643, + "epoch": 1.2027464231792364, + "kl_loss": 0.028643853962421417, + "loss_ib": 0.0005095321102999151, + "step": 4183 + }, + { + "ce_ib": 2.9970436096191406, + "ce_orig": 0.7581766247749329, + "epoch": 1.2027464231792364, + "kl_loss": 0.030269861221313477, + "loss_ib": 0.0006024029571563005, + "step": 4183 + }, + { + "ce_ib": 4.292161464691162, + "ce_orig": 1.165794014930725, + "epoch": 1.2027464231792364, + "kl_loss": 0.03921108320355415, + "loss_ib": 0.00082132697571069, + "step": 4183 + }, + { + "ce_ib": 1.9174151420593262, + "ce_orig": 0.48473718762397766, + "epoch": 1.2027464231792364, + "kl_loss": 0.022999200969934464, + "loss_ib": 0.0004217335081193596, + "step": 4183 + }, + { + "ce_ib": 3.0542213916778564, + "ce_orig": 1.033617377281189, + "epoch": 1.2030340067582141, + "kl_loss": 0.04226220026612282, + "loss_ib": 0.000728044135030359, + "step": 4184 + }, + { + "ce_ib": 3.0828089714050293, + "ce_orig": 0.6342136263847351, + "epoch": 1.2030340067582141, + "kl_loss": 0.032511934638023376, + "loss_ib": 0.0006334002246148884, + "step": 4184 + }, + { + "ce_ib": 4.199602127075195, + "ce_orig": 1.029726505279541, + "epoch": 1.2030340067582141, + "kl_loss": 0.05879083648324013, + "loss_ib": 0.0010078685590997338, + "step": 4184 + }, + { + "ce_ib": 2.035742998123169, + "ce_orig": 0.6365372538566589, + "epoch": 1.2030340067582141, + "kl_loss": 0.024816539138555527, + "loss_ib": 0.0004517396737355739, + "step": 4184 + }, + { + "epoch": 1.2033215903371917, + "grad_norm": 0.10395430028438568, + "learning_rate": 3.406296013856658e-05, + "loss": 0.8326, + "step": 4185 + }, + { + "ce_ib": 5.627831935882568, + "ce_orig": 1.8212032318115234, + "epoch": 1.2033215903371917, + "kl_loss": 0.051692645996809006, + "loss_ib": 0.0010797096183523536, + "step": 4185 + }, + { + "ce_ib": 3.7148537635803223, + "ce_orig": 1.1948741674423218, + "epoch": 1.2033215903371917, + "kl_loss": 0.04864652082324028, + "loss_ib": 0.0008579505956731737, + "step": 4185 + }, + { + "ce_ib": 1.9561076164245605, + "ce_orig": 0.4286326766014099, + "epoch": 1.2033215903371917, + "kl_loss": 0.046930886805057526, + "loss_ib": 0.0006649196147918701, + "step": 4185 + }, + { + "ce_ib": 3.0287094116210938, + "ce_orig": 0.31751036643981934, + "epoch": 1.2033215903371917, + "kl_loss": 0.058160968124866486, + "loss_ib": 0.0008844805997796357, + "step": 4185 + }, + { + "ce_ib": 2.3598852157592773, + "ce_orig": 0.710410475730896, + "epoch": 1.2036091739161694, + "kl_loss": 0.026454253122210503, + "loss_ib": 0.0005005310522392392, + "step": 4186 + }, + { + "ce_ib": 1.9314628839492798, + "ce_orig": 0.4689303934574127, + "epoch": 1.2036091739161694, + "kl_loss": 0.034537963569164276, + "loss_ib": 0.0005385259282775223, + "step": 4186 + }, + { + "ce_ib": 2.9300220012664795, + "ce_orig": 0.6163293719291687, + "epoch": 1.2036091739161694, + "kl_loss": 0.038510024547576904, + "loss_ib": 0.0006781024276278913, + "step": 4186 + }, + { + "ce_ib": 3.5004303455352783, + "ce_orig": 0.8485762476921082, + "epoch": 1.2036091739161694, + "kl_loss": 0.035197123885154724, + "loss_ib": 0.0007020142511464655, + "step": 4186 + }, + { + "ce_ib": 2.8391811847686768, + "ce_orig": 0.6106915473937988, + "epoch": 1.203896757495147, + "kl_loss": 0.03475005179643631, + "loss_ib": 0.0006314186612144113, + "step": 4187 + }, + { + "ce_ib": 3.7103774547576904, + "ce_orig": 0.9809411764144897, + "epoch": 1.203896757495147, + "kl_loss": 0.034684017300605774, + "loss_ib": 0.0007178778760135174, + "step": 4187 + }, + { + "ce_ib": 3.5968823432922363, + "ce_orig": 0.7616257667541504, + "epoch": 1.203896757495147, + "kl_loss": 0.07679934799671173, + "loss_ib": 0.0011276816949248314, + "step": 4187 + }, + { + "ce_ib": 2.663444995880127, + "ce_orig": 0.5497621297836304, + "epoch": 1.203896757495147, + "kl_loss": 0.039980463683605194, + "loss_ib": 0.0006661490770056844, + "step": 4187 + }, + { + "ce_ib": 3.024305820465088, + "ce_orig": 0.8010605573654175, + "epoch": 1.2041843410741246, + "kl_loss": 0.03249489143490791, + "loss_ib": 0.000627379457000643, + "step": 4188 + }, + { + "ce_ib": 3.8253238201141357, + "ce_orig": 0.8033916354179382, + "epoch": 1.2041843410741246, + "kl_loss": 0.05766121298074722, + "loss_ib": 0.0009591444977559149, + "step": 4188 + }, + { + "ce_ib": 2.9223663806915283, + "ce_orig": 0.7153500914573669, + "epoch": 1.2041843410741246, + "kl_loss": 0.04683732986450195, + "loss_ib": 0.0007606099243275821, + "step": 4188 + }, + { + "ce_ib": 1.972919225692749, + "ce_orig": 0.26170775294303894, + "epoch": 1.2041843410741246, + "kl_loss": 0.038197316229343414, + "loss_ib": 0.0005792650626972318, + "step": 4188 + }, + { + "ce_ib": 3.3216984272003174, + "ce_orig": 0.7957494854927063, + "epoch": 1.2044719246531024, + "kl_loss": 0.05907284840941429, + "loss_ib": 0.0009228983544744551, + "step": 4189 + }, + { + "ce_ib": 4.017453193664551, + "ce_orig": 1.206176996231079, + "epoch": 1.2044719246531024, + "kl_loss": 0.024534452706575394, + "loss_ib": 0.0006470898515544832, + "step": 4189 + }, + { + "ce_ib": 3.598419427871704, + "ce_orig": 0.8495578765869141, + "epoch": 1.2044719246531024, + "kl_loss": 0.05219211056828499, + "loss_ib": 0.0008817630005069077, + "step": 4189 + }, + { + "ce_ib": 4.6056718826293945, + "ce_orig": 1.3266936540603638, + "epoch": 1.2044719246531024, + "kl_loss": 0.05399090796709061, + "loss_ib": 0.0010004761861637235, + "step": 4189 + }, + { + "epoch": 1.20475950823208, + "grad_norm": 0.11161477118730545, + "learning_rate": 3.4026784575644886e-05, + "loss": 0.7849, + "step": 4190 + }, + { + "ce_ib": 3.79962420463562, + "ce_orig": 1.2226064205169678, + "epoch": 1.20475950823208, + "kl_loss": 0.043463170528411865, + "loss_ib": 0.0008145940955728292, + "step": 4190 + }, + { + "ce_ib": 3.2487666606903076, + "ce_orig": 1.190632700920105, + "epoch": 1.20475950823208, + "kl_loss": 0.0603913888335228, + "loss_ib": 0.0009287905413657427, + "step": 4190 + }, + { + "ce_ib": 2.7707746028900146, + "ce_orig": 0.29843154549598694, + "epoch": 1.20475950823208, + "kl_loss": 0.1605796217918396, + "loss_ib": 0.001882873591966927, + "step": 4190 + }, + { + "ce_ib": 4.726302623748779, + "ce_orig": 1.3942691087722778, + "epoch": 1.20475950823208, + "kl_loss": 0.052294231951236725, + "loss_ib": 0.0009955725399777293, + "step": 4190 + }, + { + "ce_ib": 2.5047712326049805, + "ce_orig": 0.6588637828826904, + "epoch": 1.2050470918110576, + "kl_loss": 0.043551940470933914, + "loss_ib": 0.0006859965506009758, + "step": 4191 + }, + { + "ce_ib": 4.546279430389404, + "ce_orig": 1.0542900562286377, + "epoch": 1.2050470918110576, + "kl_loss": 0.04769166558980942, + "loss_ib": 0.0009315445786342025, + "step": 4191 + }, + { + "ce_ib": 2.152055501937866, + "ce_orig": 0.6430056691169739, + "epoch": 1.2050470918110576, + "kl_loss": 0.030557531863451004, + "loss_ib": 0.0005207808571867645, + "step": 4191 + }, + { + "ce_ib": 2.899980306625366, + "ce_orig": 0.7216977477073669, + "epoch": 1.2050470918110576, + "kl_loss": 0.027615973725914955, + "loss_ib": 0.0005661577451974154, + "step": 4191 + }, + { + "ce_ib": 3.2769219875335693, + "ce_orig": 0.8686284422874451, + "epoch": 1.2053346753900351, + "kl_loss": 0.019945580512285233, + "loss_ib": 0.000527147960383445, + "step": 4192 + }, + { + "ce_ib": 4.794339179992676, + "ce_orig": 1.4287960529327393, + "epoch": 1.2053346753900351, + "kl_loss": 0.03447677195072174, + "loss_ib": 0.0008242016192525625, + "step": 4192 + }, + { + "ce_ib": 2.8076555728912354, + "ce_orig": 0.6943540573120117, + "epoch": 1.2053346753900351, + "kl_loss": 0.038947537541389465, + "loss_ib": 0.0006702409591525793, + "step": 4192 + }, + { + "ce_ib": 3.2093896865844727, + "ce_orig": 0.7284190058708191, + "epoch": 1.2053346753900351, + "kl_loss": 0.0428190678358078, + "loss_ib": 0.0007491296273656189, + "step": 4192 + }, + { + "ce_ib": 2.5111773014068604, + "ce_orig": 0.6029535531997681, + "epoch": 1.2056222589690129, + "kl_loss": 0.03419848158955574, + "loss_ib": 0.0005931025370955467, + "step": 4193 + }, + { + "ce_ib": 2.642601490020752, + "ce_orig": 0.5748811364173889, + "epoch": 1.2056222589690129, + "kl_loss": 0.042374834418296814, + "loss_ib": 0.0006880084984004498, + "step": 4193 + }, + { + "ce_ib": 3.1317243576049805, + "ce_orig": 0.5882285833358765, + "epoch": 1.2056222589690129, + "kl_loss": 0.0395948700606823, + "loss_ib": 0.0007091210572980344, + "step": 4193 + }, + { + "ce_ib": 3.368208646774292, + "ce_orig": 0.838280200958252, + "epoch": 1.2056222589690129, + "kl_loss": 0.025164375081658363, + "loss_ib": 0.0005884645506739616, + "step": 4193 + }, + { + "ce_ib": 2.315920829772949, + "ce_orig": 0.7493765950202942, + "epoch": 1.2059098425479906, + "kl_loss": 0.034406762570142746, + "loss_ib": 0.0005756596801802516, + "step": 4194 + }, + { + "ce_ib": 3.837709903717041, + "ce_orig": 0.8634136319160461, + "epoch": 1.2059098425479906, + "kl_loss": 0.05932142212986946, + "loss_ib": 0.0009769852040335536, + "step": 4194 + }, + { + "ce_ib": 2.7276830673217773, + "ce_orig": 0.680753767490387, + "epoch": 1.2059098425479906, + "kl_loss": 0.047415442764759064, + "loss_ib": 0.0007469226839020848, + "step": 4194 + }, + { + "ce_ib": 4.413412570953369, + "ce_orig": 1.3628259897232056, + "epoch": 1.2059098425479906, + "kl_loss": 0.04274474456906319, + "loss_ib": 0.0008687886293046176, + "step": 4194 + }, + { + "epoch": 1.2061974261269681, + "grad_norm": 0.11244060844182968, + "learning_rate": 3.3990587265103976e-05, + "loss": 0.8576, + "step": 4195 + }, + { + "ce_ib": 2.290036201477051, + "ce_orig": 0.6711646318435669, + "epoch": 1.2061974261269681, + "kl_loss": 0.026820946484804153, + "loss_ib": 0.0004972130409441888, + "step": 4195 + }, + { + "ce_ib": 5.678807258605957, + "ce_orig": 1.5560681819915771, + "epoch": 1.2061974261269681, + "kl_loss": 0.06281176954507828, + "loss_ib": 0.0011959983967244625, + "step": 4195 + }, + { + "ce_ib": 3.8157765865325928, + "ce_orig": 1.075918197631836, + "epoch": 1.2061974261269681, + "kl_loss": 0.046042945235967636, + "loss_ib": 0.0008420071098953485, + "step": 4195 + }, + { + "ce_ib": 2.8189642429351807, + "ce_orig": 0.6977618336677551, + "epoch": 1.2061974261269681, + "kl_loss": 0.03302684426307678, + "loss_ib": 0.0006121648475527763, + "step": 4195 + }, + { + "ce_ib": 3.7556562423706055, + "ce_orig": 0.5015865564346313, + "epoch": 1.2064850097059459, + "kl_loss": 0.04219737648963928, + "loss_ib": 0.0007975393091328442, + "step": 4196 + }, + { + "ce_ib": 2.0921216011047363, + "ce_orig": 0.6865404844284058, + "epoch": 1.2064850097059459, + "kl_loss": 0.03127376735210419, + "loss_ib": 0.0005219498416408896, + "step": 4196 + }, + { + "ce_ib": 2.901073455810547, + "ce_orig": 0.6837705373764038, + "epoch": 1.2064850097059459, + "kl_loss": 0.027454258874058723, + "loss_ib": 0.0005646499339491129, + "step": 4196 + }, + { + "ce_ib": 2.8455262184143066, + "ce_orig": 0.6199031472206116, + "epoch": 1.2064850097059459, + "kl_loss": 0.05004052817821503, + "loss_ib": 0.0007849578396417201, + "step": 4196 + }, + { + "ce_ib": 3.5294694900512695, + "ce_orig": 0.8448978662490845, + "epoch": 1.2067725932849234, + "kl_loss": 0.04994470626115799, + "loss_ib": 0.0008523939177393913, + "step": 4197 + }, + { + "ce_ib": 4.37314510345459, + "ce_orig": 1.316677212715149, + "epoch": 1.2067725932849234, + "kl_loss": 0.04622773081064224, + "loss_ib": 0.0008995917742140591, + "step": 4197 + }, + { + "ce_ib": 3.0068981647491455, + "ce_orig": 0.9079075455665588, + "epoch": 1.2067725932849234, + "kl_loss": 0.030379993841052055, + "loss_ib": 0.0006044897018000484, + "step": 4197 + }, + { + "ce_ib": 3.2051491737365723, + "ce_orig": 0.8797764778137207, + "epoch": 1.2067725932849234, + "kl_loss": 0.08222424983978271, + "loss_ib": 0.0011427573626860976, + "step": 4197 + }, + { + "ce_ib": 4.102136611938477, + "ce_orig": 1.2515819072723389, + "epoch": 1.207060176863901, + "kl_loss": 0.03735566884279251, + "loss_ib": 0.0007837703451514244, + "step": 4198 + }, + { + "ce_ib": 3.7576370239257812, + "ce_orig": 1.0512219667434692, + "epoch": 1.207060176863901, + "kl_loss": 0.05361989513039589, + "loss_ib": 0.0009119626483879983, + "step": 4198 + }, + { + "ce_ib": 3.3857407569885254, + "ce_orig": 0.9892690181732178, + "epoch": 1.207060176863901, + "kl_loss": 0.03373190760612488, + "loss_ib": 0.0006758930976502597, + "step": 4198 + }, + { + "ce_ib": 4.323064804077148, + "ce_orig": 0.7833045125007629, + "epoch": 1.207060176863901, + "kl_loss": 0.052321627736091614, + "loss_ib": 0.0009555227006785572, + "step": 4198 + }, + { + "ce_ib": 4.200905799865723, + "ce_orig": 0.9083737134933472, + "epoch": 1.2073477604428786, + "kl_loss": 0.03544513136148453, + "loss_ib": 0.0007745418697595596, + "step": 4199 + }, + { + "ce_ib": 3.8516902923583984, + "ce_orig": 1.1282868385314941, + "epoch": 1.2073477604428786, + "kl_loss": 0.02842065319418907, + "loss_ib": 0.000669375469442457, + "step": 4199 + }, + { + "ce_ib": 3.3729822635650635, + "ce_orig": 0.5461291670799255, + "epoch": 1.2073477604428786, + "kl_loss": 0.06461376696825027, + "loss_ib": 0.0009834358934313059, + "step": 4199 + }, + { + "ce_ib": 3.1255316734313965, + "ce_orig": 0.7977296710014343, + "epoch": 1.2073477604428786, + "kl_loss": 0.05359618365764618, + "loss_ib": 0.0008485150174237788, + "step": 4199 + }, + { + "epoch": 1.2076353440218563, + "grad_norm": 0.1032269299030304, + "learning_rate": 3.39543682941516e-05, + "loss": 0.838, + "step": 4200 + }, + { + "ce_ib": 4.373239994049072, + "ce_orig": 1.129825234413147, + "epoch": 1.2076353440218563, + "kl_loss": 0.03693833947181702, + "loss_ib": 0.0008067073649726808, + "step": 4200 + }, + { + "ce_ib": 3.9344255924224854, + "ce_orig": 0.9400327801704407, + "epoch": 1.2076353440218563, + "kl_loss": 0.06248775124549866, + "loss_ib": 0.0010183199774473906, + "step": 4200 + }, + { + "ce_ib": 3.2274956703186035, + "ce_orig": 0.8493333458900452, + "epoch": 1.2076353440218563, + "kl_loss": 0.049867838621139526, + "loss_ib": 0.0008214279660023749, + "step": 4200 + }, + { + "ce_ib": 4.391563415527344, + "ce_orig": 1.293945074081421, + "epoch": 1.2076353440218563, + "kl_loss": 0.16236832737922668, + "loss_ib": 0.0020628394559025764, + "step": 4200 + }, + { + "ce_ib": 3.4632229804992676, + "ce_orig": 1.0207462310791016, + "epoch": 1.207922927600834, + "kl_loss": 0.05049503594636917, + "loss_ib": 0.0008512726053595543, + "step": 4201 + }, + { + "ce_ib": 4.457083225250244, + "ce_orig": 1.1734365224838257, + "epoch": 1.207922927600834, + "kl_loss": 0.05300841107964516, + "loss_ib": 0.0009757924126461148, + "step": 4201 + }, + { + "ce_ib": 3.8528199195861816, + "ce_orig": 1.0024460554122925, + "epoch": 1.207922927600834, + "kl_loss": 0.057954341173172, + "loss_ib": 0.0009648253326304257, + "step": 4201 + }, + { + "ce_ib": 4.611259937286377, + "ce_orig": 1.2780452966690063, + "epoch": 1.207922927600834, + "kl_loss": 0.0337696336209774, + "loss_ib": 0.0007988223223946989, + "step": 4201 + }, + { + "ce_ib": 3.7429583072662354, + "ce_orig": 0.9002722501754761, + "epoch": 1.2082105111798116, + "kl_loss": 0.037494778633117676, + "loss_ib": 0.0007492436561733484, + "step": 4202 + }, + { + "ce_ib": 3.272670030593872, + "ce_orig": 0.6929343938827515, + "epoch": 1.2082105111798116, + "kl_loss": 0.054494068026542664, + "loss_ib": 0.0008722076890990138, + "step": 4202 + }, + { + "ce_ib": 2.428508758544922, + "ce_orig": 0.600389838218689, + "epoch": 1.2082105111798116, + "kl_loss": 0.03664248436689377, + "loss_ib": 0.0006092757103033364, + "step": 4202 + }, + { + "ce_ib": 3.680943250656128, + "ce_orig": 0.938934862613678, + "epoch": 1.2082105111798116, + "kl_loss": 0.03461025282740593, + "loss_ib": 0.0007141968235373497, + "step": 4202 + }, + { + "ce_ib": 2.032534599304199, + "ce_orig": 0.3163171410560608, + "epoch": 1.2084980947587893, + "kl_loss": 0.034556277096271515, + "loss_ib": 0.0005488162278197706, + "step": 4203 + }, + { + "ce_ib": 4.5867838859558105, + "ce_orig": 0.7564094066619873, + "epoch": 1.2084980947587893, + "kl_loss": 0.04535536468029022, + "loss_ib": 0.0009122320334427059, + "step": 4203 + }, + { + "ce_ib": 3.410460948944092, + "ce_orig": 1.1418874263763428, + "epoch": 1.2084980947587893, + "kl_loss": 0.024869445711374283, + "loss_ib": 0.0005897405208088458, + "step": 4203 + }, + { + "ce_ib": 5.315810203552246, + "ce_orig": 1.1942623853683472, + "epoch": 1.2084980947587893, + "kl_loss": 0.06448822468519211, + "loss_ib": 0.0011764632072299719, + "step": 4203 + }, + { + "ce_ib": 3.3507513999938965, + "ce_orig": 1.0827741622924805, + "epoch": 1.2087856783377668, + "kl_loss": 0.03179614245891571, + "loss_ib": 0.0006530365208163857, + "step": 4204 + }, + { + "ce_ib": 4.6443586349487305, + "ce_orig": 1.220612645149231, + "epoch": 1.2087856783377668, + "kl_loss": 0.056017644703388214, + "loss_ib": 0.0010246123420074582, + "step": 4204 + }, + { + "ce_ib": 2.168843984603882, + "ce_orig": 0.3220323920249939, + "epoch": 1.2087856783377668, + "kl_loss": 0.08875226974487305, + "loss_ib": 0.001104407012462616, + "step": 4204 + }, + { + "ce_ib": 4.141280651092529, + "ce_orig": 0.804042398929596, + "epoch": 1.2087856783377668, + "kl_loss": 0.06426739692687988, + "loss_ib": 0.0010568019933998585, + "step": 4204 + }, + { + "epoch": 1.2090732619167446, + "grad_norm": 0.11662895232439041, + "learning_rate": 3.391812775004764e-05, + "loss": 0.8745, + "step": 4205 + }, + { + "ce_ib": 3.089460849761963, + "ce_orig": 0.7295165657997131, + "epoch": 1.2090732619167446, + "kl_loss": 0.10961385071277618, + "loss_ib": 0.0014050845056772232, + "step": 4205 + }, + { + "ce_ib": 4.100132465362549, + "ce_orig": 1.2106691598892212, + "epoch": 1.2090732619167446, + "kl_loss": 0.04079413414001465, + "loss_ib": 0.0008179545402526855, + "step": 4205 + }, + { + "ce_ib": 3.207015037536621, + "ce_orig": 0.7946054339408875, + "epoch": 1.2090732619167446, + "kl_loss": 0.04114494100213051, + "loss_ib": 0.0007321508601307869, + "step": 4205 + }, + { + "ce_ib": 3.6109180450439453, + "ce_orig": 0.8056464791297913, + "epoch": 1.2090732619167446, + "kl_loss": 0.05634237080812454, + "loss_ib": 0.0009245154797099531, + "step": 4205 + }, + { + "ce_ib": 3.4535655975341797, + "ce_orig": 1.086488962173462, + "epoch": 1.209360845495722, + "kl_loss": 0.05027608573436737, + "loss_ib": 0.0008481174008920789, + "step": 4206 + }, + { + "ce_ib": 4.606317520141602, + "ce_orig": 1.1801327466964722, + "epoch": 1.209360845495722, + "kl_loss": 0.04010777175426483, + "loss_ib": 0.0008617094135843217, + "step": 4206 + }, + { + "ce_ib": 5.786502838134766, + "ce_orig": 1.7034682035446167, + "epoch": 1.209360845495722, + "kl_loss": 0.10556667298078537, + "loss_ib": 0.001634316984564066, + "step": 4206 + }, + { + "ce_ib": 4.510365962982178, + "ce_orig": 1.1995363235473633, + "epoch": 1.209360845495722, + "kl_loss": 0.040302760899066925, + "loss_ib": 0.0008540641865693033, + "step": 4206 + }, + { + "ce_ib": 4.894195556640625, + "ce_orig": 1.267647624015808, + "epoch": 1.2096484290746998, + "kl_loss": 0.04600388556718826, + "loss_ib": 0.0009494583937339485, + "step": 4207 + }, + { + "ce_ib": 4.156529426574707, + "ce_orig": 1.3573135137557983, + "epoch": 1.2096484290746998, + "kl_loss": 0.04171886295080185, + "loss_ib": 0.0008328414987772703, + "step": 4207 + }, + { + "ce_ib": 3.245516777038574, + "ce_orig": 0.8805092573165894, + "epoch": 1.2096484290746998, + "kl_loss": 0.05455107241868973, + "loss_ib": 0.000870062387548387, + "step": 4207 + }, + { + "ce_ib": 2.6564648151397705, + "ce_orig": 0.5449243783950806, + "epoch": 1.2096484290746998, + "kl_loss": 0.06135243922472, + "loss_ib": 0.0008791708969511092, + "step": 4207 + }, + { + "ce_ib": 2.8722589015960693, + "ce_orig": 0.691188395023346, + "epoch": 1.2099360126536776, + "kl_loss": 0.038359709084033966, + "loss_ib": 0.0006708229775540531, + "step": 4208 + }, + { + "ce_ib": 2.5917809009552, + "ce_orig": 0.6322711706161499, + "epoch": 1.2099360126536776, + "kl_loss": 0.03098749928176403, + "loss_ib": 0.0005690530524589121, + "step": 4208 + }, + { + "ce_ib": 2.286721706390381, + "ce_orig": 0.8014116287231445, + "epoch": 1.2099360126536776, + "kl_loss": 0.03957189619541168, + "loss_ib": 0.0006243911338970065, + "step": 4208 + }, + { + "ce_ib": 2.7417619228363037, + "ce_orig": 0.6941072940826416, + "epoch": 1.2099360126536776, + "kl_loss": 0.06590938568115234, + "loss_ib": 0.0009332699701189995, + "step": 4208 + }, + { + "ce_ib": 3.083037853240967, + "ce_orig": 0.729606032371521, + "epoch": 1.210223596232655, + "kl_loss": 0.04707416519522667, + "loss_ib": 0.0007790454546920955, + "step": 4209 + }, + { + "ce_ib": 2.5511837005615234, + "ce_orig": 0.5402900576591492, + "epoch": 1.210223596232655, + "kl_loss": 0.030784673988819122, + "loss_ib": 0.0005629651132039726, + "step": 4209 + }, + { + "ce_ib": 4.576441287994385, + "ce_orig": 1.2322479486465454, + "epoch": 1.210223596232655, + "kl_loss": 0.05118342861533165, + "loss_ib": 0.0009694783948361874, + "step": 4209 + }, + { + "ce_ib": 3.584207057952881, + "ce_orig": 1.001083254814148, + "epoch": 1.210223596232655, + "kl_loss": 0.06215018779039383, + "loss_ib": 0.0009799225954338908, + "step": 4209 + }, + { + "epoch": 1.2105111798116328, + "grad_norm": 0.12391572445631027, + "learning_rate": 3.388186572010399e-05, + "loss": 0.8978, + "step": 4210 + }, + { + "ce_ib": 2.8710520267486572, + "ce_orig": 0.6579993367195129, + "epoch": 1.2105111798116328, + "kl_loss": 0.03737204149365425, + "loss_ib": 0.0006608255789615214, + "step": 4210 + }, + { + "ce_ib": 3.586143732070923, + "ce_orig": 0.7182517647743225, + "epoch": 1.2105111798116328, + "kl_loss": 0.04838980734348297, + "loss_ib": 0.0008425123523920774, + "step": 4210 + }, + { + "ce_ib": 3.2388505935668945, + "ce_orig": 0.7831968069076538, + "epoch": 1.2105111798116328, + "kl_loss": 0.028383128345012665, + "loss_ib": 0.0006077163270674646, + "step": 4210 + }, + { + "ce_ib": 3.396298408508301, + "ce_orig": 0.8928624987602234, + "epoch": 1.2105111798116328, + "kl_loss": 0.0388512909412384, + "loss_ib": 0.0007281426806002855, + "step": 4210 + }, + { + "ce_ib": 2.4917304515838623, + "ce_orig": 0.6301350593566895, + "epoch": 1.2107987633906103, + "kl_loss": 0.022753790020942688, + "loss_ib": 0.00047671093489043415, + "step": 4211 + }, + { + "ce_ib": 4.219542503356934, + "ce_orig": 1.2718497514724731, + "epoch": 1.2107987633906103, + "kl_loss": 0.03967893868684769, + "loss_ib": 0.0008187436033040285, + "step": 4211 + }, + { + "ce_ib": 4.447873115539551, + "ce_orig": 1.0810647010803223, + "epoch": 1.2107987633906103, + "kl_loss": 0.0478457510471344, + "loss_ib": 0.0009232448064722121, + "step": 4211 + }, + { + "ce_ib": 3.5430829524993896, + "ce_orig": 1.144195318222046, + "epoch": 1.2107987633906103, + "kl_loss": 0.028336452320218086, + "loss_ib": 0.0006376728415489197, + "step": 4211 + }, + { + "ce_ib": 4.30905294418335, + "ce_orig": 1.0515735149383545, + "epoch": 1.211086346969588, + "kl_loss": 0.04450896009802818, + "loss_ib": 0.0008759948541410267, + "step": 4212 + }, + { + "ce_ib": 2.3013861179351807, + "ce_orig": 0.4343148469924927, + "epoch": 1.211086346969588, + "kl_loss": 0.0444602333009243, + "loss_ib": 0.0006747409352101386, + "step": 4212 + }, + { + "ce_ib": 3.185903549194336, + "ce_orig": 0.843864381313324, + "epoch": 1.211086346969588, + "kl_loss": 0.02850458212196827, + "loss_ib": 0.0006036362028680742, + "step": 4212 + }, + { + "ce_ib": 2.5624947547912598, + "ce_orig": 0.7671908140182495, + "epoch": 1.211086346969588, + "kl_loss": 0.04106553643941879, + "loss_ib": 0.0006669047870673239, + "step": 4212 + }, + { + "ce_ib": 3.294809103012085, + "ce_orig": 0.42510321736335754, + "epoch": 1.2113739305485658, + "kl_loss": 0.08411664515733719, + "loss_ib": 0.0011706473305821419, + "step": 4213 + }, + { + "ce_ib": 4.518529891967773, + "ce_orig": 1.438026785850525, + "epoch": 1.2113739305485658, + "kl_loss": 0.04033467918634415, + "loss_ib": 0.0008551997598260641, + "step": 4213 + }, + { + "ce_ib": 3.784395933151245, + "ce_orig": 0.981817364692688, + "epoch": 1.2113739305485658, + "kl_loss": 0.043288592249155045, + "loss_ib": 0.0008113254443742335, + "step": 4213 + }, + { + "ce_ib": 4.062422275543213, + "ce_orig": 1.1396996974945068, + "epoch": 1.2113739305485658, + "kl_loss": 0.044708333909511566, + "loss_ib": 0.0008533255313523114, + "step": 4213 + }, + { + "ce_ib": 3.5597751140594482, + "ce_orig": 0.7762006521224976, + "epoch": 1.2116615141275433, + "kl_loss": 0.03507416322827339, + "loss_ib": 0.0007067191181704402, + "step": 4214 + }, + { + "ce_ib": 3.337362766265869, + "ce_orig": 0.9603628516197205, + "epoch": 1.2116615141275433, + "kl_loss": 0.043206587433815, + "loss_ib": 0.0007658021058887243, + "step": 4214 + }, + { + "ce_ib": 3.1759161949157715, + "ce_orig": 0.6245856881141663, + "epoch": 1.2116615141275433, + "kl_loss": 0.06248759478330612, + "loss_ib": 0.0009424675954505801, + "step": 4214 + }, + { + "ce_ib": 3.280315637588501, + "ce_orig": 1.0685420036315918, + "epoch": 1.2116615141275433, + "kl_loss": 0.03386319428682327, + "loss_ib": 0.0006666634581051767, + "step": 4214 + }, + { + "epoch": 1.211949097706521, + "grad_norm": 0.10169830173254013, + "learning_rate": 3.3845582291684304e-05, + "loss": 0.9053, + "step": 4215 + }, + { + "ce_ib": 3.267972469329834, + "ce_orig": 1.0853002071380615, + "epoch": 1.211949097706521, + "kl_loss": 0.05374406278133392, + "loss_ib": 0.0008642378379590809, + "step": 4215 + }, + { + "ce_ib": 2.324288845062256, + "ce_orig": 0.6235506534576416, + "epoch": 1.211949097706521, + "kl_loss": 0.05051583796739578, + "loss_ib": 0.0007375872228294611, + "step": 4215 + }, + { + "ce_ib": 2.350571393966675, + "ce_orig": 0.5435885190963745, + "epoch": 1.211949097706521, + "kl_loss": 0.045663848519325256, + "loss_ib": 0.0006916956626810133, + "step": 4215 + }, + { + "ce_ib": 2.3363494873046875, + "ce_orig": 0.6959490776062012, + "epoch": 1.211949097706521, + "kl_loss": 0.02697308361530304, + "loss_ib": 0.0005033657653257251, + "step": 4215 + }, + { + "ce_ib": 2.616027355194092, + "ce_orig": 0.7209497690200806, + "epoch": 1.2122366812854986, + "kl_loss": 0.04792040213942528, + "loss_ib": 0.0007408067467622459, + "step": 4216 + }, + { + "ce_ib": 3.800629138946533, + "ce_orig": 1.2919498682022095, + "epoch": 1.2122366812854986, + "kl_loss": 0.02851106971502304, + "loss_ib": 0.0006651735748164356, + "step": 4216 + }, + { + "ce_ib": 2.5089077949523926, + "ce_orig": 0.6000918745994568, + "epoch": 1.2122366812854986, + "kl_loss": 0.03075128234922886, + "loss_ib": 0.0005584035534411669, + "step": 4216 + }, + { + "ce_ib": 2.9040558338165283, + "ce_orig": 0.7805173993110657, + "epoch": 1.2122366812854986, + "kl_loss": 0.04578167200088501, + "loss_ib": 0.0007482222863472998, + "step": 4216 + }, + { + "ce_ib": 1.8541213274002075, + "ce_orig": 0.6391454935073853, + "epoch": 1.2125242648644763, + "kl_loss": 0.020246855914592743, + "loss_ib": 0.00038788068923167884, + "step": 4217 + }, + { + "ce_ib": 3.2212133407592773, + "ce_orig": 1.094280481338501, + "epoch": 1.2125242648644763, + "kl_loss": 0.027059504762291908, + "loss_ib": 0.0005927163874730468, + "step": 4217 + }, + { + "ce_ib": 3.9792795181274414, + "ce_orig": 0.9582697153091431, + "epoch": 1.2125242648644763, + "kl_loss": 0.046267252415418625, + "loss_ib": 0.0008606004412285984, + "step": 4217 + }, + { + "ce_ib": 3.499591588973999, + "ce_orig": 0.7100967168807983, + "epoch": 1.2125242648644763, + "kl_loss": 0.033643610775470734, + "loss_ib": 0.0006863952148705721, + "step": 4217 + }, + { + "ce_ib": 3.671196699142456, + "ce_orig": 1.014523983001709, + "epoch": 1.2128118484434538, + "kl_loss": 0.057314999401569366, + "loss_ib": 0.0009402696741744876, + "step": 4218 + }, + { + "ce_ib": 2.0004630088806152, + "ce_orig": 0.5675740242004395, + "epoch": 1.2128118484434538, + "kl_loss": 0.034267961978912354, + "loss_ib": 0.0005427259020507336, + "step": 4218 + }, + { + "ce_ib": 2.2037291526794434, + "ce_orig": 0.4544850289821625, + "epoch": 1.2128118484434538, + "kl_loss": 0.06776081770658493, + "loss_ib": 0.0008979810518212616, + "step": 4218 + }, + { + "ce_ib": 1.055260181427002, + "ce_orig": 0.17910705506801605, + "epoch": 1.2128118484434538, + "kl_loss": 0.06042905151844025, + "loss_ib": 0.0007098165224306285, + "step": 4218 + }, + { + "ce_ib": 2.7946925163269043, + "ce_orig": 0.7546842694282532, + "epoch": 1.2130994320224315, + "kl_loss": 0.031071597710251808, + "loss_ib": 0.0005901852273382246, + "step": 4219 + }, + { + "ce_ib": 4.569535255432129, + "ce_orig": 1.4014281034469604, + "epoch": 1.2130994320224315, + "kl_loss": 0.029677588492631912, + "loss_ib": 0.0007537294295616448, + "step": 4219 + }, + { + "ce_ib": 3.1690151691436768, + "ce_orig": 0.4632832407951355, + "epoch": 1.2130994320224315, + "kl_loss": 0.08557590842247009, + "loss_ib": 0.0011726606171578169, + "step": 4219 + }, + { + "ce_ib": 4.274866104125977, + "ce_orig": 0.5198246240615845, + "epoch": 1.2130994320224315, + "kl_loss": 0.08324268460273743, + "loss_ib": 0.0012599134352058172, + "step": 4219 + }, + { + "epoch": 1.213387015601409, + "grad_norm": 0.12960143387317657, + "learning_rate": 3.380927755220376e-05, + "loss": 0.8477, + "step": 4220 + }, + { + "ce_ib": 3.426727533340454, + "ce_orig": 1.0091909170150757, + "epoch": 1.213387015601409, + "kl_loss": 0.03490729629993439, + "loss_ib": 0.0006917457212693989, + "step": 4220 + }, + { + "ce_ib": 2.759514570236206, + "ce_orig": 0.45766681432724, + "epoch": 1.213387015601409, + "kl_loss": 0.02469039335846901, + "loss_ib": 0.0005228553782217205, + "step": 4220 + }, + { + "ce_ib": 1.6183582544326782, + "ce_orig": 0.32612115144729614, + "epoch": 1.213387015601409, + "kl_loss": 0.03658293932676315, + "loss_ib": 0.000527665251865983, + "step": 4220 + }, + { + "ce_ib": 3.900766372680664, + "ce_orig": 1.0350719690322876, + "epoch": 1.213387015601409, + "kl_loss": 0.044140104204416275, + "loss_ib": 0.0008314776350744069, + "step": 4220 + }, + { + "ce_ib": 2.3913350105285645, + "ce_orig": 0.41675999760627747, + "epoch": 1.2136745991803868, + "kl_loss": 0.017009666189551353, + "loss_ib": 0.0004092301242053509, + "step": 4221 + }, + { + "ce_ib": 2.464491128921509, + "ce_orig": 0.5218658447265625, + "epoch": 1.2136745991803868, + "kl_loss": 0.03571164608001709, + "loss_ib": 0.0006035655387677252, + "step": 4221 + }, + { + "ce_ib": 2.386260509490967, + "ce_orig": 0.5828327536582947, + "epoch": 1.2136745991803868, + "kl_loss": 0.0394417978823185, + "loss_ib": 0.000633043993730098, + "step": 4221 + }, + { + "ce_ib": 2.8151981830596924, + "ce_orig": 0.8977371454238892, + "epoch": 1.2136745991803868, + "kl_loss": 0.05115862935781479, + "loss_ib": 0.0007931060972623527, + "step": 4221 + }, + { + "ce_ib": 3.7315897941589355, + "ce_orig": 0.9277468323707581, + "epoch": 1.2139621827593645, + "kl_loss": 0.05714372918009758, + "loss_ib": 0.0009445961914025247, + "step": 4222 + }, + { + "ce_ib": 3.7790284156799316, + "ce_orig": 1.1467578411102295, + "epoch": 1.2139621827593645, + "kl_loss": 0.0332547202706337, + "loss_ib": 0.0007104499964043498, + "step": 4222 + }, + { + "ce_ib": 4.025632858276367, + "ce_orig": 0.7039053440093994, + "epoch": 1.2139621827593645, + "kl_loss": 0.04840036854147911, + "loss_ib": 0.0008865669369697571, + "step": 4222 + }, + { + "ce_ib": 2.381626844406128, + "ce_orig": 0.5551656484603882, + "epoch": 1.2139621827593645, + "kl_loss": 0.03926968574523926, + "loss_ib": 0.0006308595184236765, + "step": 4222 + }, + { + "ce_ib": 3.853815793991089, + "ce_orig": 1.1801894903182983, + "epoch": 1.214249766338342, + "kl_loss": 0.03727060928940773, + "loss_ib": 0.0007580876117572188, + "step": 4223 + }, + { + "ce_ib": 4.284759998321533, + "ce_orig": 1.11699378490448, + "epoch": 1.214249766338342, + "kl_loss": 0.03180771321058273, + "loss_ib": 0.0007465530652552843, + "step": 4223 + }, + { + "ce_ib": 2.9042396545410156, + "ce_orig": 0.7500597834587097, + "epoch": 1.214249766338342, + "kl_loss": 0.041094742715358734, + "loss_ib": 0.0007013714057393372, + "step": 4223 + }, + { + "ce_ib": 4.868821620941162, + "ce_orig": 1.4201326370239258, + "epoch": 1.214249766338342, + "kl_loss": 0.050351016223430634, + "loss_ib": 0.0009903922909870744, + "step": 4223 + }, + { + "ce_ib": 3.215100049972534, + "ce_orig": 0.7978649735450745, + "epoch": 1.2145373499173198, + "kl_loss": 0.0326559916138649, + "loss_ib": 0.0006480698939412832, + "step": 4224 + }, + { + "ce_ib": 3.7378082275390625, + "ce_orig": 0.7832819223403931, + "epoch": 1.2145373499173198, + "kl_loss": 0.039621420204639435, + "loss_ib": 0.0007699949783273041, + "step": 4224 + }, + { + "ce_ib": 3.391895055770874, + "ce_orig": 1.0244886875152588, + "epoch": 1.2145373499173198, + "kl_loss": 0.022896884009242058, + "loss_ib": 0.0005681583425030112, + "step": 4224 + }, + { + "ce_ib": 3.6783642768859863, + "ce_orig": 0.6609358191490173, + "epoch": 1.2145373499173198, + "kl_loss": 0.02500593662261963, + "loss_ib": 0.0006178957992233336, + "step": 4224 + }, + { + "epoch": 1.2148249334962973, + "grad_norm": 0.11828821897506714, + "learning_rate": 3.377295158912893e-05, + "loss": 0.7966, + "step": 4225 + }, + { + "ce_ib": 3.630596876144409, + "ce_orig": 0.5709300637245178, + "epoch": 1.2148249334962973, + "kl_loss": 0.0612824447453022, + "loss_ib": 0.0009758840897120535, + "step": 4225 + }, + { + "ce_ib": 2.1575536727905273, + "ce_orig": 0.6927780508995056, + "epoch": 1.2148249334962973, + "kl_loss": 0.021904461085796356, + "loss_ib": 0.0004347999638412148, + "step": 4225 + }, + { + "ce_ib": 2.8259799480438232, + "ce_orig": 0.7526105046272278, + "epoch": 1.2148249334962973, + "kl_loss": 0.03311288356781006, + "loss_ib": 0.0006137267919257283, + "step": 4225 + }, + { + "ce_ib": 3.8676838874816895, + "ce_orig": 0.8858317136764526, + "epoch": 1.2148249334962973, + "kl_loss": 0.04811955243349075, + "loss_ib": 0.0008679638849571347, + "step": 4225 + }, + { + "ce_ib": 2.771491765975952, + "ce_orig": 0.5734927654266357, + "epoch": 1.215112517075275, + "kl_loss": 0.05781658738851547, + "loss_ib": 0.0008553150109946728, + "step": 4226 + }, + { + "ce_ib": 2.6505465507507324, + "ce_orig": 0.6060789823532104, + "epoch": 1.215112517075275, + "kl_loss": 0.04084569960832596, + "loss_ib": 0.0006735115894116461, + "step": 4226 + }, + { + "ce_ib": 1.962286114692688, + "ce_orig": 0.572458028793335, + "epoch": 1.215112517075275, + "kl_loss": 0.0294831283390522, + "loss_ib": 0.0004910598509013653, + "step": 4226 + }, + { + "ce_ib": 2.8303089141845703, + "ce_orig": 0.9045702219009399, + "epoch": 1.215112517075275, + "kl_loss": 0.031307511031627655, + "loss_ib": 0.0005961059359833598, + "step": 4226 + }, + { + "ce_ib": 2.93074369430542, + "ce_orig": 0.6136869192123413, + "epoch": 1.2154001006542527, + "kl_loss": 0.05528146028518677, + "loss_ib": 0.0008458889788016677, + "step": 4227 + }, + { + "ce_ib": 3.9042606353759766, + "ce_orig": 1.0488823652267456, + "epoch": 1.2154001006542527, + "kl_loss": 0.052685149013996124, + "loss_ib": 0.0009172775316983461, + "step": 4227 + }, + { + "ce_ib": 4.058575630187988, + "ce_orig": 1.3901187181472778, + "epoch": 1.2154001006542527, + "kl_loss": 0.033771105110645294, + "loss_ib": 0.0007435685838572681, + "step": 4227 + }, + { + "ce_ib": 2.2559313774108887, + "ce_orig": 0.4364849030971527, + "epoch": 1.2154001006542527, + "kl_loss": 0.03766725957393646, + "loss_ib": 0.0006022657034918666, + "step": 4227 + }, + { + "ce_ib": 3.4324893951416016, + "ce_orig": 0.7780072093009949, + "epoch": 1.2156876842332303, + "kl_loss": 0.06640021502971649, + "loss_ib": 0.0010072510922327638, + "step": 4228 + }, + { + "ce_ib": 3.375311851501465, + "ce_orig": 1.275536060333252, + "epoch": 1.2156876842332303, + "kl_loss": 0.030963661149144173, + "loss_ib": 0.0006471677916124463, + "step": 4228 + }, + { + "ce_ib": 4.991764068603516, + "ce_orig": 1.2620234489440918, + "epoch": 1.2156876842332303, + "kl_loss": 0.04597385972738266, + "loss_ib": 0.0009589149849489331, + "step": 4228 + }, + { + "ce_ib": 4.210427284240723, + "ce_orig": 0.8447989225387573, + "epoch": 1.2156876842332303, + "kl_loss": 0.05602290481328964, + "loss_ib": 0.0009812717325985432, + "step": 4228 + }, + { + "ce_ib": 2.706169843673706, + "ce_orig": 0.6987024545669556, + "epoch": 1.215975267812208, + "kl_loss": 0.025353580713272095, + "loss_ib": 0.0005241527687758207, + "step": 4229 + }, + { + "ce_ib": 1.4796243906021118, + "ce_orig": 0.3949064612388611, + "epoch": 1.215975267812208, + "kl_loss": 0.01829027198255062, + "loss_ib": 0.0003308651503175497, + "step": 4229 + }, + { + "ce_ib": 3.811084032058716, + "ce_orig": 0.5854145288467407, + "epoch": 1.215975267812208, + "kl_loss": 0.04483675956726074, + "loss_ib": 0.0008294759318232536, + "step": 4229 + }, + { + "ce_ib": 3.325000286102295, + "ce_orig": 0.8964158296585083, + "epoch": 1.215975267812208, + "kl_loss": 0.04812426120042801, + "loss_ib": 0.0008137425757013261, + "step": 4229 + }, + { + "epoch": 1.2162628513911855, + "grad_norm": 0.11056017875671387, + "learning_rate": 3.3736604489977466e-05, + "loss": 0.8165, + "step": 4230 + }, + { + "ce_ib": 3.8015213012695312, + "ce_orig": 0.9458030462265015, + "epoch": 1.2162628513911855, + "kl_loss": 0.047411128878593445, + "loss_ib": 0.0008542634313926101, + "step": 4230 + }, + { + "ce_ib": 3.4917314052581787, + "ce_orig": 0.9260947108268738, + "epoch": 1.2162628513911855, + "kl_loss": 0.04987231642007828, + "loss_ib": 0.0008478963281959295, + "step": 4230 + }, + { + "ce_ib": 2.6625969409942627, + "ce_orig": 0.7922374606132507, + "epoch": 1.2162628513911855, + "kl_loss": 0.04703003540635109, + "loss_ib": 0.0007365600322373211, + "step": 4230 + }, + { + "ce_ib": 2.9193077087402344, + "ce_orig": 0.8787528872489929, + "epoch": 1.2162628513911855, + "kl_loss": 0.03323129191994667, + "loss_ib": 0.0006242436938919127, + "step": 4230 + }, + { + "ce_ib": 2.445966958999634, + "ce_orig": 0.6536558270454407, + "epoch": 1.2165504349701632, + "kl_loss": 0.025645896792411804, + "loss_ib": 0.0005010556196793914, + "step": 4231 + }, + { + "ce_ib": 2.515275716781616, + "ce_orig": 0.4875490367412567, + "epoch": 1.2165504349701632, + "kl_loss": 0.05358131602406502, + "loss_ib": 0.0007873406866565347, + "step": 4231 + }, + { + "ce_ib": 2.7981648445129395, + "ce_orig": 0.8494081497192383, + "epoch": 1.2165504349701632, + "kl_loss": 0.03070051036775112, + "loss_ib": 0.0005868215812370181, + "step": 4231 + }, + { + "ce_ib": 4.07959508895874, + "ce_orig": 1.4021393060684204, + "epoch": 1.2165504349701632, + "kl_loss": 0.04222207888960838, + "loss_ib": 0.0008301802445203066, + "step": 4231 + }, + { + "ce_ib": 3.4053378105163574, + "ce_orig": 0.6936933398246765, + "epoch": 1.2168380185491408, + "kl_loss": 0.05048777908086777, + "loss_ib": 0.0008454115595668554, + "step": 4232 + }, + { + "ce_ib": 2.5562453269958496, + "ce_orig": 0.7115767002105713, + "epoch": 1.2168380185491408, + "kl_loss": 0.03605331480503082, + "loss_ib": 0.0006161576602607965, + "step": 4232 + }, + { + "ce_ib": 3.0305864810943604, + "ce_orig": 0.6284021735191345, + "epoch": 1.2168380185491408, + "kl_loss": 0.07448329776525497, + "loss_ib": 0.0010478915646672249, + "step": 4232 + }, + { + "ce_ib": 3.362497091293335, + "ce_orig": 0.6924746036529541, + "epoch": 1.2168380185491408, + "kl_loss": 0.04933676868677139, + "loss_ib": 0.0008296173764392734, + "step": 4232 + }, + { + "ce_ib": 2.233083963394165, + "ce_orig": 0.6013701558113098, + "epoch": 1.2171256021281185, + "kl_loss": 0.03309434652328491, + "loss_ib": 0.0005542518338188529, + "step": 4233 + }, + { + "ce_ib": 4.71088981628418, + "ce_orig": 0.8630574941635132, + "epoch": 1.2171256021281185, + "kl_loss": 0.07585444301366806, + "loss_ib": 0.0012296333443373442, + "step": 4233 + }, + { + "ce_ib": 3.1249759197235107, + "ce_orig": 0.7656455039978027, + "epoch": 1.2171256021281185, + "kl_loss": 0.04713742062449455, + "loss_ib": 0.0007838717428967357, + "step": 4233 + }, + { + "ce_ib": 3.895111083984375, + "ce_orig": 1.108312964439392, + "epoch": 1.2171256021281185, + "kl_loss": 0.05804844945669174, + "loss_ib": 0.0009699955116957426, + "step": 4233 + }, + { + "ce_ib": 3.4146907329559326, + "ce_orig": 0.6050317883491516, + "epoch": 1.217413185707096, + "kl_loss": 0.05087835341691971, + "loss_ib": 0.0008502525743097067, + "step": 4234 + }, + { + "ce_ib": 4.781938552856445, + "ce_orig": 1.2136327028274536, + "epoch": 1.217413185707096, + "kl_loss": 0.03573131188750267, + "loss_ib": 0.0008355068857781589, + "step": 4234 + }, + { + "ce_ib": 2.9861230850219727, + "ce_orig": 0.653721034526825, + "epoch": 1.217413185707096, + "kl_loss": 0.02714536339044571, + "loss_ib": 0.0005700659239664674, + "step": 4234 + }, + { + "ce_ib": 3.829692840576172, + "ce_orig": 0.9556703567504883, + "epoch": 1.217413185707096, + "kl_loss": 0.02458556555211544, + "loss_ib": 0.0006288248696364462, + "step": 4234 + }, + { + "epoch": 1.2177007692860737, + "grad_norm": 0.14123816788196564, + "learning_rate": 3.3700236342317976e-05, + "loss": 0.778, + "step": 4235 + }, + { + "ce_ib": 4.062335014343262, + "ce_orig": 1.1346594095230103, + "epoch": 1.2177007692860737, + "kl_loss": 0.03990095108747482, + "loss_ib": 0.0008052429184317589, + "step": 4235 + }, + { + "ce_ib": 2.4782962799072266, + "ce_orig": 0.6035976409912109, + "epoch": 1.2177007692860737, + "kl_loss": 0.04015994444489479, + "loss_ib": 0.0006494291010312736, + "step": 4235 + }, + { + "ce_ib": 2.533864736557007, + "ce_orig": 0.4901014566421509, + "epoch": 1.2177007692860737, + "kl_loss": 0.03280455619096756, + "loss_ib": 0.0005814320174977183, + "step": 4235 + }, + { + "ce_ib": 2.4767518043518066, + "ce_orig": 0.7954332828521729, + "epoch": 1.2177007692860737, + "kl_loss": 0.03629346564412117, + "loss_ib": 0.0006106098298914731, + "step": 4235 + }, + { + "ce_ib": 2.476815700531006, + "ce_orig": 0.6894539594650269, + "epoch": 1.2179883528650515, + "kl_loss": 0.03730710223317146, + "loss_ib": 0.0006207525730133057, + "step": 4236 + }, + { + "ce_ib": 4.276527404785156, + "ce_orig": 1.222527027130127, + "epoch": 1.2179883528650515, + "kl_loss": 0.03170948475599289, + "loss_ib": 0.0007447475800290704, + "step": 4236 + }, + { + "ce_ib": 1.3058733940124512, + "ce_orig": 0.4202273488044739, + "epoch": 1.2179883528650515, + "kl_loss": 0.08253706991672516, + "loss_ib": 0.000955958035774529, + "step": 4236 + }, + { + "ce_ib": 5.078068733215332, + "ce_orig": 1.1001683473587036, + "epoch": 1.2179883528650515, + "kl_loss": 0.0553010031580925, + "loss_ib": 0.0010608169250190258, + "step": 4236 + }, + { + "ce_ib": 3.258561849594116, + "ce_orig": 0.9316918253898621, + "epoch": 1.218275936444029, + "kl_loss": 0.025957010686397552, + "loss_ib": 0.0005854262853972614, + "step": 4237 + }, + { + "ce_ib": 2.502525568008423, + "ce_orig": 0.6619834899902344, + "epoch": 1.218275936444029, + "kl_loss": 0.04495306685566902, + "loss_ib": 0.0006997832679189742, + "step": 4237 + }, + { + "ce_ib": 2.5419185161590576, + "ce_orig": 0.5070189833641052, + "epoch": 1.218275936444029, + "kl_loss": 0.040740884840488434, + "loss_ib": 0.0006616006721742451, + "step": 4237 + }, + { + "ce_ib": 2.7888543605804443, + "ce_orig": 0.9153801798820496, + "epoch": 1.218275936444029, + "kl_loss": 0.02426215447485447, + "loss_ib": 0.0005215069395489991, + "step": 4237 + }, + { + "ce_ib": 4.43393087387085, + "ce_orig": 1.0662097930908203, + "epoch": 1.2185635200230067, + "kl_loss": 0.03827783465385437, + "loss_ib": 0.0008261713664978743, + "step": 4238 + }, + { + "ce_ib": 3.1627023220062256, + "ce_orig": 0.7404085397720337, + "epoch": 1.2185635200230067, + "kl_loss": 0.03437439352273941, + "loss_ib": 0.0006600141641683877, + "step": 4238 + }, + { + "ce_ib": 3.779458522796631, + "ce_orig": 1.0114219188690186, + "epoch": 1.2185635200230067, + "kl_loss": 0.039724551141262054, + "loss_ib": 0.0007751912926323712, + "step": 4238 + }, + { + "ce_ib": 5.445449352264404, + "ce_orig": 1.6521438360214233, + "epoch": 1.2185635200230067, + "kl_loss": 0.04980438947677612, + "loss_ib": 0.0010425887303426862, + "step": 4238 + }, + { + "ce_ib": 3.4478979110717773, + "ce_orig": 0.9017735719680786, + "epoch": 1.2188511036019842, + "kl_loss": 0.040167298167943954, + "loss_ib": 0.0007464627851732075, + "step": 4239 + }, + { + "ce_ib": 4.00905179977417, + "ce_orig": 1.1028366088867188, + "epoch": 1.2188511036019842, + "kl_loss": 0.04928645119071007, + "loss_ib": 0.0008937696693465114, + "step": 4239 + }, + { + "ce_ib": 2.8445141315460205, + "ce_orig": 0.5986679792404175, + "epoch": 1.2188511036019842, + "kl_loss": 0.05904509127140045, + "loss_ib": 0.000874902296345681, + "step": 4239 + }, + { + "ce_ib": 3.754446506500244, + "ce_orig": 1.1455804109573364, + "epoch": 1.2188511036019842, + "kl_loss": 0.04536370560526848, + "loss_ib": 0.0008290816913358867, + "step": 4239 + }, + { + "epoch": 1.219138687180962, + "grad_norm": 0.09871535003185272, + "learning_rate": 3.366384723376977e-05, + "loss": 0.7984, + "step": 4240 + }, + { + "ce_ib": 5.3441267013549805, + "ce_orig": 1.5914486646652222, + "epoch": 1.219138687180962, + "kl_loss": 0.05383718013763428, + "loss_ib": 0.0010727845365181565, + "step": 4240 + }, + { + "ce_ib": 2.779433012008667, + "ce_orig": 0.6494981050491333, + "epoch": 1.219138687180962, + "kl_loss": 0.03733234480023384, + "loss_ib": 0.0006512667168863118, + "step": 4240 + }, + { + "ce_ib": 2.8849446773529053, + "ce_orig": 0.6839683651924133, + "epoch": 1.219138687180962, + "kl_loss": 0.018786804750561714, + "loss_ib": 0.0004763624747283757, + "step": 4240 + }, + { + "ce_ib": 4.898046016693115, + "ce_orig": 1.2470669746398926, + "epoch": 1.219138687180962, + "kl_loss": 0.036970190703868866, + "loss_ib": 0.0008595064864493906, + "step": 4240 + }, + { + "ce_ib": 3.1981327533721924, + "ce_orig": 0.6378026008605957, + "epoch": 1.2194262707599397, + "kl_loss": 0.043512891978025436, + "loss_ib": 0.000754942127969116, + "step": 4241 + }, + { + "ce_ib": 1.5959128141403198, + "ce_orig": 0.4425169825553894, + "epoch": 1.2194262707599397, + "kl_loss": 0.09511049836874008, + "loss_ib": 0.0011106962338089943, + "step": 4241 + }, + { + "ce_ib": 2.4734644889831543, + "ce_orig": 0.496663361787796, + "epoch": 1.2194262707599397, + "kl_loss": 0.05000973492860794, + "loss_ib": 0.0007474438170902431, + "step": 4241 + }, + { + "ce_ib": 2.772768974304199, + "ce_orig": 0.5803791880607605, + "epoch": 1.2194262707599397, + "kl_loss": 0.034155428409576416, + "loss_ib": 0.0006188311381265521, + "step": 4241 + }, + { + "ce_ib": 3.0444889068603516, + "ce_orig": 0.774455189704895, + "epoch": 1.2197138543389172, + "kl_loss": 0.04642457515001297, + "loss_ib": 0.0007686946191824973, + "step": 4242 + }, + { + "ce_ib": 1.80690598487854, + "ce_orig": 0.44127264618873596, + "epoch": 1.2197138543389172, + "kl_loss": 0.05397156625986099, + "loss_ib": 0.0007204062421806157, + "step": 4242 + }, + { + "ce_ib": 3.4493212699890137, + "ce_orig": 1.080430030822754, + "epoch": 1.2197138543389172, + "kl_loss": 0.03586314246058464, + "loss_ib": 0.0007035635062493384, + "step": 4242 + }, + { + "ce_ib": 5.750259876251221, + "ce_orig": 1.6397647857666016, + "epoch": 1.2197138543389172, + "kl_loss": 0.04621683806180954, + "loss_ib": 0.0010371942771598697, + "step": 4242 + }, + { + "ce_ib": 4.031451225280762, + "ce_orig": 0.9360681772232056, + "epoch": 1.220001437917895, + "kl_loss": 0.037640418857336044, + "loss_ib": 0.0007795493002049625, + "step": 4243 + }, + { + "ce_ib": 2.823716163635254, + "ce_orig": 0.7729347348213196, + "epoch": 1.220001437917895, + "kl_loss": 0.04312942922115326, + "loss_ib": 0.000713665911462158, + "step": 4243 + }, + { + "ce_ib": 2.0688793659210205, + "ce_orig": 0.5379595756530762, + "epoch": 1.220001437917895, + "kl_loss": 0.06683623790740967, + "loss_ib": 0.0008752503199502826, + "step": 4243 + }, + { + "ce_ib": 3.940884590148926, + "ce_orig": 1.0253902673721313, + "epoch": 1.220001437917895, + "kl_loss": 0.04046066850423813, + "loss_ib": 0.000798695080447942, + "step": 4243 + }, + { + "ce_ib": 4.823436260223389, + "ce_orig": 1.3179317712783813, + "epoch": 1.2202890214968725, + "kl_loss": 0.03104260563850403, + "loss_ib": 0.000792769598774612, + "step": 4244 + }, + { + "ce_ib": 2.7366650104522705, + "ce_orig": 0.7013314366340637, + "epoch": 1.2202890214968725, + "kl_loss": 0.0344594269990921, + "loss_ib": 0.000618260761257261, + "step": 4244 + }, + { + "ce_ib": 3.1477596759796143, + "ce_orig": 0.9234795570373535, + "epoch": 1.2202890214968725, + "kl_loss": 0.04503478854894638, + "loss_ib": 0.0007651238120160997, + "step": 4244 + }, + { + "ce_ib": 2.89243221282959, + "ce_orig": 0.7943034768104553, + "epoch": 1.2202890214968725, + "kl_loss": 0.022685734555125237, + "loss_ib": 0.0005161005537956953, + "step": 4244 + }, + { + "epoch": 1.2205766050758502, + "grad_norm": 0.12315608561038971, + "learning_rate": 3.362743725200266e-05, + "loss": 0.8521, + "step": 4245 + }, + { + "ce_ib": 3.0976145267486572, + "ce_orig": 0.9031959772109985, + "epoch": 1.2205766050758502, + "kl_loss": 0.044340118765830994, + "loss_ib": 0.0007531626615673304, + "step": 4245 + }, + { + "ce_ib": 5.4785542488098145, + "ce_orig": 1.6779810190200806, + "epoch": 1.2205766050758502, + "kl_loss": 0.03922107815742493, + "loss_ib": 0.0009400661801919341, + "step": 4245 + }, + { + "ce_ib": 2.7944045066833496, + "ce_orig": 0.6975430250167847, + "epoch": 1.2205766050758502, + "kl_loss": 0.03615309298038483, + "loss_ib": 0.000640971353277564, + "step": 4245 + }, + { + "ce_ib": 3.0447230339050293, + "ce_orig": 0.7108815312385559, + "epoch": 1.2205766050758502, + "kl_loss": 0.04098965972661972, + "loss_ib": 0.0007143688853830099, + "step": 4245 + }, + { + "ce_ib": 3.0176329612731934, + "ce_orig": 0.8893405795097351, + "epoch": 1.220864188654828, + "kl_loss": 0.046136267483234406, + "loss_ib": 0.0007631259504705667, + "step": 4246 + }, + { + "ce_ib": 2.238184690475464, + "ce_orig": 0.5678752660751343, + "epoch": 1.220864188654828, + "kl_loss": 0.041115663945674896, + "loss_ib": 0.0006349751492962241, + "step": 4246 + }, + { + "ce_ib": 2.605300188064575, + "ce_orig": 0.45833519101142883, + "epoch": 1.220864188654828, + "kl_loss": 0.03954297676682472, + "loss_ib": 0.000655959767755121, + "step": 4246 + }, + { + "ce_ib": 3.6701176166534424, + "ce_orig": 0.7911137342453003, + "epoch": 1.220864188654828, + "kl_loss": 0.039531268179416656, + "loss_ib": 0.0007623244891874492, + "step": 4246 + }, + { + "ce_ib": 2.240961790084839, + "ce_orig": 0.48700520396232605, + "epoch": 1.2211517722338054, + "kl_loss": 0.042431771755218506, + "loss_ib": 0.0006484139012172818, + "step": 4247 + }, + { + "ce_ib": 3.802748203277588, + "ce_orig": 1.3147469758987427, + "epoch": 1.2211517722338054, + "kl_loss": 0.04066590219736099, + "loss_ib": 0.0007869338151067495, + "step": 4247 + }, + { + "ce_ib": 2.599489450454712, + "ce_orig": 0.7100297808647156, + "epoch": 1.2211517722338054, + "kl_loss": 0.03396891802549362, + "loss_ib": 0.0005996380932629108, + "step": 4247 + }, + { + "ce_ib": 1.0407015085220337, + "ce_orig": 0.17344389855861664, + "epoch": 1.2211517722338054, + "kl_loss": 0.09591358155012131, + "loss_ib": 0.0010632060002535582, + "step": 4247 + }, + { + "ce_ib": 4.449863910675049, + "ce_orig": 0.6048247814178467, + "epoch": 1.2214393558127832, + "kl_loss": 0.04485509544610977, + "loss_ib": 0.0008935373043641448, + "step": 4248 + }, + { + "ce_ib": 3.0603232383728027, + "ce_orig": 0.7586648464202881, + "epoch": 1.2214393558127832, + "kl_loss": 0.03554125875234604, + "loss_ib": 0.0006614448502659798, + "step": 4248 + }, + { + "ce_ib": 3.827822208404541, + "ce_orig": 0.989824652671814, + "epoch": 1.2214393558127832, + "kl_loss": 0.07891397178173065, + "loss_ib": 0.001171921961940825, + "step": 4248 + }, + { + "ce_ib": 2.1967828273773193, + "ce_orig": 0.6761397123336792, + "epoch": 1.2214393558127832, + "kl_loss": 0.037932414561510086, + "loss_ib": 0.0005990024073980749, + "step": 4248 + }, + { + "ce_ib": 3.9698164463043213, + "ce_orig": 0.8479146361351013, + "epoch": 1.2217269393917607, + "kl_loss": 0.047625891864299774, + "loss_ib": 0.0008732405258342624, + "step": 4249 + }, + { + "ce_ib": 3.13171124458313, + "ce_orig": 0.9460443258285522, + "epoch": 1.2217269393917607, + "kl_loss": 0.031801626086235046, + "loss_ib": 0.000631187402177602, + "step": 4249 + }, + { + "ce_ib": 4.1652703285217285, + "ce_orig": 1.345066785812378, + "epoch": 1.2217269393917607, + "kl_loss": 0.029237918555736542, + "loss_ib": 0.0007089062128216028, + "step": 4249 + }, + { + "ce_ib": 2.260453224182129, + "ce_orig": 0.803157389163971, + "epoch": 1.2217269393917607, + "kl_loss": 0.025669723749160767, + "loss_ib": 0.0004827425582334399, + "step": 4249 + }, + { + "epoch": 1.2220145229707384, + "grad_norm": 0.12441294640302658, + "learning_rate": 3.3591006484736736e-05, + "loss": 0.8312, + "step": 4250 + }, + { + "ce_ib": 3.4562857151031494, + "ce_orig": 0.8537368774414062, + "epoch": 1.2220145229707384, + "kl_loss": 0.04240633547306061, + "loss_ib": 0.0007696918910369277, + "step": 4250 + }, + { + "ce_ib": 2.1779541969299316, + "ce_orig": 0.5195799469947815, + "epoch": 1.2220145229707384, + "kl_loss": 0.024969447404146194, + "loss_ib": 0.00046748988097533584, + "step": 4250 + }, + { + "ce_ib": 3.799309730529785, + "ce_orig": 0.9896774888038635, + "epoch": 1.2220145229707384, + "kl_loss": 0.0534931905567646, + "loss_ib": 0.0009148629033006728, + "step": 4250 + }, + { + "ce_ib": 2.7760283946990967, + "ce_orig": 0.7291145324707031, + "epoch": 1.2220145229707384, + "kl_loss": 0.03101539984345436, + "loss_ib": 0.0005877568619325757, + "step": 4250 + }, + { + "ce_ib": 3.9427731037139893, + "ce_orig": 0.7972798943519592, + "epoch": 1.222302106549716, + "kl_loss": 0.07607428729534149, + "loss_ib": 0.0011550200870260596, + "step": 4251 + }, + { + "ce_ib": 1.5420677661895752, + "ce_orig": 0.36957401037216187, + "epoch": 1.222302106549716, + "kl_loss": 0.04737117514014244, + "loss_ib": 0.0006279185181483626, + "step": 4251 + }, + { + "ce_ib": 4.725181579589844, + "ce_orig": 1.3524017333984375, + "epoch": 1.222302106549716, + "kl_loss": 0.04539559781551361, + "loss_ib": 0.0009264741092920303, + "step": 4251 + }, + { + "ce_ib": 2.3017640113830566, + "ce_orig": 0.373839408159256, + "epoch": 1.222302106549716, + "kl_loss": 0.06392408162355423, + "loss_ib": 0.0008694171556271613, + "step": 4251 + }, + { + "ce_ib": 3.642026662826538, + "ce_orig": 0.9667917490005493, + "epoch": 1.2225896901286937, + "kl_loss": 0.049080513417720795, + "loss_ib": 0.0008550077327527106, + "step": 4252 + }, + { + "ce_ib": 4.106884956359863, + "ce_orig": 1.1424038410186768, + "epoch": 1.2225896901286937, + "kl_loss": 0.03586804121732712, + "loss_ib": 0.000769368838518858, + "step": 4252 + }, + { + "ce_ib": 2.198669910430908, + "ce_orig": 0.6546007394790649, + "epoch": 1.2225896901286937, + "kl_loss": 0.0403689369559288, + "loss_ib": 0.0006235563778318465, + "step": 4252 + }, + { + "ce_ib": 2.8874058723449707, + "ce_orig": 0.7304720878601074, + "epoch": 1.2225896901286937, + "kl_loss": 0.038619231432676315, + "loss_ib": 0.0006749329040758312, + "step": 4252 + }, + { + "ce_ib": 2.773599863052368, + "ce_orig": 0.6545144319534302, + "epoch": 1.2228772737076712, + "kl_loss": 0.031355325132608414, + "loss_ib": 0.0005909132305532694, + "step": 4253 + }, + { + "ce_ib": 2.441906690597534, + "ce_orig": 0.5847981572151184, + "epoch": 1.2228772737076712, + "kl_loss": 0.037449855357408524, + "loss_ib": 0.0006186891696415842, + "step": 4253 + }, + { + "ce_ib": 4.253178119659424, + "ce_orig": 1.0555833578109741, + "epoch": 1.2228772737076712, + "kl_loss": 0.042670752853155136, + "loss_ib": 0.0008520252886228263, + "step": 4253 + }, + { + "ce_ib": 4.453037261962891, + "ce_orig": 1.3546110391616821, + "epoch": 1.2228772737076712, + "kl_loss": 0.05013337358832359, + "loss_ib": 0.0009466374176554382, + "step": 4253 + }, + { + "ce_ib": 2.5908870697021484, + "ce_orig": 0.7889180779457092, + "epoch": 1.223164857286649, + "kl_loss": 0.035775184631347656, + "loss_ib": 0.0006168405525386333, + "step": 4254 + }, + { + "ce_ib": 2.999285936355591, + "ce_orig": 0.8542200922966003, + "epoch": 1.223164857286649, + "kl_loss": 0.02637207880616188, + "loss_ib": 0.0005636494024656713, + "step": 4254 + }, + { + "ce_ib": 4.156745433807373, + "ce_orig": 0.9978252649307251, + "epoch": 1.223164857286649, + "kl_loss": 0.0553768016397953, + "loss_ib": 0.0009694425389170647, + "step": 4254 + }, + { + "ce_ib": 2.38561749458313, + "ce_orig": 0.6165338754653931, + "epoch": 1.223164857286649, + "kl_loss": 0.042424678802490234, + "loss_ib": 0.0006628085393458605, + "step": 4254 + }, + { + "epoch": 1.2234524408656267, + "grad_norm": 0.11672254651784897, + "learning_rate": 3.355455501974218e-05, + "loss": 0.8193, + "step": 4255 + }, + { + "ce_ib": 4.460927486419678, + "ce_orig": 1.1624681949615479, + "epoch": 1.2234524408656267, + "kl_loss": 0.035118646919727325, + "loss_ib": 0.0007972791790962219, + "step": 4255 + }, + { + "ce_ib": 1.8519682884216309, + "ce_orig": 0.4509740173816681, + "epoch": 1.2234524408656267, + "kl_loss": 0.07931564748287201, + "loss_ib": 0.0009783533168956637, + "step": 4255 + }, + { + "ce_ib": 3.377659797668457, + "ce_orig": 1.0667610168457031, + "epoch": 1.2234524408656267, + "kl_loss": 0.02997635304927826, + "loss_ib": 0.0006375294760800898, + "step": 4255 + }, + { + "ce_ib": 4.162156105041504, + "ce_orig": 1.3370919227600098, + "epoch": 1.2234524408656267, + "kl_loss": 0.04693267494440079, + "loss_ib": 0.0008855423075146973, + "step": 4255 + }, + { + "ce_ib": 2.132129669189453, + "ce_orig": 0.6444478034973145, + "epoch": 1.2237400244446042, + "kl_loss": 0.02970920503139496, + "loss_ib": 0.0005103049916215241, + "step": 4256 + }, + { + "ce_ib": 2.478900671005249, + "ce_orig": 0.698678195476532, + "epoch": 1.2237400244446042, + "kl_loss": 0.04611189290881157, + "loss_ib": 0.000709009007550776, + "step": 4256 + }, + { + "ce_ib": 2.7990386486053467, + "ce_orig": 0.5183777213096619, + "epoch": 1.2237400244446042, + "kl_loss": 0.049050211906433105, + "loss_ib": 0.0007704059826210141, + "step": 4256 + }, + { + "ce_ib": 2.5389742851257324, + "ce_orig": 0.76523756980896, + "epoch": 1.2237400244446042, + "kl_loss": 0.03534892201423645, + "loss_ib": 0.0006073866388760507, + "step": 4256 + }, + { + "ce_ib": 4.259519100189209, + "ce_orig": 1.047403335571289, + "epoch": 1.224027608023582, + "kl_loss": 0.048880353569984436, + "loss_ib": 0.0009147554519586265, + "step": 4257 + }, + { + "ce_ib": 3.0145039558410645, + "ce_orig": 0.5325751304626465, + "epoch": 1.224027608023582, + "kl_loss": 0.04927823692560196, + "loss_ib": 0.0007942327647469938, + "step": 4257 + }, + { + "ce_ib": 4.2154741287231445, + "ce_orig": 1.2819877862930298, + "epoch": 1.224027608023582, + "kl_loss": 0.043760791420936584, + "loss_ib": 0.0008591553196310997, + "step": 4257 + }, + { + "ce_ib": 2.51990008354187, + "ce_orig": 0.7260401844978333, + "epoch": 1.224027608023582, + "kl_loss": 0.020945999771356583, + "loss_ib": 0.00046144999214448035, + "step": 4257 + }, + { + "ce_ib": 5.5657830238342285, + "ce_orig": 1.198341727256775, + "epoch": 1.2243151916025594, + "kl_loss": 0.03910867124795914, + "loss_ib": 0.0009476649574935436, + "step": 4258 + }, + { + "ce_ib": 1.628114104270935, + "ce_orig": 0.4591774046421051, + "epoch": 1.2243151916025594, + "kl_loss": 0.03537813574075699, + "loss_ib": 0.0005165927577763796, + "step": 4258 + }, + { + "ce_ib": 2.3086657524108887, + "ce_orig": 0.5363439321517944, + "epoch": 1.2243151916025594, + "kl_loss": 0.01857171580195427, + "loss_ib": 0.0004165837017353624, + "step": 4258 + }, + { + "ce_ib": 4.843801021575928, + "ce_orig": 0.833835244178772, + "epoch": 1.2243151916025594, + "kl_loss": 0.089602991938591, + "loss_ib": 0.0013804100453853607, + "step": 4258 + }, + { + "ce_ib": 3.0560953617095947, + "ce_orig": 0.850704550743103, + "epoch": 1.2246027751815372, + "kl_loss": 0.037686798721551895, + "loss_ib": 0.0006824774900451303, + "step": 4259 + }, + { + "ce_ib": 3.3379178047180176, + "ce_orig": 0.8173331618309021, + "epoch": 1.2246027751815372, + "kl_loss": 0.0570162795484066, + "loss_ib": 0.0009039545548148453, + "step": 4259 + }, + { + "ce_ib": 3.8215250968933105, + "ce_orig": 0.9619072079658508, + "epoch": 1.2246027751815372, + "kl_loss": 0.03662355989217758, + "loss_ib": 0.0007483880617655814, + "step": 4259 + }, + { + "ce_ib": 3.1103968620300293, + "ce_orig": 0.8141993880271912, + "epoch": 1.2246027751815372, + "kl_loss": 0.0651378184556961, + "loss_ib": 0.0009624178637750447, + "step": 4259 + }, + { + "epoch": 1.2248903587605149, + "grad_norm": 0.09841392189264297, + "learning_rate": 3.351808294483902e-05, + "loss": 0.824, + "step": 4260 + }, + { + "ce_ib": 3.8674261569976807, + "ce_orig": 1.3185715675354004, + "epoch": 1.2248903587605149, + "kl_loss": 0.03367423266172409, + "loss_ib": 0.0007234849035739899, + "step": 4260 + }, + { + "ce_ib": 3.1168289184570312, + "ce_orig": 0.8983002305030823, + "epoch": 1.2248903587605149, + "kl_loss": 0.04621196910738945, + "loss_ib": 0.0007738025160506368, + "step": 4260 + }, + { + "ce_ib": 3.9564638137817383, + "ce_orig": 0.9472960233688354, + "epoch": 1.2248903587605149, + "kl_loss": 0.047553226351737976, + "loss_ib": 0.0008711786358617246, + "step": 4260 + }, + { + "ce_ib": 3.6301519870758057, + "ce_orig": 0.7728745937347412, + "epoch": 1.2248903587605149, + "kl_loss": 0.05552397668361664, + "loss_ib": 0.0009182549547404051, + "step": 4260 + }, + { + "ce_ib": 3.7890398502349854, + "ce_orig": 0.9257303476333618, + "epoch": 1.2251779423394924, + "kl_loss": 0.054313547909259796, + "loss_ib": 0.000922039442230016, + "step": 4261 + }, + { + "ce_ib": 3.547912120819092, + "ce_orig": 0.9832552671432495, + "epoch": 1.2251779423394924, + "kl_loss": 0.038548462092876434, + "loss_ib": 0.0007402758346870542, + "step": 4261 + }, + { + "ce_ib": 2.019198417663574, + "ce_orig": 0.6968885660171509, + "epoch": 1.2251779423394924, + "kl_loss": 0.016824083402752876, + "loss_ib": 0.00037016067653894424, + "step": 4261 + }, + { + "ce_ib": 2.026752233505249, + "ce_orig": 0.5473020076751709, + "epoch": 1.2251779423394924, + "kl_loss": 0.026573142036795616, + "loss_ib": 0.0004684066225308925, + "step": 4261 + }, + { + "ce_ib": 2.195350408554077, + "ce_orig": 0.5874295234680176, + "epoch": 1.2254655259184701, + "kl_loss": 0.04379422217607498, + "loss_ib": 0.0006574772414751351, + "step": 4262 + }, + { + "ce_ib": 2.0723679065704346, + "ce_orig": 0.46752169728279114, + "epoch": 1.2254655259184701, + "kl_loss": 0.04380803555250168, + "loss_ib": 0.0006453171372413635, + "step": 4262 + }, + { + "ce_ib": 2.6897165775299072, + "ce_orig": 0.7584757208824158, + "epoch": 1.2254655259184701, + "kl_loss": 0.04303272068500519, + "loss_ib": 0.0006992988055571914, + "step": 4262 + }, + { + "ce_ib": 3.9322755336761475, + "ce_orig": 0.6875066757202148, + "epoch": 1.2254655259184701, + "kl_loss": 0.02909059450030327, + "loss_ib": 0.0006841334397904575, + "step": 4262 + }, + { + "ce_ib": 2.5706992149353027, + "ce_orig": 0.6985669136047363, + "epoch": 1.2257531094974476, + "kl_loss": 0.032668448984622955, + "loss_ib": 0.0005837543867528439, + "step": 4263 + }, + { + "ce_ib": 3.370647430419922, + "ce_orig": 0.8350569009780884, + "epoch": 1.2257531094974476, + "kl_loss": 0.04258137196302414, + "loss_ib": 0.0007628785097040236, + "step": 4263 + }, + { + "ce_ib": 5.724857330322266, + "ce_orig": 1.8026211261749268, + "epoch": 1.2257531094974476, + "kl_loss": 0.0426819883286953, + "loss_ib": 0.0009993056301027536, + "step": 4263 + }, + { + "ce_ib": 2.922363758087158, + "ce_orig": 0.5353079438209534, + "epoch": 1.2257531094974476, + "kl_loss": 0.023006027564406395, + "loss_ib": 0.0005222966428846121, + "step": 4263 + }, + { + "ce_ib": 2.277615785598755, + "ce_orig": 0.49644047021865845, + "epoch": 1.2260406930764254, + "kl_loss": 0.035015761852264404, + "loss_ib": 0.0005779191851615906, + "step": 4264 + }, + { + "ce_ib": 3.0338611602783203, + "ce_orig": 0.7355570793151855, + "epoch": 1.2260406930764254, + "kl_loss": 0.044444091618061066, + "loss_ib": 0.0007478269981220365, + "step": 4264 + }, + { + "ce_ib": 4.379177093505859, + "ce_orig": 0.9908022880554199, + "epoch": 1.2260406930764254, + "kl_loss": 0.04875364154577255, + "loss_ib": 0.0009254540782421827, + "step": 4264 + }, + { + "ce_ib": 4.104345321655273, + "ce_orig": 1.2334978580474854, + "epoch": 1.2260406930764254, + "kl_loss": 0.047264598309993744, + "loss_ib": 0.0008830804727040231, + "step": 4264 + }, + { + "epoch": 1.226328276655403, + "grad_norm": 0.10603292286396027, + "learning_rate": 3.348159034789695e-05, + "loss": 0.8124, + "step": 4265 + }, + { + "ce_ib": 2.2790348529815674, + "ce_orig": 0.5720539093017578, + "epoch": 1.226328276655403, + "kl_loss": 0.026446694508194923, + "loss_ib": 0.000492370396386832, + "step": 4265 + }, + { + "ce_ib": 3.897230386734009, + "ce_orig": 1.0322885513305664, + "epoch": 1.226328276655403, + "kl_loss": 0.052263785153627396, + "loss_ib": 0.0009123608469963074, + "step": 4265 + }, + { + "ce_ib": 2.142702102661133, + "ce_orig": 0.23033292591571808, + "epoch": 1.226328276655403, + "kl_loss": 0.09439082443714142, + "loss_ib": 0.0011581784347072244, + "step": 4265 + }, + { + "ce_ib": 2.8544657230377197, + "ce_orig": 0.914509654045105, + "epoch": 1.226328276655403, + "kl_loss": 0.030197538435459137, + "loss_ib": 0.0005874219350516796, + "step": 4265 + }, + { + "ce_ib": 3.350201368331909, + "ce_orig": 1.1307384967803955, + "epoch": 1.2266158602343806, + "kl_loss": 0.04054621234536171, + "loss_ib": 0.0007404821808449924, + "step": 4266 + }, + { + "ce_ib": 2.143702983856201, + "ce_orig": 0.5567172765731812, + "epoch": 1.2266158602343806, + "kl_loss": 0.027327708899974823, + "loss_ib": 0.00048764736857265234, + "step": 4266 + }, + { + "ce_ib": 2.211287498474121, + "ce_orig": 0.5233232378959656, + "epoch": 1.2266158602343806, + "kl_loss": 0.05011732131242752, + "loss_ib": 0.0007223019492812455, + "step": 4266 + }, + { + "ce_ib": 3.2374937534332275, + "ce_orig": 0.7156053781509399, + "epoch": 1.2266158602343806, + "kl_loss": 0.04267774149775505, + "loss_ib": 0.0007505267858505249, + "step": 4266 + }, + { + "ce_ib": 3.7515976428985596, + "ce_orig": 0.7962868809700012, + "epoch": 1.2269034438133581, + "kl_loss": 0.03973768651485443, + "loss_ib": 0.0007725366158410907, + "step": 4267 + }, + { + "ce_ib": 2.4469900131225586, + "ce_orig": 0.7782878279685974, + "epoch": 1.2269034438133581, + "kl_loss": 0.03649313002824783, + "loss_ib": 0.0006096302531659603, + "step": 4267 + }, + { + "ce_ib": 2.772871971130371, + "ce_orig": 0.7651295065879822, + "epoch": 1.2269034438133581, + "kl_loss": 0.038945626467466354, + "loss_ib": 0.0006667434354312718, + "step": 4267 + }, + { + "ce_ib": 5.453241348266602, + "ce_orig": 1.61061429977417, + "epoch": 1.2269034438133581, + "kl_loss": 0.07180824875831604, + "loss_ib": 0.0012634065933525562, + "step": 4267 + }, + { + "ce_ib": 2.8634679317474365, + "ce_orig": 0.6825969219207764, + "epoch": 1.2271910273923359, + "kl_loss": 0.03205263614654541, + "loss_ib": 0.0006068731308914721, + "step": 4268 + }, + { + "ce_ib": 3.5921008586883545, + "ce_orig": 0.8697168827056885, + "epoch": 1.2271910273923359, + "kl_loss": 0.061556220054626465, + "loss_ib": 0.0009747722651809454, + "step": 4268 + }, + { + "ce_ib": 2.636342763900757, + "ce_orig": 0.70856773853302, + "epoch": 1.2271910273923359, + "kl_loss": 0.04159001260995865, + "loss_ib": 0.0006795343360863626, + "step": 4268 + }, + { + "ce_ib": 2.2838661670684814, + "ce_orig": 0.4844251573085785, + "epoch": 1.2271910273923359, + "kl_loss": 0.04516869783401489, + "loss_ib": 0.000680073571857065, + "step": 4268 + }, + { + "ce_ib": 3.375903844833374, + "ce_orig": 0.7327076196670532, + "epoch": 1.2274786109713136, + "kl_loss": 0.03815044090151787, + "loss_ib": 0.0007190947653725743, + "step": 4269 + }, + { + "ce_ib": 4.201056480407715, + "ce_orig": 0.9730734825134277, + "epoch": 1.2274786109713136, + "kl_loss": 0.03691738098859787, + "loss_ib": 0.0007892794674262404, + "step": 4269 + }, + { + "ce_ib": 3.2731730937957764, + "ce_orig": 0.75006103515625, + "epoch": 1.2274786109713136, + "kl_loss": 0.04267508536577225, + "loss_ib": 0.0007540681981481612, + "step": 4269 + }, + { + "ce_ib": 2.81074857711792, + "ce_orig": 0.6379601955413818, + "epoch": 1.2274786109713136, + "kl_loss": 0.040972016751766205, + "loss_ib": 0.0006907950155436993, + "step": 4269 + }, + { + "epoch": 1.2277661945502911, + "grad_norm": 0.11328030377626419, + "learning_rate": 3.344507731683513e-05, + "loss": 0.8655, + "step": 4270 + }, + { + "ce_ib": 2.263326644897461, + "ce_orig": 0.5060404539108276, + "epoch": 1.2277661945502911, + "kl_loss": 0.03514448553323746, + "loss_ib": 0.0005777775077149272, + "step": 4270 + }, + { + "ce_ib": 4.428879261016846, + "ce_orig": 1.0799404382705688, + "epoch": 1.2277661945502911, + "kl_loss": 0.035045474767684937, + "loss_ib": 0.000793342653196305, + "step": 4270 + }, + { + "ce_ib": 3.3446741104125977, + "ce_orig": 0.7198368906974792, + "epoch": 1.2277661945502911, + "kl_loss": 0.016057239845395088, + "loss_ib": 0.0004950397997163236, + "step": 4270 + }, + { + "ce_ib": 2.9428048133850098, + "ce_orig": 0.8209998607635498, + "epoch": 1.2277661945502911, + "kl_loss": 0.04539419710636139, + "loss_ib": 0.0007482224609702826, + "step": 4270 + }, + { + "ce_ib": 3.3314425945281982, + "ce_orig": 1.0235302448272705, + "epoch": 1.2280537781292689, + "kl_loss": 0.028272736817598343, + "loss_ib": 0.0006158716278150678, + "step": 4271 + }, + { + "ce_ib": 3.6959750652313232, + "ce_orig": 0.7132399678230286, + "epoch": 1.2280537781292689, + "kl_loss": 0.030974458903074265, + "loss_ib": 0.0006793420761823654, + "step": 4271 + }, + { + "ce_ib": 2.6403005123138428, + "ce_orig": 0.6122245788574219, + "epoch": 1.2280537781292689, + "kl_loss": 0.017528314143419266, + "loss_ib": 0.00043931318214163184, + "step": 4271 + }, + { + "ce_ib": 4.035080432891846, + "ce_orig": 0.6425864100456238, + "epoch": 1.2280537781292689, + "kl_loss": 0.03276544809341431, + "loss_ib": 0.0007311624940484762, + "step": 4271 + }, + { + "ce_ib": 2.6912126541137695, + "ce_orig": 0.5403556227684021, + "epoch": 1.2283413617082464, + "kl_loss": 0.06656139343976974, + "loss_ib": 0.0009347352315671742, + "step": 4272 + }, + { + "ce_ib": 4.626352787017822, + "ce_orig": 1.2819589376449585, + "epoch": 1.2283413617082464, + "kl_loss": 0.05354025214910507, + "loss_ib": 0.0009980377508327365, + "step": 4272 + }, + { + "ce_ib": 2.20306658744812, + "ce_orig": 0.7045791149139404, + "epoch": 1.2283413617082464, + "kl_loss": 0.02522820420563221, + "loss_ib": 0.0004725886683445424, + "step": 4272 + }, + { + "ce_ib": 3.0299742221832275, + "ce_orig": 0.9361693263053894, + "epoch": 1.2283413617082464, + "kl_loss": 0.04137253388762474, + "loss_ib": 0.0007167227449826896, + "step": 4272 + }, + { + "ce_ib": 2.4149956703186035, + "ce_orig": 0.4786318838596344, + "epoch": 1.228628945287224, + "kl_loss": 0.05517367646098137, + "loss_ib": 0.0007932363077998161, + "step": 4273 + }, + { + "ce_ib": 2.333130121231079, + "ce_orig": 0.5624329447746277, + "epoch": 1.228628945287224, + "kl_loss": 0.04114416986703873, + "loss_ib": 0.0006447546766139567, + "step": 4273 + }, + { + "ce_ib": 3.4334189891815186, + "ce_orig": 0.5361512899398804, + "epoch": 1.228628945287224, + "kl_loss": 0.06656941771507263, + "loss_ib": 0.0010090360883623362, + "step": 4273 + }, + { + "ce_ib": 4.349453449249268, + "ce_orig": 0.7126014232635498, + "epoch": 1.228628945287224, + "kl_loss": 0.04872018098831177, + "loss_ib": 0.0009221471846103668, + "step": 4273 + }, + { + "ce_ib": 2.868786573410034, + "ce_orig": 0.8801438808441162, + "epoch": 1.2289165288662018, + "kl_loss": 0.041382595896720886, + "loss_ib": 0.0007007045787759125, + "step": 4274 + }, + { + "ce_ib": 4.016427993774414, + "ce_orig": 1.177148461341858, + "epoch": 1.2289165288662018, + "kl_loss": 0.04613460600376129, + "loss_ib": 0.0008629888179711998, + "step": 4274 + }, + { + "ce_ib": 4.333232879638672, + "ce_orig": 1.1841343641281128, + "epoch": 1.2289165288662018, + "kl_loss": 0.053482770919799805, + "loss_ib": 0.0009681509109213948, + "step": 4274 + }, + { + "ce_ib": 4.843456268310547, + "ce_orig": 1.5440425872802734, + "epoch": 1.2289165288662018, + "kl_loss": 0.04353901371359825, + "loss_ib": 0.0009197356994263828, + "step": 4274 + }, + { + "epoch": 1.2292041124451794, + "grad_norm": 0.12063613533973694, + "learning_rate": 3.34085439396219e-05, + "loss": 0.7508, + "step": 4275 + }, + { + "ce_ib": 3.94423770904541, + "ce_orig": 0.7217889428138733, + "epoch": 1.2292041124451794, + "kl_loss": 0.05190810561180115, + "loss_ib": 0.0009135048021562397, + "step": 4275 + }, + { + "ce_ib": 4.852246284484863, + "ce_orig": 1.712747573852539, + "epoch": 1.2292041124451794, + "kl_loss": 0.030499156564474106, + "loss_ib": 0.00079021614510566, + "step": 4275 + }, + { + "ce_ib": 3.4933247566223145, + "ce_orig": 0.8038581609725952, + "epoch": 1.2292041124451794, + "kl_loss": 0.0642123892903328, + "loss_ib": 0.0009914563270285726, + "step": 4275 + }, + { + "ce_ib": 3.4785525798797607, + "ce_orig": 0.9666681289672852, + "epoch": 1.2292041124451794, + "kl_loss": 0.03349260985851288, + "loss_ib": 0.0006827813340350986, + "step": 4275 + }, + { + "ce_ib": 3.0514028072357178, + "ce_orig": 0.7543303370475769, + "epoch": 1.229491696024157, + "kl_loss": 0.035444729030132294, + "loss_ib": 0.0006595875020138919, + "step": 4276 + }, + { + "ce_ib": 4.025876045227051, + "ce_orig": 0.9122905731201172, + "epoch": 1.229491696024157, + "kl_loss": 0.044436100870370865, + "loss_ib": 0.0008469485910609365, + "step": 4276 + }, + { + "ce_ib": 3.8355984687805176, + "ce_orig": 0.8534098267555237, + "epoch": 1.229491696024157, + "kl_loss": 0.05917297303676605, + "loss_ib": 0.0009752895566634834, + "step": 4276 + }, + { + "ce_ib": 2.6161367893218994, + "ce_orig": 0.8870153427124023, + "epoch": 1.229491696024157, + "kl_loss": 0.031883835792541504, + "loss_ib": 0.0005804519751109183, + "step": 4276 + }, + { + "ce_ib": 1.9954663515090942, + "ce_orig": 0.58541339635849, + "epoch": 1.2297792796031346, + "kl_loss": 0.03273697942495346, + "loss_ib": 0.0005269164103083313, + "step": 4277 + }, + { + "ce_ib": 3.228231430053711, + "ce_orig": 1.0248503684997559, + "epoch": 1.2297792796031346, + "kl_loss": 0.035476140677928925, + "loss_ib": 0.0006775845540687442, + "step": 4277 + }, + { + "ce_ib": 2.8689346313476562, + "ce_orig": 0.4909665286540985, + "epoch": 1.2297792796031346, + "kl_loss": 0.046112097799777985, + "loss_ib": 0.0007480144267901778, + "step": 4277 + }, + { + "ce_ib": 2.4068126678466797, + "ce_orig": 0.538442075252533, + "epoch": 1.2297792796031346, + "kl_loss": 0.04458879679441452, + "loss_ib": 0.0006865691975690424, + "step": 4277 + }, + { + "ce_ib": 0.8054199814796448, + "ce_orig": 0.12005400657653809, + "epoch": 1.2300668631821123, + "kl_loss": 0.08014638721942902, + "loss_ib": 0.0008820058428682387, + "step": 4278 + }, + { + "ce_ib": 2.4183242321014404, + "ce_orig": 0.7972863912582397, + "epoch": 1.2300668631821123, + "kl_loss": 0.03438758850097656, + "loss_ib": 0.0005857083015143871, + "step": 4278 + }, + { + "ce_ib": 5.985046863555908, + "ce_orig": 1.9225250482559204, + "epoch": 1.2300668631821123, + "kl_loss": 0.054436154663562775, + "loss_ib": 0.001142866094596684, + "step": 4278 + }, + { + "ce_ib": 2.662217140197754, + "ce_orig": 0.48543673753738403, + "epoch": 1.2300668631821123, + "kl_loss": 0.042067721486091614, + "loss_ib": 0.0006868988857604563, + "step": 4278 + }, + { + "ce_ib": 2.623237133026123, + "ce_orig": 0.6813424825668335, + "epoch": 1.2303544467610898, + "kl_loss": 0.02979844994843006, + "loss_ib": 0.0005603082245215774, + "step": 4279 + }, + { + "ce_ib": 3.756476402282715, + "ce_orig": 1.0049607753753662, + "epoch": 1.2303544467610898, + "kl_loss": 0.07275483757257462, + "loss_ib": 0.0011031959438696504, + "step": 4279 + }, + { + "ce_ib": 2.0266928672790527, + "ce_orig": 0.5145630240440369, + "epoch": 1.2303544467610898, + "kl_loss": 0.05312224477529526, + "loss_ib": 0.0007338917348533869, + "step": 4279 + }, + { + "ce_ib": 4.1537394523620605, + "ce_orig": 1.5292129516601562, + "epoch": 1.2303544467610898, + "kl_loss": 0.03496157005429268, + "loss_ib": 0.0007649895851500332, + "step": 4279 + }, + { + "epoch": 1.2306420303400676, + "grad_norm": 0.10168558359146118, + "learning_rate": 3.3371990304274656e-05, + "loss": 0.923, + "step": 4280 + }, + { + "ce_ib": 1.5058319568634033, + "ce_orig": 0.2065836787223816, + "epoch": 1.2306420303400676, + "kl_loss": 0.030680647119879723, + "loss_ib": 0.00045738962944597006, + "step": 4280 + }, + { + "ce_ib": 2.5807480812072754, + "ce_orig": 0.7893373966217041, + "epoch": 1.2306420303400676, + "kl_loss": 0.02959327958524227, + "loss_ib": 0.00055400759447366, + "step": 4280 + }, + { + "ce_ib": 3.4584715366363525, + "ce_orig": 0.6862000226974487, + "epoch": 1.2306420303400676, + "kl_loss": 0.03302375599741936, + "loss_ib": 0.0006760847172699869, + "step": 4280 + }, + { + "ce_ib": 2.0915985107421875, + "ce_orig": 0.5025553107261658, + "epoch": 1.2306420303400676, + "kl_loss": 0.04423242062330246, + "loss_ib": 0.0006514840642921627, + "step": 4280 + }, + { + "ce_ib": 2.6405603885650635, + "ce_orig": 0.4534105062484741, + "epoch": 1.2309296139190453, + "kl_loss": 0.030154570937156677, + "loss_ib": 0.0005656017456203699, + "step": 4281 + }, + { + "ce_ib": 2.1642119884490967, + "ce_orig": 0.4206337332725525, + "epoch": 1.2309296139190453, + "kl_loss": 0.049794264137744904, + "loss_ib": 0.0007143638213165104, + "step": 4281 + }, + { + "ce_ib": 2.452265739440918, + "ce_orig": 0.44951197504997253, + "epoch": 1.2309296139190453, + "kl_loss": 0.030504116788506508, + "loss_ib": 0.0005502676940523088, + "step": 4281 + }, + { + "ce_ib": 4.020281791687012, + "ce_orig": 1.088588833808899, + "epoch": 1.2309296139190453, + "kl_loss": 0.052434541285037994, + "loss_ib": 0.0009263735846616328, + "step": 4281 + }, + { + "ce_ib": 3.2941031455993652, + "ce_orig": 0.8985931277275085, + "epoch": 1.2312171974980228, + "kl_loss": 0.06281477212905884, + "loss_ib": 0.0009575579897500575, + "step": 4282 + }, + { + "ce_ib": 2.5544841289520264, + "ce_orig": 0.5605454444885254, + "epoch": 1.2312171974980228, + "kl_loss": 0.030608482658863068, + "loss_ib": 0.0005615332047455013, + "step": 4282 + }, + { + "ce_ib": 2.327893018722534, + "ce_orig": 0.5379874110221863, + "epoch": 1.2312171974980228, + "kl_loss": 0.03633947670459747, + "loss_ib": 0.0005961840506643057, + "step": 4282 + }, + { + "ce_ib": 2.2402758598327637, + "ce_orig": 0.5681865811347961, + "epoch": 1.2312171974980228, + "kl_loss": 0.033176518976688385, + "loss_ib": 0.0005557927652262151, + "step": 4282 + }, + { + "ce_ib": 2.957197427749634, + "ce_orig": 0.911383867263794, + "epoch": 1.2315047810770006, + "kl_loss": 0.038847506046295166, + "loss_ib": 0.0006841947906650603, + "step": 4283 + }, + { + "ce_ib": 3.198132038116455, + "ce_orig": 0.9673892259597778, + "epoch": 1.2315047810770006, + "kl_loss": 0.04090261086821556, + "loss_ib": 0.0007288393098860979, + "step": 4283 + }, + { + "ce_ib": 2.9049863815307617, + "ce_orig": 0.6835054159164429, + "epoch": 1.2315047810770006, + "kl_loss": 0.0461851991713047, + "loss_ib": 0.0007523506064899266, + "step": 4283 + }, + { + "ce_ib": 2.882138967514038, + "ce_orig": 0.6818674206733704, + "epoch": 1.2315047810770006, + "kl_loss": 0.047264713793992996, + "loss_ib": 0.0007608610321767628, + "step": 4283 + }, + { + "ce_ib": 2.268883466720581, + "ce_orig": 0.5553695559501648, + "epoch": 1.231792364655978, + "kl_loss": 0.0499146431684494, + "loss_ib": 0.0007260347483679652, + "step": 4284 + }, + { + "ce_ib": 2.325960636138916, + "ce_orig": 0.4793991446495056, + "epoch": 1.231792364655978, + "kl_loss": 0.024871956557035446, + "loss_ib": 0.0004813156265299767, + "step": 4284 + }, + { + "ce_ib": 2.5487217903137207, + "ce_orig": 0.771268367767334, + "epoch": 1.231792364655978, + "kl_loss": 0.022564910352230072, + "loss_ib": 0.00048052126658149064, + "step": 4284 + }, + { + "ce_ib": 2.8669817447662354, + "ce_orig": 0.6773555278778076, + "epoch": 1.231792364655978, + "kl_loss": 0.039099328219890594, + "loss_ib": 0.0006776914815418422, + "step": 4284 + }, + { + "epoch": 1.2320799482349558, + "grad_norm": 0.11021935939788818, + "learning_rate": 3.333541649885959e-05, + "loss": 0.7755, + "step": 4285 + }, + { + "ce_ib": 2.720881938934326, + "ce_orig": 0.46892496943473816, + "epoch": 1.2320799482349558, + "kl_loss": 0.04343218356370926, + "loss_ib": 0.0007064100354909897, + "step": 4285 + }, + { + "ce_ib": 2.7914137840270996, + "ce_orig": 0.36839714646339417, + "epoch": 1.2320799482349558, + "kl_loss": 0.08077394217252731, + "loss_ib": 0.0010868808021768928, + "step": 4285 + }, + { + "ce_ib": 2.270601987838745, + "ce_orig": 0.7308744788169861, + "epoch": 1.2320799482349558, + "kl_loss": 0.023199710994958878, + "loss_ib": 0.00045905730803497136, + "step": 4285 + }, + { + "ce_ib": 4.7461137771606445, + "ce_orig": 1.3539677858352661, + "epoch": 1.2320799482349558, + "kl_loss": 0.07156506925821304, + "loss_ib": 0.0011902620317414403, + "step": 4285 + }, + { + "ce_ib": 2.9825127124786377, + "ce_orig": 0.43719345331192017, + "epoch": 1.2323675318139333, + "kl_loss": 0.060057833790779114, + "loss_ib": 0.0008988296031020582, + "step": 4286 + }, + { + "ce_ib": 4.628408908843994, + "ce_orig": 0.7680606842041016, + "epoch": 1.2323675318139333, + "kl_loss": 0.032851442694664, + "loss_ib": 0.0007913552690297365, + "step": 4286 + }, + { + "ce_ib": 3.9250853061676025, + "ce_orig": 0.7709358930587769, + "epoch": 1.2323675318139333, + "kl_loss": 0.05170033872127533, + "loss_ib": 0.0009095118730328977, + "step": 4286 + }, + { + "ce_ib": 3.46876859664917, + "ce_orig": 0.9389262199401855, + "epoch": 1.2323675318139333, + "kl_loss": 0.035641226917505264, + "loss_ib": 0.0007032891153357923, + "step": 4286 + }, + { + "ce_ib": 2.325817108154297, + "ce_orig": 0.5428375601768494, + "epoch": 1.232655115392911, + "kl_loss": 0.02879868447780609, + "loss_ib": 0.0005205685738474131, + "step": 4287 + }, + { + "ce_ib": 2.427255868911743, + "ce_orig": 0.6825093030929565, + "epoch": 1.232655115392911, + "kl_loss": 0.03872678056359291, + "loss_ib": 0.0006299933884292841, + "step": 4287 + }, + { + "ce_ib": 2.5802788734436035, + "ce_orig": 0.4550608694553375, + "epoch": 1.232655115392911, + "kl_loss": 0.03411944583058357, + "loss_ib": 0.0005992223159410059, + "step": 4287 + }, + { + "ce_ib": 3.9687016010284424, + "ce_orig": 1.0089807510375977, + "epoch": 1.232655115392911, + "kl_loss": 0.041918061673641205, + "loss_ib": 0.0008160507422871888, + "step": 4287 + }, + { + "ce_ib": 5.387452125549316, + "ce_orig": 1.4680249691009521, + "epoch": 1.2329426989718888, + "kl_loss": 0.04999445378780365, + "loss_ib": 0.0010386897483840585, + "step": 4288 + }, + { + "ce_ib": 2.9901819229125977, + "ce_orig": 0.7164698243141174, + "epoch": 1.2329426989718888, + "kl_loss": 0.043099068105220795, + "loss_ib": 0.0007300088182091713, + "step": 4288 + }, + { + "ce_ib": 3.667466640472412, + "ce_orig": 1.2360398769378662, + "epoch": 1.2329426989718888, + "kl_loss": 0.038703009486198425, + "loss_ib": 0.0007537766941823065, + "step": 4288 + }, + { + "ce_ib": 3.3672943115234375, + "ce_orig": 1.108206033706665, + "epoch": 1.2329426989718888, + "kl_loss": 0.028337502852082253, + "loss_ib": 0.0006201044307090342, + "step": 4288 + }, + { + "ce_ib": 2.4163055419921875, + "ce_orig": 0.7446650266647339, + "epoch": 1.2332302825508663, + "kl_loss": 0.03239407390356064, + "loss_ib": 0.0005655712448060513, + "step": 4289 + }, + { + "ce_ib": 2.3547184467315674, + "ce_orig": 0.6327084302902222, + "epoch": 1.2332302825508663, + "kl_loss": 0.033646561205387115, + "loss_ib": 0.0005719374166801572, + "step": 4289 + }, + { + "ce_ib": 2.883737087249756, + "ce_orig": 0.6392406821250916, + "epoch": 1.2332302825508663, + "kl_loss": 0.04831523820757866, + "loss_ib": 0.000771526072639972, + "step": 4289 + }, + { + "ce_ib": 3.7947511672973633, + "ce_orig": 1.0863189697265625, + "epoch": 1.2332302825508663, + "kl_loss": 0.04130285978317261, + "loss_ib": 0.0007925036479718983, + "step": 4289 + }, + { + "epoch": 1.233517866129844, + "grad_norm": 0.11387728899717331, + "learning_rate": 3.329882261149148e-05, + "loss": 0.832, + "step": 4290 + }, + { + "ce_ib": 2.877307176589966, + "ce_orig": 0.7327218651771545, + "epoch": 1.233517866129844, + "kl_loss": 0.055865578353405, + "loss_ib": 0.0008463864796794951, + "step": 4290 + }, + { + "ce_ib": 4.774861812591553, + "ce_orig": 1.2274848222732544, + "epoch": 1.233517866129844, + "kl_loss": 0.04596890136599541, + "loss_ib": 0.000937175122089684, + "step": 4290 + }, + { + "ce_ib": 2.732151746749878, + "ce_orig": 0.9161856770515442, + "epoch": 1.233517866129844, + "kl_loss": 0.04874322935938835, + "loss_ib": 0.0007606474682688713, + "step": 4290 + }, + { + "ce_ib": 4.420731544494629, + "ce_orig": 1.0737919807434082, + "epoch": 1.233517866129844, + "kl_loss": 0.06167173385620117, + "loss_ib": 0.0010587904835119843, + "step": 4290 + }, + { + "ce_ib": 3.335930824279785, + "ce_orig": 0.9775473475456238, + "epoch": 1.2338054497088216, + "kl_loss": 0.04618380218744278, + "loss_ib": 0.0007954311440698802, + "step": 4291 + }, + { + "ce_ib": 2.269205093383789, + "ce_orig": 0.6954864263534546, + "epoch": 1.2338054497088216, + "kl_loss": 0.027699660509824753, + "loss_ib": 0.0005039171082898974, + "step": 4291 + }, + { + "ce_ib": 0.7473326325416565, + "ce_orig": 0.12267546355724335, + "epoch": 1.2338054497088216, + "kl_loss": 0.13196662068367004, + "loss_ib": 0.001394399325363338, + "step": 4291 + }, + { + "ce_ib": 2.7571563720703125, + "ce_orig": 0.6256929039955139, + "epoch": 1.2338054497088216, + "kl_loss": 0.05372755974531174, + "loss_ib": 0.0008129912312142551, + "step": 4291 + }, + { + "ce_ib": 3.2654130458831787, + "ce_orig": 0.47133803367614746, + "epoch": 1.2340930332877993, + "kl_loss": 0.03462022542953491, + "loss_ib": 0.0006727435393258929, + "step": 4292 + }, + { + "ce_ib": 3.02937388420105, + "ce_orig": 0.8083590865135193, + "epoch": 1.2340930332877993, + "kl_loss": 0.040491558611392975, + "loss_ib": 0.0007078529451973736, + "step": 4292 + }, + { + "ce_ib": 2.9644668102264404, + "ce_orig": 0.610980212688446, + "epoch": 1.2340930332877993, + "kl_loss": 0.03247401863336563, + "loss_ib": 0.000621186860371381, + "step": 4292 + }, + { + "ce_ib": 3.697622299194336, + "ce_orig": 1.1447314023971558, + "epoch": 1.2340930332877993, + "kl_loss": 0.052850786596536636, + "loss_ib": 0.000898270052857697, + "step": 4292 + }, + { + "ce_ib": 2.043231725692749, + "ce_orig": 0.561307430267334, + "epoch": 1.234380616866777, + "kl_loss": 0.02260751463472843, + "loss_ib": 0.00043039830052293837, + "step": 4293 + }, + { + "ce_ib": 4.910340785980225, + "ce_orig": 1.5927692651748657, + "epoch": 1.234380616866777, + "kl_loss": 0.030567143112421036, + "loss_ib": 0.0007967054843902588, + "step": 4293 + }, + { + "ce_ib": 2.6521246433258057, + "ce_orig": 0.5637821555137634, + "epoch": 1.234380616866777, + "kl_loss": 0.033204540610313416, + "loss_ib": 0.0005972578073851764, + "step": 4293 + }, + { + "ce_ib": 4.2894086837768555, + "ce_orig": 1.1909873485565186, + "epoch": 1.234380616866777, + "kl_loss": 0.045974113047122955, + "loss_ib": 0.0008886819705367088, + "step": 4293 + }, + { + "ce_ib": 2.733891010284424, + "ce_orig": 0.8707348108291626, + "epoch": 1.2346682004457545, + "kl_loss": 0.0451120063662529, + "loss_ib": 0.0007245091255754232, + "step": 4294 + }, + { + "ce_ib": 2.331460952758789, + "ce_orig": 0.4173871576786041, + "epoch": 1.2346682004457545, + "kl_loss": 0.03701789677143097, + "loss_ib": 0.0006033250829204917, + "step": 4294 + }, + { + "ce_ib": 4.396769046783447, + "ce_orig": 0.9927931427955627, + "epoch": 1.2346682004457545, + "kl_loss": 0.03230509161949158, + "loss_ib": 0.0007627278682775795, + "step": 4294 + }, + { + "ce_ib": 4.1516194343566895, + "ce_orig": 0.9789978265762329, + "epoch": 1.2346682004457545, + "kl_loss": 0.04446309059858322, + "loss_ib": 0.000859792809933424, + "step": 4294 + }, + { + "epoch": 1.2349557840247323, + "grad_norm": 0.10901052504777908, + "learning_rate": 3.326220873033351e-05, + "loss": 0.8374, + "step": 4295 + }, + { + "ce_ib": 3.204383373260498, + "ce_orig": 0.3667798638343811, + "epoch": 1.2349557840247323, + "kl_loss": 0.05075627565383911, + "loss_ib": 0.0008280010079033673, + "step": 4295 + }, + { + "ce_ib": 2.147343873977661, + "ce_orig": 0.45991629362106323, + "epoch": 1.2349557840247323, + "kl_loss": 0.052897270768880844, + "loss_ib": 0.0007437071180902421, + "step": 4295 + }, + { + "ce_ib": 3.630215883255005, + "ce_orig": 1.009810447692871, + "epoch": 1.2349557840247323, + "kl_loss": 0.025962773710489273, + "loss_ib": 0.000622649269644171, + "step": 4295 + }, + { + "ce_ib": 3.218397378921509, + "ce_orig": 0.7224206924438477, + "epoch": 1.2349557840247323, + "kl_loss": 0.029285689815878868, + "loss_ib": 0.0006146965897642076, + "step": 4295 + }, + { + "ce_ib": 2.4492335319519043, + "ce_orig": 0.5923131704330444, + "epoch": 1.2352433676037098, + "kl_loss": 0.023566827178001404, + "loss_ib": 0.00048059161053970456, + "step": 4296 + }, + { + "ce_ib": 4.66945743560791, + "ce_orig": 1.203882098197937, + "epoch": 1.2352433676037098, + "kl_loss": 0.0902133509516716, + "loss_ib": 0.0013690792256966233, + "step": 4296 + }, + { + "ce_ib": 3.5900793075561523, + "ce_orig": 0.9396870732307434, + "epoch": 1.2352433676037098, + "kl_loss": 0.03515442833304405, + "loss_ib": 0.0007105521508492529, + "step": 4296 + }, + { + "ce_ib": 5.119722366333008, + "ce_orig": 1.6775455474853516, + "epoch": 1.2352433676037098, + "kl_loss": 0.03012148290872574, + "loss_ib": 0.000813187041785568, + "step": 4296 + }, + { + "ce_ib": 4.84269380569458, + "ce_orig": 1.489667534828186, + "epoch": 1.2355309511826875, + "kl_loss": 0.050183385610580444, + "loss_ib": 0.0009861032012850046, + "step": 4297 + }, + { + "ce_ib": 2.4217629432678223, + "ce_orig": 0.5772871375083923, + "epoch": 1.2355309511826875, + "kl_loss": 0.03685547038912773, + "loss_ib": 0.0006107310182414949, + "step": 4297 + }, + { + "ce_ib": 3.432474136352539, + "ce_orig": 1.1958173513412476, + "epoch": 1.2355309511826875, + "kl_loss": 0.034524284303188324, + "loss_ib": 0.0006884902832098305, + "step": 4297 + }, + { + "ce_ib": 3.3837859630584717, + "ce_orig": 0.7094699740409851, + "epoch": 1.2355309511826875, + "kl_loss": 0.03400071710348129, + "loss_ib": 0.0006783857825212181, + "step": 4297 + }, + { + "ce_ib": 2.6201820373535156, + "ce_orig": 0.5595741271972656, + "epoch": 1.235818534761665, + "kl_loss": 0.03437265008687973, + "loss_ib": 0.0006057447171770036, + "step": 4298 + }, + { + "ce_ib": 4.2437639236450195, + "ce_orig": 1.186962366104126, + "epoch": 1.235818534761665, + "kl_loss": 0.030520759522914886, + "loss_ib": 0.0007295839604921639, + "step": 4298 + }, + { + "ce_ib": 1.8800510168075562, + "ce_orig": 0.6042175889015198, + "epoch": 1.235818534761665, + "kl_loss": 0.023676779121160507, + "loss_ib": 0.00042477285023778677, + "step": 4298 + }, + { + "ce_ib": 1.8705127239227295, + "ce_orig": 0.4980851113796234, + "epoch": 1.235818534761665, + "kl_loss": 0.031487591564655304, + "loss_ib": 0.0005019271629862487, + "step": 4298 + }, + { + "ce_ib": 2.200178623199463, + "ce_orig": 0.6822667717933655, + "epoch": 1.2361061183406428, + "kl_loss": 0.0295072291046381, + "loss_ib": 0.0005150901270098984, + "step": 4299 + }, + { + "ce_ib": 3.331746816635132, + "ce_orig": 1.1176207065582275, + "epoch": 1.2361061183406428, + "kl_loss": 0.042360611259937286, + "loss_ib": 0.0007567807915620506, + "step": 4299 + }, + { + "ce_ib": 3.9454030990600586, + "ce_orig": 1.1553831100463867, + "epoch": 1.2361061183406428, + "kl_loss": 0.05780891329050064, + "loss_ib": 0.0009726294665597379, + "step": 4299 + }, + { + "ce_ib": 4.9387359619140625, + "ce_orig": 1.4334875345230103, + "epoch": 1.2361061183406428, + "kl_loss": 0.03821318596601486, + "loss_ib": 0.000876005447935313, + "step": 4299 + }, + { + "epoch": 1.2363937019196203, + "grad_norm": 0.13057924807071686, + "learning_rate": 3.3225574943597005e-05, + "loss": 0.837, + "step": 4300 + }, + { + "ce_ib": 4.713967800140381, + "ce_orig": 1.3586485385894775, + "epoch": 1.2363937019196203, + "kl_loss": 0.04990258812904358, + "loss_ib": 0.0009704225813038647, + "step": 4300 + }, + { + "ce_ib": 2.577444553375244, + "ce_orig": 0.8194641470909119, + "epoch": 1.2363937019196203, + "kl_loss": 0.03948291391134262, + "loss_ib": 0.00065257353708148, + "step": 4300 + }, + { + "ce_ib": 3.846269369125366, + "ce_orig": 0.717339277267456, + "epoch": 1.2363937019196203, + "kl_loss": 0.06148876994848251, + "loss_ib": 0.000999514595605433, + "step": 4300 + }, + { + "ce_ib": 4.2721123695373535, + "ce_orig": 0.9374851584434509, + "epoch": 1.2363937019196203, + "kl_loss": 0.04111192375421524, + "loss_ib": 0.0008383304229937494, + "step": 4300 + }, + { + "ce_ib": 3.971973419189453, + "ce_orig": 0.7996195554733276, + "epoch": 1.236681285498598, + "kl_loss": 0.05150308459997177, + "loss_ib": 0.0009122281917370856, + "step": 4301 + }, + { + "ce_ib": 4.410249710083008, + "ce_orig": 1.3421534299850464, + "epoch": 1.236681285498598, + "kl_loss": 0.05792300030589104, + "loss_ib": 0.001020254916511476, + "step": 4301 + }, + { + "ce_ib": 3.7820844650268555, + "ce_orig": 0.8741791248321533, + "epoch": 1.236681285498598, + "kl_loss": 0.04856352508068085, + "loss_ib": 0.0008638437138870358, + "step": 4301 + }, + { + "ce_ib": 2.0076000690460205, + "ce_orig": 0.577593207359314, + "epoch": 1.236681285498598, + "kl_loss": 0.037826456129550934, + "loss_ib": 0.0005790245486423373, + "step": 4301 + }, + { + "ce_ib": 4.371280670166016, + "ce_orig": 1.3231652975082397, + "epoch": 1.2369688690775758, + "kl_loss": 0.04849831759929657, + "loss_ib": 0.0009221112704835832, + "step": 4302 + }, + { + "ce_ib": 3.5324511528015137, + "ce_orig": 0.7712406516075134, + "epoch": 1.2369688690775758, + "kl_loss": 0.048917755484580994, + "loss_ib": 0.0008424226543866098, + "step": 4302 + }, + { + "ce_ib": 4.124956130981445, + "ce_orig": 1.0595614910125732, + "epoch": 1.2369688690775758, + "kl_loss": 0.05991528183221817, + "loss_ib": 0.0010116484481841326, + "step": 4302 + }, + { + "ce_ib": 2.3137495517730713, + "ce_orig": 0.5648394823074341, + "epoch": 1.2369688690775758, + "kl_loss": 0.03435461223125458, + "loss_ib": 0.0005749210831709206, + "step": 4302 + }, + { + "ce_ib": 3.9026782512664795, + "ce_orig": 1.0191737413406372, + "epoch": 1.2372564526565533, + "kl_loss": 0.038059547543525696, + "loss_ib": 0.0007708632620051503, + "step": 4303 + }, + { + "ce_ib": 5.417357921600342, + "ce_orig": 1.6051075458526611, + "epoch": 1.2372564526565533, + "kl_loss": 0.04088030010461807, + "loss_ib": 0.0009505387861281633, + "step": 4303 + }, + { + "ce_ib": 3.8503990173339844, + "ce_orig": 1.0991884469985962, + "epoch": 1.2372564526565533, + "kl_loss": 0.0319204106926918, + "loss_ib": 0.0007042440120130777, + "step": 4303 + }, + { + "ce_ib": 3.5064797401428223, + "ce_orig": 0.9669780135154724, + "epoch": 1.2372564526565533, + "kl_loss": 0.03416324779391289, + "loss_ib": 0.00069228041684255, + "step": 4303 + }, + { + "ce_ib": 2.468095541000366, + "ce_orig": 0.5807396173477173, + "epoch": 1.237544036235531, + "kl_loss": 0.023907311260700226, + "loss_ib": 0.00048588268691673875, + "step": 4304 + }, + { + "ce_ib": 2.3889429569244385, + "ce_orig": 0.6584697365760803, + "epoch": 1.237544036235531, + "kl_loss": 0.041330885142087936, + "loss_ib": 0.0006522031035274267, + "step": 4304 + }, + { + "ce_ib": 5.870662689208984, + "ce_orig": 1.3117691278457642, + "epoch": 1.237544036235531, + "kl_loss": 0.0352693647146225, + "loss_ib": 0.0009397598914802074, + "step": 4304 + }, + { + "ce_ib": 4.6814165115356445, + "ce_orig": 0.5777747631072998, + "epoch": 1.237544036235531, + "kl_loss": 0.028789950534701347, + "loss_ib": 0.0007560410886071622, + "step": 4304 + }, + { + "epoch": 1.2378316198145085, + "grad_norm": 0.10608385503292084, + "learning_rate": 3.318892133954127e-05, + "loss": 0.8678, + "step": 4305 + }, + { + "ce_ib": 4.198281288146973, + "ce_orig": 0.8937285542488098, + "epoch": 1.2378316198145085, + "kl_loss": 0.05326233059167862, + "loss_ib": 0.000952451373450458, + "step": 4305 + }, + { + "ce_ib": 2.2503161430358887, + "ce_orig": 0.6115522384643555, + "epoch": 1.2378316198145085, + "kl_loss": 0.029888441786170006, + "loss_ib": 0.0005239160382188857, + "step": 4305 + }, + { + "ce_ib": 3.693148374557495, + "ce_orig": 0.8259866833686829, + "epoch": 1.2378316198145085, + "kl_loss": 0.029732046648859978, + "loss_ib": 0.0006666352273896337, + "step": 4305 + }, + { + "ce_ib": 2.933122396469116, + "ce_orig": 0.6478883624076843, + "epoch": 1.2378316198145085, + "kl_loss": 0.03189016506075859, + "loss_ib": 0.0006122139166109264, + "step": 4305 + }, + { + "ce_ib": 2.2587528228759766, + "ce_orig": 0.6187869310379028, + "epoch": 1.2381192033934862, + "kl_loss": 0.029002398252487183, + "loss_ib": 0.0005158992717042565, + "step": 4306 + }, + { + "ce_ib": 3.710707426071167, + "ce_orig": 0.9976998567581177, + "epoch": 1.2381192033934862, + "kl_loss": 0.035600028932094574, + "loss_ib": 0.0007270710193552077, + "step": 4306 + }, + { + "ce_ib": 2.4202558994293213, + "ce_orig": 0.6447434425354004, + "epoch": 1.2381192033934862, + "kl_loss": 0.03719146549701691, + "loss_ib": 0.000613940239418298, + "step": 4306 + }, + { + "ce_ib": 2.891235828399658, + "ce_orig": 0.4556446075439453, + "epoch": 1.2381192033934862, + "kl_loss": 0.035593800246715546, + "loss_ib": 0.0006450615474022925, + "step": 4306 + }, + { + "ce_ib": 2.2527952194213867, + "ce_orig": 0.6499141454696655, + "epoch": 1.238406786972464, + "kl_loss": 0.028078973293304443, + "loss_ib": 0.0005060692201368511, + "step": 4307 + }, + { + "ce_ib": 3.8041131496429443, + "ce_orig": 0.93196702003479, + "epoch": 1.238406786972464, + "kl_loss": 0.03743116185069084, + "loss_ib": 0.0007547229179181159, + "step": 4307 + }, + { + "ce_ib": 3.3418021202087402, + "ce_orig": 0.5866404175758362, + "epoch": 1.238406786972464, + "kl_loss": 0.040272392332553864, + "loss_ib": 0.0007369041559286416, + "step": 4307 + }, + { + "ce_ib": 3.36344838142395, + "ce_orig": 0.5313479900360107, + "epoch": 1.238406786972464, + "kl_loss": 0.05262388288974762, + "loss_ib": 0.0008625836344435811, + "step": 4307 + }, + { + "ce_ib": 4.059891223907471, + "ce_orig": 1.0041613578796387, + "epoch": 1.2386943705514415, + "kl_loss": 0.05538851022720337, + "loss_ib": 0.0009598742472007871, + "step": 4308 + }, + { + "ce_ib": 3.2033815383911133, + "ce_orig": 0.694891631603241, + "epoch": 1.2386943705514415, + "kl_loss": 0.08176600933074951, + "loss_ib": 0.0011379981879144907, + "step": 4308 + }, + { + "ce_ib": 3.056469440460205, + "ce_orig": 0.5563850402832031, + "epoch": 1.2386943705514415, + "kl_loss": 0.02606061100959778, + "loss_ib": 0.0005662530311383307, + "step": 4308 + }, + { + "ce_ib": 2.855637311935425, + "ce_orig": 0.4828340709209442, + "epoch": 1.2386943705514415, + "kl_loss": 0.03627641126513481, + "loss_ib": 0.0006483278120867908, + "step": 4308 + }, + { + "ce_ib": 3.2173006534576416, + "ce_orig": 0.678649365901947, + "epoch": 1.2389819541304192, + "kl_loss": 0.034155577421188354, + "loss_ib": 0.0006632858421653509, + "step": 4309 + }, + { + "ce_ib": 4.460118293762207, + "ce_orig": 1.452803373336792, + "epoch": 1.2389819541304192, + "kl_loss": 0.04063744470477104, + "loss_ib": 0.0008523862343281507, + "step": 4309 + }, + { + "ce_ib": 2.7330119609832764, + "ce_orig": 0.5943870544433594, + "epoch": 1.2389819541304192, + "kl_loss": 0.043176256120204926, + "loss_ib": 0.0007050637505017221, + "step": 4309 + }, + { + "ce_ib": 2.2143046855926514, + "ce_orig": 0.5961117148399353, + "epoch": 1.2389819541304192, + "kl_loss": 0.03362686559557915, + "loss_ib": 0.0005576991243287921, + "step": 4309 + }, + { + "epoch": 1.2392695377093967, + "grad_norm": 0.11760246008634567, + "learning_rate": 3.315224800647333e-05, + "loss": 0.8316, + "step": 4310 + }, + { + "ce_ib": 4.259821891784668, + "ce_orig": 1.3011255264282227, + "epoch": 1.2392695377093967, + "kl_loss": 0.036552414298057556, + "loss_ib": 0.000791506317909807, + "step": 4310 + }, + { + "ce_ib": 3.5682308673858643, + "ce_orig": 1.0077890157699585, + "epoch": 1.2392695377093967, + "kl_loss": 0.05566608905792236, + "loss_ib": 0.0009134839056059718, + "step": 4310 + }, + { + "ce_ib": 3.08955979347229, + "ce_orig": 0.9682716131210327, + "epoch": 1.2392695377093967, + "kl_loss": 0.04035407304763794, + "loss_ib": 0.0007124966941773891, + "step": 4310 + }, + { + "ce_ib": 4.338375568389893, + "ce_orig": 1.321935772895813, + "epoch": 1.2392695377093967, + "kl_loss": 0.057490695267915726, + "loss_ib": 0.0010087444679811597, + "step": 4310 + }, + { + "ce_ib": 2.631669282913208, + "ce_orig": 0.6564859747886658, + "epoch": 1.2395571212883745, + "kl_loss": 0.04345015808939934, + "loss_ib": 0.0006976684671826661, + "step": 4311 + }, + { + "ce_ib": 5.498257637023926, + "ce_orig": 1.5530996322631836, + "epoch": 1.2395571212883745, + "kl_loss": 0.04939790442585945, + "loss_ib": 0.0010438048047944903, + "step": 4311 + }, + { + "ce_ib": 3.614795684814453, + "ce_orig": 0.8317360877990723, + "epoch": 1.2395571212883745, + "kl_loss": 0.029976969584822655, + "loss_ib": 0.0006612492143176496, + "step": 4311 + }, + { + "ce_ib": 3.0128886699676514, + "ce_orig": 0.9664864540100098, + "epoch": 1.2395571212883745, + "kl_loss": 0.03737686201930046, + "loss_ib": 0.000675057468470186, + "step": 4311 + }, + { + "ce_ib": 1.090239405632019, + "ce_orig": 0.24221785366535187, + "epoch": 1.239844704867352, + "kl_loss": 0.08808295428752899, + "loss_ib": 0.0009898534044623375, + "step": 4312 + }, + { + "ce_ib": 2.5355987548828125, + "ce_orig": 0.5008035898208618, + "epoch": 1.239844704867352, + "kl_loss": 0.04141681268811226, + "loss_ib": 0.0006677280180156231, + "step": 4312 + }, + { + "ce_ib": 1.8173481225967407, + "ce_orig": 0.3203515410423279, + "epoch": 1.239844704867352, + "kl_loss": 0.09676925837993622, + "loss_ib": 0.0011494273785501719, + "step": 4312 + }, + { + "ce_ib": 4.034792900085449, + "ce_orig": 1.1527892351150513, + "epoch": 1.239844704867352, + "kl_loss": 0.049101732671260834, + "loss_ib": 0.0008944965666159987, + "step": 4312 + }, + { + "ce_ib": 1.9589509963989258, + "ce_orig": 0.3693467676639557, + "epoch": 1.2401322884463297, + "kl_loss": 0.04390842095017433, + "loss_ib": 0.000634979282040149, + "step": 4313 + }, + { + "ce_ib": 3.344982862472534, + "ce_orig": 0.8787932395935059, + "epoch": 1.2401322884463297, + "kl_loss": 0.045987531542778015, + "loss_ib": 0.0007943735690787435, + "step": 4313 + }, + { + "ce_ib": 2.3572466373443604, + "ce_orig": 0.6900548338890076, + "epoch": 1.2401322884463297, + "kl_loss": 0.04167243093252182, + "loss_ib": 0.0006524489726871252, + "step": 4313 + }, + { + "ce_ib": 2.6852200031280518, + "ce_orig": 0.7115756869316101, + "epoch": 1.2401322884463297, + "kl_loss": 0.02748890593647957, + "loss_ib": 0.000543411064427346, + "step": 4313 + }, + { + "ce_ib": 2.1124627590179443, + "ce_orig": 0.7167018055915833, + "epoch": 1.2404198720253072, + "kl_loss": 0.03640047460794449, + "loss_ib": 0.0005752510041929781, + "step": 4314 + }, + { + "ce_ib": 3.884269952774048, + "ce_orig": 0.7909151315689087, + "epoch": 1.2404198720253072, + "kl_loss": 0.039598092436790466, + "loss_ib": 0.0007844078936614096, + "step": 4314 + }, + { + "ce_ib": 3.9153685569763184, + "ce_orig": 1.1631598472595215, + "epoch": 1.2404198720253072, + "kl_loss": 0.034828998148441315, + "loss_ib": 0.0007398268207907677, + "step": 4314 + }, + { + "ce_ib": 4.296024799346924, + "ce_orig": 1.107012391090393, + "epoch": 1.2404198720253072, + "kl_loss": 0.03981415182352066, + "loss_ib": 0.0008277439628727734, + "step": 4314 + }, + { + "epoch": 1.240707455604285, + "grad_norm": 0.11885323375463486, + "learning_rate": 3.3115555032747766e-05, + "loss": 0.8804, + "step": 4315 + }, + { + "ce_ib": 2.445275068283081, + "ce_orig": 0.41829800605773926, + "epoch": 1.240707455604285, + "kl_loss": 0.049904365092515945, + "loss_ib": 0.0007435711449943483, + "step": 4315 + }, + { + "ce_ib": 2.190845489501953, + "ce_orig": 0.5039660930633545, + "epoch": 1.240707455604285, + "kl_loss": 0.02646665833890438, + "loss_ib": 0.00048375115147791803, + "step": 4315 + }, + { + "ce_ib": 2.841562509536743, + "ce_orig": 0.655648410320282, + "epoch": 1.240707455604285, + "kl_loss": 0.04576386511325836, + "loss_ib": 0.000741794821806252, + "step": 4315 + }, + { + "ce_ib": 4.708279132843018, + "ce_orig": 1.2666594982147217, + "epoch": 1.240707455604285, + "kl_loss": 0.05660349503159523, + "loss_ib": 0.0010368628427386284, + "step": 4315 + }, + { + "ce_ib": 4.127355575561523, + "ce_orig": 0.7817568182945251, + "epoch": 1.2409950391832627, + "kl_loss": 0.05233805626630783, + "loss_ib": 0.0009361160919070244, + "step": 4316 + }, + { + "ce_ib": 4.1886067390441895, + "ce_orig": 1.1460437774658203, + "epoch": 1.2409950391832627, + "kl_loss": 0.04797481745481491, + "loss_ib": 0.0008986088214442134, + "step": 4316 + }, + { + "ce_ib": 3.4730122089385986, + "ce_orig": 0.6056907773017883, + "epoch": 1.2409950391832627, + "kl_loss": 0.03689035773277283, + "loss_ib": 0.0007162047550082207, + "step": 4316 + }, + { + "ce_ib": 3.7957348823547363, + "ce_orig": 0.9346227645874023, + "epoch": 1.2409950391832627, + "kl_loss": 0.036393266171216965, + "loss_ib": 0.000743506068829447, + "step": 4316 + }, + { + "ce_ib": 4.246911525726318, + "ce_orig": 1.0694447755813599, + "epoch": 1.2412826227622402, + "kl_loss": 0.04933079332113266, + "loss_ib": 0.0009179990738630295, + "step": 4317 + }, + { + "ce_ib": 4.6556572914123535, + "ce_orig": 1.4254459142684937, + "epoch": 1.2412826227622402, + "kl_loss": 0.07862222194671631, + "loss_ib": 0.0012517879949882627, + "step": 4317 + }, + { + "ce_ib": 2.825639486312866, + "ce_orig": 0.6975762844085693, + "epoch": 1.2412826227622402, + "kl_loss": 0.02985038422048092, + "loss_ib": 0.0005810678121633828, + "step": 4317 + }, + { + "ce_ib": 4.468008995056152, + "ce_orig": 0.9714134931564331, + "epoch": 1.2412826227622402, + "kl_loss": 0.059684932231903076, + "loss_ib": 0.0010436502052471042, + "step": 4317 + }, + { + "ce_ib": 3.5070137977600098, + "ce_orig": 1.1361066102981567, + "epoch": 1.241570206341218, + "kl_loss": 0.03677454963326454, + "loss_ib": 0.0007184467976912856, + "step": 4318 + }, + { + "ce_ib": 4.3701677322387695, + "ce_orig": 0.9174346327781677, + "epoch": 1.241570206341218, + "kl_loss": 0.05713322386145592, + "loss_ib": 0.0010083490051329136, + "step": 4318 + }, + { + "ce_ib": 4.080385208129883, + "ce_orig": 1.1041514873504639, + "epoch": 1.241570206341218, + "kl_loss": 0.02588711306452751, + "loss_ib": 0.0006669096765108407, + "step": 4318 + }, + { + "ce_ib": 4.588955879211426, + "ce_orig": 0.9621594548225403, + "epoch": 1.241570206341218, + "kl_loss": 0.044174328446388245, + "loss_ib": 0.0009006388136185706, + "step": 4318 + }, + { + "ce_ib": 3.115964889526367, + "ce_orig": 0.6707367300987244, + "epoch": 1.2418577899201955, + "kl_loss": 0.04436958581209183, + "loss_ib": 0.0007552923052571714, + "step": 4319 + }, + { + "ce_ib": 2.639112710952759, + "ce_orig": 0.38779085874557495, + "epoch": 1.2418577899201955, + "kl_loss": 0.06831458956003189, + "loss_ib": 0.0009470571530982852, + "step": 4319 + }, + { + "ce_ib": 2.990586519241333, + "ce_orig": 1.0229307413101196, + "epoch": 1.2418577899201955, + "kl_loss": 0.025388453155755997, + "loss_ib": 0.0005529432091861963, + "step": 4319 + }, + { + "ce_ib": 4.230472564697266, + "ce_orig": 1.1057617664337158, + "epoch": 1.2418577899201955, + "kl_loss": 0.03432466834783554, + "loss_ib": 0.0007662939024157822, + "step": 4319 + }, + { + "epoch": 1.2421453734991732, + "grad_norm": 0.10557665675878525, + "learning_rate": 3.3078842506766484e-05, + "loss": 0.841, + "step": 4320 + }, + { + "ce_ib": 3.654700517654419, + "ce_orig": 1.0780147314071655, + "epoch": 1.2421453734991732, + "kl_loss": 0.04929471015930176, + "loss_ib": 0.0008584171300753951, + "step": 4320 + }, + { + "ce_ib": 2.535130023956299, + "ce_orig": 0.45572230219841003, + "epoch": 1.2421453734991732, + "kl_loss": 0.04828834533691406, + "loss_ib": 0.0007363964687101543, + "step": 4320 + }, + { + "ce_ib": 3.449955940246582, + "ce_orig": 0.8056680560112, + "epoch": 1.2421453734991732, + "kl_loss": 0.033421799540519714, + "loss_ib": 0.0006792135536670685, + "step": 4320 + }, + { + "ce_ib": 3.82047176361084, + "ce_orig": 1.1373211145401, + "epoch": 1.2421453734991732, + "kl_loss": 0.02557331696152687, + "loss_ib": 0.0006377803510986269, + "step": 4320 + }, + { + "ce_ib": 3.357665777206421, + "ce_orig": 0.7268592119216919, + "epoch": 1.242432957078151, + "kl_loss": 0.03594140708446503, + "loss_ib": 0.0006951805553399026, + "step": 4321 + }, + { + "ce_ib": 4.602231025695801, + "ce_orig": 1.4055730104446411, + "epoch": 1.242432957078151, + "kl_loss": 0.03613751381635666, + "loss_ib": 0.0008215982234105468, + "step": 4321 + }, + { + "ce_ib": 3.815263032913208, + "ce_orig": 0.9539876580238342, + "epoch": 1.242432957078151, + "kl_loss": 0.04072832688689232, + "loss_ib": 0.0007888094987720251, + "step": 4321 + }, + { + "ce_ib": 4.056264877319336, + "ce_orig": 1.1985821723937988, + "epoch": 1.242432957078151, + "kl_loss": 0.03313564881682396, + "loss_ib": 0.0007369829691015184, + "step": 4321 + }, + { + "ce_ib": 4.085426330566406, + "ce_orig": 0.7170079946517944, + "epoch": 1.2427205406571284, + "kl_loss": 0.0694395899772644, + "loss_ib": 0.0011029384331777692, + "step": 4322 + }, + { + "ce_ib": 1.611340880393982, + "ce_orig": 0.27595391869544983, + "epoch": 1.2427205406571284, + "kl_loss": 0.023173969238996506, + "loss_ib": 0.00039287377148866653, + "step": 4322 + }, + { + "ce_ib": 4.534134864807129, + "ce_orig": 0.9750275611877441, + "epoch": 1.2427205406571284, + "kl_loss": 0.05479489266872406, + "loss_ib": 0.00100136233959347, + "step": 4322 + }, + { + "ce_ib": 6.138675212860107, + "ce_orig": 1.5709712505340576, + "epoch": 1.2427205406571284, + "kl_loss": 0.05647828057408333, + "loss_ib": 0.0011786501854658127, + "step": 4322 + }, + { + "ce_ib": 4.989274024963379, + "ce_orig": 1.4149328470230103, + "epoch": 1.2430081242361062, + "kl_loss": 0.05134256184101105, + "loss_ib": 0.0010123529937118292, + "step": 4323 + }, + { + "ce_ib": 4.037515640258789, + "ce_orig": 1.3123607635498047, + "epoch": 1.2430081242361062, + "kl_loss": 0.02864486537873745, + "loss_ib": 0.0006902001914568245, + "step": 4323 + }, + { + "ce_ib": 2.05928635597229, + "ce_orig": 0.4123592972755432, + "epoch": 1.2430081242361062, + "kl_loss": 0.029934057965874672, + "loss_ib": 0.0005052692140452564, + "step": 4323 + }, + { + "ce_ib": 4.55168342590332, + "ce_orig": 0.8681050539016724, + "epoch": 1.2430081242361062, + "kl_loss": 0.039106305688619614, + "loss_ib": 0.0008462314144708216, + "step": 4323 + }, + { + "ce_ib": 3.2593159675598145, + "ce_orig": 0.8689188361167908, + "epoch": 1.2432957078150837, + "kl_loss": 0.04258572682738304, + "loss_ib": 0.0007517888443544507, + "step": 4324 + }, + { + "ce_ib": 3.833101987838745, + "ce_orig": 0.8764129877090454, + "epoch": 1.2432957078150837, + "kl_loss": 0.056254804134368896, + "loss_ib": 0.0009458581916987896, + "step": 4324 + }, + { + "ce_ib": 3.154097318649292, + "ce_orig": 0.7078856825828552, + "epoch": 1.2432957078150837, + "kl_loss": 0.04757144674658775, + "loss_ib": 0.00079112418461591, + "step": 4324 + }, + { + "ce_ib": 2.504565954208374, + "ce_orig": 0.5676838159561157, + "epoch": 1.2432957078150837, + "kl_loss": 0.030852409079670906, + "loss_ib": 0.0005589806823991239, + "step": 4324 + }, + { + "epoch": 1.2435832913940614, + "grad_norm": 0.11231398582458496, + "learning_rate": 3.304211051697844e-05, + "loss": 0.864, + "step": 4325 + }, + { + "ce_ib": 3.2376229763031006, + "ce_orig": 0.6956800222396851, + "epoch": 1.2435832913940614, + "kl_loss": 0.02434074878692627, + "loss_ib": 0.0005671698017977178, + "step": 4325 + }, + { + "ce_ib": 2.6427314281463623, + "ce_orig": 0.6201412677764893, + "epoch": 1.2435832913940614, + "kl_loss": 0.053103119134902954, + "loss_ib": 0.0007953043095767498, + "step": 4325 + }, + { + "ce_ib": 4.137721538543701, + "ce_orig": 0.4104962944984436, + "epoch": 1.2435832913940614, + "kl_loss": 0.03361589089035988, + "loss_ib": 0.0007499310304410756, + "step": 4325 + }, + { + "ce_ib": 2.8101513385772705, + "ce_orig": 0.8962716460227966, + "epoch": 1.2435832913940614, + "kl_loss": 0.04145604372024536, + "loss_ib": 0.0006955755525268614, + "step": 4325 + }, + { + "ce_ib": 3.5858027935028076, + "ce_orig": 0.8425741791725159, + "epoch": 1.243870874973039, + "kl_loss": 0.04505367577075958, + "loss_ib": 0.0008091169875115156, + "step": 4326 + }, + { + "ce_ib": 2.6573574542999268, + "ce_orig": 0.5906660556793213, + "epoch": 1.243870874973039, + "kl_loss": 0.04111417010426521, + "loss_ib": 0.0006768773891963065, + "step": 4326 + }, + { + "ce_ib": 2.3042774200439453, + "ce_orig": 0.5367829203605652, + "epoch": 1.243870874973039, + "kl_loss": 0.023610565811395645, + "loss_ib": 0.0004665333544835448, + "step": 4326 + }, + { + "ce_ib": 1.6987866163253784, + "ce_orig": 0.3788645565509796, + "epoch": 1.243870874973039, + "kl_loss": 0.026701783761382103, + "loss_ib": 0.00043689648737199605, + "step": 4326 + }, + { + "ce_ib": 1.4954296350479126, + "ce_orig": 0.13862551748752594, + "epoch": 1.2441584585520167, + "kl_loss": 0.040481336414813995, + "loss_ib": 0.0005543563165701926, + "step": 4327 + }, + { + "ce_ib": 2.70324444770813, + "ce_orig": 0.7748600244522095, + "epoch": 1.2441584585520167, + "kl_loss": 0.024451186880469322, + "loss_ib": 0.0005148362834006548, + "step": 4327 + }, + { + "ce_ib": 5.157174110412598, + "ce_orig": 1.7121347188949585, + "epoch": 1.2441584585520167, + "kl_loss": 0.04801825061440468, + "loss_ib": 0.0009958998998627067, + "step": 4327 + }, + { + "ce_ib": 4.189245700836182, + "ce_orig": 1.293172836303711, + "epoch": 1.2441584585520167, + "kl_loss": 0.016182325780391693, + "loss_ib": 0.0005807478446513414, + "step": 4327 + }, + { + "ce_ib": 2.2617034912109375, + "ce_orig": 0.6690163612365723, + "epoch": 1.2444460421309944, + "kl_loss": 0.03166725113987923, + "loss_ib": 0.0005428428412415087, + "step": 4328 + }, + { + "ce_ib": 4.301599502563477, + "ce_orig": 0.9099486470222473, + "epoch": 1.2444460421309944, + "kl_loss": 0.05022866278886795, + "loss_ib": 0.0009324465645477176, + "step": 4328 + }, + { + "ce_ib": 4.460276126861572, + "ce_orig": 1.306626796722412, + "epoch": 1.2444460421309944, + "kl_loss": 0.041200026869773865, + "loss_ib": 0.0008580278372392058, + "step": 4328 + }, + { + "ce_ib": 3.4430952072143555, + "ce_orig": 0.9134397506713867, + "epoch": 1.2444460421309944, + "kl_loss": 0.04116305708885193, + "loss_ib": 0.0007559400401078165, + "step": 4328 + }, + { + "ce_ib": 2.789224863052368, + "ce_orig": 0.5664234161376953, + "epoch": 1.244733625709972, + "kl_loss": 0.04080918803811073, + "loss_ib": 0.0006870143697597086, + "step": 4329 + }, + { + "ce_ib": 6.132214546203613, + "ce_orig": 1.6930112838745117, + "epoch": 1.244733625709972, + "kl_loss": 0.05131305009126663, + "loss_ib": 0.0011263518827036023, + "step": 4329 + }, + { + "ce_ib": 3.7639379501342773, + "ce_orig": 0.6755872964859009, + "epoch": 1.244733625709972, + "kl_loss": 0.047892820090055466, + "loss_ib": 0.0008553219959139824, + "step": 4329 + }, + { + "ce_ib": 3.903093099594116, + "ce_orig": 0.9489759206771851, + "epoch": 1.244733625709972, + "kl_loss": 0.02567390166223049, + "loss_ib": 0.0006470483494922519, + "step": 4329 + }, + { + "epoch": 1.2450212092889497, + "grad_norm": 0.09919138997793198, + "learning_rate": 3.300535915187957e-05, + "loss": 0.7687, + "step": 4330 + }, + { + "ce_ib": 4.256085395812988, + "ce_orig": 1.0341546535491943, + "epoch": 1.2450212092889497, + "kl_loss": 0.03659610450267792, + "loss_ib": 0.000791569531429559, + "step": 4330 + }, + { + "ce_ib": 3.271038293838501, + "ce_orig": 1.1016682386398315, + "epoch": 1.2450212092889497, + "kl_loss": 0.026639185845851898, + "loss_ib": 0.0005934956716373563, + "step": 4330 + }, + { + "ce_ib": 2.421772003173828, + "ce_orig": 0.576144814491272, + "epoch": 1.2450212092889497, + "kl_loss": 0.06020553782582283, + "loss_ib": 0.0008442325633950531, + "step": 4330 + }, + { + "ce_ib": 4.051109790802002, + "ce_orig": 1.1318871974945068, + "epoch": 1.2450212092889497, + "kl_loss": 0.036683592945337296, + "loss_ib": 0.0007719469140283763, + "step": 4330 + }, + { + "ce_ib": 4.491608619689941, + "ce_orig": 1.4384610652923584, + "epoch": 1.2453087928679272, + "kl_loss": 0.03460341691970825, + "loss_ib": 0.0007951949955895543, + "step": 4331 + }, + { + "ce_ib": 2.813318967819214, + "ce_orig": 0.7975696325302124, + "epoch": 1.2453087928679272, + "kl_loss": 0.025305986404418945, + "loss_ib": 0.0005343917873688042, + "step": 4331 + }, + { + "ce_ib": 4.112937927246094, + "ce_orig": 1.1913098096847534, + "epoch": 1.2453087928679272, + "kl_loss": 0.036530960351228714, + "loss_ib": 0.000776603352278471, + "step": 4331 + }, + { + "ce_ib": 3.2232251167297363, + "ce_orig": 0.7780563235282898, + "epoch": 1.2453087928679272, + "kl_loss": 0.04117518663406372, + "loss_ib": 0.0007340743904933333, + "step": 4331 + }, + { + "ce_ib": 3.4144809246063232, + "ce_orig": 0.8404524326324463, + "epoch": 1.245596376446905, + "kl_loss": 0.04259072616696358, + "loss_ib": 0.0007673553191125393, + "step": 4332 + }, + { + "ce_ib": 3.158054828643799, + "ce_orig": 0.6074140667915344, + "epoch": 1.245596376446905, + "kl_loss": 0.04601580649614334, + "loss_ib": 0.0007759634754620492, + "step": 4332 + }, + { + "ce_ib": 1.9068692922592163, + "ce_orig": 0.5859753489494324, + "epoch": 1.245596376446905, + "kl_loss": 0.020670615136623383, + "loss_ib": 0.00039739307248964906, + "step": 4332 + }, + { + "ce_ib": 4.017156600952148, + "ce_orig": 1.1627081632614136, + "epoch": 1.245596376446905, + "kl_loss": 0.04664718359708786, + "loss_ib": 0.0008681874605827034, + "step": 4332 + }, + { + "ce_ib": 2.9520061016082764, + "ce_orig": 1.0033056735992432, + "epoch": 1.2458839600258824, + "kl_loss": 0.02326609380543232, + "loss_ib": 0.0005278615280985832, + "step": 4333 + }, + { + "ce_ib": 4.512933254241943, + "ce_orig": 1.2172597646713257, + "epoch": 1.2458839600258824, + "kl_loss": 0.041951484978199005, + "loss_ib": 0.0008708082023076713, + "step": 4333 + }, + { + "ce_ib": 3.138828992843628, + "ce_orig": 0.5454142093658447, + "epoch": 1.2458839600258824, + "kl_loss": 0.04295089468359947, + "loss_ib": 0.0007433918071910739, + "step": 4333 + }, + { + "ce_ib": 2.3291852474212646, + "ce_orig": 0.6874583959579468, + "epoch": 1.2458839600258824, + "kl_loss": 0.017909811809659004, + "loss_ib": 0.0004120166413486004, + "step": 4333 + }, + { + "ce_ib": 6.385437488555908, + "ce_orig": 1.7932950258255005, + "epoch": 1.2461715436048602, + "kl_loss": 0.07512638717889786, + "loss_ib": 0.0013898075558245182, + "step": 4334 + }, + { + "ce_ib": 2.3496265411376953, + "ce_orig": 0.6907625794410706, + "epoch": 1.2461715436048602, + "kl_loss": 0.018534895032644272, + "loss_ib": 0.00042031161137856543, + "step": 4334 + }, + { + "ce_ib": 2.717586040496826, + "ce_orig": 0.5290762186050415, + "epoch": 1.2461715436048602, + "kl_loss": 0.035062000155448914, + "loss_ib": 0.0006223785458132625, + "step": 4334 + }, + { + "ce_ib": 2.5696568489074707, + "ce_orig": 0.5752684473991394, + "epoch": 1.2461715436048602, + "kl_loss": 0.039723142981529236, + "loss_ib": 0.0006541971233673394, + "step": 4334 + }, + { + "epoch": 1.246459127183838, + "grad_norm": 0.11837595701217651, + "learning_rate": 3.29685885000124e-05, + "loss": 0.806, + "step": 4335 + }, + { + "ce_ib": 3.5849990844726562, + "ce_orig": 0.6159544587135315, + "epoch": 1.246459127183838, + "kl_loss": 0.04560413211584091, + "loss_ib": 0.0008145411848090589, + "step": 4335 + }, + { + "ce_ib": 1.8633536100387573, + "ce_orig": 0.6062930822372437, + "epoch": 1.246459127183838, + "kl_loss": 0.03317953646183014, + "loss_ib": 0.0005181307205930352, + "step": 4335 + }, + { + "ce_ib": 3.134099245071411, + "ce_orig": 0.8480399250984192, + "epoch": 1.246459127183838, + "kl_loss": 0.03260425478219986, + "loss_ib": 0.000639452482573688, + "step": 4335 + }, + { + "ce_ib": 4.325756549835205, + "ce_orig": 1.19683837890625, + "epoch": 1.246459127183838, + "kl_loss": 0.05362332984805107, + "loss_ib": 0.0009688089485280216, + "step": 4335 + }, + { + "ce_ib": 4.337467193603516, + "ce_orig": 1.2929463386535645, + "epoch": 1.2467467107628154, + "kl_loss": 0.04802261292934418, + "loss_ib": 0.0009139728499576449, + "step": 4336 + }, + { + "ce_ib": 2.3551628589630127, + "ce_orig": 0.3092690110206604, + "epoch": 1.2467467107628154, + "kl_loss": 0.05505364388227463, + "loss_ib": 0.0007860527257435024, + "step": 4336 + }, + { + "ce_ib": 2.8124213218688965, + "ce_orig": 0.5608478784561157, + "epoch": 1.2467467107628154, + "kl_loss": 0.039665453135967255, + "loss_ib": 0.0006778966635465622, + "step": 4336 + }, + { + "ce_ib": 2.595223903656006, + "ce_orig": 0.4720524847507477, + "epoch": 1.2467467107628154, + "kl_loss": 0.03423847630620003, + "loss_ib": 0.0006019071443006396, + "step": 4336 + }, + { + "ce_ib": 2.5443837642669678, + "ce_orig": 0.683111846446991, + "epoch": 1.2470342943417931, + "kl_loss": 0.019829630851745605, + "loss_ib": 0.0004527346754912287, + "step": 4337 + }, + { + "ce_ib": 2.6151669025421143, + "ce_orig": 0.5740978717803955, + "epoch": 1.2470342943417931, + "kl_loss": 0.045287519693374634, + "loss_ib": 0.0007143918774090707, + "step": 4337 + }, + { + "ce_ib": 2.062392234802246, + "ce_orig": 0.3958171308040619, + "epoch": 1.2470342943417931, + "kl_loss": 0.04012412577867508, + "loss_ib": 0.0006074804696254432, + "step": 4337 + }, + { + "ce_ib": 4.9373779296875, + "ce_orig": 1.4525312185287476, + "epoch": 1.2470342943417931, + "kl_loss": 0.030371172353625298, + "loss_ib": 0.0007974494947120547, + "step": 4337 + }, + { + "ce_ib": 4.9106926918029785, + "ce_orig": 1.2041205167770386, + "epoch": 1.2473218779207706, + "kl_loss": 0.026829630136489868, + "loss_ib": 0.000759365560952574, + "step": 4338 + }, + { + "ce_ib": 2.6493079662323, + "ce_orig": 0.5723951458930969, + "epoch": 1.2473218779207706, + "kl_loss": 0.043665818870067596, + "loss_ib": 0.0007015889859758317, + "step": 4338 + }, + { + "ce_ib": 3.5289947986602783, + "ce_orig": 0.8387235999107361, + "epoch": 1.2473218779207706, + "kl_loss": 0.032306428998708725, + "loss_ib": 0.0006759637617506087, + "step": 4338 + }, + { + "ce_ib": 3.4625585079193115, + "ce_orig": 1.021998643875122, + "epoch": 1.2473218779207706, + "kl_loss": 0.039654143154621124, + "loss_ib": 0.0007427972159348428, + "step": 4338 + }, + { + "ce_ib": 2.3320963382720947, + "ce_orig": 0.6435001492500305, + "epoch": 1.2476094614997484, + "kl_loss": 0.026091858744621277, + "loss_ib": 0.0004941282095387578, + "step": 4339 + }, + { + "ce_ib": 4.025104999542236, + "ce_orig": 1.0656324625015259, + "epoch": 1.2476094614997484, + "kl_loss": 0.034953244030475616, + "loss_ib": 0.0007520429790019989, + "step": 4339 + }, + { + "ce_ib": 2.2818477153778076, + "ce_orig": 0.49799516797065735, + "epoch": 1.2476094614997484, + "kl_loss": 0.030710507184267044, + "loss_ib": 0.0005352898151613772, + "step": 4339 + }, + { + "ce_ib": 2.929800271987915, + "ce_orig": 0.6985006332397461, + "epoch": 1.2476094614997484, + "kl_loss": 0.04735535383224487, + "loss_ib": 0.000766533543355763, + "step": 4339 + }, + { + "epoch": 1.2478970450787261, + "grad_norm": 0.13924916088581085, + "learning_rate": 3.293179864996599e-05, + "loss": 0.8479, + "step": 4340 + }, + { + "ce_ib": 3.5019848346710205, + "ce_orig": 1.1032718420028687, + "epoch": 1.2478970450787261, + "kl_loss": 0.027662036940455437, + "loss_ib": 0.0006268188008107245, + "step": 4340 + }, + { + "ce_ib": 1.8490327596664429, + "ce_orig": 0.4195340573787689, + "epoch": 1.2478970450787261, + "kl_loss": 0.02714666724205017, + "loss_ib": 0.00045636994764208794, + "step": 4340 + }, + { + "ce_ib": 1.9561724662780762, + "ce_orig": 0.5448063611984253, + "epoch": 1.2478970450787261, + "kl_loss": 0.038821347057819366, + "loss_ib": 0.0005838306969963014, + "step": 4340 + }, + { + "ce_ib": 2.4509589672088623, + "ce_orig": 0.814144492149353, + "epoch": 1.2478970450787261, + "kl_loss": 0.026019951328635216, + "loss_ib": 0.0005052954074926674, + "step": 4340 + }, + { + "ce_ib": 3.3304965496063232, + "ce_orig": 0.8476390838623047, + "epoch": 1.2481846286577036, + "kl_loss": 0.06167395040392876, + "loss_ib": 0.0009497891296632588, + "step": 4341 + }, + { + "ce_ib": 3.404106378555298, + "ce_orig": 0.8958142399787903, + "epoch": 1.2481846286577036, + "kl_loss": 0.04908263310790062, + "loss_ib": 0.0008312369463965297, + "step": 4341 + }, + { + "ce_ib": 2.701690912246704, + "ce_orig": 0.6159617900848389, + "epoch": 1.2481846286577036, + "kl_loss": 0.0420558862388134, + "loss_ib": 0.000690727960318327, + "step": 4341 + }, + { + "ce_ib": 3.250714063644409, + "ce_orig": 0.7431774735450745, + "epoch": 1.2481846286577036, + "kl_loss": 0.05684333294630051, + "loss_ib": 0.0008935047662816942, + "step": 4341 + }, + { + "ce_ib": 2.891423225402832, + "ce_orig": 0.8275752663612366, + "epoch": 1.2484722122366814, + "kl_loss": 0.0287493783980608, + "loss_ib": 0.000576636113692075, + "step": 4342 + }, + { + "ce_ib": 3.5276272296905518, + "ce_orig": 1.1368930339813232, + "epoch": 1.2484722122366814, + "kl_loss": 0.03468424826860428, + "loss_ib": 0.000699605152476579, + "step": 4342 + }, + { + "ce_ib": 3.1539313793182373, + "ce_orig": 0.49673697352409363, + "epoch": 1.2484722122366814, + "kl_loss": 0.05211711302399635, + "loss_ib": 0.000836564286146313, + "step": 4342 + }, + { + "ce_ib": 3.466299533843994, + "ce_orig": 0.9810696244239807, + "epoch": 1.2484722122366814, + "kl_loss": 0.05717740207910538, + "loss_ib": 0.0009184039663523436, + "step": 4342 + }, + { + "ce_ib": 5.588840484619141, + "ce_orig": 1.487582802772522, + "epoch": 1.2487597958156589, + "kl_loss": 0.04611755907535553, + "loss_ib": 0.0010200595716014504, + "step": 4343 + }, + { + "ce_ib": 5.779508590698242, + "ce_orig": 1.72333562374115, + "epoch": 1.2487597958156589, + "kl_loss": 0.04527392238378525, + "loss_ib": 0.001030690036714077, + "step": 4343 + }, + { + "ce_ib": 3.8876686096191406, + "ce_orig": 0.9855862855911255, + "epoch": 1.2487597958156589, + "kl_loss": 0.0439722016453743, + "loss_ib": 0.000828488904517144, + "step": 4343 + }, + { + "ce_ib": 1.7066177129745483, + "ce_orig": 0.5339191555976868, + "epoch": 1.2487597958156589, + "kl_loss": 0.028516365215182304, + "loss_ib": 0.000455825385870412, + "step": 4343 + }, + { + "ce_ib": 3.273784875869751, + "ce_orig": 0.6024976968765259, + "epoch": 1.2490473793946366, + "kl_loss": 0.036326561123132706, + "loss_ib": 0.0006906440830789506, + "step": 4344 + }, + { + "ce_ib": 2.522347927093506, + "ce_orig": 0.6146305799484253, + "epoch": 1.2490473793946366, + "kl_loss": 0.03572464734315872, + "loss_ib": 0.0006094812415540218, + "step": 4344 + }, + { + "ce_ib": 3.20220947265625, + "ce_orig": 0.7661081552505493, + "epoch": 1.2490473793946366, + "kl_loss": 0.03701595962047577, + "loss_ib": 0.0006903804605826735, + "step": 4344 + }, + { + "ce_ib": 2.028961658477783, + "ce_orig": 0.6010376811027527, + "epoch": 1.2490473793946366, + "kl_loss": 0.0246998630464077, + "loss_ib": 0.000449894811026752, + "step": 4344 + }, + { + "epoch": 1.2493349629736141, + "grad_norm": 0.11626764386892319, + "learning_rate": 3.2894989690375626e-05, + "loss": 0.8349, + "step": 4345 + }, + { + "ce_ib": 2.771796703338623, + "ce_orig": 0.5789204239845276, + "epoch": 1.2493349629736141, + "kl_loss": 0.051389180123806, + "loss_ib": 0.0007910715066827834, + "step": 4345 + }, + { + "ce_ib": 1.5257419347763062, + "ce_orig": 0.4559953808784485, + "epoch": 1.2493349629736141, + "kl_loss": 0.024838633835315704, + "loss_ib": 0.00040096050361171365, + "step": 4345 + }, + { + "ce_ib": 2.466301679611206, + "ce_orig": 0.6293806433677673, + "epoch": 1.2493349629736141, + "kl_loss": 0.037775736302137375, + "loss_ib": 0.0006243875250220299, + "step": 4345 + }, + { + "ce_ib": 5.045629024505615, + "ce_orig": 1.430849313735962, + "epoch": 1.2493349629736141, + "kl_loss": 0.04918234795331955, + "loss_ib": 0.0009963863994926214, + "step": 4345 + }, + { + "ce_ib": 3.6236329078674316, + "ce_orig": 1.0598942041397095, + "epoch": 1.2496225465525919, + "kl_loss": 0.03201381862163544, + "loss_ib": 0.0006825014716014266, + "step": 4346 + }, + { + "ce_ib": 1.9678130149841309, + "ce_orig": 0.6002795100212097, + "epoch": 1.2496225465525919, + "kl_loss": 0.02291964367032051, + "loss_ib": 0.00042597769061103463, + "step": 4346 + }, + { + "ce_ib": 2.4178807735443115, + "ce_orig": 0.4253583550453186, + "epoch": 1.2496225465525919, + "kl_loss": 0.03882285952568054, + "loss_ib": 0.0006300166132859886, + "step": 4346 + }, + { + "ce_ib": 3.6066324710845947, + "ce_orig": 1.1085119247436523, + "epoch": 1.2496225465525919, + "kl_loss": 0.03859324008226395, + "loss_ib": 0.000746595615055412, + "step": 4346 + }, + { + "ce_ib": 2.766690969467163, + "ce_orig": 0.7568559050559998, + "epoch": 1.2499101301315694, + "kl_loss": 0.023850545287132263, + "loss_ib": 0.000515174528118223, + "step": 4347 + }, + { + "ce_ib": 4.294102191925049, + "ce_orig": 1.1992484331130981, + "epoch": 1.2499101301315694, + "kl_loss": 0.04581558704376221, + "loss_ib": 0.0008875660132616758, + "step": 4347 + }, + { + "ce_ib": 4.447693824768066, + "ce_orig": 1.4010940790176392, + "epoch": 1.2499101301315694, + "kl_loss": 0.04734381288290024, + "loss_ib": 0.0009182074572890997, + "step": 4347 + }, + { + "ce_ib": 2.3337202072143555, + "ce_orig": 0.5973346829414368, + "epoch": 1.2499101301315694, + "kl_loss": 0.0331638865172863, + "loss_ib": 0.0005650108796544373, + "step": 4347 + }, + { + "ce_ib": 2.4613685607910156, + "ce_orig": 0.6620900630950928, + "epoch": 1.250197713710547, + "kl_loss": 0.03819073736667633, + "loss_ib": 0.0006280442466959357, + "step": 4348 + }, + { + "ce_ib": 3.2021708488464355, + "ce_orig": 0.874103307723999, + "epoch": 1.250197713710547, + "kl_loss": 0.03485472872853279, + "loss_ib": 0.0006687644054181874, + "step": 4348 + }, + { + "ce_ib": 3.0105974674224854, + "ce_orig": 0.7320840954780579, + "epoch": 1.250197713710547, + "kl_loss": 0.04146488010883331, + "loss_ib": 0.0007157085929065943, + "step": 4348 + }, + { + "ce_ib": 2.094118356704712, + "ce_orig": 0.6478078961372375, + "epoch": 1.250197713710547, + "kl_loss": 0.01907194033265114, + "loss_ib": 0.0004001312190666795, + "step": 4348 + }, + { + "ce_ib": 5.259263038635254, + "ce_orig": 1.447972059249878, + "epoch": 1.2504852972895248, + "kl_loss": 0.02975347638130188, + "loss_ib": 0.0008234610431827605, + "step": 4349 + }, + { + "ce_ib": 2.7445709705352783, + "ce_orig": 0.8576162457466125, + "epoch": 1.2504852972895248, + "kl_loss": 0.04210933297872543, + "loss_ib": 0.0006955504068173468, + "step": 4349 + }, + { + "ce_ib": 4.407168865203857, + "ce_orig": 1.1428827047348022, + "epoch": 1.2504852972895248, + "kl_loss": 0.0385642945766449, + "loss_ib": 0.0008263598429039121, + "step": 4349 + }, + { + "ce_ib": 3.872048854827881, + "ce_orig": 1.1766459941864014, + "epoch": 1.2504852972895248, + "kl_loss": 0.05022836849093437, + "loss_ib": 0.0008894886123016477, + "step": 4349 + }, + { + "epoch": 1.2507728808685024, + "grad_norm": 0.13662484288215637, + "learning_rate": 3.285816170992263e-05, + "loss": 0.7873, + "step": 4350 + }, + { + "ce_ib": 4.447669982910156, + "ce_orig": 1.0165499448776245, + "epoch": 1.2507728808685024, + "kl_loss": 0.05455603078007698, + "loss_ib": 0.000990327331237495, + "step": 4350 + }, + { + "ce_ib": 3.2183585166931152, + "ce_orig": 0.9411856532096863, + "epoch": 1.2507728808685024, + "kl_loss": 0.052029095590114594, + "loss_ib": 0.0008421267266385257, + "step": 4350 + }, + { + "ce_ib": 2.7760426998138428, + "ce_orig": 0.4877295196056366, + "epoch": 1.2507728808685024, + "kl_loss": 0.02496085688471794, + "loss_ib": 0.0005272128037177026, + "step": 4350 + }, + { + "ce_ib": 2.5228235721588135, + "ce_orig": 0.5217838287353516, + "epoch": 1.2507728808685024, + "kl_loss": 0.044805869460105896, + "loss_ib": 0.000700341013725847, + "step": 4350 + }, + { + "ce_ib": 5.3900628089904785, + "ce_orig": 1.4431794881820679, + "epoch": 1.25106046444748, + "kl_loss": 0.05093192681670189, + "loss_ib": 0.0010483254445716739, + "step": 4351 + }, + { + "ce_ib": 3.37117338180542, + "ce_orig": 0.7587767243385315, + "epoch": 1.25106046444748, + "kl_loss": 0.04983040690422058, + "loss_ib": 0.000835421378724277, + "step": 4351 + }, + { + "ce_ib": 4.925459861755371, + "ce_orig": 1.0079307556152344, + "epoch": 1.25106046444748, + "kl_loss": 0.04544004797935486, + "loss_ib": 0.0009469464421272278, + "step": 4351 + }, + { + "ce_ib": 4.490858554840088, + "ce_orig": 0.9978848099708557, + "epoch": 1.25106046444748, + "kl_loss": 0.047703951597213745, + "loss_ib": 0.0009261253289878368, + "step": 4351 + }, + { + "ce_ib": 2.8107197284698486, + "ce_orig": 0.6395078897476196, + "epoch": 1.2513480480264576, + "kl_loss": 0.031081022694706917, + "loss_ib": 0.000591882155276835, + "step": 4352 + }, + { + "ce_ib": 4.503235816955566, + "ce_orig": 0.9143180847167969, + "epoch": 1.2513480480264576, + "kl_loss": 0.05927707999944687, + "loss_ib": 0.0010430943220853806, + "step": 4352 + }, + { + "ce_ib": 3.614014148712158, + "ce_orig": 0.9033962488174438, + "epoch": 1.2513480480264576, + "kl_loss": 0.04095115512609482, + "loss_ib": 0.0007709129131399095, + "step": 4352 + }, + { + "ce_ib": 4.267055988311768, + "ce_orig": 1.0096848011016846, + "epoch": 1.2513480480264576, + "kl_loss": 0.03896302729845047, + "loss_ib": 0.000816335785202682, + "step": 4352 + }, + { + "ce_ib": 2.363988161087036, + "ce_orig": 0.6991767883300781, + "epoch": 1.2516356316054353, + "kl_loss": 0.028003038838505745, + "loss_ib": 0.0005164291942492127, + "step": 4353 + }, + { + "ce_ib": 2.9034242630004883, + "ce_orig": 0.7287737727165222, + "epoch": 1.2516356316054353, + "kl_loss": 0.04647781327366829, + "loss_ib": 0.0007551204762421548, + "step": 4353 + }, + { + "ce_ib": 2.781825304031372, + "ce_orig": 0.5749319195747375, + "epoch": 1.2516356316054353, + "kl_loss": 0.033635422587394714, + "loss_ib": 0.0006145367515273392, + "step": 4353 + }, + { + "ce_ib": 4.330816745758057, + "ce_orig": 1.0716878175735474, + "epoch": 1.2516356316054353, + "kl_loss": 0.044660523533821106, + "loss_ib": 0.0008796869078651071, + "step": 4353 + }, + { + "ce_ib": 2.3019425868988037, + "ce_orig": 0.6227008104324341, + "epoch": 1.251923215184413, + "kl_loss": 0.03573628515005112, + "loss_ib": 0.0005875570932403207, + "step": 4354 + }, + { + "ce_ib": 3.353712558746338, + "ce_orig": 0.7141209244728088, + "epoch": 1.251923215184413, + "kl_loss": 0.034292757511138916, + "loss_ib": 0.0006782987620681524, + "step": 4354 + }, + { + "ce_ib": 2.7541277408599854, + "ce_orig": 0.5901636481285095, + "epoch": 1.251923215184413, + "kl_loss": 0.06414015591144562, + "loss_ib": 0.0009168143151327968, + "step": 4354 + }, + { + "ce_ib": 2.1773276329040527, + "ce_orig": 0.6112444996833801, + "epoch": 1.251923215184413, + "kl_loss": 0.03193805366754532, + "loss_ib": 0.0005371132865548134, + "step": 4354 + }, + { + "epoch": 1.2522107987633906, + "grad_norm": 0.1101294457912445, + "learning_rate": 3.282131479733416e-05, + "loss": 0.8126, + "step": 4355 + }, + { + "ce_ib": 3.5598549842834473, + "ce_orig": 0.8902134895324707, + "epoch": 1.2522107987633906, + "kl_loss": 0.03198287636041641, + "loss_ib": 0.0006758142844773829, + "step": 4355 + }, + { + "ce_ib": 5.343376159667969, + "ce_orig": 1.6976802349090576, + "epoch": 1.2522107987633906, + "kl_loss": 0.061139293015003204, + "loss_ib": 0.0011457304935902357, + "step": 4355 + }, + { + "ce_ib": 3.1083524227142334, + "ce_orig": 0.7075733542442322, + "epoch": 1.2522107987633906, + "kl_loss": 0.043331701308488846, + "loss_ib": 0.0007441521738655865, + "step": 4355 + }, + { + "ce_ib": 2.9257524013519287, + "ce_orig": 0.7298928499221802, + "epoch": 1.2522107987633906, + "kl_loss": 0.05792830139398575, + "loss_ib": 0.0008718582103028893, + "step": 4355 + }, + { + "ce_ib": 2.530499219894409, + "ce_orig": 0.604814887046814, + "epoch": 1.2524983823423683, + "kl_loss": 0.03545811027288437, + "loss_ib": 0.0006076309946365654, + "step": 4356 + }, + { + "ce_ib": 1.7324000597000122, + "ce_orig": 0.4287906289100647, + "epoch": 1.2524983823423683, + "kl_loss": 0.03094184771180153, + "loss_ib": 0.0004826584772672504, + "step": 4356 + }, + { + "ce_ib": 3.7623891830444336, + "ce_orig": 0.6933006048202515, + "epoch": 1.2524983823423683, + "kl_loss": 0.058088868856430054, + "loss_ib": 0.0009571275440976024, + "step": 4356 + }, + { + "ce_ib": 2.927618980407715, + "ce_orig": 0.5780041217803955, + "epoch": 1.2524983823423683, + "kl_loss": 0.027053281664848328, + "loss_ib": 0.0005632946849800646, + "step": 4356 + }, + { + "ce_ib": 4.222033977508545, + "ce_orig": 1.1297208070755005, + "epoch": 1.2527859659213458, + "kl_loss": 0.05041927099227905, + "loss_ib": 0.0009263960528187454, + "step": 4357 + }, + { + "ce_ib": 3.3701727390289307, + "ce_orig": 0.9152921438217163, + "epoch": 1.2527859659213458, + "kl_loss": 0.048530369997024536, + "loss_ib": 0.0008223209297284484, + "step": 4357 + }, + { + "ce_ib": 2.8735995292663574, + "ce_orig": 0.8556289672851562, + "epoch": 1.2527859659213458, + "kl_loss": 0.04782877489924431, + "loss_ib": 0.0007656477391719818, + "step": 4357 + }, + { + "ce_ib": 2.3236799240112305, + "ce_orig": 0.753394365310669, + "epoch": 1.2527859659213458, + "kl_loss": 0.034792885184288025, + "loss_ib": 0.000580296793486923, + "step": 4357 + }, + { + "ce_ib": 3.8382935523986816, + "ce_orig": 1.0765223503112793, + "epoch": 1.2530735495003236, + "kl_loss": 0.0389232262969017, + "loss_ib": 0.0007730616489425302, + "step": 4358 + }, + { + "ce_ib": 2.8082103729248047, + "ce_orig": 0.9302999973297119, + "epoch": 1.2530735495003236, + "kl_loss": 0.03815305978059769, + "loss_ib": 0.0006623516092076898, + "step": 4358 + }, + { + "ce_ib": 2.5005199909210205, + "ce_orig": 0.712388813495636, + "epoch": 1.2530735495003236, + "kl_loss": 0.04557475447654724, + "loss_ib": 0.0007057994953356683, + "step": 4358 + }, + { + "ce_ib": 2.3124613761901855, + "ce_orig": 0.6309823989868164, + "epoch": 1.2530735495003236, + "kl_loss": 0.04377736896276474, + "loss_ib": 0.0006690198206342757, + "step": 4358 + }, + { + "ce_ib": 1.4968611001968384, + "ce_orig": 0.34067681431770325, + "epoch": 1.2533611330793013, + "kl_loss": 0.07800927758216858, + "loss_ib": 0.0009297788492403924, + "step": 4359 + }, + { + "ce_ib": 1.9890257120132446, + "ce_orig": 0.5579976439476013, + "epoch": 1.2533611330793013, + "kl_loss": 0.02584655210375786, + "loss_ib": 0.0004573680635076016, + "step": 4359 + }, + { + "ce_ib": 1.8774000406265259, + "ce_orig": 0.36109307408332825, + "epoch": 1.2533611330793013, + "kl_loss": 0.035177018493413925, + "loss_ib": 0.0005395101616159081, + "step": 4359 + }, + { + "ce_ib": 3.7173357009887695, + "ce_orig": 0.9105441570281982, + "epoch": 1.2533611330793013, + "kl_loss": 0.05281313136219978, + "loss_ib": 0.0008998648845590651, + "step": 4359 + }, + { + "epoch": 1.2536487166582788, + "grad_norm": 0.11347527801990509, + "learning_rate": 3.278444904138297e-05, + "loss": 0.7973, + "step": 4360 + }, + { + "ce_ib": 2.3391690254211426, + "ce_orig": 0.7149328589439392, + "epoch": 1.2536487166582788, + "kl_loss": 0.031056800857186317, + "loss_ib": 0.0005444849375635386, + "step": 4360 + }, + { + "ce_ib": 4.260293006896973, + "ce_orig": 1.0501697063446045, + "epoch": 1.2536487166582788, + "kl_loss": 0.04327846318483353, + "loss_ib": 0.0008588139316998422, + "step": 4360 + }, + { + "ce_ib": 3.696725845336914, + "ce_orig": 0.9318695664405823, + "epoch": 1.2536487166582788, + "kl_loss": 0.05491359531879425, + "loss_ib": 0.0009188085678033531, + "step": 4360 + }, + { + "ce_ib": 3.5946719646453857, + "ce_orig": 0.9659897685050964, + "epoch": 1.2536487166582788, + "kl_loss": 0.04875180497765541, + "loss_ib": 0.0008469852618873119, + "step": 4360 + }, + { + "ce_ib": 2.38240385055542, + "ce_orig": 0.6007758378982544, + "epoch": 1.2539363002372563, + "kl_loss": 0.03825851157307625, + "loss_ib": 0.0006208255072124302, + "step": 4361 + }, + { + "ce_ib": 2.3821659088134766, + "ce_orig": 0.38488149642944336, + "epoch": 1.2539363002372563, + "kl_loss": 0.030871614813804626, + "loss_ib": 0.0005469327443279326, + "step": 4361 + }, + { + "ce_ib": 3.457575559616089, + "ce_orig": 1.1058672666549683, + "epoch": 1.2539363002372563, + "kl_loss": 0.03875720128417015, + "loss_ib": 0.0007333295652642846, + "step": 4361 + }, + { + "ce_ib": 3.21343994140625, + "ce_orig": 0.9036383628845215, + "epoch": 1.2539363002372563, + "kl_loss": 0.045938074588775635, + "loss_ib": 0.000780724745709449, + "step": 4361 + }, + { + "ce_ib": 3.0839943885803223, + "ce_orig": 0.9210638999938965, + "epoch": 1.254223883816234, + "kl_loss": 0.03448382765054703, + "loss_ib": 0.0006532376864925027, + "step": 4362 + }, + { + "ce_ib": 2.1096341609954834, + "ce_orig": 0.6928640604019165, + "epoch": 1.254223883816234, + "kl_loss": 0.032456908375024796, + "loss_ib": 0.0005355324828997254, + "step": 4362 + }, + { + "ce_ib": 4.69873571395874, + "ce_orig": 1.3026243448257446, + "epoch": 1.254223883816234, + "kl_loss": 0.04217041656374931, + "loss_ib": 0.0008915777434594929, + "step": 4362 + }, + { + "ce_ib": 2.950834035873413, + "ce_orig": 0.9562898278236389, + "epoch": 1.254223883816234, + "kl_loss": 0.04019477218389511, + "loss_ib": 0.0006970310932956636, + "step": 4362 + }, + { + "ce_ib": 3.7716004848480225, + "ce_orig": 0.7964085340499878, + "epoch": 1.2545114673952118, + "kl_loss": 0.050555549561977386, + "loss_ib": 0.0008827155688777566, + "step": 4363 + }, + { + "ce_ib": 2.869417905807495, + "ce_orig": 0.7699050307273865, + "epoch": 1.2545114673952118, + "kl_loss": 0.05802469700574875, + "loss_ib": 0.0008671886753290892, + "step": 4363 + }, + { + "ce_ib": 2.070605754852295, + "ce_orig": 0.5408931970596313, + "epoch": 1.2545114673952118, + "kl_loss": 0.03400636091828346, + "loss_ib": 0.000547124189324677, + "step": 4363 + }, + { + "ce_ib": 4.606869697570801, + "ce_orig": 1.2551651000976562, + "epoch": 1.2545114673952118, + "kl_loss": 0.08268755674362183, + "loss_ib": 0.0012875625398010015, + "step": 4363 + }, + { + "ce_ib": 3.395965576171875, + "ce_orig": 0.571077823638916, + "epoch": 1.2547990509741893, + "kl_loss": 0.07407223433256149, + "loss_ib": 0.001080318819731474, + "step": 4364 + }, + { + "ce_ib": 2.7973670959472656, + "ce_orig": 0.5862576961517334, + "epoch": 1.2547990509741893, + "kl_loss": 0.035978563129901886, + "loss_ib": 0.0006395223317667842, + "step": 4364 + }, + { + "ce_ib": 2.9063937664031982, + "ce_orig": 0.7628955841064453, + "epoch": 1.2547990509741893, + "kl_loss": 0.0411800891160965, + "loss_ib": 0.0007024402730166912, + "step": 4364 + }, + { + "ce_ib": 4.97173547744751, + "ce_orig": 1.1124640703201294, + "epoch": 1.2547990509741893, + "kl_loss": 0.12191151082515717, + "loss_ib": 0.0017162886215373874, + "step": 4364 + }, + { + "epoch": 1.255086634553167, + "grad_norm": 0.09365641325712204, + "learning_rate": 3.274756453088723e-05, + "loss": 0.8188, + "step": 4365 + }, + { + "ce_ib": 4.552379131317139, + "ce_orig": 1.487311601638794, + "epoch": 1.255086634553167, + "kl_loss": 0.05971131473779678, + "loss_ib": 0.0010523509699851274, + "step": 4365 + }, + { + "ce_ib": 4.250067710876465, + "ce_orig": 1.338928461074829, + "epoch": 1.255086634553167, + "kl_loss": 0.040869079530239105, + "loss_ib": 0.0008336975006386638, + "step": 4365 + }, + { + "ce_ib": 5.287361145019531, + "ce_orig": 1.2977962493896484, + "epoch": 1.255086634553167, + "kl_loss": 0.04118039086461067, + "loss_ib": 0.0009405399905517697, + "step": 4365 + }, + { + "ce_ib": 3.5819592475891113, + "ce_orig": 0.7614784836769104, + "epoch": 1.255086634553167, + "kl_loss": 0.05343199521303177, + "loss_ib": 0.0008925158181227744, + "step": 4365 + }, + { + "ce_ib": 5.135771751403809, + "ce_orig": 1.675308346748352, + "epoch": 1.2553742181321446, + "kl_loss": 0.06885774433612823, + "loss_ib": 0.0012021545553579926, + "step": 4366 + }, + { + "ce_ib": 4.3677191734313965, + "ce_orig": 0.961846649646759, + "epoch": 1.2553742181321446, + "kl_loss": 0.053183022886514664, + "loss_ib": 0.000968602136708796, + "step": 4366 + }, + { + "ce_ib": 2.938076972961426, + "ce_orig": 0.7775267958641052, + "epoch": 1.2553742181321446, + "kl_loss": 0.03331027179956436, + "loss_ib": 0.0006269103614613414, + "step": 4366 + }, + { + "ce_ib": 3.133293390274048, + "ce_orig": 0.768174946308136, + "epoch": 1.2553742181321446, + "kl_loss": 0.03365972638130188, + "loss_ib": 0.000649926601909101, + "step": 4366 + }, + { + "ce_ib": 2.8236658573150635, + "ce_orig": 0.7013083696365356, + "epoch": 1.2556618017111223, + "kl_loss": 0.03802367299795151, + "loss_ib": 0.0006626032991334796, + "step": 4367 + }, + { + "ce_ib": 2.2917113304138184, + "ce_orig": 0.57365483045578, + "epoch": 1.2556618017111223, + "kl_loss": 0.03123481385409832, + "loss_ib": 0.0005415192572399974, + "step": 4367 + }, + { + "ce_ib": 1.7147375345230103, + "ce_orig": 0.4846704304218292, + "epoch": 1.2556618017111223, + "kl_loss": 0.027427880093455315, + "loss_ib": 0.00044575255014933646, + "step": 4367 + }, + { + "ce_ib": 2.8221476078033447, + "ce_orig": 0.7713122367858887, + "epoch": 1.2556618017111223, + "kl_loss": 0.05217558145523071, + "loss_ib": 0.0008039704989641905, + "step": 4367 + }, + { + "ce_ib": 2.9132614135742188, + "ce_orig": 0.6223223209381104, + "epoch": 1.2559493852901, + "kl_loss": 0.04949883371591568, + "loss_ib": 0.0007863144273869693, + "step": 4368 + }, + { + "ce_ib": 4.802818298339844, + "ce_orig": 1.1331557035446167, + "epoch": 1.2559493852901, + "kl_loss": 0.03904426842927933, + "loss_ib": 0.0008707244414836168, + "step": 4368 + }, + { + "ce_ib": 1.845137596130371, + "ce_orig": 0.4177876114845276, + "epoch": 1.2559493852901, + "kl_loss": 0.046055980026721954, + "loss_ib": 0.0006450735381804407, + "step": 4368 + }, + { + "ce_ib": 4.437448024749756, + "ce_orig": 1.4432967901229858, + "epoch": 1.2559493852901, + "kl_loss": 0.041882917284965515, + "loss_ib": 0.0008625739719718695, + "step": 4368 + }, + { + "ce_ib": 2.9572956562042236, + "ce_orig": 0.8543957471847534, + "epoch": 1.2562369688690775, + "kl_loss": 0.05152904614806175, + "loss_ib": 0.0008110199705697596, + "step": 4369 + }, + { + "ce_ib": 1.8478716611862183, + "ce_orig": 0.3807485103607178, + "epoch": 1.2562369688690775, + "kl_loss": 0.020169014111161232, + "loss_ib": 0.00038647730252705514, + "step": 4369 + }, + { + "ce_ib": 5.4518141746521, + "ce_orig": 1.5308336019515991, + "epoch": 1.2562369688690775, + "kl_loss": 0.04248220846056938, + "loss_ib": 0.0009700034279376268, + "step": 4369 + }, + { + "ce_ib": 3.0276424884796143, + "ce_orig": 0.7489144802093506, + "epoch": 1.2562369688690775, + "kl_loss": 0.03588103502988815, + "loss_ib": 0.0006615745951421559, + "step": 4369 + }, + { + "epoch": 1.2565245524480553, + "grad_norm": 0.09643363207578659, + "learning_rate": 3.271066135471029e-05, + "loss": 0.8563, + "step": 4370 + }, + { + "ce_ib": 3.659430742263794, + "ce_orig": 1.242215633392334, + "epoch": 1.2565245524480553, + "kl_loss": 0.02556523121893406, + "loss_ib": 0.0006215953617356718, + "step": 4370 + }, + { + "ce_ib": 4.605731010437012, + "ce_orig": 0.9758405089378357, + "epoch": 1.2565245524480553, + "kl_loss": 0.04468079283833504, + "loss_ib": 0.0009073810069821775, + "step": 4370 + }, + { + "ce_ib": 3.190058708190918, + "ce_orig": 0.806190550327301, + "epoch": 1.2565245524480553, + "kl_loss": 0.04245499521493912, + "loss_ib": 0.0007435557781718671, + "step": 4370 + }, + { + "ce_ib": 5.391535758972168, + "ce_orig": 1.5598551034927368, + "epoch": 1.2565245524480553, + "kl_loss": 0.035790424793958664, + "loss_ib": 0.0008970577619038522, + "step": 4370 + }, + { + "ce_ib": 2.165501832962036, + "ce_orig": 0.4338909089565277, + "epoch": 1.2568121360270328, + "kl_loss": 0.033186882734298706, + "loss_ib": 0.0005484189605340362, + "step": 4371 + }, + { + "ce_ib": 4.475053787231445, + "ce_orig": 0.9093128442764282, + "epoch": 1.2568121360270328, + "kl_loss": 0.06342069059610367, + "loss_ib": 0.0010817121947184205, + "step": 4371 + }, + { + "ce_ib": 1.9486533403396606, + "ce_orig": 0.5719766616821289, + "epoch": 1.2568121360270328, + "kl_loss": 0.020929638296365738, + "loss_ib": 0.00040416172123514116, + "step": 4371 + }, + { + "ce_ib": 2.69364595413208, + "ce_orig": 0.8443297147750854, + "epoch": 1.2568121360270328, + "kl_loss": 0.03153450787067413, + "loss_ib": 0.0005847096326760948, + "step": 4371 + }, + { + "ce_ib": 2.832754135131836, + "ce_orig": 0.698772132396698, + "epoch": 1.2570997196060105, + "kl_loss": 0.06634856015443802, + "loss_ib": 0.0009467609925195575, + "step": 4372 + }, + { + "ce_ib": 2.927907943725586, + "ce_orig": 0.9495781660079956, + "epoch": 1.2570997196060105, + "kl_loss": 0.02098044939339161, + "loss_ib": 0.0005025952705182135, + "step": 4372 + }, + { + "ce_ib": 3.974158525466919, + "ce_orig": 1.0595420598983765, + "epoch": 1.2570997196060105, + "kl_loss": 0.039980486035346985, + "loss_ib": 0.0007972206803970039, + "step": 4372 + }, + { + "ce_ib": 4.729122638702393, + "ce_orig": 1.4219907522201538, + "epoch": 1.2570997196060105, + "kl_loss": 0.07776185870170593, + "loss_ib": 0.0012505307095125318, + "step": 4372 + }, + { + "ce_ib": 3.1090376377105713, + "ce_orig": 0.5915253162384033, + "epoch": 1.2573873031849883, + "kl_loss": 0.07250116020441055, + "loss_ib": 0.001035915338434279, + "step": 4373 + }, + { + "ce_ib": 3.2439095973968506, + "ce_orig": 0.9418063163757324, + "epoch": 1.2573873031849883, + "kl_loss": 0.04668763279914856, + "loss_ib": 0.0007912672590464354, + "step": 4373 + }, + { + "ce_ib": 1.9669455289840698, + "ce_orig": 0.5781437754631042, + "epoch": 1.2573873031849883, + "kl_loss": 0.025014666840434074, + "loss_ib": 0.0004468411789275706, + "step": 4373 + }, + { + "ce_ib": 3.19258713722229, + "ce_orig": 0.8873891830444336, + "epoch": 1.2573873031849883, + "kl_loss": 0.03924081474542618, + "loss_ib": 0.0007116668275557458, + "step": 4373 + }, + { + "ce_ib": 3.920607089996338, + "ce_orig": 1.0247550010681152, + "epoch": 1.2576748867639658, + "kl_loss": 0.03224966302514076, + "loss_ib": 0.0007145573035813868, + "step": 4374 + }, + { + "ce_ib": 3.289022207260132, + "ce_orig": 1.1300514936447144, + "epoch": 1.2576748867639658, + "kl_loss": 0.04664548859000206, + "loss_ib": 0.0007953570457175374, + "step": 4374 + }, + { + "ce_ib": 5.495182514190674, + "ce_orig": 1.5786340236663818, + "epoch": 1.2576748867639658, + "kl_loss": 0.0423852875828743, + "loss_ib": 0.0009733710903674364, + "step": 4374 + }, + { + "ce_ib": 2.3229758739471436, + "ce_orig": 0.6241907477378845, + "epoch": 1.2576748867639658, + "kl_loss": 0.02583673596382141, + "loss_ib": 0.0004906649119220674, + "step": 4374 + }, + { + "epoch": 1.2579624703429433, + "grad_norm": 0.12879309058189392, + "learning_rate": 3.267373960176047e-05, + "loss": 0.8685, + "step": 4375 + }, + { + "ce_ib": 2.0995547771453857, + "ce_orig": 0.3616024851799011, + "epoch": 1.2579624703429433, + "kl_loss": 0.08051493763923645, + "loss_ib": 0.0010151048190891743, + "step": 4375 + }, + { + "ce_ib": 2.4267499446868896, + "ce_orig": 0.6901453733444214, + "epoch": 1.2579624703429433, + "kl_loss": 0.03953006491065025, + "loss_ib": 0.0006379756378009915, + "step": 4375 + }, + { + "ce_ib": 4.69392204284668, + "ce_orig": 1.3171980381011963, + "epoch": 1.2579624703429433, + "kl_loss": 0.03498159348964691, + "loss_ib": 0.0008192081586457789, + "step": 4375 + }, + { + "ce_ib": 2.844032049179077, + "ce_orig": 0.8413372039794922, + "epoch": 1.2579624703429433, + "kl_loss": 0.042800989001989365, + "loss_ib": 0.0007124130497686565, + "step": 4375 + }, + { + "ce_ib": 4.1246843338012695, + "ce_orig": 1.0864871740341187, + "epoch": 1.258250053921921, + "kl_loss": 0.04920237883925438, + "loss_ib": 0.000904492218978703, + "step": 4376 + }, + { + "ce_ib": 3.6330759525299072, + "ce_orig": 0.9873692989349365, + "epoch": 1.258250053921921, + "kl_loss": 0.04151034727692604, + "loss_ib": 0.0007784109911881387, + "step": 4376 + }, + { + "ce_ib": 3.2331368923187256, + "ce_orig": 0.9192548394203186, + "epoch": 1.258250053921921, + "kl_loss": 0.037467196583747864, + "loss_ib": 0.0006979856407269835, + "step": 4376 + }, + { + "ce_ib": 1.939968466758728, + "ce_orig": 0.5903986096382141, + "epoch": 1.258250053921921, + "kl_loss": 0.02614104002714157, + "loss_ib": 0.00045540722203440964, + "step": 4376 + }, + { + "ce_ib": 3.5922696590423584, + "ce_orig": 0.8418840169906616, + "epoch": 1.2585376375008988, + "kl_loss": 0.05451120436191559, + "loss_ib": 0.0009043389582075179, + "step": 4377 + }, + { + "ce_ib": 3.446929931640625, + "ce_orig": 1.0221072435379028, + "epoch": 1.2585376375008988, + "kl_loss": 0.031340956687927246, + "loss_ib": 0.0006581025663763285, + "step": 4377 + }, + { + "ce_ib": 2.2637219429016113, + "ce_orig": 0.6580826640129089, + "epoch": 1.2585376375008988, + "kl_loss": 0.025609396398067474, + "loss_ib": 0.00048246613005176187, + "step": 4377 + }, + { + "ce_ib": 1.9265282154083252, + "ce_orig": 0.42959320545196533, + "epoch": 1.2585376375008988, + "kl_loss": 0.027847466990351677, + "loss_ib": 0.0004711274814326316, + "step": 4377 + }, + { + "ce_ib": 2.6078760623931885, + "ce_orig": 0.5746787786483765, + "epoch": 1.2588252210798763, + "kl_loss": 0.039279550313949585, + "loss_ib": 0.0006535830907523632, + "step": 4378 + }, + { + "ce_ib": 1.618629813194275, + "ce_orig": 0.3085295855998993, + "epoch": 1.2588252210798763, + "kl_loss": 0.04236559197306633, + "loss_ib": 0.0005855188937857747, + "step": 4378 + }, + { + "ce_ib": 4.303234100341797, + "ce_orig": 1.1491425037384033, + "epoch": 1.2588252210798763, + "kl_loss": 0.04511383920907974, + "loss_ib": 0.0008814617758616805, + "step": 4378 + }, + { + "ce_ib": 2.239658832550049, + "ce_orig": 0.7089115381240845, + "epoch": 1.2588252210798763, + "kl_loss": 0.031330909579992294, + "loss_ib": 0.000537274987436831, + "step": 4378 + }, + { + "ce_ib": 3.923182487487793, + "ce_orig": 1.1081247329711914, + "epoch": 1.259112804658854, + "kl_loss": 0.03338705748319626, + "loss_ib": 0.0007261888240464032, + "step": 4379 + }, + { + "ce_ib": 2.6782054901123047, + "ce_orig": 0.7116192579269409, + "epoch": 1.259112804658854, + "kl_loss": 0.04634946584701538, + "loss_ib": 0.0007313151727430522, + "step": 4379 + }, + { + "ce_ib": 2.9762256145477295, + "ce_orig": 0.798417329788208, + "epoch": 1.259112804658854, + "kl_loss": 0.034896120429039, + "loss_ib": 0.0006465837359428406, + "step": 4379 + }, + { + "ce_ib": 2.8075876235961914, + "ce_orig": 0.7810253500938416, + "epoch": 1.259112804658854, + "kl_loss": 0.07811121642589569, + "loss_ib": 0.001061870832927525, + "step": 4379 + }, + { + "epoch": 1.2594003882378315, + "grad_norm": 0.10441810637712479, + "learning_rate": 3.2636799360990835e-05, + "loss": 0.8606, + "step": 4380 + }, + { + "ce_ib": 4.288107395172119, + "ce_orig": 1.0947461128234863, + "epoch": 1.2594003882378315, + "kl_loss": 0.05379196256399155, + "loss_ib": 0.0009667302947491407, + "step": 4380 + }, + { + "ce_ib": 2.662630319595337, + "ce_orig": 0.5554064512252808, + "epoch": 1.2594003882378315, + "kl_loss": 0.03969043865799904, + "loss_ib": 0.000663167389575392, + "step": 4380 + }, + { + "ce_ib": 3.0411431789398193, + "ce_orig": 0.8656868934631348, + "epoch": 1.2594003882378315, + "kl_loss": 0.0689462274312973, + "loss_ib": 0.0009935765992850065, + "step": 4380 + }, + { + "ce_ib": 5.256511688232422, + "ce_orig": 1.3569964170455933, + "epoch": 1.2594003882378315, + "kl_loss": 0.05543120950460434, + "loss_ib": 0.0010799632873386145, + "step": 4380 + }, + { + "ce_ib": 2.6110105514526367, + "ce_orig": 0.376024067401886, + "epoch": 1.2596879718168092, + "kl_loss": 0.05416275933384895, + "loss_ib": 0.0008027286385186017, + "step": 4381 + }, + { + "ce_ib": 3.7195754051208496, + "ce_orig": 0.7252327799797058, + "epoch": 1.2596879718168092, + "kl_loss": 0.0814438909292221, + "loss_ib": 0.001186396460980177, + "step": 4381 + }, + { + "ce_ib": 4.354588508605957, + "ce_orig": 1.227679967880249, + "epoch": 1.2596879718168092, + "kl_loss": 0.05448142811655998, + "loss_ib": 0.0009802731219679117, + "step": 4381 + }, + { + "ce_ib": 2.5714452266693115, + "ce_orig": 1.0284217596054077, + "epoch": 1.2596879718168092, + "kl_loss": 0.022740373387932777, + "loss_ib": 0.00048454824718646705, + "step": 4381 + }, + { + "ce_ib": 3.393467903137207, + "ce_orig": 1.2724766731262207, + "epoch": 1.259975555395787, + "kl_loss": 0.029986396431922913, + "loss_ib": 0.0006392108043655753, + "step": 4382 + }, + { + "ce_ib": 3.6688714027404785, + "ce_orig": 0.8981284499168396, + "epoch": 1.259975555395787, + "kl_loss": 0.043337382376194, + "loss_ib": 0.0008002608665265143, + "step": 4382 + }, + { + "ce_ib": 4.884104251861572, + "ce_orig": 1.4132020473480225, + "epoch": 1.259975555395787, + "kl_loss": 0.03844679892063141, + "loss_ib": 0.0008728784159757197, + "step": 4382 + }, + { + "ce_ib": 4.183985710144043, + "ce_orig": 1.3546175956726074, + "epoch": 1.259975555395787, + "kl_loss": 0.03906858712434769, + "loss_ib": 0.000809084449429065, + "step": 4382 + }, + { + "ce_ib": 5.933824062347412, + "ce_orig": 1.7549829483032227, + "epoch": 1.2602631389747645, + "kl_loss": 0.03011495992541313, + "loss_ib": 0.0008945319568738341, + "step": 4383 + }, + { + "ce_ib": 4.491677761077881, + "ce_orig": 1.2313427925109863, + "epoch": 1.2602631389747645, + "kl_loss": 0.05469304323196411, + "loss_ib": 0.000996098155155778, + "step": 4383 + }, + { + "ce_ib": 3.151392936706543, + "ce_orig": 0.6174677014350891, + "epoch": 1.2602631389747645, + "kl_loss": 0.050146810710430145, + "loss_ib": 0.0008166073821485043, + "step": 4383 + }, + { + "ce_ib": 3.041409969329834, + "ce_orig": 0.7165460586547852, + "epoch": 1.2602631389747645, + "kl_loss": 0.014539605006575584, + "loss_ib": 0.00044953703763894737, + "step": 4383 + }, + { + "ce_ib": 3.7577943801879883, + "ce_orig": 1.1845279932022095, + "epoch": 1.2605507225537422, + "kl_loss": 0.025395281612873077, + "loss_ib": 0.0006297322106547654, + "step": 4384 + }, + { + "ce_ib": 2.8660287857055664, + "ce_orig": 0.5646254420280457, + "epoch": 1.2605507225537422, + "kl_loss": 0.03712117671966553, + "loss_ib": 0.0006578146712854505, + "step": 4384 + }, + { + "ce_ib": 4.80979061126709, + "ce_orig": 1.004356026649475, + "epoch": 1.2605507225537422, + "kl_loss": 0.046449076384305954, + "loss_ib": 0.000945469771977514, + "step": 4384 + }, + { + "ce_ib": 3.042694568634033, + "ce_orig": 0.8649472594261169, + "epoch": 1.2605507225537422, + "kl_loss": 0.029528271406888962, + "loss_ib": 0.0005995521787554026, + "step": 4384 + }, + { + "epoch": 1.2608383061327197, + "grad_norm": 0.11042038351297379, + "learning_rate": 3.259984072139899e-05, + "loss": 0.8195, + "step": 4385 + }, + { + "ce_ib": 4.955777645111084, + "ce_orig": 1.5290294885635376, + "epoch": 1.2608383061327197, + "kl_loss": 0.04181869700551033, + "loss_ib": 0.0009137646411545575, + "step": 4385 + }, + { + "ce_ib": 2.8633410930633545, + "ce_orig": 0.6286666989326477, + "epoch": 1.2608383061327197, + "kl_loss": 0.051935017108917236, + "loss_ib": 0.0008056842489168048, + "step": 4385 + }, + { + "ce_ib": 3.937359571456909, + "ce_orig": 0.9135238528251648, + "epoch": 1.2608383061327197, + "kl_loss": 0.0559840090572834, + "loss_ib": 0.0009535760036669672, + "step": 4385 + }, + { + "ce_ib": 3.6948347091674805, + "ce_orig": 0.7818248867988586, + "epoch": 1.2608383061327197, + "kl_loss": 0.03273189067840576, + "loss_ib": 0.0006968023371882737, + "step": 4385 + }, + { + "ce_ib": 2.854630708694458, + "ce_orig": 0.724092960357666, + "epoch": 1.2611258897116975, + "kl_loss": 0.03159784525632858, + "loss_ib": 0.0006014414830133319, + "step": 4386 + }, + { + "ce_ib": 2.9456043243408203, + "ce_orig": 0.8957991600036621, + "epoch": 1.2611258897116975, + "kl_loss": 0.034141480922698975, + "loss_ib": 0.0006359752151183784, + "step": 4386 + }, + { + "ce_ib": 2.4556467533111572, + "ce_orig": 0.6531681418418884, + "epoch": 1.2611258897116975, + "kl_loss": 0.027127094566822052, + "loss_ib": 0.0005168356001377106, + "step": 4386 + }, + { + "ce_ib": 4.36328125, + "ce_orig": 1.300465703010559, + "epoch": 1.2611258897116975, + "kl_loss": 0.03625961393117905, + "loss_ib": 0.0007989242440089583, + "step": 4386 + }, + { + "ce_ib": 3.3985111713409424, + "ce_orig": 0.7014503479003906, + "epoch": 1.2614134732906752, + "kl_loss": 0.03899022936820984, + "loss_ib": 0.0007297533447854221, + "step": 4387 + }, + { + "ce_ib": 4.744250297546387, + "ce_orig": 1.2274049520492554, + "epoch": 1.2614134732906752, + "kl_loss": 0.038267530500888824, + "loss_ib": 0.0008571002981625497, + "step": 4387 + }, + { + "ce_ib": 3.1413185596466064, + "ce_orig": 0.9637088179588318, + "epoch": 1.2614134732906752, + "kl_loss": 0.031377069652080536, + "loss_ib": 0.0006279025692492723, + "step": 4387 + }, + { + "ce_ib": 5.91068696975708, + "ce_orig": 1.8165020942687988, + "epoch": 1.2614134732906752, + "kl_loss": 0.044107869267463684, + "loss_ib": 0.0010321474401280284, + "step": 4387 + }, + { + "ce_ib": 3.2560644149780273, + "ce_orig": 0.6576510071754456, + "epoch": 1.2617010568696527, + "kl_loss": 0.050251640379428864, + "loss_ib": 0.0008281227783299983, + "step": 4388 + }, + { + "ce_ib": 4.808323860168457, + "ce_orig": 1.135523796081543, + "epoch": 1.2617010568696527, + "kl_loss": 0.04215473681688309, + "loss_ib": 0.0009023797465488315, + "step": 4388 + }, + { + "ce_ib": 2.7859983444213867, + "ce_orig": 0.6144617199897766, + "epoch": 1.2617010568696527, + "kl_loss": 0.06759246438741684, + "loss_ib": 0.0009545243810862303, + "step": 4388 + }, + { + "ce_ib": 1.5408098697662354, + "ce_orig": 0.46856921911239624, + "epoch": 1.2617010568696527, + "kl_loss": 0.0340116024017334, + "loss_ib": 0.0004941970109939575, + "step": 4388 + }, + { + "ce_ib": 2.7334229946136475, + "ce_orig": 0.6236959099769592, + "epoch": 1.2619886404486305, + "kl_loss": 0.03168615698814392, + "loss_ib": 0.000590203853789717, + "step": 4389 + }, + { + "ce_ib": 2.7237229347229004, + "ce_orig": 0.757580041885376, + "epoch": 1.2619886404486305, + "kl_loss": 0.025727946311235428, + "loss_ib": 0.0005296517629176378, + "step": 4389 + }, + { + "ce_ib": 5.429747581481934, + "ce_orig": 1.5783387422561646, + "epoch": 1.2619886404486305, + "kl_loss": 0.049247220158576965, + "loss_ib": 0.0010354469995945692, + "step": 4389 + }, + { + "ce_ib": 1.9570571184158325, + "ce_orig": 0.4156891703605652, + "epoch": 1.2619886404486305, + "kl_loss": 0.03556283563375473, + "loss_ib": 0.0005513340584002435, + "step": 4389 + }, + { + "epoch": 1.262276224027608, + "grad_norm": 0.10181885957717896, + "learning_rate": 3.25628637720269e-05, + "loss": 0.8896, + "step": 4390 + }, + { + "ce_ib": 4.165914058685303, + "ce_orig": 1.157598853111267, + "epoch": 1.262276224027608, + "kl_loss": 0.031542833894491196, + "loss_ib": 0.0007320196600630879, + "step": 4390 + }, + { + "ce_ib": 2.8058793544769287, + "ce_orig": 0.6084126830101013, + "epoch": 1.262276224027608, + "kl_loss": 0.030210979282855988, + "loss_ib": 0.0005826976848766208, + "step": 4390 + }, + { + "ce_ib": 4.154483318328857, + "ce_orig": 0.9078108072280884, + "epoch": 1.262276224027608, + "kl_loss": 0.034198977053165436, + "loss_ib": 0.0007574380724690855, + "step": 4390 + }, + { + "ce_ib": 2.0215494632720947, + "ce_orig": 0.4843139946460724, + "epoch": 1.262276224027608, + "kl_loss": 0.033325839787721634, + "loss_ib": 0.0005354133318178356, + "step": 4390 + }, + { + "ce_ib": 4.507707118988037, + "ce_orig": 1.3689779043197632, + "epoch": 1.2625638076065857, + "kl_loss": 0.05327489972114563, + "loss_ib": 0.0009835197124630213, + "step": 4391 + }, + { + "ce_ib": 3.871894598007202, + "ce_orig": 0.9283878207206726, + "epoch": 1.2625638076065857, + "kl_loss": 0.05499499291181564, + "loss_ib": 0.0009371393825858831, + "step": 4391 + }, + { + "ce_ib": 2.508241891860962, + "ce_orig": 0.7304095029830933, + "epoch": 1.2625638076065857, + "kl_loss": 0.03337496519088745, + "loss_ib": 0.0005845738342031837, + "step": 4391 + }, + { + "ce_ib": 4.731241703033447, + "ce_orig": 1.260901689529419, + "epoch": 1.2625638076065857, + "kl_loss": 0.046788573265075684, + "loss_ib": 0.0009410099009983242, + "step": 4391 + }, + { + "ce_ib": 3.5957655906677246, + "ce_orig": 1.1309449672698975, + "epoch": 1.2628513911855632, + "kl_loss": 0.03170909732580185, + "loss_ib": 0.0006766675505787134, + "step": 4392 + }, + { + "ce_ib": 2.2708077430725098, + "ce_orig": 0.30406951904296875, + "epoch": 1.2628513911855632, + "kl_loss": 0.06693023443222046, + "loss_ib": 0.0008963830769062042, + "step": 4392 + }, + { + "ce_ib": 2.3550660610198975, + "ce_orig": 0.444292813539505, + "epoch": 1.2628513911855632, + "kl_loss": 0.0910358801484108, + "loss_ib": 0.0011458654189482331, + "step": 4392 + }, + { + "ce_ib": 4.987854957580566, + "ce_orig": 1.4548557996749878, + "epoch": 1.2628513911855632, + "kl_loss": 0.030066225677728653, + "loss_ib": 0.0007994477055035532, + "step": 4392 + }, + { + "ce_ib": 3.6793019771575928, + "ce_orig": 0.8714795112609863, + "epoch": 1.263138974764541, + "kl_loss": 0.03693586587905884, + "loss_ib": 0.0007372888503596187, + "step": 4393 + }, + { + "ce_ib": 3.343327522277832, + "ce_orig": 0.6849318742752075, + "epoch": 1.263138974764541, + "kl_loss": 0.039372727274894714, + "loss_ib": 0.0007280599675141275, + "step": 4393 + }, + { + "ce_ib": 5.2238287925720215, + "ce_orig": 1.672032117843628, + "epoch": 1.263138974764541, + "kl_loss": 0.055024154484272, + "loss_ib": 0.0010726243490353227, + "step": 4393 + }, + { + "ce_ib": 4.377386093139648, + "ce_orig": 1.20264732837677, + "epoch": 1.263138974764541, + "kl_loss": 0.03206295520067215, + "loss_ib": 0.0007583681144751608, + "step": 4393 + }, + { + "ce_ib": 3.8596301078796387, + "ce_orig": 0.7008557319641113, + "epoch": 1.2634265583435185, + "kl_loss": 0.046081021428108215, + "loss_ib": 0.0008467732113786042, + "step": 4394 + }, + { + "ce_ib": 3.027474880218506, + "ce_orig": 0.7755725383758545, + "epoch": 1.2634265583435185, + "kl_loss": 0.048946306109428406, + "loss_ib": 0.0007922105141915381, + "step": 4394 + }, + { + "ce_ib": 2.297098398208618, + "ce_orig": 0.45318469405174255, + "epoch": 1.2634265583435185, + "kl_loss": 0.025621209293603897, + "loss_ib": 0.0004859219479840249, + "step": 4394 + }, + { + "ce_ib": 3.525705337524414, + "ce_orig": 0.8967520594596863, + "epoch": 1.2634265583435185, + "kl_loss": 0.036349304020404816, + "loss_ib": 0.0007160634850151837, + "step": 4394 + }, + { + "epoch": 1.2637141419224962, + "grad_norm": 0.15080858767032623, + "learning_rate": 3.25258686019606e-05, + "loss": 0.8288, + "step": 4395 + }, + { + "ce_ib": 2.1043126583099365, + "ce_orig": 0.6480749845504761, + "epoch": 1.2637141419224962, + "kl_loss": 0.03409944474697113, + "loss_ib": 0.0005514256772585213, + "step": 4395 + }, + { + "ce_ib": 3.0565764904022217, + "ce_orig": 0.7464945912361145, + "epoch": 1.2637141419224962, + "kl_loss": 0.052201371639966965, + "loss_ib": 0.0008276713197119534, + "step": 4395 + }, + { + "ce_ib": 0.8639447093009949, + "ce_orig": 0.14557448029518127, + "epoch": 1.2637141419224962, + "kl_loss": 0.07015097141265869, + "loss_ib": 0.000787904195021838, + "step": 4395 + }, + { + "ce_ib": 3.87298846244812, + "ce_orig": 1.0311685800552368, + "epoch": 1.2637141419224962, + "kl_loss": 0.061481211334466934, + "loss_ib": 0.0010021110065281391, + "step": 4395 + }, + { + "ce_ib": 4.142095565795898, + "ce_orig": 1.3519009351730347, + "epoch": 1.264001725501474, + "kl_loss": 0.05951709672808647, + "loss_ib": 0.0010093804448843002, + "step": 4396 + }, + { + "ce_ib": 1.663159728050232, + "ce_orig": 0.19505201280117035, + "epoch": 1.264001725501474, + "kl_loss": 0.10484408587217331, + "loss_ib": 0.001214756746776402, + "step": 4396 + }, + { + "ce_ib": 3.8816397190093994, + "ce_orig": 1.2493983507156372, + "epoch": 1.264001725501474, + "kl_loss": 0.042426176369190216, + "loss_ib": 0.0008124257437884808, + "step": 4396 + }, + { + "ce_ib": 2.0782511234283447, + "ce_orig": 0.6257206201553345, + "epoch": 1.264001725501474, + "kl_loss": 0.021181900054216385, + "loss_ib": 0.00041964411502704024, + "step": 4396 + }, + { + "ce_ib": 3.6257989406585693, + "ce_orig": 0.9233943223953247, + "epoch": 1.2642893090804515, + "kl_loss": 0.0753956288099289, + "loss_ib": 0.0011165362084284425, + "step": 4397 + }, + { + "ce_ib": 2.89680552482605, + "ce_orig": 1.0362221002578735, + "epoch": 1.2642893090804515, + "kl_loss": 0.16026724874973297, + "loss_ib": 0.0018923529423773289, + "step": 4397 + }, + { + "ce_ib": 4.840130805969238, + "ce_orig": 1.2289925813674927, + "epoch": 1.2642893090804515, + "kl_loss": 0.04575641453266144, + "loss_ib": 0.0009415772510692477, + "step": 4397 + }, + { + "ce_ib": 2.824370861053467, + "ce_orig": 0.49788159132003784, + "epoch": 1.2642893090804515, + "kl_loss": 0.03464028984308243, + "loss_ib": 0.0006288399454206228, + "step": 4397 + }, + { + "ce_ib": 2.1076624393463135, + "ce_orig": 0.39262932538986206, + "epoch": 1.2645768926594292, + "kl_loss": 0.03357316553592682, + "loss_ib": 0.0005464978748932481, + "step": 4398 + }, + { + "ce_ib": 2.902637481689453, + "ce_orig": 0.543433427810669, + "epoch": 1.2645768926594292, + "kl_loss": 0.05054536461830139, + "loss_ib": 0.0007957173511385918, + "step": 4398 + }, + { + "ce_ib": 4.370688438415527, + "ce_orig": 1.0228724479675293, + "epoch": 1.2645768926594292, + "kl_loss": 0.0362352654337883, + "loss_ib": 0.0007994215120561421, + "step": 4398 + }, + { + "ce_ib": 5.020813465118408, + "ce_orig": 1.2925361394882202, + "epoch": 1.2645768926594292, + "kl_loss": 0.038472987711429596, + "loss_ib": 0.0008868111181072891, + "step": 4398 + }, + { + "ce_ib": 2.1455037593841553, + "ce_orig": 0.6004530191421509, + "epoch": 1.2648644762384067, + "kl_loss": 0.048751458525657654, + "loss_ib": 0.0007020648918114603, + "step": 4399 + }, + { + "ce_ib": 2.442003011703491, + "ce_orig": 0.6421102285385132, + "epoch": 1.2648644762384067, + "kl_loss": 0.054000914096832275, + "loss_ib": 0.0007842094055376947, + "step": 4399 + }, + { + "ce_ib": 3.594906806945801, + "ce_orig": 0.7831500768661499, + "epoch": 1.2648644762384067, + "kl_loss": 0.05324510112404823, + "loss_ib": 0.000891941599547863, + "step": 4399 + }, + { + "ce_ib": 3.4999349117279053, + "ce_orig": 0.8561927080154419, + "epoch": 1.2648644762384067, + "kl_loss": 0.05527849122881889, + "loss_ib": 0.0009027783526107669, + "step": 4399 + }, + { + "epoch": 1.2651520598173844, + "grad_norm": 0.0958116427063942, + "learning_rate": 3.248885530033004e-05, + "loss": 0.7893, + "step": 4400 + }, + { + "ce_ib": 4.82877779006958, + "ce_orig": 1.3695422410964966, + "epoch": 1.2651520598173844, + "kl_loss": 0.05864996463060379, + "loss_ib": 0.0010693772928789258, + "step": 4400 + }, + { + "ce_ib": 4.789294719696045, + "ce_orig": 1.1382180452346802, + "epoch": 1.2651520598173844, + "kl_loss": 0.05417530611157417, + "loss_ib": 0.0010206825099885464, + "step": 4400 + }, + { + "ce_ib": 2.909536600112915, + "ce_orig": 0.6881528496742249, + "epoch": 1.2651520598173844, + "kl_loss": 0.04226599633693695, + "loss_ib": 0.0007136136409826577, + "step": 4400 + }, + { + "ce_ib": 3.508859872817993, + "ce_orig": 0.6323538422584534, + "epoch": 1.2651520598173844, + "kl_loss": 0.04851751774549484, + "loss_ib": 0.0008360611391253769, + "step": 4400 + }, + { + "ce_ib": 2.1971137523651123, + "ce_orig": 0.5863850116729736, + "epoch": 1.2654396433963622, + "kl_loss": 0.046075768768787384, + "loss_ib": 0.0006804690347053111, + "step": 4401 + }, + { + "ce_ib": 2.2615301609039307, + "ce_orig": 0.4700341522693634, + "epoch": 1.2654396433963622, + "kl_loss": 0.03887740522623062, + "loss_ib": 0.000614927033893764, + "step": 4401 + }, + { + "ce_ib": 3.9156692028045654, + "ce_orig": 0.624037504196167, + "epoch": 1.2654396433963622, + "kl_loss": 0.03438673913478851, + "loss_ib": 0.0007354342960752547, + "step": 4401 + }, + { + "ce_ib": 5.243454456329346, + "ce_orig": 1.6528112888336182, + "epoch": 1.2654396433963622, + "kl_loss": 0.054445914924144745, + "loss_ib": 0.0010688045294955373, + "step": 4401 + }, + { + "ce_ib": 3.6847126483917236, + "ce_orig": 0.9546472430229187, + "epoch": 1.2657272269753397, + "kl_loss": 0.04471983015537262, + "loss_ib": 0.0008156695985235274, + "step": 4402 + }, + { + "ce_ib": 2.2187981605529785, + "ce_orig": 0.5499052405357361, + "epoch": 1.2657272269753397, + "kl_loss": 0.03882107511162758, + "loss_ib": 0.000610090559348464, + "step": 4402 + }, + { + "ce_ib": 2.7854983806610107, + "ce_orig": 0.6651802659034729, + "epoch": 1.2657272269753397, + "kl_loss": 0.047412432730197906, + "loss_ib": 0.0007526741828769445, + "step": 4402 + }, + { + "ce_ib": 3.1164608001708984, + "ce_orig": 0.9249817728996277, + "epoch": 1.2657272269753397, + "kl_loss": 0.0190032497048378, + "loss_ib": 0.0005016785580664873, + "step": 4402 + }, + { + "ce_ib": 3.7053656578063965, + "ce_orig": 0.9462273120880127, + "epoch": 1.2660148105543174, + "kl_loss": 0.04007008671760559, + "loss_ib": 0.0007712374208495021, + "step": 4403 + }, + { + "ce_ib": 3.1920394897460938, + "ce_orig": 0.34103652834892273, + "epoch": 1.2660148105543174, + "kl_loss": 0.05432936176657677, + "loss_ib": 0.0008624975453130901, + "step": 4403 + }, + { + "ce_ib": 2.5615105628967285, + "ce_orig": 0.8398652672767639, + "epoch": 1.2660148105543174, + "kl_loss": 0.032580625265836716, + "loss_ib": 0.0005819572834298015, + "step": 4403 + }, + { + "ce_ib": 3.218980312347412, + "ce_orig": 0.8008548617362976, + "epoch": 1.2660148105543174, + "kl_loss": 0.05735297501087189, + "loss_ib": 0.0008954277145676315, + "step": 4403 + }, + { + "ce_ib": 1.8855522871017456, + "ce_orig": 0.6497148275375366, + "epoch": 1.266302394133295, + "kl_loss": 0.019342824816703796, + "loss_ib": 0.00038198346737772226, + "step": 4404 + }, + { + "ce_ib": 2.852731466293335, + "ce_orig": 0.7170611023902893, + "epoch": 1.266302394133295, + "kl_loss": 0.0439094640314579, + "loss_ib": 0.0007243677973747253, + "step": 4404 + }, + { + "ce_ib": 2.1157023906707764, + "ce_orig": 0.38382360339164734, + "epoch": 1.266302394133295, + "kl_loss": 0.039201926440000534, + "loss_ib": 0.0006035894621163607, + "step": 4404 + }, + { + "ce_ib": 3.823162317276001, + "ce_orig": 1.1296675205230713, + "epoch": 1.266302394133295, + "kl_loss": 0.03949123993515968, + "loss_ib": 0.0007772286189720035, + "step": 4404 + }, + { + "epoch": 1.2665899777122727, + "grad_norm": 0.11017212271690369, + "learning_rate": 3.245182395630886e-05, + "loss": 0.8488, + "step": 4405 + }, + { + "ce_ib": 3.348923444747925, + "ce_orig": 0.8190807700157166, + "epoch": 1.2665899777122727, + "kl_loss": 0.028702374547719955, + "loss_ib": 0.000621916085947305, + "step": 4405 + }, + { + "ce_ib": 3.100412607192993, + "ce_orig": 0.6520056128501892, + "epoch": 1.2665899777122727, + "kl_loss": 0.04869776591658592, + "loss_ib": 0.0007970189326442778, + "step": 4405 + }, + { + "ce_ib": 3.074678897857666, + "ce_orig": 0.787074863910675, + "epoch": 1.2665899777122727, + "kl_loss": 0.05480661243200302, + "loss_ib": 0.0008555339882150292, + "step": 4405 + }, + { + "ce_ib": 2.955143690109253, + "ce_orig": 0.7613470554351807, + "epoch": 1.2665899777122727, + "kl_loss": 0.023335136473178864, + "loss_ib": 0.0005288657266646624, + "step": 4405 + }, + { + "ce_ib": 2.1060378551483154, + "ce_orig": 0.46470049023628235, + "epoch": 1.2668775612912504, + "kl_loss": 0.059482429176568985, + "loss_ib": 0.0008054280187934637, + "step": 4406 + }, + { + "ce_ib": 2.4312357902526855, + "ce_orig": 0.7164291143417358, + "epoch": 1.2668775612912504, + "kl_loss": 0.059610478579998016, + "loss_ib": 0.0008392283343710005, + "step": 4406 + }, + { + "ce_ib": 4.006764888763428, + "ce_orig": 0.774300754070282, + "epoch": 1.2668775612912504, + "kl_loss": 0.05687551200389862, + "loss_ib": 0.000969431537669152, + "step": 4406 + }, + { + "ce_ib": 2.647205352783203, + "ce_orig": 0.5628796815872192, + "epoch": 1.2668775612912504, + "kl_loss": 0.02283964306116104, + "loss_ib": 0.0004931169678457081, + "step": 4406 + }, + { + "ce_ib": 3.9149253368377686, + "ce_orig": 1.077172040939331, + "epoch": 1.267165144870228, + "kl_loss": 0.028965173289179802, + "loss_ib": 0.0006811441853642464, + "step": 4407 + }, + { + "ce_ib": 3.0385992527008057, + "ce_orig": 0.6969709992408752, + "epoch": 1.267165144870228, + "kl_loss": 0.03764001280069351, + "loss_ib": 0.0006802600692026317, + "step": 4407 + }, + { + "ce_ib": 1.5797584056854248, + "ce_orig": 0.4813395142555237, + "epoch": 1.267165144870228, + "kl_loss": 0.02203516475856304, + "loss_ib": 0.00037832747329957783, + "step": 4407 + }, + { + "ce_ib": 2.3960142135620117, + "ce_orig": 0.6296178102493286, + "epoch": 1.267165144870228, + "kl_loss": 0.035440459847450256, + "loss_ib": 0.0005940060364082456, + "step": 4407 + }, + { + "ce_ib": 2.525825023651123, + "ce_orig": 0.9200817942619324, + "epoch": 1.2674527284492054, + "kl_loss": 0.027327194809913635, + "loss_ib": 0.000525854469742626, + "step": 4408 + }, + { + "ce_ib": 3.4238903522491455, + "ce_orig": 0.8940367698669434, + "epoch": 1.2674527284492054, + "kl_loss": 0.040741465985774994, + "loss_ib": 0.000749803613871336, + "step": 4408 + }, + { + "ce_ib": 3.5783159732818604, + "ce_orig": 1.0808910131454468, + "epoch": 1.2674527284492054, + "kl_loss": 0.030222052708268166, + "loss_ib": 0.0006600521155633032, + "step": 4408 + }, + { + "ce_ib": 2.6497445106506348, + "ce_orig": 0.6289002895355225, + "epoch": 1.2674527284492054, + "kl_loss": 0.04285344108939171, + "loss_ib": 0.0006935088313184679, + "step": 4408 + }, + { + "ce_ib": 4.711726188659668, + "ce_orig": 0.6746043562889099, + "epoch": 1.2677403120281832, + "kl_loss": 0.06788960844278336, + "loss_ib": 0.0011500685941427946, + "step": 4409 + }, + { + "ce_ib": 2.0658748149871826, + "ce_orig": 0.2718374729156494, + "epoch": 1.2677403120281832, + "kl_loss": 0.023484643548727036, + "loss_ib": 0.0004414339200593531, + "step": 4409 + }, + { + "ce_ib": 2.6960337162017822, + "ce_orig": 0.6395667791366577, + "epoch": 1.2677403120281832, + "kl_loss": 0.04742240905761719, + "loss_ib": 0.0007438274333253503, + "step": 4409 + }, + { + "ce_ib": 2.019019365310669, + "ce_orig": 0.5653956532478333, + "epoch": 1.2677403120281832, + "kl_loss": 0.015081159770488739, + "loss_ib": 0.000352713541360572, + "step": 4409 + }, + { + "epoch": 1.268027895607161, + "grad_norm": 0.11229785531759262, + "learning_rate": 3.241477465911418e-05, + "loss": 0.8075, + "step": 4410 + }, + { + "ce_ib": 1.9792073965072632, + "ce_orig": 0.2959616780281067, + "epoch": 1.268027895607161, + "kl_loss": 0.02786855399608612, + "loss_ib": 0.0004766062484122813, + "step": 4410 + }, + { + "ce_ib": 2.1666390895843506, + "ce_orig": 0.558937132358551, + "epoch": 1.268027895607161, + "kl_loss": 0.027822725474834442, + "loss_ib": 0.0004948911955580115, + "step": 4410 + }, + { + "ce_ib": 2.3423025608062744, + "ce_orig": 0.6726953387260437, + "epoch": 1.268027895607161, + "kl_loss": 0.02860531583428383, + "loss_ib": 0.000520283414516598, + "step": 4410 + }, + { + "ce_ib": 3.138547658920288, + "ce_orig": 1.1241953372955322, + "epoch": 1.268027895607161, + "kl_loss": 0.03735293447971344, + "loss_ib": 0.0006873841630294919, + "step": 4410 + }, + { + "ce_ib": 2.0776281356811523, + "ce_orig": 0.4886261522769928, + "epoch": 1.2683154791861384, + "kl_loss": 0.03219747170805931, + "loss_ib": 0.0005297375610098243, + "step": 4411 + }, + { + "ce_ib": 3.187504529953003, + "ce_orig": 0.751419723033905, + "epoch": 1.2683154791861384, + "kl_loss": 0.04223349690437317, + "loss_ib": 0.0007410853868350387, + "step": 4411 + }, + { + "ce_ib": 2.86386775970459, + "ce_orig": 0.7934455275535583, + "epoch": 1.2683154791861384, + "kl_loss": 0.025644900277256966, + "loss_ib": 0.0005428357399068773, + "step": 4411 + }, + { + "ce_ib": 2.7140421867370605, + "ce_orig": 0.6516506671905518, + "epoch": 1.2683154791861384, + "kl_loss": 0.033196233212947845, + "loss_ib": 0.0006033665267750621, + "step": 4411 + }, + { + "ce_ib": 3.2780776023864746, + "ce_orig": 0.715265691280365, + "epoch": 1.2686030627651161, + "kl_loss": 0.06742483377456665, + "loss_ib": 0.0010020560584962368, + "step": 4412 + }, + { + "ce_ib": 3.47309947013855, + "ce_orig": 0.7473452091217041, + "epoch": 1.2686030627651161, + "kl_loss": 0.02466627210378647, + "loss_ib": 0.0005939726252108812, + "step": 4412 + }, + { + "ce_ib": 3.4365055561065674, + "ce_orig": 0.7892213463783264, + "epoch": 1.2686030627651161, + "kl_loss": 0.027630668133497238, + "loss_ib": 0.000619957223534584, + "step": 4412 + }, + { + "ce_ib": 2.1865012645721436, + "ce_orig": 0.6520018577575684, + "epoch": 1.2686030627651161, + "kl_loss": 0.029515596106648445, + "loss_ib": 0.0005138060660101473, + "step": 4412 + }, + { + "ce_ib": 2.8002800941467285, + "ce_orig": 0.6755026578903198, + "epoch": 1.2688906463440937, + "kl_loss": 0.14684492349624634, + "loss_ib": 0.0017484772251918912, + "step": 4413 + }, + { + "ce_ib": 1.4419790506362915, + "ce_orig": 0.29443785548210144, + "epoch": 1.2688906463440937, + "kl_loss": 0.06511077284812927, + "loss_ib": 0.0007953056483529508, + "step": 4413 + }, + { + "ce_ib": 3.4361350536346436, + "ce_orig": 0.9534095525741577, + "epoch": 1.2688906463440937, + "kl_loss": 0.06593531370162964, + "loss_ib": 0.0010029665427282453, + "step": 4413 + }, + { + "ce_ib": 3.226714611053467, + "ce_orig": 0.5326805114746094, + "epoch": 1.2688906463440937, + "kl_loss": 0.03477685526013374, + "loss_ib": 0.0006704400293529034, + "step": 4413 + }, + { + "ce_ib": 3.536954402923584, + "ce_orig": 0.7160307168960571, + "epoch": 1.2691782299230714, + "kl_loss": 0.04814377427101135, + "loss_ib": 0.0008351331925950944, + "step": 4414 + }, + { + "ce_ib": 3.1578116416931152, + "ce_orig": 0.3482239246368408, + "epoch": 1.2691782299230714, + "kl_loss": 0.07416577637195587, + "loss_ib": 0.0010574389016255736, + "step": 4414 + }, + { + "ce_ib": 2.674123764038086, + "ce_orig": 0.6068398952484131, + "epoch": 1.2691782299230714, + "kl_loss": 0.03333812206983566, + "loss_ib": 0.0006007935735397041, + "step": 4414 + }, + { + "ce_ib": 1.7328293323516846, + "ce_orig": 0.4610806405544281, + "epoch": 1.2691782299230714, + "kl_loss": 0.03411489725112915, + "loss_ib": 0.000514431856572628, + "step": 4414 + }, + { + "epoch": 1.2694658135020491, + "grad_norm": 0.10571768134832382, + "learning_rate": 3.2377707498006326e-05, + "loss": 0.7964, + "step": 4415 + }, + { + "ce_ib": 2.077040433883667, + "ce_orig": 0.650888979434967, + "epoch": 1.2694658135020491, + "kl_loss": 0.015882913023233414, + "loss_ib": 0.00036653317511081696, + "step": 4415 + }, + { + "ce_ib": 3.779869318008423, + "ce_orig": 0.7859389781951904, + "epoch": 1.2694658135020491, + "kl_loss": 0.043708719313144684, + "loss_ib": 0.0008150741341523826, + "step": 4415 + }, + { + "ce_ib": 1.8963305950164795, + "ce_orig": 0.36389321088790894, + "epoch": 1.2694658135020491, + "kl_loss": 0.033932074904441833, + "loss_ib": 0.0005289537948556244, + "step": 4415 + }, + { + "ce_ib": 1.9751639366149902, + "ce_orig": 0.4016041159629822, + "epoch": 1.2694658135020491, + "kl_loss": 0.04543624818325043, + "loss_ib": 0.0006518788286484778, + "step": 4415 + }, + { + "ce_ib": 3.534773111343384, + "ce_orig": 1.0300639867782593, + "epoch": 1.2697533970810266, + "kl_loss": 0.028821662068367004, + "loss_ib": 0.0006416938849724829, + "step": 4416 + }, + { + "ce_ib": 3.0709340572357178, + "ce_orig": 0.69557785987854, + "epoch": 1.2697533970810266, + "kl_loss": 0.02621529996395111, + "loss_ib": 0.0005692464183084667, + "step": 4416 + }, + { + "ce_ib": 2.5227739810943604, + "ce_orig": 0.6359072923660278, + "epoch": 1.2697533970810266, + "kl_loss": 0.03878585994243622, + "loss_ib": 0.0006401360151357949, + "step": 4416 + }, + { + "ce_ib": 3.4059925079345703, + "ce_orig": 0.950096845626831, + "epoch": 1.2697533970810266, + "kl_loss": 0.04218669980764389, + "loss_ib": 0.0007624661666341126, + "step": 4416 + }, + { + "ce_ib": 4.364383220672607, + "ce_orig": 1.2712810039520264, + "epoch": 1.2700409806600044, + "kl_loss": 0.03939708322286606, + "loss_ib": 0.0008304091170430183, + "step": 4417 + }, + { + "ce_ib": 3.352350950241089, + "ce_orig": 0.5526295900344849, + "epoch": 1.2700409806600044, + "kl_loss": 0.03592735528945923, + "loss_ib": 0.0006945086061023176, + "step": 4417 + }, + { + "ce_ib": 5.365078449249268, + "ce_orig": 0.9826311469078064, + "epoch": 1.2700409806600044, + "kl_loss": 0.029896730557084084, + "loss_ib": 0.0008354751043953001, + "step": 4417 + }, + { + "ce_ib": 2.1720783710479736, + "ce_orig": 0.6676583290100098, + "epoch": 1.2700409806600044, + "kl_loss": 0.025960709899663925, + "loss_ib": 0.0004768149519804865, + "step": 4417 + }, + { + "ce_ib": 3.1472129821777344, + "ce_orig": 0.9479552507400513, + "epoch": 1.2703285642389819, + "kl_loss": 0.035808660089969635, + "loss_ib": 0.0006728078005835414, + "step": 4418 + }, + { + "ce_ib": 1.725308895111084, + "ce_orig": 0.4791623055934906, + "epoch": 1.2703285642389819, + "kl_loss": 0.02233041077852249, + "loss_ib": 0.00039583496982231736, + "step": 4418 + }, + { + "ce_ib": 3.382964611053467, + "ce_orig": 1.1617085933685303, + "epoch": 1.2703285642389819, + "kl_loss": 0.03714580088853836, + "loss_ib": 0.0007097544148564339, + "step": 4418 + }, + { + "ce_ib": 1.2795546054840088, + "ce_orig": 0.23148953914642334, + "epoch": 1.2703285642389819, + "kl_loss": 0.05478420481085777, + "loss_ib": 0.0006757975206710398, + "step": 4418 + }, + { + "ce_ib": 3.1468515396118164, + "ce_orig": 0.8625646829605103, + "epoch": 1.2706161478179596, + "kl_loss": 0.04727664589881897, + "loss_ib": 0.0007874515722505748, + "step": 4419 + }, + { + "ce_ib": 3.507063865661621, + "ce_orig": 0.9535219073295593, + "epoch": 1.2706161478179596, + "kl_loss": 0.04498092830181122, + "loss_ib": 0.0008005156996659935, + "step": 4419 + }, + { + "ce_ib": 3.9808132648468018, + "ce_orig": 0.6759607195854187, + "epoch": 1.2706161478179596, + "kl_loss": 0.07089782506227493, + "loss_ib": 0.0011070595355704427, + "step": 4419 + }, + { + "ce_ib": 3.082399368286133, + "ce_orig": 0.4412170350551605, + "epoch": 1.2706161478179596, + "kl_loss": 0.048323098570108414, + "loss_ib": 0.0007914709276519716, + "step": 4419 + }, + { + "epoch": 1.2709037313969374, + "grad_norm": 0.11691539734601974, + "learning_rate": 3.2340622562288714e-05, + "loss": 0.8589, + "step": 4420 + }, + { + "ce_ib": 3.8156588077545166, + "ce_orig": 0.8438808917999268, + "epoch": 1.2709037313969374, + "kl_loss": 0.04876352474093437, + "loss_ib": 0.0008692010887898505, + "step": 4420 + }, + { + "ce_ib": 2.675229072570801, + "ce_orig": 0.6775104403495789, + "epoch": 1.2709037313969374, + "kl_loss": 0.0385693795979023, + "loss_ib": 0.0006532166735269129, + "step": 4420 + }, + { + "ce_ib": 2.261655569076538, + "ce_orig": 0.6155389547348022, + "epoch": 1.2709037313969374, + "kl_loss": 0.02345697395503521, + "loss_ib": 0.00046073528937995434, + "step": 4420 + }, + { + "ce_ib": 2.259036064147949, + "ce_orig": 0.4974336624145508, + "epoch": 1.2709037313969374, + "kl_loss": 0.03516630828380585, + "loss_ib": 0.0005775666795670986, + "step": 4420 + }, + { + "ce_ib": 1.9536865949630737, + "ce_orig": 0.6090062856674194, + "epoch": 1.2711913149759149, + "kl_loss": 0.04264692962169647, + "loss_ib": 0.0006218379712663591, + "step": 4421 + }, + { + "ce_ib": 3.9085869789123535, + "ce_orig": 0.9978996515274048, + "epoch": 1.2711913149759149, + "kl_loss": 0.04485681653022766, + "loss_ib": 0.0008394268224947155, + "step": 4421 + }, + { + "ce_ib": 1.8867305517196655, + "ce_orig": 0.47193652391433716, + "epoch": 1.2711913149759149, + "kl_loss": 0.0186898335814476, + "loss_ib": 0.00037557139876298606, + "step": 4421 + }, + { + "ce_ib": 3.3550243377685547, + "ce_orig": 0.8761637806892395, + "epoch": 1.2711913149759149, + "kl_loss": 0.04663601517677307, + "loss_ib": 0.0008018626249395311, + "step": 4421 + }, + { + "ce_ib": 3.029712200164795, + "ce_orig": 0.7492466568946838, + "epoch": 1.2714788985548924, + "kl_loss": 0.06631658971309662, + "loss_ib": 0.0009661371004767716, + "step": 4422 + }, + { + "ce_ib": 2.6748850345611572, + "ce_orig": 0.5062928795814514, + "epoch": 1.2714788985548924, + "kl_loss": 0.05586380511522293, + "loss_ib": 0.0008261265465989709, + "step": 4422 + }, + { + "ce_ib": 2.8049399852752686, + "ce_orig": 0.6567248106002808, + "epoch": 1.2714788985548924, + "kl_loss": 0.035995371639728546, + "loss_ib": 0.0006404477171599865, + "step": 4422 + }, + { + "ce_ib": 2.39663028717041, + "ce_orig": 0.5140756368637085, + "epoch": 1.2714788985548924, + "kl_loss": 0.03806628659367561, + "loss_ib": 0.000620325852651149, + "step": 4422 + }, + { + "ce_ib": 4.23832893371582, + "ce_orig": 1.47096848487854, + "epoch": 1.2717664821338701, + "kl_loss": 0.034702692180871964, + "loss_ib": 0.0007708597695454955, + "step": 4423 + }, + { + "ce_ib": 3.5684573650360107, + "ce_orig": 0.7788079977035522, + "epoch": 1.2717664821338701, + "kl_loss": 0.04752858355641365, + "loss_ib": 0.0008321315399371088, + "step": 4423 + }, + { + "ce_ib": 4.333921432495117, + "ce_orig": 1.186807632446289, + "epoch": 1.2717664821338701, + "kl_loss": 0.0348246693611145, + "loss_ib": 0.0007816387806087732, + "step": 4423 + }, + { + "ce_ib": 2.2407076358795166, + "ce_orig": 0.5206801891326904, + "epoch": 1.2717664821338701, + "kl_loss": 0.03434734046459198, + "loss_ib": 0.0005675441934727132, + "step": 4423 + }, + { + "ce_ib": 3.5134522914886475, + "ce_orig": 0.8020669221878052, + "epoch": 1.2720540657128478, + "kl_loss": 0.03171803429722786, + "loss_ib": 0.0006685255211777985, + "step": 4424 + }, + { + "ce_ib": 4.012816905975342, + "ce_orig": 1.00221586227417, + "epoch": 1.2720540657128478, + "kl_loss": 0.044610388576984406, + "loss_ib": 0.0008473854977637529, + "step": 4424 + }, + { + "ce_ib": 2.8678295612335205, + "ce_orig": 0.7391777038574219, + "epoch": 1.2720540657128478, + "kl_loss": 0.040396057069301605, + "loss_ib": 0.0006907434435561299, + "step": 4424 + }, + { + "ce_ib": 3.000438928604126, + "ce_orig": 0.9948460459709167, + "epoch": 1.2720540657128478, + "kl_loss": 0.03188392519950867, + "loss_ib": 0.0006188831175677478, + "step": 4424 + }, + { + "epoch": 1.2723416492918254, + "grad_norm": 0.10533622652292252, + "learning_rate": 3.230351994130755e-05, + "loss": 0.8829, + "step": 4425 + }, + { + "ce_ib": 2.2220547199249268, + "ce_orig": 0.48067528009414673, + "epoch": 1.2723416492918254, + "kl_loss": 0.042133621871471405, + "loss_ib": 0.00064354162896052, + "step": 4425 + }, + { + "ce_ib": 3.2153677940368652, + "ce_orig": 0.6670027375221252, + "epoch": 1.2723416492918254, + "kl_loss": 0.05365804582834244, + "loss_ib": 0.000858117185998708, + "step": 4425 + }, + { + "ce_ib": 3.588984727859497, + "ce_orig": 0.778231680393219, + "epoch": 1.2723416492918254, + "kl_loss": 0.03308529406785965, + "loss_ib": 0.0006897513521835208, + "step": 4425 + }, + { + "ce_ib": 4.708016872406006, + "ce_orig": 1.271942377090454, + "epoch": 1.2723416492918254, + "kl_loss": 0.03136717155575752, + "loss_ib": 0.0007844733772799373, + "step": 4425 + }, + { + "ce_ib": 2.3414623737335205, + "ce_orig": 0.47243112325668335, + "epoch": 1.272629232870803, + "kl_loss": 0.058484070003032684, + "loss_ib": 0.0008189868531189859, + "step": 4426 + }, + { + "ce_ib": 2.3712034225463867, + "ce_orig": 0.6499794721603394, + "epoch": 1.272629232870803, + "kl_loss": 0.04297493398189545, + "loss_ib": 0.0006668696296401322, + "step": 4426 + }, + { + "ce_ib": 3.587113380432129, + "ce_orig": 1.0222434997558594, + "epoch": 1.272629232870803, + "kl_loss": 0.044906534254550934, + "loss_ib": 0.0008077766397036612, + "step": 4426 + }, + { + "ce_ib": 2.445939302444458, + "ce_orig": 0.5623234510421753, + "epoch": 1.272629232870803, + "kl_loss": 0.03830450400710106, + "loss_ib": 0.0006276390049606562, + "step": 4426 + }, + { + "ce_ib": 3.494636058807373, + "ce_orig": 1.0948601961135864, + "epoch": 1.2729168164497806, + "kl_loss": 0.0435064435005188, + "loss_ib": 0.000784528034273535, + "step": 4427 + }, + { + "ce_ib": 2.2065954208374023, + "ce_orig": 0.6618528962135315, + "epoch": 1.2729168164497806, + "kl_loss": 0.03094113990664482, + "loss_ib": 0.0005300709744915366, + "step": 4427 + }, + { + "ce_ib": 4.936227798461914, + "ce_orig": 1.3060152530670166, + "epoch": 1.2729168164497806, + "kl_loss": 0.03171325474977493, + "loss_ib": 0.0008107553003355861, + "step": 4427 + }, + { + "ce_ib": 2.7380130290985107, + "ce_orig": 0.7410396337509155, + "epoch": 1.2729168164497806, + "kl_loss": 0.038118161261081696, + "loss_ib": 0.0006549829267896712, + "step": 4427 + }, + { + "ce_ib": 2.5383026599884033, + "ce_orig": 0.6187870502471924, + "epoch": 1.2732044000287583, + "kl_loss": 0.031067105010151863, + "loss_ib": 0.0005645012715831399, + "step": 4428 + }, + { + "ce_ib": 2.984553098678589, + "ce_orig": 0.5707635879516602, + "epoch": 1.2732044000287583, + "kl_loss": 0.10864841938018799, + "loss_ib": 0.001384939532727003, + "step": 4428 + }, + { + "ce_ib": 2.076463222503662, + "ce_orig": 0.6557684540748596, + "epoch": 1.2732044000287583, + "kl_loss": 0.02822474017739296, + "loss_ib": 0.0004898937186226249, + "step": 4428 + }, + { + "ce_ib": 3.672882556915283, + "ce_orig": 0.9520159959793091, + "epoch": 1.2732044000287583, + "kl_loss": 0.05034466087818146, + "loss_ib": 0.0008707348606549203, + "step": 4428 + }, + { + "ce_ib": 2.4736592769622803, + "ce_orig": 0.7363133430480957, + "epoch": 1.273491983607736, + "kl_loss": 0.029251696541905403, + "loss_ib": 0.0005398828652687371, + "step": 4429 + }, + { + "ce_ib": 1.8479108810424805, + "ce_orig": 0.510345995426178, + "epoch": 1.273491983607736, + "kl_loss": 0.02045282907783985, + "loss_ib": 0.0003893193497788161, + "step": 4429 + }, + { + "ce_ib": 2.806095838546753, + "ce_orig": 0.7149112820625305, + "epoch": 1.273491983607736, + "kl_loss": 0.04386453330516815, + "loss_ib": 0.0007192548946477473, + "step": 4429 + }, + { + "ce_ib": 2.2497024536132812, + "ce_orig": 0.6669415235519409, + "epoch": 1.273491983607736, + "kl_loss": 0.021058272570371628, + "loss_ib": 0.00043555296724662185, + "step": 4429 + }, + { + "epoch": 1.2737795671867136, + "grad_norm": 0.11510134488344193, + "learning_rate": 3.226639972445167e-05, + "loss": 0.7907, + "step": 4430 + }, + { + "ce_ib": 4.6652092933654785, + "ce_orig": 1.4613696336746216, + "epoch": 1.2737795671867136, + "kl_loss": 0.02782490663230419, + "loss_ib": 0.0007447699899785221, + "step": 4430 + }, + { + "ce_ib": 2.754326105117798, + "ce_orig": 0.700066864490509, + "epoch": 1.2737795671867136, + "kl_loss": 0.058701660484075546, + "loss_ib": 0.000862449174746871, + "step": 4430 + }, + { + "ce_ib": 2.9540042877197266, + "ce_orig": 0.8314077854156494, + "epoch": 1.2737795671867136, + "kl_loss": 0.04362993687391281, + "loss_ib": 0.0007316997507587075, + "step": 4430 + }, + { + "ce_ib": 3.9696011543273926, + "ce_orig": 1.1394377946853638, + "epoch": 1.2737795671867136, + "kl_loss": 0.05717713385820389, + "loss_ib": 0.0009687314741313457, + "step": 4430 + }, + { + "ce_ib": 2.4229350090026855, + "ce_orig": 0.7435259819030762, + "epoch": 1.2740671507656913, + "kl_loss": 0.020995263010263443, + "loss_ib": 0.00045224613859318197, + "step": 4431 + }, + { + "ce_ib": 3.575688362121582, + "ce_orig": 0.7625448703765869, + "epoch": 1.2740671507656913, + "kl_loss": 0.04388361796736717, + "loss_ib": 0.0007964049582369626, + "step": 4431 + }, + { + "ce_ib": 3.3341023921966553, + "ce_orig": 1.1867421865463257, + "epoch": 1.2740671507656913, + "kl_loss": 0.03870069608092308, + "loss_ib": 0.0007204171852208674, + "step": 4431 + }, + { + "ce_ib": 3.419938564300537, + "ce_orig": 0.9346266388893127, + "epoch": 1.2740671507656913, + "kl_loss": 0.04244421795010567, + "loss_ib": 0.000766435987316072, + "step": 4431 + }, + { + "ce_ib": 2.5373878479003906, + "ce_orig": 0.6651410460472107, + "epoch": 1.2743547343446688, + "kl_loss": 0.02255862206220627, + "loss_ib": 0.00047932498273439705, + "step": 4432 + }, + { + "ce_ib": 3.43269944190979, + "ce_orig": 0.9709749817848206, + "epoch": 1.2743547343446688, + "kl_loss": 0.040022559463977814, + "loss_ib": 0.0007434955332428217, + "step": 4432 + }, + { + "ce_ib": 3.6345038414001465, + "ce_orig": 0.7824410796165466, + "epoch": 1.2743547343446688, + "kl_loss": 0.025380313396453857, + "loss_ib": 0.0006172534776851535, + "step": 4432 + }, + { + "ce_ib": 2.64506459236145, + "ce_orig": 0.6944803595542908, + "epoch": 1.2743547343446688, + "kl_loss": 0.039973922073841095, + "loss_ib": 0.0006642456282861531, + "step": 4432 + }, + { + "ce_ib": 4.006752967834473, + "ce_orig": 1.0063177347183228, + "epoch": 1.2746423179236466, + "kl_loss": 0.059750281274318695, + "loss_ib": 0.0009981781477108598, + "step": 4433 + }, + { + "ce_ib": 2.780534267425537, + "ce_orig": 0.7970536351203918, + "epoch": 1.2746423179236466, + "kl_loss": 0.028024224564433098, + "loss_ib": 0.0005582956364378333, + "step": 4433 + }, + { + "ce_ib": 3.8025147914886475, + "ce_orig": 1.1532251834869385, + "epoch": 1.2746423179236466, + "kl_loss": 0.033824410289525986, + "loss_ib": 0.0007184955175034702, + "step": 4433 + }, + { + "ce_ib": 4.35082483291626, + "ce_orig": 0.9196264743804932, + "epoch": 1.2746423179236466, + "kl_loss": 0.05165531486272812, + "loss_ib": 0.0009516355930827558, + "step": 4433 + }, + { + "ce_ib": 3.073517084121704, + "ce_orig": 0.7193306684494019, + "epoch": 1.2749299015026243, + "kl_loss": 0.03981180861592293, + "loss_ib": 0.0007054697489365935, + "step": 4434 + }, + { + "ce_ib": 3.9623801708221436, + "ce_orig": 1.2085111141204834, + "epoch": 1.2749299015026243, + "kl_loss": 0.04848919063806534, + "loss_ib": 0.0008811299339868128, + "step": 4434 + }, + { + "ce_ib": 3.151926279067993, + "ce_orig": 0.961897611618042, + "epoch": 1.2749299015026243, + "kl_loss": 0.03388511389493942, + "loss_ib": 0.0006540436879731715, + "step": 4434 + }, + { + "ce_ib": 4.0121750831604, + "ce_orig": 1.245322346687317, + "epoch": 1.2749299015026243, + "kl_loss": 0.045468032360076904, + "loss_ib": 0.0008558977860957384, + "step": 4434 + }, + { + "epoch": 1.2752174850816018, + "grad_norm": 0.10754955559968948, + "learning_rate": 3.2229262001152286e-05, + "loss": 0.8555, + "step": 4435 + }, + { + "ce_ib": 3.1380481719970703, + "ce_orig": 0.8650233149528503, + "epoch": 1.2752174850816018, + "kl_loss": 0.02941848523914814, + "loss_ib": 0.0006079896120354533, + "step": 4435 + }, + { + "ce_ib": 2.827397346496582, + "ce_orig": 0.9488897323608398, + "epoch": 1.2752174850816018, + "kl_loss": 0.0307028666138649, + "loss_ib": 0.0005897684022784233, + "step": 4435 + }, + { + "ce_ib": 3.3227288722991943, + "ce_orig": 1.0795507431030273, + "epoch": 1.2752174850816018, + "kl_loss": 0.03266434371471405, + "loss_ib": 0.0006589163094758987, + "step": 4435 + }, + { + "ce_ib": 5.218452453613281, + "ce_orig": 1.4940862655639648, + "epoch": 1.2752174850816018, + "kl_loss": 0.04832505062222481, + "loss_ib": 0.001005095662549138, + "step": 4435 + }, + { + "ce_ib": 2.762178659439087, + "ce_orig": 0.6175373196601868, + "epoch": 1.2755050686605796, + "kl_loss": 0.029252372682094574, + "loss_ib": 0.0005687415250577033, + "step": 4436 + }, + { + "ce_ib": 2.0104598999023438, + "ce_orig": 0.38239169120788574, + "epoch": 1.2755050686605796, + "kl_loss": 0.03784281015396118, + "loss_ib": 0.000579474086407572, + "step": 4436 + }, + { + "ce_ib": 4.706171989440918, + "ce_orig": 1.3522038459777832, + "epoch": 1.2755050686605796, + "kl_loss": 0.046327341347932816, + "loss_ib": 0.0009338906384073198, + "step": 4436 + }, + { + "ce_ib": 3.5857036113739014, + "ce_orig": 0.6785675287246704, + "epoch": 1.2755050686605796, + "kl_loss": 0.05674765259027481, + "loss_ib": 0.0009260468650609255, + "step": 4436 + }, + { + "ce_ib": 3.0405595302581787, + "ce_orig": 0.5801121592521667, + "epoch": 1.275792652239557, + "kl_loss": 0.045408815145492554, + "loss_ib": 0.0007581440731883049, + "step": 4437 + }, + { + "ce_ib": 3.815199136734009, + "ce_orig": 1.1183574199676514, + "epoch": 1.275792652239557, + "kl_loss": 0.03020121529698372, + "loss_ib": 0.0006835320382378995, + "step": 4437 + }, + { + "ce_ib": 4.705102443695068, + "ce_orig": 1.2609186172485352, + "epoch": 1.275792652239557, + "kl_loss": 0.055504970252513885, + "loss_ib": 0.0010255598463118076, + "step": 4437 + }, + { + "ce_ib": 2.6985905170440674, + "ce_orig": 0.8033261895179749, + "epoch": 1.275792652239557, + "kl_loss": 0.04179643839597702, + "loss_ib": 0.0006878234562464058, + "step": 4437 + }, + { + "ce_ib": 3.068077564239502, + "ce_orig": 1.099273920059204, + "epoch": 1.2760802358185348, + "kl_loss": 0.025776907801628113, + "loss_ib": 0.0005645768251270056, + "step": 4438 + }, + { + "ce_ib": 2.837117910385132, + "ce_orig": 0.8104127049446106, + "epoch": 1.2760802358185348, + "kl_loss": 0.0287068672478199, + "loss_ib": 0.0005707804230041802, + "step": 4438 + }, + { + "ce_ib": 5.696500301361084, + "ce_orig": 1.3636339902877808, + "epoch": 1.2760802358185348, + "kl_loss": 0.05493108928203583, + "loss_ib": 0.0011189609067514539, + "step": 4438 + }, + { + "ce_ib": 3.852658271789551, + "ce_orig": 0.8427087664604187, + "epoch": 1.2760802358185348, + "kl_loss": 0.06266139447689056, + "loss_ib": 0.0010118797654286027, + "step": 4438 + }, + { + "ce_ib": 1.5757380723953247, + "ce_orig": 0.368777334690094, + "epoch": 1.2763678193975125, + "kl_loss": 0.02064613625407219, + "loss_ib": 0.0003640351351350546, + "step": 4439 + }, + { + "ce_ib": 2.936253309249878, + "ce_orig": 0.955931544303894, + "epoch": 1.2763678193975125, + "kl_loss": 0.019308676943182945, + "loss_ib": 0.0004867120587732643, + "step": 4439 + }, + { + "ce_ib": 4.023527145385742, + "ce_orig": 1.1727344989776611, + "epoch": 1.2763678193975125, + "kl_loss": 0.03741917014122009, + "loss_ib": 0.0007765443297103047, + "step": 4439 + }, + { + "ce_ib": 1.1185693740844727, + "ce_orig": 0.17090600728988647, + "epoch": 1.2763678193975125, + "kl_loss": 0.03933755308389664, + "loss_ib": 0.00050523248501122, + "step": 4439 + }, + { + "epoch": 1.27665540297649, + "grad_norm": 0.10946527123451233, + "learning_rate": 3.219210686088278e-05, + "loss": 0.8348, + "step": 4440 + }, + { + "ce_ib": 3.4305412769317627, + "ce_orig": 0.950636088848114, + "epoch": 1.27665540297649, + "kl_loss": 0.034872982650995255, + "loss_ib": 0.0006917839054949582, + "step": 4440 + }, + { + "ce_ib": 4.7100725173950195, + "ce_orig": 1.2761359214782715, + "epoch": 1.27665540297649, + "kl_loss": 0.043099213391542435, + "loss_ib": 0.000901999301277101, + "step": 4440 + }, + { + "ce_ib": 3.1456923484802246, + "ce_orig": 0.6950331926345825, + "epoch": 1.27665540297649, + "kl_loss": 0.04919920116662979, + "loss_ib": 0.000806561263743788, + "step": 4440 + }, + { + "ce_ib": 3.0220961570739746, + "ce_orig": 0.7309277057647705, + "epoch": 1.27665540297649, + "kl_loss": 0.026407985016703606, + "loss_ib": 0.0005662894691340625, + "step": 4440 + }, + { + "ce_ib": 1.910854697227478, + "ce_orig": 0.6513403058052063, + "epoch": 1.2769429865554676, + "kl_loss": 0.015222798101603985, + "loss_ib": 0.00034331344068050385, + "step": 4441 + }, + { + "ce_ib": 2.340829610824585, + "ce_orig": 0.6122603416442871, + "epoch": 1.2769429865554676, + "kl_loss": 0.04575099050998688, + "loss_ib": 0.0006915928097441792, + "step": 4441 + }, + { + "ce_ib": 3.216324806213379, + "ce_orig": 0.514555811882019, + "epoch": 1.2769429865554676, + "kl_loss": 0.04861357808113098, + "loss_ib": 0.0008077682578004897, + "step": 4441 + }, + { + "ce_ib": 3.1931190490722656, + "ce_orig": 0.6878113746643066, + "epoch": 1.2769429865554676, + "kl_loss": 0.04547744616866112, + "loss_ib": 0.0007740863366052508, + "step": 4441 + }, + { + "ce_ib": 3.4186105728149414, + "ce_orig": 0.9896261692047119, + "epoch": 1.2772305701344453, + "kl_loss": 0.0420956052839756, + "loss_ib": 0.0007628171006217599, + "step": 4442 + }, + { + "ce_ib": 3.3182032108306885, + "ce_orig": 0.7753350734710693, + "epoch": 1.2772305701344453, + "kl_loss": 0.0563599057495594, + "loss_ib": 0.0008954193908721209, + "step": 4442 + }, + { + "ce_ib": 4.390225410461426, + "ce_orig": 1.2905082702636719, + "epoch": 1.2772305701344453, + "kl_loss": 0.028806958347558975, + "loss_ib": 0.0007270920905284584, + "step": 4442 + }, + { + "ce_ib": 2.3208205699920654, + "ce_orig": 0.6390832662582397, + "epoch": 1.2772305701344453, + "kl_loss": 0.0444553941488266, + "loss_ib": 0.0006766359438188374, + "step": 4442 + }, + { + "ce_ib": 5.120141983032227, + "ce_orig": 1.809262990951538, + "epoch": 1.277518153713423, + "kl_loss": 0.04552745074033737, + "loss_ib": 0.0009672886808402836, + "step": 4443 + }, + { + "ce_ib": 1.7450695037841797, + "ce_orig": 0.48868805170059204, + "epoch": 1.277518153713423, + "kl_loss": 0.037252068519592285, + "loss_ib": 0.0005470276228152215, + "step": 4443 + }, + { + "ce_ib": 2.6189069747924805, + "ce_orig": 0.7243270874023438, + "epoch": 1.277518153713423, + "kl_loss": 0.04489700496196747, + "loss_ib": 0.0007108607096597552, + "step": 4443 + }, + { + "ce_ib": 2.9056057929992676, + "ce_orig": 0.7972027063369751, + "epoch": 1.277518153713423, + "kl_loss": 0.0341770201921463, + "loss_ib": 0.0006323307752609253, + "step": 4443 + }, + { + "ce_ib": 3.5958774089813232, + "ce_orig": 1.108939290046692, + "epoch": 1.2778057372924005, + "kl_loss": 0.03127817064523697, + "loss_ib": 0.0006723694386892021, + "step": 4444 + }, + { + "ce_ib": 3.294086456298828, + "ce_orig": 0.654003381729126, + "epoch": 1.2778057372924005, + "kl_loss": 0.03910928592085838, + "loss_ib": 0.000720501528121531, + "step": 4444 + }, + { + "ce_ib": 2.9346654415130615, + "ce_orig": 0.6132904887199402, + "epoch": 1.2778057372924005, + "kl_loss": 0.024167388677597046, + "loss_ib": 0.0005351403960958123, + "step": 4444 + }, + { + "ce_ib": 2.664518356323242, + "ce_orig": 0.6286492943763733, + "epoch": 1.2778057372924005, + "kl_loss": 0.049600593745708466, + "loss_ib": 0.0007624577847309411, + "step": 4444 + }, + { + "epoch": 1.2780933208713783, + "grad_norm": 0.11688470840454102, + "learning_rate": 3.2154934393158534e-05, + "loss": 0.8314, + "step": 4445 + }, + { + "ce_ib": 2.723773956298828, + "ce_orig": 0.48024430871009827, + "epoch": 1.2780933208713783, + "kl_loss": 0.1567523181438446, + "loss_ib": 0.0018399006221443415, + "step": 4445 + }, + { + "ce_ib": 3.5264036655426025, + "ce_orig": 1.086857795715332, + "epoch": 1.2780933208713783, + "kl_loss": 0.04484479874372482, + "loss_ib": 0.0008010883466340601, + "step": 4445 + }, + { + "ce_ib": 1.7159087657928467, + "ce_orig": 0.2810918986797333, + "epoch": 1.2780933208713783, + "kl_loss": 0.05532391741871834, + "loss_ib": 0.0007248300244100392, + "step": 4445 + }, + { + "ce_ib": 3.988217353820801, + "ce_orig": 1.168535590171814, + "epoch": 1.2780933208713783, + "kl_loss": 0.037807170301675797, + "loss_ib": 0.0007768934010528028, + "step": 4445 + }, + { + "ce_ib": 3.509591579437256, + "ce_orig": 0.8848838806152344, + "epoch": 1.2783809044503558, + "kl_loss": 0.04463446885347366, + "loss_ib": 0.0007973038009367883, + "step": 4446 + }, + { + "ce_ib": 5.045328140258789, + "ce_orig": 1.5341882705688477, + "epoch": 1.2783809044503558, + "kl_loss": 0.03693704307079315, + "loss_ib": 0.0008739031618461013, + "step": 4446 + }, + { + "ce_ib": 2.5055456161499023, + "ce_orig": 0.7665419578552246, + "epoch": 1.2783809044503558, + "kl_loss": 0.025768136605620384, + "loss_ib": 0.0005082358838990331, + "step": 4446 + }, + { + "ce_ib": 2.8275907039642334, + "ce_orig": 0.5921735763549805, + "epoch": 1.2783809044503558, + "kl_loss": 0.04728055000305176, + "loss_ib": 0.0007555645424872637, + "step": 4446 + }, + { + "ce_ib": 4.228602886199951, + "ce_orig": 1.1326420307159424, + "epoch": 1.2786684880293335, + "kl_loss": 0.040263354778289795, + "loss_ib": 0.0008254937711171806, + "step": 4447 + }, + { + "ce_ib": 2.924999475479126, + "ce_orig": 0.8593735694885254, + "epoch": 1.2786684880293335, + "kl_loss": 0.03123769350349903, + "loss_ib": 0.0006048768409527838, + "step": 4447 + }, + { + "ce_ib": 2.137730836868286, + "ce_orig": 0.6894881725311279, + "epoch": 1.2786684880293335, + "kl_loss": 0.14661937952041626, + "loss_ib": 0.00167996680829674, + "step": 4447 + }, + { + "ce_ib": 3.8815054893493652, + "ce_orig": 1.231552004814148, + "epoch": 1.2786684880293335, + "kl_loss": 0.05605582520365715, + "loss_ib": 0.0009487087954767048, + "step": 4447 + }, + { + "ce_ib": 1.9959118366241455, + "ce_orig": 0.6416285634040833, + "epoch": 1.2789560716083113, + "kl_loss": 0.02597212605178356, + "loss_ib": 0.00045931240310892463, + "step": 4448 + }, + { + "ce_ib": 4.450961589813232, + "ce_orig": 1.3108769655227661, + "epoch": 1.2789560716083113, + "kl_loss": 0.041645653545856476, + "loss_ib": 0.0008615526603534818, + "step": 4448 + }, + { + "ce_ib": 2.5121405124664307, + "ce_orig": 0.6400777697563171, + "epoch": 1.2789560716083113, + "kl_loss": 0.04582800716161728, + "loss_ib": 0.0007094941101968288, + "step": 4448 + }, + { + "ce_ib": 2.4505152702331543, + "ce_orig": 0.5459441542625427, + "epoch": 1.2789560716083113, + "kl_loss": 0.04832877963781357, + "loss_ib": 0.0007283393060788512, + "step": 4448 + }, + { + "ce_ib": 4.198945999145508, + "ce_orig": 1.1829427480697632, + "epoch": 1.2792436551872888, + "kl_loss": 0.03966297209262848, + "loss_ib": 0.0008165243198163807, + "step": 4449 + }, + { + "ce_ib": 0.8533406853675842, + "ce_orig": 0.19134335219860077, + "epoch": 1.2792436551872888, + "kl_loss": 0.07600194215774536, + "loss_ib": 0.0008453534101136029, + "step": 4449 + }, + { + "ce_ib": 2.3607418537139893, + "ce_orig": 0.7122697830200195, + "epoch": 1.2792436551872888, + "kl_loss": 0.03701798990368843, + "loss_ib": 0.0006062540924176574, + "step": 4449 + }, + { + "ce_ib": 3.795440912246704, + "ce_orig": 1.0010173320770264, + "epoch": 1.2792436551872888, + "kl_loss": 0.03637702018022537, + "loss_ib": 0.0007433143327943981, + "step": 4449 + }, + { + "epoch": 1.2795312387662665, + "grad_norm": 0.12408386915922165, + "learning_rate": 3.2117744687536636e-05, + "loss": 0.8195, + "step": 4450 + }, + { + "ce_ib": 2.127194881439209, + "ce_orig": 0.5459045171737671, + "epoch": 1.2795312387662665, + "kl_loss": 0.04072178155183792, + "loss_ib": 0.0006199372583068907, + "step": 4450 + }, + { + "ce_ib": 2.8073267936706543, + "ce_orig": 0.6685857772827148, + "epoch": 1.2795312387662665, + "kl_loss": 0.0737786591053009, + "loss_ib": 0.0010185192804783583, + "step": 4450 + }, + { + "ce_ib": 3.7751448154449463, + "ce_orig": 0.8064935803413391, + "epoch": 1.2795312387662665, + "kl_loss": 0.040349334478378296, + "loss_ib": 0.0007810078677721322, + "step": 4450 + }, + { + "ce_ib": 2.3497514724731445, + "ce_orig": 0.528609573841095, + "epoch": 1.2795312387662665, + "kl_loss": 0.036885522305965424, + "loss_ib": 0.0006038303254172206, + "step": 4450 + }, + { + "ce_ib": 4.01030969619751, + "ce_orig": 1.232796549797058, + "epoch": 1.279818822345244, + "kl_loss": 0.04610808193683624, + "loss_ib": 0.0008621117449365556, + "step": 4451 + }, + { + "ce_ib": 0.8409925103187561, + "ce_orig": 0.14256255328655243, + "epoch": 1.279818822345244, + "kl_loss": 0.07785426080226898, + "loss_ib": 0.0008626418421044946, + "step": 4451 + }, + { + "ce_ib": 2.032302141189575, + "ce_orig": 0.4494565725326538, + "epoch": 1.279818822345244, + "kl_loss": 0.032113514840602875, + "loss_ib": 0.0005243653431534767, + "step": 4451 + }, + { + "ce_ib": 2.5040946006774902, + "ce_orig": 0.8828515410423279, + "epoch": 1.279818822345244, + "kl_loss": 0.029695473611354828, + "loss_ib": 0.0005473641795106232, + "step": 4451 + }, + { + "ce_ib": 3.0607800483703613, + "ce_orig": 0.7500450015068054, + "epoch": 1.2801064059242218, + "kl_loss": 0.033646710216999054, + "loss_ib": 0.0006425450555980206, + "step": 4452 + }, + { + "ce_ib": 3.0113682746887207, + "ce_orig": 0.7379306554794312, + "epoch": 1.2801064059242218, + "kl_loss": 0.027602335438132286, + "loss_ib": 0.000577160157263279, + "step": 4452 + }, + { + "ce_ib": 2.0414657592773438, + "ce_orig": 0.4635581970214844, + "epoch": 1.2801064059242218, + "kl_loss": 0.03904718533158302, + "loss_ib": 0.0005946184392087162, + "step": 4452 + }, + { + "ce_ib": 3.4275434017181396, + "ce_orig": 0.7030160427093506, + "epoch": 1.2801064059242218, + "kl_loss": 0.056987177580595016, + "loss_ib": 0.0009126260410994291, + "step": 4452 + }, + { + "ce_ib": 4.689213752746582, + "ce_orig": 1.0908700227737427, + "epoch": 1.2803939895031995, + "kl_loss": 0.09607739746570587, + "loss_ib": 0.0014296952867880464, + "step": 4453 + }, + { + "ce_ib": 3.4047670364379883, + "ce_orig": 0.6747841835021973, + "epoch": 1.2803939895031995, + "kl_loss": 0.02461141347885132, + "loss_ib": 0.0005865908460691571, + "step": 4453 + }, + { + "ce_ib": 3.045952796936035, + "ce_orig": 0.6870266795158386, + "epoch": 1.2803939895031995, + "kl_loss": 0.02665700577199459, + "loss_ib": 0.00057116529205814, + "step": 4453 + }, + { + "ce_ib": 2.9515655040740967, + "ce_orig": 0.6503739356994629, + "epoch": 1.2803939895031995, + "kl_loss": 0.036629945039749146, + "loss_ib": 0.0006614559679292142, + "step": 4453 + }, + { + "ce_ib": 3.6585123538970947, + "ce_orig": 1.1903719902038574, + "epoch": 1.280681573082177, + "kl_loss": 0.04420677572488785, + "loss_ib": 0.0008079189574345946, + "step": 4454 + }, + { + "ce_ib": 3.4751691818237305, + "ce_orig": 0.5864376425743103, + "epoch": 1.280681573082177, + "kl_loss": 0.057735294103622437, + "loss_ib": 0.0009248698479495943, + "step": 4454 + }, + { + "ce_ib": 4.71577787399292, + "ce_orig": 0.8550558686256409, + "epoch": 1.280681573082177, + "kl_loss": 0.050362780690193176, + "loss_ib": 0.0009752055630087852, + "step": 4454 + }, + { + "ce_ib": 3.303412675857544, + "ce_orig": 0.6883498430252075, + "epoch": 1.280681573082177, + "kl_loss": 0.037403710186481476, + "loss_ib": 0.0007043782970868051, + "step": 4454 + }, + { + "epoch": 1.2809691566611545, + "grad_norm": 0.11022134870290756, + "learning_rate": 3.208053783361573e-05, + "loss": 0.7833, + "step": 4455 + }, + { + "ce_ib": 2.688687562942505, + "ce_orig": 0.5819972157478333, + "epoch": 1.2809691566611545, + "kl_loss": 0.03627400100231171, + "loss_ib": 0.0006316087092272937, + "step": 4455 + }, + { + "ce_ib": 3.0069637298583984, + "ce_orig": 0.7059890627861023, + "epoch": 1.2809691566611545, + "kl_loss": 0.0316983163356781, + "loss_ib": 0.0006176794995553792, + "step": 4455 + }, + { + "ce_ib": 5.234445571899414, + "ce_orig": 1.3498204946517944, + "epoch": 1.2809691566611545, + "kl_loss": 0.03720325976610184, + "loss_ib": 0.000895477132871747, + "step": 4455 + }, + { + "ce_ib": 2.259401321411133, + "ce_orig": 0.6483685374259949, + "epoch": 1.2809691566611545, + "kl_loss": 0.01774773746728897, + "loss_ib": 0.00040341747808270156, + "step": 4455 + }, + { + "ce_ib": 0.9009489417076111, + "ce_orig": 0.13226646184921265, + "epoch": 1.2812567402401323, + "kl_loss": 0.06364449858665466, + "loss_ib": 0.0007265398744493723, + "step": 4456 + }, + { + "ce_ib": 2.963714599609375, + "ce_orig": 0.9361583590507507, + "epoch": 1.2812567402401323, + "kl_loss": 0.02805948257446289, + "loss_ib": 0.0005769662675447762, + "step": 4456 + }, + { + "ce_ib": 4.252908706665039, + "ce_orig": 1.016947865486145, + "epoch": 1.2812567402401323, + "kl_loss": 0.05583242326974869, + "loss_ib": 0.0009836150566115975, + "step": 4456 + }, + { + "ce_ib": 4.448090553283691, + "ce_orig": 0.7417461276054382, + "epoch": 1.2812567402401323, + "kl_loss": 0.07856225967407227, + "loss_ib": 0.0012304316041991115, + "step": 4456 + }, + { + "ce_ib": 3.3435730934143066, + "ce_orig": 0.7882320284843445, + "epoch": 1.28154432381911, + "kl_loss": 0.1324523687362671, + "loss_ib": 0.0016588809667155147, + "step": 4457 + }, + { + "ce_ib": 2.844146966934204, + "ce_orig": 0.6831561923027039, + "epoch": 1.28154432381911, + "kl_loss": 0.05394928157329559, + "loss_ib": 0.0008239074377343059, + "step": 4457 + }, + { + "ce_ib": 2.9380905628204346, + "ce_orig": 0.7919028401374817, + "epoch": 1.28154432381911, + "kl_loss": 0.02552146464586258, + "loss_ib": 0.0005490236799232662, + "step": 4457 + }, + { + "ce_ib": 1.7394793033599854, + "ce_orig": 0.5232303738594055, + "epoch": 1.28154432381911, + "kl_loss": 0.024764398112893105, + "loss_ib": 0.0004215919179841876, + "step": 4457 + }, + { + "ce_ib": 5.574509620666504, + "ce_orig": 1.4520906209945679, + "epoch": 1.2818319073980875, + "kl_loss": 0.044987764209508896, + "loss_ib": 0.0010073286248371005, + "step": 4458 + }, + { + "ce_ib": 2.2561075687408447, + "ce_orig": 0.534507691860199, + "epoch": 1.2818319073980875, + "kl_loss": 0.029850736260414124, + "loss_ib": 0.0005241180770099163, + "step": 4458 + }, + { + "ce_ib": 2.444014549255371, + "ce_orig": 0.7895253300666809, + "epoch": 1.2818319073980875, + "kl_loss": 0.033379748463630676, + "loss_ib": 0.0005781989311799407, + "step": 4458 + }, + { + "ce_ib": 2.295620918273926, + "ce_orig": 0.7483843564987183, + "epoch": 1.2818319073980875, + "kl_loss": 0.03927627205848694, + "loss_ib": 0.0006223247619345784, + "step": 4458 + }, + { + "ce_ib": 2.2054905891418457, + "ce_orig": 0.571591854095459, + "epoch": 1.2821194909770652, + "kl_loss": 0.02233176678419113, + "loss_ib": 0.000443866680143401, + "step": 4459 + }, + { + "ce_ib": 3.4209978580474854, + "ce_orig": 0.8229070901870728, + "epoch": 1.2821194909770652, + "kl_loss": 0.05559943616390228, + "loss_ib": 0.0008980941493064165, + "step": 4459 + }, + { + "ce_ib": 4.34708309173584, + "ce_orig": 1.1655341386795044, + "epoch": 1.2821194909770652, + "kl_loss": 0.03694980964064598, + "loss_ib": 0.0008042064146138728, + "step": 4459 + }, + { + "ce_ib": 4.125705718994141, + "ce_orig": 1.3604320287704468, + "epoch": 1.2821194909770652, + "kl_loss": 0.03649958223104477, + "loss_ib": 0.0007775663980282843, + "step": 4459 + }, + { + "epoch": 1.2824070745560427, + "grad_norm": 0.12074330449104309, + "learning_rate": 3.2043313921035743e-05, + "loss": 0.8305, + "step": 4460 + }, + { + "ce_ib": 3.261138677597046, + "ce_orig": 0.8333669900894165, + "epoch": 1.2824070745560427, + "kl_loss": 0.04281948134303093, + "loss_ib": 0.0007543087122030556, + "step": 4460 + }, + { + "ce_ib": 4.090667724609375, + "ce_orig": 1.1492620706558228, + "epoch": 1.2824070745560427, + "kl_loss": 0.047630779445171356, + "loss_ib": 0.0008853745530359447, + "step": 4460 + }, + { + "ce_ib": 2.942680835723877, + "ce_orig": 0.7359097599983215, + "epoch": 1.2824070745560427, + "kl_loss": 0.04958441108465195, + "loss_ib": 0.0007901121280156076, + "step": 4460 + }, + { + "ce_ib": 3.453341245651245, + "ce_orig": 0.8115570545196533, + "epoch": 1.2824070745560427, + "kl_loss": 0.02724583074450493, + "loss_ib": 0.0006177924224175513, + "step": 4460 + }, + { + "ce_ib": 2.1542880535125732, + "ce_orig": 0.6517717242240906, + "epoch": 1.2826946581350205, + "kl_loss": 0.0563068613409996, + "loss_ib": 0.0007784974295645952, + "step": 4461 + }, + { + "ce_ib": 3.009824514389038, + "ce_orig": 0.6378299593925476, + "epoch": 1.2826946581350205, + "kl_loss": 0.054275304079055786, + "loss_ib": 0.0008437354699708521, + "step": 4461 + }, + { + "ce_ib": 3.101815700531006, + "ce_orig": 0.8161347508430481, + "epoch": 1.2826946581350205, + "kl_loss": 0.062359392642974854, + "loss_ib": 0.000933775445446372, + "step": 4461 + }, + { + "ce_ib": 2.6351447105407715, + "ce_orig": 0.6833446621894836, + "epoch": 1.2826946581350205, + "kl_loss": 0.03384635969996452, + "loss_ib": 0.0006019780412316322, + "step": 4461 + }, + { + "ce_ib": 2.8730075359344482, + "ce_orig": 0.7425087690353394, + "epoch": 1.2829822417139982, + "kl_loss": 0.03833547234535217, + "loss_ib": 0.0006706554559059441, + "step": 4462 + }, + { + "ce_ib": 3.278998851776123, + "ce_orig": 0.9666758179664612, + "epoch": 1.2829822417139982, + "kl_loss": 0.03729577362537384, + "loss_ib": 0.000700857606716454, + "step": 4462 + }, + { + "ce_ib": 3.8538286685943604, + "ce_orig": 1.0433076620101929, + "epoch": 1.2829822417139982, + "kl_loss": 0.035263799130916595, + "loss_ib": 0.0007380208116956055, + "step": 4462 + }, + { + "ce_ib": 2.446106433868408, + "ce_orig": 0.7127349972724915, + "epoch": 1.2829822417139982, + "kl_loss": 0.03141739219427109, + "loss_ib": 0.0005587845807895064, + "step": 4462 + }, + { + "ce_ib": 1.885520100593567, + "ce_orig": 0.4243689775466919, + "epoch": 1.2832698252929757, + "kl_loss": 0.06598114967346191, + "loss_ib": 0.0008483634446747601, + "step": 4463 + }, + { + "ce_ib": 4.049178600311279, + "ce_orig": 1.1085598468780518, + "epoch": 1.2832698252929757, + "kl_loss": 0.03753449395298958, + "loss_ib": 0.0007802628097124398, + "step": 4463 + }, + { + "ce_ib": 2.5862579345703125, + "ce_orig": 0.6714182496070862, + "epoch": 1.2832698252929757, + "kl_loss": 0.027089975774288177, + "loss_ib": 0.0005295255105011165, + "step": 4463 + }, + { + "ce_ib": 3.2712881565093994, + "ce_orig": 0.8278493881225586, + "epoch": 1.2832698252929757, + "kl_loss": 0.02901652082800865, + "loss_ib": 0.0006172939902171493, + "step": 4463 + }, + { + "ce_ib": 3.083165407180786, + "ce_orig": 0.6516585946083069, + "epoch": 1.2835574088719535, + "kl_loss": 0.03748304396867752, + "loss_ib": 0.000683146994560957, + "step": 4464 + }, + { + "ce_ib": 1.7062207460403442, + "ce_orig": 0.3850683867931366, + "epoch": 1.2835574088719535, + "kl_loss": 0.03103744238615036, + "loss_ib": 0.00048099650302901864, + "step": 4464 + }, + { + "ce_ib": 4.062726974487305, + "ce_orig": 1.2440141439437866, + "epoch": 1.2835574088719535, + "kl_loss": 0.05447714775800705, + "loss_ib": 0.0009510440868325531, + "step": 4464 + }, + { + "ce_ib": 4.945428848266602, + "ce_orig": 1.5452152490615845, + "epoch": 1.2835574088719535, + "kl_loss": 0.0847644954919815, + "loss_ib": 0.0013421877520158887, + "step": 4464 + }, + { + "epoch": 1.283844992450931, + "grad_norm": 0.10694213211536407, + "learning_rate": 3.2006073039477744e-05, + "loss": 0.7726, + "step": 4465 + }, + { + "ce_ib": 4.577495574951172, + "ce_orig": 1.123873233795166, + "epoch": 1.283844992450931, + "kl_loss": 0.04100329428911209, + "loss_ib": 0.0008677825098857284, + "step": 4465 + }, + { + "ce_ib": 3.7110793590545654, + "ce_orig": 0.9507580399513245, + "epoch": 1.283844992450931, + "kl_loss": 0.03692235052585602, + "loss_ib": 0.0007403314230032265, + "step": 4465 + }, + { + "ce_ib": 2.406301975250244, + "ce_orig": 0.5968446135520935, + "epoch": 1.283844992450931, + "kl_loss": 0.04153277724981308, + "loss_ib": 0.0006559579633176327, + "step": 4465 + }, + { + "ce_ib": 4.33561372756958, + "ce_orig": 0.5524322390556335, + "epoch": 1.283844992450931, + "kl_loss": 0.05886632949113846, + "loss_ib": 0.0010222246637567878, + "step": 4465 + }, + { + "ce_ib": 2.375108242034912, + "ce_orig": 0.6062129735946655, + "epoch": 1.2841325760299087, + "kl_loss": 0.02098485641181469, + "loss_ib": 0.0004473593726288527, + "step": 4466 + }, + { + "ce_ib": 2.215773582458496, + "ce_orig": 0.48332786560058594, + "epoch": 1.2841325760299087, + "kl_loss": 0.048390716314315796, + "loss_ib": 0.0007054845336824656, + "step": 4466 + }, + { + "ce_ib": 1.6714966297149658, + "ce_orig": 0.4250623285770416, + "epoch": 1.2841325760299087, + "kl_loss": 0.04086660221219063, + "loss_ib": 0.0005758156767114997, + "step": 4466 + }, + { + "ce_ib": 4.043229579925537, + "ce_orig": 1.2272629737854004, + "epoch": 1.2841325760299087, + "kl_loss": 0.043494828045368195, + "loss_ib": 0.0008392712334170938, + "step": 4466 + }, + { + "ce_ib": 3.0699539184570312, + "ce_orig": 0.6381486058235168, + "epoch": 1.2844201596088864, + "kl_loss": 0.027099333703517914, + "loss_ib": 0.0005779886851087213, + "step": 4467 + }, + { + "ce_ib": 2.5791432857513428, + "ce_orig": 0.7741358876228333, + "epoch": 1.2844201596088864, + "kl_loss": 0.019428400322794914, + "loss_ib": 0.0004521982918959111, + "step": 4467 + }, + { + "ce_ib": 3.102980136871338, + "ce_orig": 0.863409698009491, + "epoch": 1.2844201596088864, + "kl_loss": 0.04125518724322319, + "loss_ib": 0.0007228498579934239, + "step": 4467 + }, + { + "ce_ib": 4.033925533294678, + "ce_orig": 1.1289575099945068, + "epoch": 1.2844201596088864, + "kl_loss": 0.06418062746524811, + "loss_ib": 0.001045198761858046, + "step": 4467 + }, + { + "ce_ib": 1.8794877529144287, + "ce_orig": 0.5619112253189087, + "epoch": 1.284707743187864, + "kl_loss": 0.037441931664943695, + "loss_ib": 0.0005623680772259831, + "step": 4468 + }, + { + "ce_ib": 4.036344528198242, + "ce_orig": 1.137084722518921, + "epoch": 1.284707743187864, + "kl_loss": 0.11774466186761856, + "loss_ib": 0.0015810810727998614, + "step": 4468 + }, + { + "ce_ib": 1.7413185834884644, + "ce_orig": 0.3625618815422058, + "epoch": 1.284707743187864, + "kl_loss": 0.03398176655173302, + "loss_ib": 0.000513949547894299, + "step": 4468 + }, + { + "ce_ib": 4.37086820602417, + "ce_orig": 1.1085798740386963, + "epoch": 1.284707743187864, + "kl_loss": 0.0680762231349945, + "loss_ib": 0.0011178490240126848, + "step": 4468 + }, + { + "ce_ib": 3.438615560531616, + "ce_orig": 1.0805072784423828, + "epoch": 1.2849953267668417, + "kl_loss": 0.036753930151462555, + "loss_ib": 0.0007114008185453713, + "step": 4469 + }, + { + "ce_ib": 3.142242193222046, + "ce_orig": 0.5885537266731262, + "epoch": 1.2849953267668417, + "kl_loss": 0.046690165996551514, + "loss_ib": 0.0007811258547008038, + "step": 4469 + }, + { + "ce_ib": 2.686016082763672, + "ce_orig": 0.6488931179046631, + "epoch": 1.2849953267668417, + "kl_loss": 0.044489581137895584, + "loss_ib": 0.0007134974002838135, + "step": 4469 + }, + { + "ce_ib": 2.8948371410369873, + "ce_orig": 1.0106455087661743, + "epoch": 1.2849953267668417, + "kl_loss": 0.03408542275428772, + "loss_ib": 0.00063033786136657, + "step": 4469 + }, + { + "epoch": 1.2852829103458192, + "grad_norm": 0.11002914607524872, + "learning_rate": 3.1968815278663634e-05, + "loss": 0.7966, + "step": 4470 + }, + { + "ce_ib": 5.303455352783203, + "ce_orig": 1.4332906007766724, + "epoch": 1.2852829103458192, + "kl_loss": 0.027689162641763687, + "loss_ib": 0.0008072371711023152, + "step": 4470 + }, + { + "ce_ib": 4.237806797027588, + "ce_orig": 1.3341491222381592, + "epoch": 1.2852829103458192, + "kl_loss": 0.031870365142822266, + "loss_ib": 0.000742484291549772, + "step": 4470 + }, + { + "ce_ib": 2.726163148880005, + "ce_orig": 0.8146499991416931, + "epoch": 1.2852829103458192, + "kl_loss": 0.036343760788440704, + "loss_ib": 0.0006360539118759334, + "step": 4470 + }, + { + "ce_ib": 4.986202239990234, + "ce_orig": 1.3920984268188477, + "epoch": 1.2852829103458192, + "kl_loss": 0.06424155831336975, + "loss_ib": 0.001141035812906921, + "step": 4470 + }, + { + "ce_ib": 2.2951180934906006, + "ce_orig": 0.704685628414154, + "epoch": 1.285570493924797, + "kl_loss": 0.031915079802274704, + "loss_ib": 0.0005486626178026199, + "step": 4471 + }, + { + "ce_ib": 3.472719669342041, + "ce_orig": 1.1347613334655762, + "epoch": 1.285570493924797, + "kl_loss": 0.061436161398887634, + "loss_ib": 0.0009616335737518966, + "step": 4471 + }, + { + "ce_ib": 2.986281394958496, + "ce_orig": 0.8750795722007751, + "epoch": 1.285570493924797, + "kl_loss": 0.013765904121100903, + "loss_ib": 0.00043628716957755387, + "step": 4471 + }, + { + "ce_ib": 1.6222835779190063, + "ce_orig": 0.38565126061439514, + "epoch": 1.285570493924797, + "kl_loss": 0.03644601255655289, + "loss_ib": 0.0005266884691081941, + "step": 4471 + }, + { + "ce_ib": 3.121488332748413, + "ce_orig": 0.898102343082428, + "epoch": 1.2858580775037745, + "kl_loss": 0.02043793722987175, + "loss_ib": 0.0005165282054804265, + "step": 4472 + }, + { + "ce_ib": 2.0409159660339355, + "ce_orig": 0.5713332891464233, + "epoch": 1.2858580775037745, + "kl_loss": 0.0904606282711029, + "loss_ib": 0.0011086978483945131, + "step": 4472 + }, + { + "ce_ib": 2.339909791946411, + "ce_orig": 0.536328911781311, + "epoch": 1.2858580775037745, + "kl_loss": 0.13381314277648926, + "loss_ib": 0.0015721223317086697, + "step": 4472 + }, + { + "ce_ib": 3.3657686710357666, + "ce_orig": 0.5676456689834595, + "epoch": 1.2858580775037745, + "kl_loss": 0.027500707656145096, + "loss_ib": 0.0006115839350968599, + "step": 4472 + }, + { + "ce_ib": 3.0753591060638428, + "ce_orig": 0.9528391361236572, + "epoch": 1.2861456610827522, + "kl_loss": 0.027500569820404053, + "loss_ib": 0.0005825415719300508, + "step": 4473 + }, + { + "ce_ib": 3.720235824584961, + "ce_orig": 0.9489082098007202, + "epoch": 1.2861456610827522, + "kl_loss": 0.03756367415189743, + "loss_ib": 0.0007476602913811803, + "step": 4473 + }, + { + "ce_ib": 3.112757682800293, + "ce_orig": 0.8641070127487183, + "epoch": 1.2861456610827522, + "kl_loss": 0.042848341166973114, + "loss_ib": 0.0007397591834887862, + "step": 4473 + }, + { + "ce_ib": 3.1764559745788574, + "ce_orig": 0.8972547054290771, + "epoch": 1.2861456610827522, + "kl_loss": 0.042997099459171295, + "loss_ib": 0.0007476165774278343, + "step": 4473 + }, + { + "ce_ib": 2.3971545696258545, + "ce_orig": 0.7590101361274719, + "epoch": 1.2864332446617297, + "kl_loss": 0.032219186425209045, + "loss_ib": 0.0005619073053821921, + "step": 4474 + }, + { + "ce_ib": 3.532986879348755, + "ce_orig": 0.46207472681999207, + "epoch": 1.2864332446617297, + "kl_loss": 0.06320511549711227, + "loss_ib": 0.0009853498777374625, + "step": 4474 + }, + { + "ce_ib": 4.006943702697754, + "ce_orig": 0.8469406366348267, + "epoch": 1.2864332446617297, + "kl_loss": 0.043046802282333374, + "loss_ib": 0.0008311623241752386, + "step": 4474 + }, + { + "ce_ib": 4.753377437591553, + "ce_orig": 1.4303148984909058, + "epoch": 1.2864332446617297, + "kl_loss": 0.11187499761581421, + "loss_ib": 0.0015940876910462976, + "step": 4474 + }, + { + "epoch": 1.2867208282407074, + "grad_norm": 0.11156878620386124, + "learning_rate": 3.1931540728356035e-05, + "loss": 0.8656, + "step": 4475 + }, + { + "ce_ib": 2.310821533203125, + "ce_orig": 0.5868425369262695, + "epoch": 1.2867208282407074, + "kl_loss": 0.04321115091443062, + "loss_ib": 0.0006631935830228031, + "step": 4475 + }, + { + "ce_ib": 2.301854133605957, + "ce_orig": 0.7024844884872437, + "epoch": 1.2867208282407074, + "kl_loss": 0.030076775699853897, + "loss_ib": 0.0005309531698003411, + "step": 4475 + }, + { + "ce_ib": 2.597884178161621, + "ce_orig": 0.5755831599235535, + "epoch": 1.2867208282407074, + "kl_loss": 0.05809813737869263, + "loss_ib": 0.000840769789647311, + "step": 4475 + }, + { + "ce_ib": 4.501324653625488, + "ce_orig": 1.280886173248291, + "epoch": 1.2867208282407074, + "kl_loss": 0.03806980699300766, + "loss_ib": 0.0008308304823003709, + "step": 4475 + }, + { + "ce_ib": 3.3323583602905273, + "ce_orig": 0.313508540391922, + "epoch": 1.2870084118196852, + "kl_loss": 0.09443721175193787, + "loss_ib": 0.0012776079820469022, + "step": 4476 + }, + { + "ce_ib": 2.7422935962677, + "ce_orig": 0.7225409746170044, + "epoch": 1.2870084118196852, + "kl_loss": 0.03654935210943222, + "loss_ib": 0.0006397228571586311, + "step": 4476 + }, + { + "ce_ib": 2.0687978267669678, + "ce_orig": 0.6855192184448242, + "epoch": 1.2870084118196852, + "kl_loss": 0.028121378272771835, + "loss_ib": 0.0004880935593973845, + "step": 4476 + }, + { + "ce_ib": 4.216102123260498, + "ce_orig": 1.2857571840286255, + "epoch": 1.2870084118196852, + "kl_loss": 0.05668532848358154, + "loss_ib": 0.000988463405519724, + "step": 4476 + }, + { + "ce_ib": 1.586550235748291, + "ce_orig": 0.545640230178833, + "epoch": 1.2872959953986627, + "kl_loss": 0.02153334952890873, + "loss_ib": 0.0003739885287359357, + "step": 4477 + }, + { + "ce_ib": 4.550764560699463, + "ce_orig": 0.7703331708908081, + "epoch": 1.2872959953986627, + "kl_loss": 0.057809069752693176, + "loss_ib": 0.0010331671219319105, + "step": 4477 + }, + { + "ce_ib": 2.485783576965332, + "ce_orig": 0.5258970856666565, + "epoch": 1.2872959953986627, + "kl_loss": 0.04462669789791107, + "loss_ib": 0.0006948452792130411, + "step": 4477 + }, + { + "ce_ib": 4.743536472320557, + "ce_orig": 1.0832582712173462, + "epoch": 1.2872959953986627, + "kl_loss": 0.03372354432940483, + "loss_ib": 0.0008115890668705106, + "step": 4477 + }, + { + "ce_ib": 2.474778175354004, + "ce_orig": 0.6416388154029846, + "epoch": 1.2875835789776404, + "kl_loss": 0.02831398695707321, + "loss_ib": 0.0005306176608428359, + "step": 4478 + }, + { + "ce_ib": 2.797407865524292, + "ce_orig": 0.4456813335418701, + "epoch": 1.2875835789776404, + "kl_loss": 0.09741461277008057, + "loss_ib": 0.0012538869632408023, + "step": 4478 + }, + { + "ce_ib": 2.3825185298919678, + "ce_orig": 0.7076461911201477, + "epoch": 1.2875835789776404, + "kl_loss": 0.05789049714803696, + "loss_ib": 0.0008171568042598665, + "step": 4478 + }, + { + "ce_ib": 4.764573097229004, + "ce_orig": 0.8666495084762573, + "epoch": 1.2875835789776404, + "kl_loss": 0.04363986849784851, + "loss_ib": 0.0009128560195676982, + "step": 4478 + }, + { + "ce_ib": 3.430774450302124, + "ce_orig": 1.0297455787658691, + "epoch": 1.287871162556618, + "kl_loss": 0.04531841725111008, + "loss_ib": 0.0007962615345604718, + "step": 4479 + }, + { + "ce_ib": 3.543407917022705, + "ce_orig": 1.0003173351287842, + "epoch": 1.287871162556618, + "kl_loss": 0.07676513493061066, + "loss_ib": 0.0011219921289011836, + "step": 4479 + }, + { + "ce_ib": 4.212738513946533, + "ce_orig": 1.186429738998413, + "epoch": 1.287871162556618, + "kl_loss": 0.053580090403556824, + "loss_ib": 0.0009570747497491539, + "step": 4479 + }, + { + "ce_ib": 4.073374271392822, + "ce_orig": 0.7411481142044067, + "epoch": 1.287871162556618, + "kl_loss": 0.037414662539958954, + "loss_ib": 0.0007814840646460652, + "step": 4479 + }, + { + "epoch": 1.2881587461355957, + "grad_norm": 0.10457353293895721, + "learning_rate": 3.1894249478357965e-05, + "loss": 0.9048, + "step": 4480 + }, + { + "ce_ib": 2.4190921783447266, + "ce_orig": 0.675700306892395, + "epoch": 1.2881587461355957, + "kl_loss": 0.04337586462497711, + "loss_ib": 0.0006756678340025246, + "step": 4480 + }, + { + "ce_ib": 4.225837707519531, + "ce_orig": 1.0837070941925049, + "epoch": 1.2881587461355957, + "kl_loss": 0.04833969846367836, + "loss_ib": 0.0009059807634912431, + "step": 4480 + }, + { + "ce_ib": 2.126775026321411, + "ce_orig": 0.5802807211875916, + "epoch": 1.2881587461355957, + "kl_loss": 0.03706823289394379, + "loss_ib": 0.0005833597970195115, + "step": 4480 + }, + { + "ce_ib": 2.581416606903076, + "ce_orig": 0.8645727038383484, + "epoch": 1.2881587461355957, + "kl_loss": 0.0624404214322567, + "loss_ib": 0.0008825458353385329, + "step": 4480 + }, + { + "ce_ib": 5.746756553649902, + "ce_orig": 1.894531488418579, + "epoch": 1.2884463297145734, + "kl_loss": 0.14185592532157898, + "loss_ib": 0.0019932349678128958, + "step": 4481 + }, + { + "ce_ib": 3.1364047527313232, + "ce_orig": 1.130078911781311, + "epoch": 1.2884463297145734, + "kl_loss": 0.045884694904088974, + "loss_ib": 0.0007724873721599579, + "step": 4481 + }, + { + "ce_ib": 2.370957136154175, + "ce_orig": 0.7606921792030334, + "epoch": 1.2884463297145734, + "kl_loss": 0.026489542797207832, + "loss_ib": 0.0005019911332055926, + "step": 4481 + }, + { + "ce_ib": 6.050169467926025, + "ce_orig": 1.3988375663757324, + "epoch": 1.2884463297145734, + "kl_loss": 0.045799627900123596, + "loss_ib": 0.0010630132164806128, + "step": 4481 + }, + { + "ce_ib": 1.9772294759750366, + "ce_orig": 0.5300948023796082, + "epoch": 1.288733913293551, + "kl_loss": 0.0310884527862072, + "loss_ib": 0.0005086074816063046, + "step": 4482 + }, + { + "ce_ib": 2.7817654609680176, + "ce_orig": 0.5121303200721741, + "epoch": 1.288733913293551, + "kl_loss": 0.039173588156700134, + "loss_ib": 0.0006699123769067228, + "step": 4482 + }, + { + "ce_ib": 1.3164528608322144, + "ce_orig": 0.22281618416309357, + "epoch": 1.288733913293551, + "kl_loss": 0.08174499869346619, + "loss_ib": 0.0009490952361375093, + "step": 4482 + }, + { + "ce_ib": 5.02006196975708, + "ce_orig": 1.5773972272872925, + "epoch": 1.288733913293551, + "kl_loss": 0.07582014799118042, + "loss_ib": 0.0012602076167240739, + "step": 4482 + }, + { + "ce_ib": 2.224698781967163, + "ce_orig": 0.7774945497512817, + "epoch": 1.2890214968725286, + "kl_loss": 0.022100776433944702, + "loss_ib": 0.0004434776201378554, + "step": 4483 + }, + { + "ce_ib": 4.675645351409912, + "ce_orig": 1.2760671377182007, + "epoch": 1.2890214968725286, + "kl_loss": 0.06179624795913696, + "loss_ib": 0.0010855270083993673, + "step": 4483 + }, + { + "ce_ib": 3.9778196811676025, + "ce_orig": 1.2226697206497192, + "epoch": 1.2890214968725286, + "kl_loss": 0.02943136915564537, + "loss_ib": 0.0006920956075191498, + "step": 4483 + }, + { + "ce_ib": 4.475436210632324, + "ce_orig": 1.143258810043335, + "epoch": 1.2890214968725286, + "kl_loss": 0.03428565710783005, + "loss_ib": 0.0007904001977294683, + "step": 4483 + }, + { + "ce_ib": 2.8296210765838623, + "ce_orig": 0.7117740511894226, + "epoch": 1.2893090804515062, + "kl_loss": 0.030381595715880394, + "loss_ib": 0.0005867780419066548, + "step": 4484 + }, + { + "ce_ib": 2.1595711708068848, + "ce_orig": 0.5276721119880676, + "epoch": 1.2893090804515062, + "kl_loss": 0.04821952432394028, + "loss_ib": 0.0006981522892601788, + "step": 4484 + }, + { + "ce_ib": 2.5285139083862305, + "ce_orig": 0.38180381059646606, + "epoch": 1.2893090804515062, + "kl_loss": 0.02007252350449562, + "loss_ib": 0.0004535765910986811, + "step": 4484 + }, + { + "ce_ib": 3.062255620956421, + "ce_orig": 0.33830973505973816, + "epoch": 1.2893090804515062, + "kl_loss": 0.03831818699836731, + "loss_ib": 0.0006894074031151831, + "step": 4484 + }, + { + "epoch": 1.289596664030484, + "grad_norm": 0.11203812807798386, + "learning_rate": 3.185694161851272e-05, + "loss": 0.8361, + "step": 4485 + }, + { + "ce_ib": 1.9920316934585571, + "ce_orig": 0.45547547936439514, + "epoch": 1.289596664030484, + "kl_loss": 0.036927297711372375, + "loss_ib": 0.0005684760981239378, + "step": 4485 + }, + { + "ce_ib": 3.1049935817718506, + "ce_orig": 0.9395639896392822, + "epoch": 1.289596664030484, + "kl_loss": 0.03291202709078789, + "loss_ib": 0.0006396195967681706, + "step": 4485 + }, + { + "ce_ib": 4.419310092926025, + "ce_orig": 1.0366010665893555, + "epoch": 1.289596664030484, + "kl_loss": 0.03590425103902817, + "loss_ib": 0.000800973444711417, + "step": 4485 + }, + { + "ce_ib": 2.5654263496398926, + "ce_orig": 0.7582240104675293, + "epoch": 1.289596664030484, + "kl_loss": 0.035052116960287094, + "loss_ib": 0.0006070638191886246, + "step": 4485 + }, + { + "ce_ib": 3.9971559047698975, + "ce_orig": 0.9380460381507874, + "epoch": 1.2898842476094616, + "kl_loss": 0.04464618116617203, + "loss_ib": 0.000846177339553833, + "step": 4486 + }, + { + "ce_ib": 3.1983540058135986, + "ce_orig": 1.0282353162765503, + "epoch": 1.2898842476094616, + "kl_loss": 0.0268529262393713, + "loss_ib": 0.0005883646663278341, + "step": 4486 + }, + { + "ce_ib": 2.4491183757781982, + "ce_orig": 0.28267356753349304, + "epoch": 1.2898842476094616, + "kl_loss": 0.03221581131219864, + "loss_ib": 0.0005670699174515903, + "step": 4486 + }, + { + "ce_ib": 3.9707534313201904, + "ce_orig": 1.068291187286377, + "epoch": 1.2898842476094616, + "kl_loss": 0.0388597771525383, + "loss_ib": 0.0007856730953790247, + "step": 4486 + }, + { + "ce_ib": 3.3415403366088867, + "ce_orig": 0.6335703134536743, + "epoch": 1.2901718311884391, + "kl_loss": 0.022137269377708435, + "loss_ib": 0.0005555266980081797, + "step": 4487 + }, + { + "ce_ib": 2.2111306190490723, + "ce_orig": 0.6335045099258423, + "epoch": 1.2901718311884391, + "kl_loss": 0.031330641359090805, + "loss_ib": 0.000534419494215399, + "step": 4487 + }, + { + "ce_ib": 2.401289463043213, + "ce_orig": 0.5729602575302124, + "epoch": 1.2901718311884391, + "kl_loss": 0.03296877443790436, + "loss_ib": 0.0005698166787624359, + "step": 4487 + }, + { + "ce_ib": 2.839254140853882, + "ce_orig": 0.4862714409828186, + "epoch": 1.2901718311884391, + "kl_loss": 0.06415453553199768, + "loss_ib": 0.000925470725633204, + "step": 4487 + }, + { + "ce_ib": 5.676782131195068, + "ce_orig": 1.1319011449813843, + "epoch": 1.2904594147674167, + "kl_loss": 0.053209155797958374, + "loss_ib": 0.0010997697245329618, + "step": 4488 + }, + { + "ce_ib": 4.560790061950684, + "ce_orig": 1.1500035524368286, + "epoch": 1.2904594147674167, + "kl_loss": 0.026761973276734352, + "loss_ib": 0.000723698700312525, + "step": 4488 + }, + { + "ce_ib": 2.4346742630004883, + "ce_orig": 0.5999580025672913, + "epoch": 1.2904594147674167, + "kl_loss": 0.05036918818950653, + "loss_ib": 0.0007471592398360372, + "step": 4488 + }, + { + "ce_ib": 3.921022415161133, + "ce_orig": 1.2465065717697144, + "epoch": 1.2904594147674167, + "kl_loss": 0.03141438588500023, + "loss_ib": 0.0007062460063025355, + "step": 4488 + }, + { + "ce_ib": 4.673959732055664, + "ce_orig": 1.2129408121109009, + "epoch": 1.2907469983463944, + "kl_loss": 0.06752467155456543, + "loss_ib": 0.0011426425771787763, + "step": 4489 + }, + { + "ce_ib": 2.4175548553466797, + "ce_orig": 0.643172025680542, + "epoch": 1.2907469983463944, + "kl_loss": 0.043519556522369385, + "loss_ib": 0.000676951021887362, + "step": 4489 + }, + { + "ce_ib": 4.502527236938477, + "ce_orig": 0.9569980502128601, + "epoch": 1.2907469983463944, + "kl_loss": 0.05462898313999176, + "loss_ib": 0.0009965425124391913, + "step": 4489 + }, + { + "ce_ib": 3.6634767055511475, + "ce_orig": 0.6308883428573608, + "epoch": 1.2907469983463944, + "kl_loss": 0.042295362800359726, + "loss_ib": 0.0007893012370914221, + "step": 4489 + }, + { + "epoch": 1.2910345819253721, + "grad_norm": 0.09802980720996857, + "learning_rate": 3.1819617238703584e-05, + "loss": 0.8924, + "step": 4490 + }, + { + "ce_ib": 2.9894561767578125, + "ce_orig": 0.641133725643158, + "epoch": 1.2910345819253721, + "kl_loss": 0.019231850281357765, + "loss_ib": 0.0004912641015835106, + "step": 4490 + }, + { + "ce_ib": 2.7161080837249756, + "ce_orig": 0.8568962812423706, + "epoch": 1.2910345819253721, + "kl_loss": 0.03227044269442558, + "loss_ib": 0.0005943152355030179, + "step": 4490 + }, + { + "ce_ib": 5.023597717285156, + "ce_orig": 1.1758760213851929, + "epoch": 1.2910345819253721, + "kl_loss": 0.0482168048620224, + "loss_ib": 0.0009845277527347207, + "step": 4490 + }, + { + "ce_ib": 3.5346407890319824, + "ce_orig": 1.168505311012268, + "epoch": 1.2910345819253721, + "kl_loss": 0.05089876055717468, + "loss_ib": 0.0008624516194686294, + "step": 4490 + }, + { + "ce_ib": 1.9439641237258911, + "ce_orig": 0.5200515985488892, + "epoch": 1.2913221655043496, + "kl_loss": 0.03478347510099411, + "loss_ib": 0.0005422311369329691, + "step": 4491 + }, + { + "ce_ib": 4.760715961456299, + "ce_orig": 1.263006329536438, + "epoch": 1.2913221655043496, + "kl_loss": 0.048738449811935425, + "loss_ib": 0.0009634559974074364, + "step": 4491 + }, + { + "ce_ib": 2.570913791656494, + "ce_orig": 0.8564876317977905, + "epoch": 1.2913221655043496, + "kl_loss": 0.020803291350603104, + "loss_ib": 0.0004651242634281516, + "step": 4491 + }, + { + "ce_ib": 3.1371402740478516, + "ce_orig": 0.3816436529159546, + "epoch": 1.2913221655043496, + "kl_loss": 0.13173237442970276, + "loss_ib": 0.001631037681363523, + "step": 4491 + }, + { + "ce_ib": 4.077648162841797, + "ce_orig": 0.9721534848213196, + "epoch": 1.2916097490833274, + "kl_loss": 0.04050035774707794, + "loss_ib": 0.0008127682958729565, + "step": 4492 + }, + { + "ce_ib": 4.567760944366455, + "ce_orig": 1.6268888711929321, + "epoch": 1.2916097490833274, + "kl_loss": 0.04031633213162422, + "loss_ib": 0.000859939435031265, + "step": 4492 + }, + { + "ce_ib": 3.0713891983032227, + "ce_orig": 0.9795765280723572, + "epoch": 1.2916097490833274, + "kl_loss": 0.028705967590212822, + "loss_ib": 0.0005941985873505473, + "step": 4492 + }, + { + "ce_ib": 3.186990976333618, + "ce_orig": 0.7890920639038086, + "epoch": 1.2916097490833274, + "kl_loss": 0.04977232217788696, + "loss_ib": 0.0008164222817867994, + "step": 4492 + }, + { + "ce_ib": 4.139065742492676, + "ce_orig": 1.1903825998306274, + "epoch": 1.2918973326623049, + "kl_loss": 0.03949768841266632, + "loss_ib": 0.0008088834583759308, + "step": 4493 + }, + { + "ce_ib": 1.8850935697555542, + "ce_orig": 0.42663389444351196, + "epoch": 1.2918973326623049, + "kl_loss": 0.032655246555805206, + "loss_ib": 0.0005150618380866945, + "step": 4493 + }, + { + "ce_ib": 5.2221221923828125, + "ce_orig": 1.4240803718566895, + "epoch": 1.2918973326623049, + "kl_loss": 0.041618410497903824, + "loss_ib": 0.0009383962606079876, + "step": 4493 + }, + { + "ce_ib": 3.178375005722046, + "ce_orig": 0.8560095429420471, + "epoch": 1.2918973326623049, + "kl_loss": 0.028012976050376892, + "loss_ib": 0.0005979672423563898, + "step": 4493 + }, + { + "ce_ib": 4.237872123718262, + "ce_orig": 0.6036153435707092, + "epoch": 1.2921849162412826, + "kl_loss": 0.052998095750808716, + "loss_ib": 0.0009537681471556425, + "step": 4494 + }, + { + "ce_ib": 2.217503547668457, + "ce_orig": 0.4171827435493469, + "epoch": 1.2921849162412826, + "kl_loss": 0.038554638624191284, + "loss_ib": 0.0006072967080399394, + "step": 4494 + }, + { + "ce_ib": 1.8077309131622314, + "ce_orig": 0.17442339658737183, + "epoch": 1.2921849162412826, + "kl_loss": 0.10832002758979797, + "loss_ib": 0.0012639734195545316, + "step": 4494 + }, + { + "ce_ib": 3.178846597671509, + "ce_orig": 0.7738553881645203, + "epoch": 1.2921849162412826, + "kl_loss": 0.033294428139925, + "loss_ib": 0.0006508289370685816, + "step": 4494 + }, + { + "epoch": 1.2924724998202604, + "grad_norm": 0.10839655250310898, + "learning_rate": 3.178227642885366e-05, + "loss": 0.8278, + "step": 4495 + }, + { + "ce_ib": 4.614302635192871, + "ce_orig": 1.1398338079452515, + "epoch": 1.2924724998202604, + "kl_loss": 0.03544868156313896, + "loss_ib": 0.0008159170974977314, + "step": 4495 + }, + { + "ce_ib": 4.164463520050049, + "ce_orig": 1.2010037899017334, + "epoch": 1.2924724998202604, + "kl_loss": 0.043080735951662064, + "loss_ib": 0.0008472536574117839, + "step": 4495 + }, + { + "ce_ib": 2.005864381790161, + "ce_orig": 0.512371301651001, + "epoch": 1.2924724998202604, + "kl_loss": 0.03028562106192112, + "loss_ib": 0.0005034426576457918, + "step": 4495 + }, + { + "ce_ib": 3.6071577072143555, + "ce_orig": 1.0133695602416992, + "epoch": 1.2924724998202604, + "kl_loss": 0.04911772161722183, + "loss_ib": 0.00085189298260957, + "step": 4495 + }, + { + "ce_ib": 2.858706474304199, + "ce_orig": 0.5236147046089172, + "epoch": 1.2927600833992379, + "kl_loss": 0.049687907099723816, + "loss_ib": 0.0007827497320249677, + "step": 4496 + }, + { + "ce_ib": 2.714970827102661, + "ce_orig": 0.6505854725837708, + "epoch": 1.2927600833992379, + "kl_loss": 0.04329342395067215, + "loss_ib": 0.0007044312660582364, + "step": 4496 + }, + { + "ce_ib": 2.056239366531372, + "ce_orig": 0.5257994532585144, + "epoch": 1.2927600833992379, + "kl_loss": 0.06154298037290573, + "loss_ib": 0.0008210537489503622, + "step": 4496 + }, + { + "ce_ib": 2.211102247238159, + "ce_orig": 0.3463180661201477, + "epoch": 1.2927600833992379, + "kl_loss": 0.06778542697429657, + "loss_ib": 0.0008989644702523947, + "step": 4496 + }, + { + "ce_ib": 3.7409985065460205, + "ce_orig": 0.8131685256958008, + "epoch": 1.2930476669782156, + "kl_loss": 0.03656570613384247, + "loss_ib": 0.0007397568551823497, + "step": 4497 + }, + { + "ce_ib": 2.402454137802124, + "ce_orig": 0.6943317651748657, + "epoch": 1.2930476669782156, + "kl_loss": 0.031253475695848465, + "loss_ib": 0.0005527801695279777, + "step": 4497 + }, + { + "ce_ib": 2.1678519248962402, + "ce_orig": 0.6343862414360046, + "epoch": 1.2930476669782156, + "kl_loss": 0.022292591631412506, + "loss_ib": 0.0004397110897116363, + "step": 4497 + }, + { + "ce_ib": 3.3402931690216064, + "ce_orig": 0.8002315759658813, + "epoch": 1.2930476669782156, + "kl_loss": 0.02210250124335289, + "loss_ib": 0.000555054284632206, + "step": 4497 + }, + { + "ce_ib": 3.327126979827881, + "ce_orig": 0.750221312046051, + "epoch": 1.2933352505571931, + "kl_loss": 0.03658457100391388, + "loss_ib": 0.0006985583459027112, + "step": 4498 + }, + { + "ce_ib": 1.5786821842193604, + "ce_orig": 0.4134250283241272, + "epoch": 1.2933352505571931, + "kl_loss": 0.01736411824822426, + "loss_ib": 0.00033150939270853996, + "step": 4498 + }, + { + "ce_ib": 1.728885293006897, + "ce_orig": 0.35575538873672485, + "epoch": 1.2933352505571931, + "kl_loss": 0.03928196430206299, + "loss_ib": 0.0005657081492245197, + "step": 4498 + }, + { + "ce_ib": 2.7119271755218506, + "ce_orig": 0.7274483442306519, + "epoch": 1.2933352505571931, + "kl_loss": 0.06715098768472672, + "loss_ib": 0.0009427025797776878, + "step": 4498 + }, + { + "ce_ib": 3.213263988494873, + "ce_orig": 0.8937224745750427, + "epoch": 1.2936228341361709, + "kl_loss": 0.04731803387403488, + "loss_ib": 0.0007945067482069135, + "step": 4499 + }, + { + "ce_ib": 2.8701090812683105, + "ce_orig": 0.7630175352096558, + "epoch": 1.2936228341361709, + "kl_loss": 0.03623736649751663, + "loss_ib": 0.0006493845721706748, + "step": 4499 + }, + { + "ce_ib": 3.303105592727661, + "ce_orig": 0.9751455783843994, + "epoch": 1.2936228341361709, + "kl_loss": 0.025869611650705338, + "loss_ib": 0.0005890066386200488, + "step": 4499 + }, + { + "ce_ib": 2.4134647846221924, + "ce_orig": 0.7153543829917908, + "epoch": 1.2936228341361709, + "kl_loss": 0.04359178990125656, + "loss_ib": 0.0006772642955183983, + "step": 4499 + }, + { + "epoch": 1.2939104177151486, + "grad_norm": 0.13232430815696716, + "learning_rate": 3.1744919278925605e-05, + "loss": 0.8122, + "step": 4500 + } + ], + "logging_steps": 5, + "max_steps": 10434, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}